define avro_load_document_to_dataset
org.apache.pig.piggybank.storage.avro.AvroStorage(
'input_schema_class', '$schema_input_document_to_dataset');

define avro_load_document_id
org.apache.pig.piggybank.storage.avro.AvroStorage(
'input_schema_class', '$schema_input_document_id');

define avro_store_identifier
org.apache.pig.piggybank.storage.avro.AvroStorage(
'index', '0',
'output_schema_class', '$schema_output_identifier');

documentId = load '$input_document_id' using avro_load_document_id;
documentId = foreach documentId generate $0 as id;

documentToDataset = load '$input_document_to_dataset' using avro_load_document_to_dataset;
datasetIds = foreach documentToDataset generate datasetId as id;
datasetIdsDistinct = distinct datasetIds;

joined = join datasetIdsDistinct by id left, documentId by id;
joinedFiltered = filter joined by documentId::id is null;
identifiers = foreach joinedFiltered generate datasetIdsDistinct::id as id;

store identifiers into '$output_identifier' using avro_store_identifier;
