define avro_load_document_with_inferenced_data
org.apache.pig.piggybank.storage.avro.AvroStorage(
'input_schema_class', '$schema_input_document_with_inferenced_data');

define avro_load_document_metadata
org.apache.pig.piggybank.storage.avro.AvroStorage(
'input_schema_class', '$schema_input_document_metadata');


define avro_store_identifier
org.apache.pig.piggybank.storage.avro.AvroStorage(
'index', '0',
'output_schema_class', '$schema_output_identifier');


documentWithInferencedData = load '$input_document_with_inferenced_data' using avro_load_document_with_inferenced_data;
documentMetadata = load '$input_document_metadata' using avro_load_document_metadata;

documentWithInferencedDataId = foreach documentWithInferencedData generate id;
documentMetadataId = foreach documentMetadata generate id;

documentWithInferencedDataIdDistinct = distinct documentWithInferencedDataId;
documentMetadataIdDistinct = distinct documentMetadataId;

joined = join documentWithInferencedDataIdDistinct by id left, documentMetadataIdDistinct by id;
joinedFiltered = filter joined by documentMetadataIdDistinct::id is null;
identifiers = foreach joinedFiltered generate documentWithInferencedDataIdDistinct::id as id;

store identifiers into '$output_identifier' using avro_store_identifier;
