define avro_load_input_person
org.apache.pig.piggybank.storage.avro.AvroStorage(
'input_schema_class', '$schema_input_person');

define avro_load_input_author_to_author_statistics
org.apache.pig.piggybank.storage.avro.AvroStorage(
'input_schema_class', '$schema_input_author_to_author_statistics');

define avro_load_input_persons_with_website_usage_similarities
org.apache.pig.piggybank.storage.avro.AvroStorage(
'input_schema_class', '$schema_input_persons_with_website_usage_similarities');


define avro_store_person_with_inferenced_data
org.apache.pig.piggybank.storage.avro.AvroStorage(
'index', '0',
'output_schema_class', '$schema_output_person_with_inferenced_data');


define FIRST_NOT_NULL_STR eu.dnetlib.iis.transformers.udfs.StringFirstNotEmpty;
define NULL_EMPTY eu.dnetlib.iis.transformers.udfs.EmptyBagToNull;
define NULL_EMPTY_TUPLE_FIELDS eu.dnetlib.iis.transformers.udfs.NullTupleFieldsToNull;

person = load '$input_person' using avro_load_input_person;
author_to_author_statistics = load '$input_author_to_author_statistics' using avro_load_input_author_to_author_statistics;
persons_with_website_usage_similarities = load '$input_persons_with_website_usage_similarities' using avro_load_input_persons_with_website_usage_similarities;


personWithWebsiteUsageSimilaritiesGroupped = group persons_with_website_usage_similarities by personId;
outputSimilarities = foreach personWithWebsiteUsageSimilaritiesGroupped {
    websiteUsageSimilarities = foreach persons_with_website_usage_similarities generate otherPersonId as personId, similarities as similarities;
    generate group as id, websiteUsageSimilarities;
}

joined = join author_to_author_statistics by authorId full, outputSimilarities by id;
joinedCleaned = foreach joined generate
    FIRST_NOT_NULL_STR(author_to_author_statistics::authorId, outputSimilarities::id) as id,
    author_to_author_statistics::statistics as statistics,
    outputSimilarities::websiteUsageSimilarities as websiteUsageSimilarities;

joinedFull = join person by id full, joinedCleaned by id;
joinedFullCleaned = foreach joinedFull generate
    FIRST_NOT_NULL_STR(person::id, joinedCleaned::id) as id,
    person::firstname as firstname,
    person::secondnames as secondnames,
    person::fullname as fullname,
    joinedCleaned::statistics as statistics,
    joinedCleaned::websiteUsageSimilarities as websiteUsageSimilarities;

store joinedFullCleaned into '$output_person_with_inferenced_data' using avro_store_person_with_inferenced_data;

