package eu.dnetlib.iis.ingest.pmc.citations;

import java.io.IOException;

import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

import eu.dnetlib.iis.common.schemas.IdentifierMapping;
import eu.dnetlib.iis.ingest.pmc.citations.schemas.PmidMapping;

/**
 * {@link PmidMapping} reducer writing only one selected outcome for each pmid based on entity type.
 * @author mhorst
 *
 */
public class PmidToOaidReducer 
extends Reducer<AvroKey<String>, AvroValue<PmidMapping>, AvroKey<IdentifierMapping>, NullWritable> {
	
	public static final String ENTITY_TYPE_RESEARCH_ARTICLE = "research-article";
	
	@Override
	public void reduce(AvroKey<String> key, 
			Iterable<AvroValue<PmidMapping>> values, Context context)
	throws IOException, InterruptedException {
		int count = 0;
		PmidMapping firstRecord = null;
	    for(AvroValue<PmidMapping> value : values) {
	    	if (count==0) {
	    		firstRecord = value.datum();
	    	}
	    	if (value.datum()!=null && 
	    			ENTITY_TYPE_RESEARCH_ARTICLE.equals(value.datum().getEntityType())) {
//	    		writing only the first applicable 'research-article'
	    		context.write(new AvroKey<IdentifierMapping>(IdentifierMapping.newBuilder()
	    				.setOriginalId(value.datum().getPmId())
	    				.setNewId(value.datum().getOaId()).build()), 
		    			NullWritable.get());
	    		return;
	    	}
	    	count++;
	    }
	    if (count==1) {
//	    	no 'research-article' found but also no duplicates found, writing record
    		context.write(new AvroKey<IdentifierMapping>(IdentifierMapping.newBuilder()
    				.setOriginalId(firstRecord.getPmId())
    				.setNewId(firstRecord.getOaId()).build()), 
	    			NullWritable.get());	
	    }
	}
}
