package eu.dnetlib.iis.ingest.pmc.citations;

import java.io.IOException;
import java.util.Properties;

import cascading.avro.AvroScheme;
import cascading.avro.PackedAvroScheme;
import cascading.flow.Flow;
import cascading.flow.FlowDef;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.pipe.Pipe;
import cascading.pipe.SubAssembly;
import cascading.property.AppProps;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import eu.dnetlib.iis.common.schemas.IdentifierMapping;
import eu.dnetlib.iis.ingest.pmc.citations.schemas.Citation;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.ExtractedDocumentMetadata;

/**
 * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl)
 * @author mhorst
 */
public class ResolvedCitationsImporter {
    public static void main(String[] args) throws IOException {
        String inPath = args[0];
        String dedupPath = args[1];
        String pmidToOaidPath = args[2];
        String doiToOaidPath = args[3];
        String outPath = args[4];

        Properties properties = new Properties();
        AppProps.setApplicationJarClass(properties, ResolvedCitationsImporter.class);
        CascadingUtils.addLibJars(properties);
        CascadingUtils.copyBasicProperties(properties);
        properties.setProperty("mapred.task.timeout", "1800000");
        HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);
        Tap docTap = new Hfs(new PackedAvroScheme<ExtractedDocumentMetadata>(
        		ExtractedDocumentMetadata.getClassSchema()), inPath);
        Tap dedupTap = new Hfs(new AvroScheme(IdentifierMapping.getClassSchema()), dedupPath);
        Tap pmidToOaidTap = new Hfs(new AvroScheme(IdentifierMapping.getClassSchema()), pmidToOaidPath);
        Tap doiToOaidTap = new Hfs(new AvroScheme(IdentifierMapping.getClassSchema()), doiToOaidPath);

        Pipe docPipe = new Pipe("doc");
        Pipe dedupMapPipe = new Pipe("dedup");
        Pipe pmidToOaidPipe = new Pipe("pmid_to_oaid");
        Pipe doiToOaidPipe = new Pipe("doi_to_oaid");

        Tap outTap = new Hfs(new PackedAvroScheme<Citation>(Citation.getClassSchema()), outPath);

        SubAssembly main = new ResolvedCitationsSubAssembly(docPipe, dedupMapPipe, 
        		pmidToOaidPipe, doiToOaidPipe);

        FlowDef flowDef = FlowDef.flowDef()
                .addSource(docPipe, docTap)
                .addSource(dedupMapPipe, dedupTap)
                .addSource(pmidToOaidPipe, pmidToOaidTap)
                .addSource(doiToOaidPipe, doiToOaidTap)
                .addTailSink(main.getTails()[0], outTap);

        Flow flow = flowConnector.connect(flowDef);
        flow.complete();
    }
}
