package eu.dnetlib.iis.mainworkflows.importer.acm;

import eu.dnetlib.iis.core.java.HadoopContext;
import eu.dnetlib.iis.core.java.PortBindings;
import eu.dnetlib.iis.core.java.Process;
import eu.dnetlib.iis.core.java.io.DataStore;
import eu.dnetlib.iis.core.java.io.FileSystemPath;
import eu.dnetlib.iis.core.java.porttype.AvroPortType;
import eu.dnetlib.iis.core.java.porttype.PortType;
import eu.dnetlib.iis.importer.dataset.DataFileRecordReceiver;
import eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata;
import java.io.InputStream;
import java.security.InvalidParameterException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/* loaded from: input_file:eu/dnetlib/iis/mainworkflows/importer/acm/AcmXmlImporter.class */
public class AcmXmlImporter implements Process {
    private static final String PORT_OUT_DOCUMENT_METADATA = "document_metadata";
    public static final String PARAM_ACM_XML_DUMP_PATH = "import.acm.xmldump.path";
    private static final Map<String, PortType> outputPorts = new HashMap();

    public AcmXmlImporter() {
        outputPorts.put(PORT_OUT_DOCUMENT_METADATA, new AvroPortType(ExtractedDocumentMetadata.SCHEMA$));
    }

    public Map<String, PortType> getInputPorts() {
        return Collections.emptyMap();
    }

    public Map<String, PortType> getOutputPorts() {
        return outputPorts;
    }

    public void run(PortBindings portBindings, HadoopContext hadoopContext, Map<String, String> map) throws Exception {
        FileSystem fileSystem = FileSystem.get(hadoopContext.getConfiguration());
        if (!map.containsKey(PARAM_ACM_XML_DUMP_PATH)) {
            throw new InvalidParameterException("required parameter 'import.acm.xmldump.path' is missing!");
        }
        DataFileWriter<ExtractedDocumentMetadata> dataFileWriter = null;
        try {
            dataFileWriter = DataStore.create(new FileSystemPath(fileSystem, (Path) portBindings.getOutput().get(PORT_OUT_DOCUMENT_METADATA)), ExtractedDocumentMetadata.SCHEMA$);
            processNode(fileSystem, new Path(map.get(PARAM_ACM_XML_DUMP_PATH)), dataFileWriter);
            if (dataFileWriter != null) {
                dataFileWriter.close();
            }
        } catch (Throwable th) {
            if (dataFileWriter != null) {
                dataFileWriter.close();
            }
            throw th;
        }
    }

    protected void processNode(FileSystem fileSystem, Path path, DataFileWriter<ExtractedDocumentMetadata> dataFileWriter) throws Exception {
        if (fileSystem.isDirectory(path)) {
            for (FileStatus fileStatus : fileSystem.listStatus(path)) {
                processNode(fileSystem, fileStatus.getPath(), dataFileWriter);
            }
            return;
        }
        InputStream inputStream = null;
        try {
            SAXParser newSAXParser = SAXParserFactory.newInstance().newSAXParser();
            InputStream open = fileSystem.open(path);
            inputStream = open;
            newSAXParser.parse(open, new AcmDumpXmlHandler(new DataFileRecordReceiver(dataFileWriter)));
            if (inputStream != null) {
                inputStream.close();
            }
        } catch (Throwable th) {
            if (inputStream != null) {
                inputStream.close();
            }
            throw th;
        }
    }
}
