package eu.dnetlib.iis.importer.content;

import eu.dnetlib.data.objectstore.rmi.ObjectStoreFile;
import eu.dnetlib.data.objectstore.rmi.ObjectStoreService;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl;
import eu.dnetlib.iis.core.java.HadoopContext;
import eu.dnetlib.iis.core.java.PortBindings;
import eu.dnetlib.iis.core.java.Process;
import eu.dnetlib.iis.core.java.porttype.AvroPortType;
import eu.dnetlib.iis.core.java.porttype.PortType;
import eu.dnetlib.iis.importer.AvroWriterRunnable;
import eu.dnetlib.iis.importer.ObjectWithPath;
import eu.dnetlib.iis.importer.Poison;
import eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl;
import eu.dnetlib.iis.importer.auxiliary.schemas.UrlWithMimeType;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import javax.xml.ws.wsaddressing.W3CEndpointReference;
import javax.xml.ws.wsaddressing.W3CEndpointReferenceBuilder;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

/* loaded from: input_file:eu/dnetlib/iis/importer/content/ObjectStoreDocumentContentUrlImporterProcess.class */
public class ObjectStoreDocumentContentUrlImporterProcess implements Process {
    public static final String IMPORT_CONTENT_MIMETYPES_PDF = "import.content.mime.types.pdf.csv";
    public static final String IMPORT_CONTENT_MIMETYPES_TEXT = "import.content.mime.types.text.csv";
    public static final String IMPORT_CONTENT_MIMETYPES_XML_PMC = "import.content.mime.types.xml.pmc.csv";
    private static final String PORT_OUT_DOCUMENT_CONTENT_URL = "content_url";
    private static final String PORT_OUT_PLAINTEXT_URL = "plaintext_url";
    private static final String PORT_OUT_XML_PMC_URL = "xml_pmc_url";
    private final int defaultPagesize = 100;
    private int queueSize = 5;
    private int progresLogInterval = 10000;
    private Collection<String> mimeTypesPdf;
    private Collection<String> mimeTypesText;
    private Collection<String> mimeTypesXmlPmc;
    private static final Logger log = Logger.getLogger(ObjectStoreDocumentContentUrlImporterProcess.class);
    private static final Map<String, PortType> outputPorts = new HashMap();

    public ObjectStoreDocumentContentUrlImporterProcess() {
        outputPorts.put(PORT_OUT_DOCUMENT_CONTENT_URL, new AvroPortType(DocumentContentUrl.SCHEMA$));
        outputPorts.put(PORT_OUT_PLAINTEXT_URL, new AvroPortType(DocumentContentUrl.SCHEMA$));
        outputPorts.put(PORT_OUT_XML_PMC_URL, new AvroPortType(DocumentContentUrl.SCHEMA$));
    }

    public Map<String, PortType> getInputPorts() {
        return Collections.emptyMap();
    }

    public Map<String, PortType> getOutputPorts() {
        return outputPorts;
    }

    public void run(PortBindings portBindings, HadoopContext hadoopContext, Map<String, String> map) throws Exception {
        String[] strArr;
        if (map.containsKey(IMPORT_CONTENT_MIMETYPES_PDF)) {
            this.mimeTypesPdf = Arrays.asList(StringUtils.split(map.get(IMPORT_CONTENT_MIMETYPES_PDF), ','));
        }
        if (skipProcessing(this.mimeTypesPdf)) {
            this.mimeTypesPdf = Collections.emptyList();
        }
        if (map.containsKey(IMPORT_CONTENT_MIMETYPES_TEXT)) {
            this.mimeTypesText = Arrays.asList(StringUtils.split(map.get(IMPORT_CONTENT_MIMETYPES_TEXT), ','));
        }
        if (skipProcessing(this.mimeTypesText)) {
            this.mimeTypesText = Collections.emptyList();
        }
        if (map.containsKey(IMPORT_CONTENT_MIMETYPES_XML_PMC)) {
            this.mimeTypesXmlPmc = Arrays.asList(StringUtils.split(map.get(IMPORT_CONTENT_MIMETYPES_XML_PMC), ','));
        }
        if (skipProcessing(this.mimeTypesXmlPmc)) {
            this.mimeTypesXmlPmc = Collections.emptyList();
        }
        Long valueOf = map.containsKey("import.resultset.client.read.timeout") ? Long.valueOf(map.get("import.resultset.client.read.timeout")) : null;
        String str = map.containsKey("import.content.object.store.location") ? map.get("import.content.object.store.location") : hadoopContext.getConfiguration().get("import.content.object.store.location");
        if (str == null || str.isEmpty()) {
            throw new RuntimeException("unknown object store service location: no parameter provided: 'import.content.object.store.location'");
        }
        String str2 = map.containsKey("import.content.approved.objectstores.csv") ? map.get("import.content.approved.objectstores.csv") : hadoopContext.getConfiguration().get("import.content.approved.objectstores.csv");
        if (str2 == null || str2.isEmpty() || "$UNDEFINED$".equals(str2)) {
            String str3 = map.containsKey("import.approved.datasources.csv") ? map.get("import.approved.datasources.csv") : hadoopContext.getConfiguration().get("import.approved.datasources.csv");
            if (str3 == null || str3.isEmpty() || "$UNDEFINED$".equals(str3)) {
                log.warn("unable to locate object stores containing contents: neither 'import.content.approved.objectstores.csv' nor 'import.approved.datasources.csv' parameter provided! Empty content and text datastores will be created!");
                strArr = new String[0];
            } else {
                String str4 = map.containsKey("import.content.lookup.service.location") ? map.get("import.content.lookup.service.location") : hadoopContext.getConfiguration().get("import.content.lookup.service.location");
                if (str4 == null || str4.isEmpty()) {
                    throw new RuntimeException("unable to get objectstore id based on datasource id, unknown IS Lookup service location: no parameter provided: 'import.content.lookup.service.location'");
                }
                new W3CEndpointReferenceBuilder();
                W3CEndpointReferenceBuilder w3CEndpointReferenceBuilder = new W3CEndpointReferenceBuilder();
                w3CEndpointReferenceBuilder.address(str4);
                w3CEndpointReferenceBuilder.build();
                ISLookUpService iSLookUpService = (ISLookUpService) new JaxwsServiceResolverImpl().getService(ISLookUpService.class, w3CEndpointReferenceBuilder.build());
                String[] split = StringUtils.split(str3, ',');
                strArr = new String[split.length];
                for (int i = 0; i < split.length; i++) {
                    strArr[i] = ObjectStoreContentProviderUtils.objectStoreIdLookup(iSLookUpService, split[i]);
                }
            }
        } else {
            strArr = StringUtils.split(str2, ',');
        }
        W3CEndpointReferenceBuilder w3CEndpointReferenceBuilder2 = new W3CEndpointReferenceBuilder();
        w3CEndpointReferenceBuilder2.address(str);
        w3CEndpointReferenceBuilder2.build();
        ObjectStoreService objectStoreService = (ObjectStoreService) new JaxwsServiceResolverImpl().getService(ObjectStoreService.class, w3CEndpointReferenceBuilder2.build());
        Path path = (Path) portBindings.getOutput().get(PORT_OUT_DOCUMENT_CONTENT_URL);
        Path path2 = (Path) portBindings.getOutput().get(PORT_OUT_PLAINTEXT_URL);
        Path path3 = (Path) portBindings.getOutput().get(PORT_OUT_XML_PMC_URL);
        FileSystem fileSystem = FileSystem.get(hadoopContext.getConfiguration());
        ArrayBlockingQueue arrayBlockingQueue = new ArrayBlockingQueue(this.queueSize);
        AvroWriterRunnable avroWriterRunnable = new AvroWriterRunnable(fileSystem, arrayBlockingQueue, 1);
        ExecutorService newSingleThreadExecutor = Executors.newSingleThreadExecutor();
        Future<?> submit = newSingleThreadExecutor.submit(avroWriterRunnable);
        long currentTimeMillis = System.currentTimeMillis();
        long j = currentTimeMillis;
        int i2 = 0;
        log.warn("starting url retrieval...");
        for (String str5 : strArr) {
            log.warn("starting importing process from object store: " + str5);
            W3CEndpointReference deliverObjects = objectStoreService.deliverObjects(str5, Double.valueOf(0.0d), Double.valueOf(System.currentTimeMillis()));
            log.warn("obtained ObjectStore ResultSet EPR: " + deliverObjects.toString());
            ResultSetClientFactory resultSetClientFactory = new ResultSetClientFactory();
            if (valueOf != null) {
                resultSetClientFactory.setTimeout(valueOf.longValue());
            }
            resultSetClientFactory.setServiceResolver(new JaxwsServiceResolverImpl());
            resultSetClientFactory.setPageSize(map.containsKey("import.content.objectstore.resultset.pagesize") ? Integer.valueOf(map.get("import.content.objectstore.resultset.pagesize")).intValue() : 100);
            Iterator it = resultSetClientFactory.getClient(deliverObjects).iterator();
            while (it.hasNext()) {
                ObjectStoreFile createObject = ObjectStoreFile.createObject((String) it.next());
                if (createObject != null) {
                    String extractResultIdFromObjectId = ObjectStoreContentProviderUtils.extractResultIdFromObjectId(createObject.getObjectID());
                    DocumentContentUrl.Builder newBuilder = DocumentContentUrl.newBuilder();
                    newBuilder.setId(extractResultIdFromObjectId);
                    UrlWithMimeType.Builder newBuilder2 = UrlWithMimeType.newBuilder();
                    newBuilder2.setUrl(createObject.getURI());
                    newBuilder2.setMimeType(createObject.getMimeType());
                    newBuilder.setUrl(Collections.singletonList(newBuilder2.build()));
                    Path path4 = null;
                    if (this.mimeTypesPdf.contains(newBuilder2.getMimeType())) {
                        path4 = path;
                    } else if (this.mimeTypesText.contains(newBuilder2.getMimeType())) {
                        path4 = path2;
                    } else if (this.mimeTypesXmlPmc.contains(newBuilder2.getMimeType())) {
                        path4 = path3;
                    }
                    if (path4 != null) {
                        ObjectWithPath objectWithPath = new ObjectWithPath(newBuilder.build(), path4);
                        boolean z = false;
                        while (!z) {
                            z = arrayBlockingQueue.offer(objectWithPath, 10L, TimeUnit.SECONDS);
                            if (avroWriterRunnable.isWasInterrupted()) {
                                throw new Exception("worker thread was interrupted, exitting");
                            }
                        }
                    } else {
                        log.warn("got unhandled mime type: " + ((Object) newBuilder2.getMimeType()) + " for object: " + extractResultIdFromObjectId);
                    }
                }
                if (i2 > 0 && i2 % this.progresLogInterval == 0) {
                    log.warn("content retrieval progress: " + i2 + ", time taken to process " + this.progresLogInterval + " elements: " + ((System.currentTimeMillis() - j) / 1000) + " secs");
                    j = System.currentTimeMillis();
                }
                i2++;
            }
            log.warn("URL importing process from object store: " + str5 + " has finished");
        }
        arrayBlockingQueue.add(new Poison());
        log.warn("waiting for writer thread finishing writing content");
        submit.get();
        newSingleThreadExecutor.shutdown();
        log.warn("content retrieval for " + i2 + " documents finished in " + ((System.currentTimeMillis() - currentTimeMillis) / 1000) + " secs");
    }

    private boolean skipProcessing(Collection<String> collection) {
        return collection == null || collection.isEmpty() || (collection.size() == 1 && "$UNDEFINED$".equals(collection.iterator().next()));
    }

    public void setQueueSize(int i) {
        this.queueSize = i;
    }

    public void setProgresLogInterval(int i) {
        this.progresLogInterval = i;
    }
}
