package org.dlese.dpc.oai.harvester;

import com.mongodb.util.JSONCallback;
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.SimpleTimeZone;
import java.util.zip.GZIPInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.hadoop.hdfs.HftpFileSystem;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.archive.net.UURIFactory;
import org.dlese.dpc.oai.OAIArgs;
import org.dlese.dpc.oai.OAIUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXParseException;

/* loaded from: input_file:WEB-INF/lib/jOAI-2.0.9.3.jar:org/dlese/dpc/oai/harvester/Harvester.class */
public class Harvester implements ErrorHandler {
    private static final int GRAN_DAY = 1;
    private static final int GRAN_SECOND = 2;
    private static long nextIdIter = 0;
    private int bugs;
    private String xmlerrors;
    private String xmlwarnings;
    private int recordCount;
    private int resumpCount;
    private long startTime;
    private long endTime;
    private boolean isRunning;
    private boolean hasDoneHarvest;
    private boolean killed;
    private String outputDir;
    private HarvestMessageHandler msgHandler;
    private int messagingNum;
    private long harvestId;

    private static void badparms(String str) {
        prtln(new StringBuffer().append("Error: ").append(str).toString());
        prtln("Parms:");
        prtln("    outdir");
        prtln("    baseURL");
        prtln("    metadataPrefix");
        prtln("    [ set ]");
        prtln("    [ from ]");
        prtln("    [ until ]");
        System.exit(1);
    }

    public static void main(String[] strArr) {
        if (strArr.length < 3 || strArr.length > 6) {
            badparms("wrong num parms");
        }
        int i = 0 + 1;
        String str = strArr[0];
        if (str.length() == 0 || str.equalsIgnoreCase("null")) {
            str = null;
        }
        int i2 = i + 1;
        String str2 = strArr[i];
        int i3 = i2 + 1;
        String str3 = strArr[i2];
        if (str3.length() == 0) {
            str3 = null;
        }
        String str4 = null;
        if (strArr.length >= 4) {
            i3++;
            str4 = strArr[i3];
            if (str4.length() == 0) {
                str4 = null;
            }
        }
        Date date = null;
        Date date2 = null;
        try {
            if (strArr.length >= 5) {
                int i4 = i3;
                i3++;
                date = parseDate(strArr[i4]);
            }
            if (strArr.length >= 6) {
                int i5 = i3;
                int i6 = i3 + 1;
                date2 = parseDate(strArr[i5]);
            }
        } catch (Hexception e) {
            System.exit(1);
        }
        String[][] strArr2 = (String[][]) null;
        try {
            strArr2 = harvest(str2, str3, str4, date, date2, str, new SimpleHarvestMessageHandler(), false);
        } catch (Hexception e2) {
            System.exit(1);
        } catch (OAIErrorException e3) {
            System.exit(1);
        }
        if (strArr2 != null) {
            for (int i7 = 0; i7 < strArr2.length; i7++) {
                prtln(new StringBuffer().append("resmat ").append(i7).append(": \"").append(strArr2[i7][0]).append(UURIFactory.QUOT).toString());
                prtln(strArr2[i7][1]);
                prtln("\n");
            }
        }
    }

    public static String[][] harvest(String str, String str2, String str3, Date date, Date date2, String str4, HarvestMessageHandler harvestMessageHandler, boolean z) throws Hexception, OAIErrorException {
        return (harvestMessageHandler == null ? new Harvester() : new Harvester(harvestMessageHandler)).doHarvest(str, str2, str3, date, date2, str4, z);
    }

    public static String[][] harvest(String str, String str2, String str3, Date date, Date date2, String str4, boolean z) throws Hexception, OAIErrorException {
        return harvest(str, str2, str3, date, date2, str4, new SimpleHarvestMessageHandler(), z);
    }

    public Harvester() {
        this.bugs = 0;
        this.recordCount = 0;
        this.resumpCount = 0;
        this.startTime = 0L;
        this.endTime = 0L;
        this.isRunning = false;
        this.hasDoneHarvest = false;
        this.killed = false;
        this.outputDir = "";
        this.msgHandler = null;
        this.messagingNum = 100;
        this.msgHandler = null;
        long currentTimeMillis = System.currentTimeMillis();
        long j = nextIdIter;
        nextIdIter = j + 1;
        this.harvestId = currentTimeMillis + j;
    }

    public Harvester(HarvestMessageHandler harvestMessageHandler) {
        this.bugs = 0;
        this.recordCount = 0;
        this.resumpCount = 0;
        this.startTime = 0L;
        this.endTime = 0L;
        this.isRunning = false;
        this.hasDoneHarvest = false;
        this.killed = false;
        this.outputDir = "";
        this.msgHandler = null;
        this.messagingNum = 100;
        this.msgHandler = harvestMessageHandler;
        if (harvestMessageHandler != null) {
            this.messagingNum = harvestMessageHandler.getNumRecordsForStatusNotification();
        }
        long currentTimeMillis = System.currentTimeMillis();
        long j = nextIdIter;
        nextIdIter = j + 1;
        this.harvestId = currentTimeMillis + j;
    }

    public void kill() {
        prtln("Harvester kill() ");
        this.killed = true;
    }

    public void setNumRecordsForNotification(int i) {
        this.messagingNum = i;
    }

    public long getStartTime() {
        return this.startTime;
    }

    public String getHarvestedRecordsDir() {
        return this.outputDir;
    }

    public long getHarvestUid() {
        return this.harvestId;
    }

    public long getEndTime() {
        return this.endTime;
    }

    public int getNumRecordsHarvested() {
        return this.recordCount;
    }

    public int getNumResumptionTokensIssued() {
        return this.resumpCount;
    }

    public boolean isRunning() {
        return this.isRunning;
    }

    /* JADX WARN: Code restructure failed: missing block: B:53:0x022d, code lost:
    
        r18 = r18 + 1;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public java.lang.String[][] doHarvest(java.lang.String r11, java.lang.String r12, java.lang.String r13, java.util.Date r14, java.util.Date r15, java.lang.String r16, boolean r17) throws org.dlese.dpc.oai.harvester.Hexception, org.dlese.dpc.oai.harvester.OAIErrorException {
        /*
            Method dump skipped, instructions count: 834
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.dlese.dpc.oai.harvester.Harvester.doHarvest(java.lang.String, java.lang.String, java.lang.String, java.util.Date, java.util.Date, java.lang.String, boolean):java.lang.String[][]");
    }

    private String extractRecords(String str, Document document, LinkedList linkedList, String str2, String str3, boolean z) throws Hexception, OAIErrorException {
        Element element;
        String str4 = null;
        Element documentElement = document.getDocumentElement();
        Element findChild = findChild(documentElement, "error");
        if (findChild != null) {
            String attribute = findChild.getAttribute("code");
            if (getContent(findChild) == null) {
            }
            throw new OAIErrorException(attribute, getContent(findChild));
        }
        try {
            element = mustFindChild(documentElement, OAIArgs.LIST_RECORDS);
        } catch (Hexception e) {
            element = documentElement;
        }
        try {
            Element mustFindChild = mustFindChild(element, "record");
            while (true) {
                if (mustFindChild == null) {
                    break;
                }
                if (this.killed) {
                    throw new Hexception("Harvest received kill signal");
                }
                extractContent(str, mustFindChild(mustFindChild, "header"), linkedList, str2, document, str3, z);
                mustFindChild = findSibling(mustFindChild, "record", OAIArgs.RESUMPTION_TOKEN);
                if (mustFindChild != null && mustFindChild.getNodeName().equals(OAIArgs.RESUMPTION_TOKEN)) {
                    str4 = getContent(mustFindChild);
                    if (str4.length() == 0) {
                        str4 = null;
                    } else {
                        this.resumpCount++;
                    }
                }
            }
            return str4;
        } catch (Hexception e2) {
            if (findChild(element, "requestURL") != null) {
                throw new Hexception("No matching records were returned by the data provider (protocol version 1.x)");
            }
            throw new Hexception(new StringBuffer().append("The data provider returned an invalid response to the ListRecords request: ").append(e2.getMessage()).toString());
        }
    }

    private void extractContent(String str, Element element, LinkedList linkedList, String str2, Document document, String str3, boolean z) throws Hexception {
        Node node;
        boolean z2 = false;
        String attribute = element.getAttribute("status");
        Element mustFindChild = mustFindChild(element, OAIArgs.IDENTIFIER);
        String content = getContent(mustFindChild);
        if (attribute != null && attribute.equalsIgnoreCase(OAIConfigurationReader.DELETED_FIELD)) {
            z2 = true;
        }
        Thread.yield();
        Element element2 = null;
        if (!z2) {
            Node firstChild = mustFindSibling(element, "metadata").getFirstChild();
            while (true) {
                node = firstChild;
                if (node == null || node.getNodeType() == 1) {
                    break;
                } else {
                    firstChild = node.getNextSibling();
                }
            }
            if (node == null) {
                throw new Hexception("meta content not found");
            }
            element2 = (Element) node;
        }
        Element mustFindSibling = mustFindSibling(mustFindChild, OAIConfigurationReader.DATESTAMP_FIELD);
        LinkedList linkedList2 = new LinkedList();
        Element findSibling = findSibling(mustFindSibling, "setSpec");
        while (true) {
            Element element3 = findSibling;
            if (element3 == null) {
                break;
            }
            linkedList2.add(getContent(element3));
            findSibling = findSibling(element3, "setSpec");
        }
        if (linkedList2.size() == 0) {
            linkedList2.add("");
        }
        if (str2 == null) {
            if (z2) {
                linkedList.add(new String[]{encode(content), OAIConfigurationReader.DELETED_FIELD});
                return;
            }
            StringWriter stringWriter = new StringWriter();
            try {
                new XMLSerializer(stringWriter, (OutputFormat) null).serialize(element2);
                stringWriter.close();
                linkedList.add(new String[]{encode(content), stringWriter.toString()});
                this.recordCount++;
                if (this.recordCount % this.messagingNum != 0 || this.msgHandler == null) {
                    return;
                }
                this.msgHandler.statusMessage(this.recordCount, this.resumpCount);
                return;
            } catch (IOException e) {
                throw new Hexception(new StringBuffer().append("cannot serialize.  reason: ").append(e).toString());
            }
        }
        Iterator it = linkedList2.iterator();
        while (it.hasNext()) {
            String harvestedDirPath = OAIUtils.getHarvestedDirPath(str2, (String) it.next(), str, str3);
            String stringBuffer = new StringBuffer().append(harvestedDirPath).append("/").append(encode(content)).toString();
            if (z2) {
                new File(new StringBuffer().append(stringBuffer).append(".xml").toString()).delete();
                new File(new StringBuffer().append(stringBuffer).append("_hdr.xml").toString()).delete();
            } else {
                mkdirs(harvestedDirPath);
                if (this.bugs >= 1) {
                    prtln(new StringBuffer().append("fnamebase: \"").append(stringBuffer).append(UURIFactory.QUOT).toString());
                }
                if (z) {
                    writedoc(new StringBuffer().append(stringBuffer).append("_hdr.xml").toString(), element, document);
                }
                writedoc(new StringBuffer().append(stringBuffer).append(".xml").toString(), element2, document);
                this.recordCount++;
                if (this.recordCount % this.messagingNum == 0 && this.msgHandler != null) {
                    this.msgHandler.statusMessage(this.recordCount, this.resumpCount);
                }
            }
        }
    }

    private void mkdirs(String str) {
        int i = 0;
        while (true) {
            int indexOf = str.indexOf("/", i);
            File file = new File(indexOf < 0 ? str : str.substring(0, indexOf));
            if (!file.exists()) {
                if (this.bugs >= 1) {
                    prtln(new StringBuffer().append("mkdir: \"").append(file).append(UURIFactory.QUOT).toString());
                }
                file.mkdir();
            }
            if (indexOf < 0) {
                return;
            } else {
                i = indexOf + 1;
            }
        }
    }

    private Document getDoc(String str) throws Hexception {
        String str2;
        if (this.bugs >= 1) {
            prtln(new StringBuffer().append("getDoc: request: \"").append(str).append(UURIFactory.QUOT).toString());
        }
        try {
            HttpURLConnection httpURLConnection = (HttpURLConnection) new URL(str).openConnection();
            httpURLConnection.setRequestProperty("Connection", "close");
            httpURLConnection.setRequestProperty("Accept-Encoding", "gzip;q=1.0, identity;q=0.5, *;q=0");
            HttpURLConnection.setFollowRedirects(true);
            httpURLConnection.connect();
            int responseCode = httpURLConnection.getResponseCode();
            if (responseCode != 200) {
                throw new Hexception(new StringBuffer().append("document not found: respcode: ").append(responseCode).toString());
            }
            InputStream inputStream = httpURLConnection.getInputStream();
            String contentEncoding = httpURLConnection.getContentEncoding();
            if (contentEncoding != null && contentEncoding.equalsIgnoreCase("gzip")) {
                inputStream = new GZIPInputStream(inputStream);
            }
            DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
            newInstance.setCoalescing(true);
            newInstance.setExpandEntityReferences(true);
            newInstance.setIgnoringComments(true);
            newInstance.setNamespaceAware(true);
            newInstance.setValidating(false);
            newInstance.setIgnoringElementContentWhitespace(false);
            DocumentBuilder newDocumentBuilder = newInstance.newDocumentBuilder();
            this.xmlerrors = "";
            this.xmlwarnings = "";
            newDocumentBuilder.setErrorHandler(this);
            Document parse = newDocumentBuilder.parse(inputStream);
            inputStream.close();
            if (this.xmlerrors.length() <= 0 && this.xmlwarnings.length() <= 0) {
                return parse;
            }
            str2 = "XML validation failed.\n";
            str2 = this.xmlerrors.length() > 0 ? new StringBuffer().append(str2).append("Errors:\n").append(this.xmlerrors).toString() : "XML validation failed.\n";
            if (this.xmlwarnings.length() > 0) {
                str2 = new StringBuffer().append(str2).append("Warnings:\n").append(this.xmlwarnings).toString();
            }
            throw new Hexception(str2);
        } catch (Exception e) {
            throw new Hexception(e.getMessage().matches(".*respcode.*") ? new StringBuffer().append("The request for data resulted in an invalid response from the provider. The baseURL indicated may be incorrect or the service may be unavailable. HTTP response: ").append(e.getMessage()).toString() : new StringBuffer().append("The request for data resulted in an invalid response from the provider. Error: ").append(e.getMessage()).toString());
        }
    }

    private void writedoc(String str, Element element, Document document) throws Hexception {
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(str));
            new XMLSerializer(bufferedWriter, new OutputFormat(document, ContentStreamBase.DEFAULT_CHARSET, false)).serialize(element);
            bufferedWriter.close();
        } catch (IOException e) {
            throw new Hexception(new StringBuffer().append("cannot write file \"").append(str).append("\"  reason: ").append(e).toString());
        }
    }

    private Element mustFindChild(Element element, String str) throws Hexception {
        Element findChild = findChild(element, str);
        if (findChild == null) {
            throw new Hexception(new StringBuffer().append("Element not found: \"").append(str).append(UURIFactory.QUOT).toString());
        }
        return findChild;
    }

    private Element findChild(Element element, String str) {
        Element element2 = null;
        Node firstChild = element.getFirstChild();
        while (true) {
            Node node = firstChild;
            if (node != null) {
                if (node.getNodeType() == 1 && node.getNodeName().equals(str)) {
                    element2 = (Element) node;
                    break;
                }
                firstChild = node.getNextSibling();
            } else {
                break;
            }
        }
        return element2;
    }

    private Element mustFindSibling(Element element, String str) throws Hexception {
        Element findSibling = findSibling(element, str);
        if (findSibling == null) {
            throw new Hexception(new StringBuffer().append("Element not found: \"").append(str).append(UURIFactory.QUOT).toString());
        }
        return findSibling;
    }

    private Element findSibling(Element element, String str) {
        Element element2 = null;
        Node nextSibling = element.getNextSibling();
        while (true) {
            Node node = nextSibling;
            if (node != null) {
                if (node.getNodeType() == 1 && node.getNodeName().equals(str)) {
                    element2 = (Element) node;
                    break;
                }
                nextSibling = node.getNextSibling();
            } else {
                break;
            }
        }
        return element2;
    }

    /* JADX WARN: Code restructure failed: missing block: B:11:0x0037, code lost:
    
        r7 = (org.w3c.dom.Element) r8;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private org.w3c.dom.Element findSibling(org.w3c.dom.Element r4, java.lang.String r5, java.lang.String r6) {
        /*
            r3 = this;
            r0 = 0
            r7 = r0
            r0 = r4
            org.w3c.dom.Node r0 = r0.getNextSibling()
            r8 = r0
        Lb:
            r0 = r8
            if (r0 == 0) goto L4d
            r0 = r8
            short r0 = r0.getNodeType()
            r1 = 1
            if (r0 != r1) goto L41
            r0 = r8
            java.lang.String r0 = r0.getNodeName()
            r1 = r5
            boolean r0 = r0.equals(r1)
            if (r0 != 0) goto L37
            r0 = r8
            java.lang.String r0 = r0.getNodeName()
            r1 = r6
            boolean r0 = r0.equals(r1)
            if (r0 == 0) goto L41
        L37:
            r0 = r8
            org.w3c.dom.Element r0 = (org.w3c.dom.Element) r0
            r7 = r0
            goto L4d
        L41:
            r0 = r8
            org.w3c.dom.Node r0 = r0.getNextSibling()
            r8 = r0
            goto Lb
        L4d:
            r0 = r7
            return r0
        */
        throw new UnsupportedOperationException("Method not decompiled: org.dlese.dpc.oai.harvester.Harvester.findSibling(org.w3c.dom.Element, java.lang.String, java.lang.String):org.w3c.dom.Element");
    }

    private String getContent(Node node) {
        StringBuffer stringBuffer = new StringBuffer();
        getContentSub(node, stringBuffer);
        return stringBuffer.toString();
    }

    private void getContentSub(Node node, StringBuffer stringBuffer) {
        switch (node.getNodeType()) {
            case 1:
                Node firstChild = node.getFirstChild();
                while (true) {
                    Node node2 = firstChild;
                    if (node2 == null) {
                        return;
                    }
                    getContentSub(node2, stringBuffer);
                    firstChild = node2.getNextSibling();
                }
            case 2:
            default:
                return;
            case 3:
            case 4:
                stringBuffer.append(node.getNodeValue().trim());
                return;
        }
    }

    private int getGranularity(String str) throws Hexception, OAIErrorException {
        int i;
        Element documentElement = getDoc(new StringBuffer().append(str).append("?verb=Identify").toString()).getDocumentElement();
        Element findChild = findChild(documentElement, "error");
        if (findChild != null) {
            String attribute = findChild.getAttribute("code");
            if (getContent(findChild) == null) {
            }
            throw new OAIErrorException(attribute, getContent(findChild));
        }
        try {
            String content = getContent(mustFindChild(mustFindChild(documentElement, OAIArgs.IDENTIFY), "granularity"));
            if (content.equals("YYYY-MM-DD")) {
                i = 1;
                if (this.bugs >= 1) {
                    prtln("granularity: day");
                }
            } else {
                if (!content.toLowerCase().equals("yyyy-mm-ddthh:mm:ssz")) {
                    throw new Hexception(new StringBuffer().append("provider supports an invalid granularity: ").append(content).toString());
                }
                i = 2;
                if (this.bugs >= 1) {
                    prtln("granularity: second");
                }
            }
            return i;
        } catch (Hexception e) {
            try {
                mustFindChild(documentElement, "protocolVersion");
                return 1;
            } catch (Throwable th) {
                throw new Hexception(new StringBuffer().append("The data provider returned an invalid response to the Identify request: ").append(th.getMessage()).toString());
            }
        }
    }

    private String[] getPrefices(String str) throws Hexception, OAIErrorException {
        Element documentElement = getDoc(new StringBuffer().append(str).append("?verb=ListMetadataFormats").toString()).getDocumentElement();
        Element findChild = findChild(documentElement, "error");
        if (findChild != null) {
            String attribute = findChild.getAttribute("code");
            if (getContent(findChild) == null) {
            }
            throw new OAIErrorException(attribute, getContent(findChild));
        }
        Element mustFindChild = mustFindChild(documentElement, OAIArgs.LIST_METADATA_FORMATS);
        LinkedList linkedList = new LinkedList();
        Element mustFindChild2 = mustFindChild(mustFindChild, "metadataFormat");
        while (true) {
            Element element = mustFindChild2;
            if (element == null) {
                break;
            }
            linkedList.add(getContent(mustFindChild(element, OAIArgs.METADATA_PREFIX)));
            mustFindChild2 = findSibling(element, "metadataFormat");
        }
        String[] strArr = new String[linkedList.size()];
        for (int i = 0; i < strArr.length; i++) {
            strArr[i] = ((String) linkedList.get(i)).replaceAll(":", "/");
            if (this.bugs >= 1) {
                prtln(new StringBuffer().append("prefix: \"").append(strArr[i]).append(UURIFactory.QUOT).toString());
            }
        }
        return strArr;
    }

    private String encode(String str) throws Hexception {
        try {
            return OAIUtils.encode(str);
        } catch (Exception e) {
            throw new Hexception(e.getMessage());
        }
    }

    private static String formatDate(int i, Date date) {
        SimpleDateFormat simpleDateFormat;
        if (i == 2) {
            simpleDateFormat = new SimpleDateFormat(JSONCallback._secDateFormat);
            simpleDateFormat.setTimeZone(new SimpleTimeZone(2, HftpFileSystem.HFTP_TIMEZONE));
        } else {
            simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
        }
        return simpleDateFormat.format(date);
    }

    private static Date parseDate(String str) throws Hexception {
        SimpleDateFormat simpleDateFormat;
        Date date = null;
        if (str != null && str.length() > 0) {
            if (str.indexOf(":") >= 0) {
                simpleDateFormat = new SimpleDateFormat(JSONCallback._secDateFormat);
                simpleDateFormat.setTimeZone(new SimpleTimeZone(2, HftpFileSystem.HFTP_TIMEZONE));
            } else {
                simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
            }
            try {
                date = simpleDateFormat.parse(str);
            } catch (ParseException e) {
                throw new Hexception(new StringBuffer().append("invalid date: \"").append(str).append(UURIFactory.QUOT).toString());
            }
        }
        prtln(new StringBuffer().append("parseDate() returning: ").append(date.toString()).toString());
        return date;
    }

    private static void prtln(String str) {
        System.out.println(str);
    }

    private static void prtlnErr(String str) {
        System.err.println(str);
    }

    @Override // org.xml.sax.ErrorHandler
    public void fatalError(SAXParseException sAXParseException) {
        this.xmlerrors = new StringBuffer().append(this.xmlerrors).append(sAXParseException).toString();
    }

    @Override // org.xml.sax.ErrorHandler
    public void error(SAXParseException sAXParseException) {
        this.xmlerrors = new StringBuffer().append(this.xmlerrors).append(sAXParseException).toString();
    }

    @Override // org.xml.sax.ErrorHandler
    public void warning(SAXParseException sAXParseException) {
        this.xmlwarnings = new StringBuffer().append(this.xmlwarnings).append(sAXParseException).toString();
    }
}
