package org.dlese.dpc.services.idmapper;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.ConnectException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.CRC32;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.hadoop.fs.FsConstants;
import org.archive.net.UURIFactory;
import org.dlese.dpc.util.DpcErrors;
import org.hsqldb.DatabaseURL;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:WEB-INF/lib/jOAI-2.0.9.3.jar:org/dlese/dpc/services/idmapper/PageDesc.class */
public class PageDesc {
    private static final int MAXCONTENTLENGTH = 100000;
    static final int TP_ID = 1;
    static final int TP_PRIMARY_URL = 2;
    static final int TP_MIRROR_URL = 3;
    static final int TP_OTHER_URL = 4;
    static final int TP_EMAIL = 5;
    static final String[] typenames = {"unknown", "id", "primary URL", "mirror URL", "other URL", "email"};
    ResourceDesc rsd;
    int urltype;
    String urlstg;
    int respcode;
    double resptime;
    WarnBuf pageWarnBuf;
    HashMap hdrmap;
    String hdrstg;
    byte[] contentbuf;
    String ftpline;
    ScanThread scanThread;
    long pageChecksum = 0;
    String pagesummary = null;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.dlese.dpc.services.idmapper.PageDesc$1Sumspec, reason: invalid class name */
    /* loaded from: input_file:WEB-INF/lib/jOAI-2.0.9.3.jar:org/dlese/dpc/services/idmapper/PageDesc$1Sumspec.class */
    public class C1Sumspec {
        String startstg;
        String endstg;
        int maxlen;
        Pattern endpat;
        private final PageDesc this$0;

        C1Sumspec(PageDesc pageDesc, String str, String str2, int i) {
            this.this$0 = pageDesc;
            this.startstg = str;
            this.endstg = str2;
            this.maxlen = i;
            this.endpat = Pattern.compile(str2, 42);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public PageDesc(ResourceDesc resourceDesc, int i, String str) {
        this.rsd = resourceDesc;
        this.urltype = i;
        this.urlstg = str;
    }

    public String toString() {
        String stringBuffer = new StringBuffer().append(new StringBuffer().append(new StringBuffer().append(new StringBuffer().append("page: urlstg: \"").append(this.urlstg).append("\"\n").toString()).append("  respcode: ").append(this.respcode).append(" \"").append(DpcErrors.getMessage(this.respcode)).append("\"\n").toString()).append("  resptime: ").append(this.resptime).append("\n").toString()).append("  pageWarnBuf: ").append(this.pageWarnBuf).append("\n").toString();
        String stringBuffer2 = this.hdrstg == null ? new StringBuffer().append(stringBuffer).append("  hdrstg: null\n").toString() : new StringBuffer().append(stringBuffer).append("  hdrstg len: ").append(this.hdrstg.length()).append("\n").toString();
        return new StringBuffer().append(this.contentbuf == null ? new StringBuffer().append(stringBuffer2).append("  contentbuf: null\n").toString() : new StringBuffer().append(stringBuffer2).append("  contentbuf len: ").append(this.contentbuf.length).append("\n").toString()).append("  pageChecksum: ").append(this.pageChecksum).toString();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void processPage(int i, int i2, int i3) {
        this.respcode = 0;
        this.pageChecksum = 0L;
        this.hdrmap = new HashMap();
        if (i >= 50) {
            prtln(new StringBuffer().append("processPage: start page: \"").append(this.urlstg).append(UURIFactory.QUOT).toString());
        }
        Exception exc = null;
        try {
            URL url = new URL(this.urlstg);
            if (url.getProtocol().equals("http")) {
                getHttpContent(i, i3, this.urlstg);
            } else if (url.getProtocol().equals(FsConstants.FTP_SCHEME)) {
                getFtpContent(i, i3, this.urlstg);
            } else {
                throwResponse(DpcErrors.IDMAP_UNKNOWN_PROTOCOL, null, this.urlstg, null);
            }
            String str = (String) this.hdrmap.get("content-type");
            if (this.urlstg.startsWith(DatabaseURL.S_HTTP) && str != null && str.equalsIgnoreCase("text/html")) {
                this.pagesummary = extractSummary(i, new String(this.contentbuf), this.urlstg);
            } else {
                this.pagesummary = new String(this.contentbuf);
            }
            CRC32 crc32 = new CRC32();
            crc32.update(this.pagesummary.getBytes());
            this.pageChecksum = crc32.getValue();
            this.respcode = DpcErrors.IDMAP_OK;
        } catch (SocketTimeoutException e) {
            exc = e;
            this.respcode = DpcErrors.IDMAP_TIMEOUT;
            addWarning(i, this.respcode, this.rsd.getId(), this.rsd.getFileName(), null, this.urlstg, null);
        } catch (IOException e2) {
            exc = e2;
            if (e2 instanceof ConnectException) {
                this.respcode = DpcErrors.IDMAP_CONNECT_REFUSED;
                addWarning(i, this.respcode, this.rsd.getId(), this.rsd.getFileName(), null, this.urlstg, null);
            } else if (e2 instanceof UnknownHostException) {
                this.respcode = DpcErrors.IDMAP_UNKNOWN_HOST;
                addWarning(i, this.respcode, this.rsd.getId(), this.rsd.getFileName(), null, this.urlstg, null);
            } else {
                this.respcode = DpcErrors.IDMAP_MISC;
                addWarning(i, this.respcode, this.rsd.getId(), this.rsd.getFileName(), e2.toString(), this.urlstg, null);
            }
        } catch (PageDescException e3) {
            exc = e3;
            this.respcode = e3.respcode;
            addWarning(i, this.respcode, this.rsd.getId(), this.rsd.getFileName(), e3.message, e3.info1, e3.info2);
        }
        if (i >= 50 && exc != null) {
            prtln(new StringBuffer().append("processPage: caught: ").append(exc).toString());
            exc.printStackTrace(System.out);
        }
        this.hdrstg = null;
        this.contentbuf = null;
        this.pagesummary = null;
        System.gc();
        if (i >= 50) {
            prtln(new StringBuffer().append("processPage: final page: ").append(this).toString());
        }
    }

    void getHttpContent(int i, int i2, String str) throws PageDescException, IOException {
        int i3 = 0;
        long currentTimeMillis = System.currentTimeMillis();
        String str2 = str;
        for (int i4 = 0; i4 < 10; i4++) {
            try {
                this.hdrmap = new HashMap();
                i3 = getSingleHttpContent(i, i2, str2);
                if (i3 == 200) {
                    break;
                }
                String str3 = (String) this.hdrmap.get("location");
                if (str3 == null) {
                    throwResponse(DpcErrors.IDMAP_NOT_FOUND, "redirect, but no new loc", this.urlstg, null);
                }
                if (str3.startsWith(DatabaseURL.S_HTTP)) {
                    str2 = str3;
                } else {
                    int indexOf = str2.indexOf("/", DatabaseURL.S_HTTP.length());
                    String substring = indexOf < 0 ? str2 : str2.substring(0, indexOf);
                    if (!str3.startsWith("/")) {
                        substring = new StringBuffer().append(substring).append("/").toString();
                    }
                    str2 = new StringBuffer().append(substring).append(str3).toString();
                }
            } catch (Throwable th) {
                this.resptime = 0.001d * (System.currentTimeMillis() - currentTimeMillis);
                throw th;
            }
        }
        if (i3 != 200) {
            throwResponse(DpcErrors.IDMAP_REDIRECT_LIMIT, null, this.urlstg, null);
        }
        this.resptime = 0.001d * (System.currentTimeMillis() - currentTimeMillis);
    }

    /*  JADX ERROR: JadxRuntimeException in pass: BlockProcessor
        jadx.core.utils.exceptions.JadxRuntimeException: Unreachable block: B:57:0x0387
        	at jadx.core.dex.visitors.blocks.BlockProcessor.checkForUnreachableBlocks(BlockProcessor.java:88)
        	at jadx.core.dex.visitors.blocks.BlockProcessor.processBlocksTree(BlockProcessor.java:52)
        	at jadx.core.dex.visitors.blocks.BlockProcessor.visit(BlockProcessor.java:44)
        */
    int getSingleHttpContent(int r8, int r9, java.lang.String r10) throws org.dlese.dpc.services.idmapper.PageDescException, java.io.IOException {
        /*
            Method dump skipped, instructions count: 1110
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.dlese.dpc.services.idmapper.PageDesc.getSingleHttpContent(int, int, java.lang.String):int");
    }

    /*  JADX ERROR: JadxRuntimeException in pass: BlockProcessor
        jadx.core.utils.exceptions.JadxRuntimeException: Unreachable block: B:67:0x03e0
        	at jadx.core.dex.visitors.blocks.BlockProcessor.checkForUnreachableBlocks(BlockProcessor.java:88)
        	at jadx.core.dex.visitors.blocks.BlockProcessor.processBlocksTree(BlockProcessor.java:52)
        	at jadx.core.dex.visitors.blocks.BlockProcessor.visit(BlockProcessor.java:44)
        */
    void getFtpContent(int r7, int r8, java.lang.String r9) throws org.dlese.dpc.services.idmapper.PageDescException, java.io.IOException {
        /*
            Method dump skipped, instructions count: 1000
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.dlese.dpc.services.idmapper.PageDesc.getFtpContent(int, int, java.lang.String):void");
    }

    void putFtpRequest(int i, OutputStream outputStream, String str) throws IOException {
        if (i >= 50) {
            prtln(new StringBuffer().append("putFtpRequest: \"").append(str).append(UURIFactory.QUOT).toString());
        }
        outputStream.write(new StringBuffer().append(str).append("\r\n").toString().getBytes());
        outputStream.flush();
    }

    int getFtpResponse(int i, InputStream inputStream) throws PageDescException, IOException {
        if (i >= 50) {
            prtln("getFtpResponse: entry");
        }
        while (true) {
            this.ftpline = getRawLine(i, inputStream);
            if (this.ftpline == null) {
                throwResponse(DpcErrors.IDMAP_NO_SERVICE, null, this.urlstg, null);
            }
            if (this.ftpline.length() > 4 && Character.isDigit(this.ftpline.charAt(0)) && Character.isDigit(this.ftpline.charAt(1)) && Character.isDigit(this.ftpline.charAt(2)) && this.ftpline.charAt(3) == ' ') {
                break;
            }
        }
        int charAt = (100 * (this.ftpline.charAt(0) - '0')) + (10 * (this.ftpline.charAt(1) - '0')) + (1 * (this.ftpline.charAt(2) - '0'));
        if (i >= 50) {
            prtln(new StringBuffer().append("getFtpResponse: ires: ").append(charAt).append("  ftpline: \"").append(this.ftpline).append(UURIFactory.QUOT).toString());
        }
        return charAt;
    }

    void chkFtpOk(int i, String str) throws PageDescException {
        if (isFtpOk(i)) {
            return;
        }
        throwResponse(DpcErrors.IDMAP_FTP_MISC, new StringBuffer().append(str).append(" failed").toString(), this.urlstg, new StringBuffer().append("").append(i).toString());
    }

    boolean isFtpOk(int i) {
        return i >= 200 && i < 400;
    }

    boolean isFtpMark(int i) {
        return i >= 100 && i < 200;
    }

    String[] getStatusLine(int i, InputStream inputStream) throws PageDescException, IOException {
        char charAt;
        char charAt2;
        String rawLine = getRawLine(i, inputStream);
        if (rawLine == null) {
            throwResponse(DpcErrors.IDMAP_HTTP_STATUSLINE, "no status line", this.urlstg, null);
        }
        String str = "";
        int i2 = 0;
        while (i2 < rawLine.length() && (charAt2 = rawLine.charAt(i2)) != ' ') {
            str = new StringBuffer().append(str).append(charAt2).toString();
            i2++;
        }
        int i3 = i2;
        while (i3 < rawLine.length() && rawLine.charAt(i3) == ' ') {
            i3++;
        }
        String str2 = "";
        int i4 = i3;
        while (i4 < rawLine.length() && (charAt = rawLine.charAt(i4)) != ' ') {
            str2 = new StringBuffer().append(str2).append(charAt).toString();
            i4++;
        }
        int i5 = i4;
        while (i5 < rawLine.length() && rawLine.charAt(i5) == ' ') {
            i5++;
        }
        String substring = rawLine.substring(i5);
        if (str.length() == 0 || str2.length() == 0 || substring.length() == 0) {
            throwResponse(DpcErrors.IDMAP_HTTP_STATUSLINE, null, this.urlstg, rawLine);
        }
        if (i >= 50) {
            prtln("getStatusLine:");
            prtln(new StringBuffer().append("  version: \"").append(str).append(UURIFactory.QUOT).toString());
            prtln(new StringBuffer().append("  status: \"").append(str2).append(UURIFactory.QUOT).toString());
            prtln(new StringBuffer().append("  reason: \"").append(substring).append(UURIFactory.QUOT).toString());
            prtln(new StringBuffer().append("  rawline: \"").append(rawLine).append(UURIFactory.QUOT).toString());
        }
        return new String[]{str, str2, substring, rawLine};
    }

    String[] getHeader(int i, InputStream inputStream) throws PageDescException, IOException {
        String[] strArr;
        String rawLine = getRawLine(i, inputStream);
        if (rawLine == null) {
            strArr = null;
            if (i >= 50) {
                prtln("getHeader: ret null");
            }
        } else {
            char c = 0;
            String str = "";
            int i2 = 0;
            while (i2 < rawLine.length()) {
                c = rawLine.charAt(i2);
                if (c == ':') {
                    break;
                }
                str = new StringBuffer().append(str).append(c).toString();
                i2++;
            }
            if (i2 >= rawLine.length()) {
                throwResponse(DpcErrors.IDMAP_HTTP_HEADER, "no colon", this.urlstg, rawLine);
            }
            if (c != ':') {
                throwResponse(DpcErrors.IDMAP_HTTP_HEADER, "no colon", this.urlstg, rawLine);
            }
            int i3 = i2 + 1;
            while (i3 < rawLine.length() && rawLine.charAt(i3) == ' ') {
                i3++;
            }
            String substring = rawLine.substring(i3);
            String lowerCase = str.trim().toLowerCase();
            String trim = substring.trim();
            if (lowerCase.length() == 0) {
                throwResponse(DpcErrors.IDMAP_HTTP_HEADER, "empty name", this.urlstg, rawLine);
            }
            if (i >= 50) {
                prtln(new StringBuffer().append("getHeader: \"").append(lowerCase).append("\"    \"").append(trim).append(UURIFactory.QUOT).toString());
            }
            strArr = new String[]{lowerCase, trim, rawLine};
        }
        return strArr;
    }

    String getRawLine(int i, InputStream inputStream) throws PageDescException, IOException {
        StringBuffer stringBuffer = new StringBuffer();
        while (true) {
            int read = inputStream.read();
            if (read == -1 || read == 10) {
                break;
            }
            if (read != 13) {
                stringBuffer.append((char) read);
            } else if (inputStream.read() != 10) {
                throwResponse(DpcErrors.IDMAP_HTTP_HEADER, "no eol", this.urlstg, stringBuffer.toString());
            }
        }
        String str = null;
        if (stringBuffer.length() > 0) {
            str = stringBuffer.toString();
        }
        if (i >= 50) {
            prtln(new StringBuffer().append("getRawLine: res: ").append(str == null ? "null" : new StringBuffer().append(UURIFactory.QUOT).append(str).append(UURIFactory.QUOT).toString()).toString());
        }
        return str;
    }

    String extractSummary(int i, String str, String str2) throws PageDescException {
        if (i >= 50) {
            prtln(new StringBuffer().append("extractSummary.entry: urlstg: ").append(str2).toString());
        }
        C1Sumspec[] c1SumspecArr = {new C1Sumspec(this, "<head", "</head>", 1000), new C1Sumspec(this, "<h1", "</h", 200), new C1Sumspec(this, "<h2", "</h", 200), new C1Sumspec(this, "<h3", "</h", 200), new C1Sumspec(this, "<h4", "</h", 200), new C1Sumspec(this, "<table", ">", 200), new C1Sumspec(this, "<th", "</t|<t", 200), new C1Sumspec(this, "<td", "</t|<t", 200), new C1Sumspec(this, "<dt", "</d|<d", 200), new C1Sumspec(this, "<dd", "</d|<d", 200), new C1Sumspec(this, "title:", "\\.", 200), new C1Sumspec(this, "overview:", "\\.", 200), new C1Sumspec(this, "purpose:", "\\.", 200), new C1Sumspec(this, "font.{0,100}size=[\" ]?[3-7]", "</font>", 200), new C1Sumspec(this, "font.{0,100}size=[\" ]?\\+", "</font>", 200)};
        String[] strArr = {"<img[^>]*>", "<img[^>]*$", "\\d\\d\\d\\d\\d+", "session[^>]*>", "cookie[^>]*>"};
        String str3 = "";
        for (int i2 = 0; i2 < strArr.length; i2++) {
            if (i2 > 0) {
                str3 = new StringBuffer().append(str3).append("|").toString();
            }
            str3 = new StringBuffer().append(str3).append(strArr[i2]).toString();
        }
        if (i >= 50) {
            prtln(new StringBuffer().append("extractSummary: allexcludestg: \"").append(str3).append(UURIFactory.QUOT).toString());
        }
        Pattern compile = Pattern.compile(str3, 2);
        boolean z = false;
        int i3 = 0;
        while (true) {
            if (i3 >= this.rsd.getUrlOnlyTestsLength()) {
                break;
            }
            if (str2.indexOf(this.rsd.getUrlOnlyTests(i3)) >= 0) {
                z = true;
                break;
            }
            i3++;
        }
        StringBuffer stringBuffer = new StringBuffer();
        if (z) {
            stringBuffer.append(str2);
        } else {
            String str4 = "";
            for (int i4 = 0; i4 < c1SumspecArr.length; i4++) {
                if (i4 > 0) {
                    str4 = new StringBuffer().append(str4).append("|").toString();
                }
                str4 = new StringBuffer().append(str4).append(DefaultExpressionEngine.DEFAULT_INDEX_START).append(c1SumspecArr[i4].startstg).append(DefaultExpressionEngine.DEFAULT_INDEX_END).toString();
            }
            if (i >= 50) {
                prtln(new StringBuffer().append("extractSummary: allstartstg: \"").append(str4).append(UURIFactory.QUOT).toString());
            }
            Matcher matcher = Pattern.compile(str4, 42).matcher(str);
            int i5 = 0;
            while (matcher.find(i5)) {
                int i6 = -1;
                int i7 = 1;
                while (true) {
                    if (i7 > c1SumspecArr.length) {
                        break;
                    }
                    if (matcher.group(i7) != null) {
                        i6 = i7;
                        break;
                    }
                    i7++;
                }
                if (i6 < 0) {
                    throwResponse(DpcErrors.IDMAP_MISC, "invalid sumspecs", str2, new StringBuffer().append("").append(i6).toString());
                }
                int i8 = i6 - 1;
                int start = matcher.start();
                Matcher matcher2 = c1SumspecArr[i8].endpat.matcher(str);
                int end = matcher2.find(matcher.end()) ? matcher2.end() : Math.min(start + c1SumspecArr[i8].maxlen, str.length());
                String substring = str.substring(start, end);
                if (i >= 50) {
                    prtln(new StringBuffer().append("extractSummary: istart ").append(start).append("  iend: ").append(end).toString());
                    prtln(new StringBuffer().append("    extract: \"").append(substring).append(UURIFactory.QUOT).toString());
                }
                while (true) {
                    Matcher matcher3 = compile.matcher(substring);
                    if (!matcher3.find()) {
                        break;
                    }
                    int start2 = matcher3.start();
                    int end2 = matcher3.end();
                    if (i >= 50) {
                        prtln(new StringBuffer().append("extractSummary: excluded substring: \"").append(substring.substring(start2, end2)).append(UURIFactory.QUOT).toString());
                    }
                    substring = new StringBuffer().append(substring.substring(0, start2)).append(substring.substring(end2)).toString();
                }
                stringBuffer.append(substring);
                i5 = end;
            }
        }
        if (stringBuffer.length() == 0) {
            stringBuffer.append(str2);
            if (i >= 50) {
                prtln("extractSummary: extract was empty; using url");
            }
        }
        if (i >= 50) {
            prtln(new StringBuffer().append("extractSummary: final extract: \"").append((Object) stringBuffer).append(UURIFactory.QUOT).toString());
        }
        return stringBuffer.toString();
    }

    void throwResponse(int i, String str, String str2, String str3) throws PageDescException {
        throw new PageDescException(i, str, str2, str3);
    }

    void addWarning(int i, int i2, String str, String str2, String str3, String str4, String str5) {
        if (this.pageWarnBuf == null) {
            this.pageWarnBuf = new WarnBuf();
        }
        Warning warning = new Warning(i2, str, str2, str3, str4, str5);
        if (i >= 10) {
            prtln(new StringBuffer().append("PageDesc: add warning: ").append(warning).toString());
        }
        this.pageWarnBuf.add(warning);
    }

    static void prtstg(String str) {
        System.out.print(str);
    }

    static void prtln(String str) {
        System.out.println(str);
    }
}
