package org.archive.io.arc;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpParser;
import org.apache.commons.httpclient.StatusLine;
import org.apache.commons.httpclient.util.EncodingUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.oozie.cli.OozieCLI;
import org.archive.io.ArchiveFileConstants;
import org.archive.io.ArchiveRecord;
import org.archive.io.ArchiveRecordHeader;
import org.archive.io.RecoverableIOException;
import org.archive.io.arc.ARCConstants;
import org.archive.net.UURIFactory;
import org.archive.util.InetAddressUtil;
import org.archive.util.TextUtils;
import org.quartz.impl.jdbcjobstore.Constants;

/* loaded from: input_file:WEB-INF/lib/heritrix-commons-3.1.0.jar:org/archive/io/arc/ARCRecord.class */
public class ARCRecord extends ArchiveRecord implements ARCConstants {
    private StatusLine httpStatus;
    private InputStream httpHeaderStream;
    private Header[] httpHeaders;
    private final String[] headerFieldNameKeysArray;
    private final List<String> headerFieldNameKeys;
    public long httpHeaderBytesRead;
    public long recordDeclaredLength;
    public long compressedBytes;
    public long uncompressedBytes;
    public long httpPayloadDeclaredLength;
    public long httpPayloadActualLength;
    public List<ARCConstants.ArcRecordErrors> errors;
    private String headerString;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/heritrix-commons-3.1.0.jar:org/archive/io/arc/ARCRecord$DeletedARCRecordIOException.class */
    public static class DeletedARCRecordIOException extends RecoverableIOException {
        private static final long serialVersionUID = 1;

        public DeletedARCRecordIOException(String str) {
            super(str);
        }
    }

    public String getHeaderString() {
        return this.headerString;
    }

    public ARCRecord(InputStream inputStream, ArchiveRecordHeader archiveRecordHeader) throws IOException {
        this(inputStream, archiveRecordHeader, 0, true, false, true);
    }

    public ARCRecord(InputStream inputStream, ArchiveRecordHeader archiveRecordHeader, int i, boolean z, boolean z2, boolean z3) throws IOException {
        super(inputStream, archiveRecordHeader, i, z, z2);
        this.httpStatus = null;
        this.httpHeaderStream = null;
        this.httpHeaders = null;
        this.headerFieldNameKeysArray = new String[]{ArchiveFileConstants.URL_FIELD_KEY, ARCConstants.IP_HEADER_FIELD_KEY, ArchiveFileConstants.DATE_FIELD_KEY, "content-type", "length"};
        this.headerFieldNameKeys = Arrays.asList(this.headerFieldNameKeysArray);
        this.httpHeaderBytesRead = -1L;
        this.errors = new ArrayList();
        if (z3) {
            this.httpHeaderStream = readHttpHeader();
        }
    }

    public ARCRecord(InputStream inputStream, String str, long j, boolean z, boolean z2, boolean z3, boolean z4, String str2) throws IOException {
        super(inputStream, null, 0, z, z2);
        this.httpStatus = null;
        this.httpHeaderStream = null;
        this.httpHeaders = null;
        this.headerFieldNameKeysArray = new String[]{ArchiveFileConstants.URL_FIELD_KEY, ARCConstants.IP_HEADER_FIELD_KEY, ArchiveFileConstants.DATE_FIELD_KEY, "content-type", "length"};
        this.headerFieldNameKeys = Arrays.asList(this.headerFieldNameKeysArray);
        this.httpHeaderBytesRead = -1L;
        this.errors = new ArrayList();
        setHeader(parseHeaders(inputStream, str, j, z2, z4, str2));
        if (z3) {
            this.httpHeaderStream = readHttpHeader();
        }
    }

    public ARCRecord(InputStream inputStream, String str, long j, boolean z, boolean z2, boolean z3) throws IOException {
        this(inputStream, str, j, z, z2, z3, false, null);
    }

    private ArchiveRecordHeader parseHeaders(InputStream inputStream, String str, long j, boolean z, boolean z2, String str2) throws IOException {
        List<String> arrayList = new ArrayList<>(20);
        getTokenizedHeaderLine(inputStream, arrayList);
        int i = 0;
        if (j == 0 && z2) {
            ArrayList arrayList2 = new ArrayList(20);
            int tokenizedHeaderLine = 0 + getTokenizedHeaderLine(inputStream, arrayList2);
            str2 = ((String) arrayList2.get(0)) + "." + ((String) arrayList2.get(1));
            i = tokenizedHeaderLine + getTokenizedHeaderLine(inputStream, null);
        }
        setBodyOffset(i);
        return computeMetaData(this.headerFieldNameKeys, arrayList, str2, j, str);
    }

    private int getTokenizedHeaderLine(InputStream inputStream, List<String> list) throws IOException {
        StringBuilder sb = new StringBuilder(2068);
        int i = 0;
        int i2 = -1;
        while (true) {
            int i3 = i2;
            int read = inputStream.read();
            if (read == -1) {
                throw new RecoverableIOException("Hit EOF before header EOL.");
            }
            i2 = read & 255;
            i++;
            if (i > 102400) {
                throw new IOException("Header line longer than max allowed  -- " + String.valueOf(102400) + " -- or passed buffer doesn't contain a line (Read: " + sb.length() + ").  Here's some of what was read: " + sb.substring(0, Math.min(sb.length(), 256)));
            }
            if (i2 == 10) {
                if (sb.length() != 0) {
                    if (list != null) {
                        list.add(sb.toString());
                    }
                    if (list != null && (list.size() < 3 || list.size() > 100)) {
                        throw new IOException("Unparseable header line: " + list);
                    }
                    this.headerString = StringUtils.join(list, " ");
                    return i;
                }
            } else if (i2 != 32) {
                sb.append((char) i2);
            } else if (isStrict() || i3 != 32) {
                if (list != null) {
                    list.add(sb.toString());
                }
                sb.setLength(0);
            }
        }
    }

    private ARCRecordMetaData computeMetaData(List<String> list, List<String> list2, String str, long j, String str2) throws IOException {
        if (list.size() != list2.size()) {
            if (!isStrict()) {
                list2 = fixSpaceInURL(list2, list.size());
                if (list.size() != list2.size()) {
                    if (list2.size() == list.size() + 1 && list2.get(4).toLowerCase().startsWith("charset=")) {
                        ArrayList arrayList = new ArrayList(list.size());
                        arrayList.add(0, list2.get(0));
                        arrayList.add(1, list2.get(1));
                        arrayList.add(2, list2.get(2));
                        arrayList.add(3, list2.get(3) + list2.get(4));
                        arrayList.add(4, list2.get(5));
                        list2 = arrayList;
                    } else if (list2.size() + 1 == list.size() && isLegitimateIPValue(list2.get(1)) && isDate(list2.get(2)) && isNumber(list2.get(3))) {
                        ArrayList arrayList2 = new ArrayList(list.size());
                        arrayList2.add(0, list2.get(0));
                        arrayList2.add(1, list2.get(1));
                        arrayList2.add(2, list2.get(2));
                        arrayList2.add(3, "-");
                        arrayList2.add(4, list2.get(3));
                        list2 = arrayList2;
                    }
                }
            }
            if (list.size() != list2.size()) {
                throw new IOException("Size of field name keys does not match count of field values: " + list2);
            }
            System.err.println(Level.WARNING.toString() + "Fixed spaces in metadata line at offset " + j + " Original: " + list2 + ", New: " + list2);
        }
        HashMap hashMap = new HashMap(list.size() + 2);
        for (int i = 0; i < list.size(); i++) {
            hashMap.put(list.get(i), list2.get(i));
        }
        String str3 = (String) hashMap.get(ArchiveFileConstants.URL_FIELD_KEY);
        if (str3 != null && str3.indexOf(9) >= 0) {
            hashMap.put(ArchiveFileConstants.URL_FIELD_KEY, TextUtils.replaceAll(OozieCLI.VERBOSE_DELIMITER, str3, "%09"));
        }
        hashMap.put("version", str);
        hashMap.put(ArchiveFileConstants.ABSOLUTE_OFFSET_KEY, new Long(j));
        return new ARCRecordMetaData(str2, hashMap);
    }

    private List<String> fixSpaceInURL(List<String> list, int i) {
        if (list.size() <= i || list.size() < 4) {
            return list;
        }
        if (isDate(list.get(list.size() - 3)) && isLegitimateIPValue(list.get(list.size() - 4))) {
            ArrayList arrayList = new ArrayList(i);
            StringBuffer stringBuffer = new StringBuffer();
            for (int i2 = 0; i2 < list.size() - 4; i2++) {
                if (i2 > 0) {
                    stringBuffer.append(UURIFactory.ESCAPED_SPACE);
                }
                stringBuffer.append(list.get(i2));
            }
            arrayList.add(stringBuffer.toString());
            for (int size = list.size() - 4; size < list.size(); size++) {
                arrayList.add(list.get(size));
            }
            return arrayList;
        }
        return list;
    }

    private boolean isDate(String str) {
        if (str.length() != 14) {
            return false;
        }
        return isNumber(str);
    }

    private boolean isNumber(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (!Character.isDigit(str.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    private boolean isLegitimateIPValue(String str) {
        if ("-".equals(str)) {
            return true;
        }
        Matcher matcher = InetAddressUtil.IPV4_QUADS.matcher(str);
        return matcher != null && matcher.matches();
    }

    public void skipHttpHeader() throws IOException {
        int available;
        if (this.httpHeaderStream != null) {
            this.httpHeaderStream.available();
            while (this.httpHeaderStream != null && (available = this.httpHeaderStream.available()) > 0) {
                read(new byte[available], 0, available);
            }
        }
    }

    public void dumpHttpHeader() throws IOException {
        int available;
        if (this.httpHeaderStream == null) {
            return;
        }
        this.httpHeaderStream.available();
        while (this.httpHeaderStream != null && (available = this.httpHeaderStream.available()) > 0) {
            byte[] bArr = new byte[available];
            System.out.write(bArr, 0, read(bArr, 0, available));
        }
    }

    private InputStream readHttpHeader() throws IOException {
        Logger logger = Logger.getLogger(getClass().getName());
        ArchiveRecordHeader header = getHeader();
        if (!getHeader().getUrl().startsWith("http") || getHeader().getLength() <= MIN_HTTP_HEADER_LENGTH) {
            return null;
        }
        byte[] readRawLine = HttpParser.readRawLine(getIn());
        int eolCharsCount = getEolCharsCount(readRawLine);
        if (eolCharsCount <= 0) {
            throw new RecoverableIOException("Failed to read http status where one was expected: " + (readRawLine == null ? "" : new String(readRawLine)));
        }
        String string = EncodingUtil.getString(readRawLine, 0, readRawLine.length - eolCharsCount, "ISO-8859-1");
        if (string == null || !StatusLine.startsWithHTTP(string)) {
            if (string.startsWith(Constants.STATE_DELETED)) {
                throw new DeletedARCRecordIOException(string);
            }
            this.errors.add(ARCConstants.ArcRecordErrors.HTTP_STATUS_LINE_INVALID);
        }
        try {
            this.httpStatus = new StatusLine(string);
        } catch (IOException e) {
            logger.warning(e.getMessage() + " at offset: " + header.getOffset());
            this.errors.add(ARCConstants.ArcRecordErrors.HTTP_STATUS_LINE_EXCEPTION);
        }
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(readRawLine.length + 4096);
        byteArrayOutputStream.write(readRawLine);
        while (true) {
            byte[] readRawLine2 = HttpParser.readRawLine(getIn());
            int eolCharsCount2 = getEolCharsCount(readRawLine2);
            if (eolCharsCount2 > 0) {
                this.httpHeaderBytesRead += readRawLine2.length;
                byteArrayOutputStream.write(readRawLine2);
                if (readRawLine2.length - eolCharsCount2 <= 0) {
                    break;
                }
            } else {
                if (getIn().available() != 0) {
                    throw new IOException("Failed reading http headers: " + (readRawLine2 != null ? new String(readRawLine2) : null));
                }
                this.httpHeaderBytesRead += readRawLine.length;
                logger.warning("HTTP header truncated at offset: " + header.getOffset());
                this.errors.add(ARCConstants.ArcRecordErrors.HTTP_HEADER_TRUNCATED);
                setEor(true);
            }
        }
        byte[] byteArray = byteArrayOutputStream.toByteArray();
        getMetaData().setContentBegin(byteArray.length);
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArray);
        if (!byteArrayInputStream.markSupported()) {
            throw new IOException("ByteArrayInputStream does not support mark");
        }
        byteArrayInputStream.mark(byteArray.length);
        byteArrayInputStream.read(readRawLine, 0, readRawLine.length);
        this.httpHeaders = HttpParser.parseHeaders(byteArrayInputStream, "ISO-8859-1");
        getMetaData().setStatusCode(Integer.toString(getStatusCode()));
        byteArrayInputStream.reset();
        return byteArrayInputStream;
    }

    public int getStatusCode() {
        if (this.httpStatus == null) {
            return -1;
        }
        return this.httpStatus.getStatusCode();
    }

    private int getEolCharsCount(byte[] bArr) {
        int i = 0;
        if (bArr != null && bArr.length >= 1 && bArr[bArr.length - 1] == 10) {
            i = 0 + 1;
            if (bArr.length >= 2 && bArr[bArr.length - 2] == 13) {
                i++;
            }
        }
        return i;
    }

    public ARCRecordMetaData getMetaData() {
        return (ARCRecordMetaData) getHeader();
    }

    public Header[] getHttpHeaders() {
        return this.httpHeaders;
    }

    public List<ARCConstants.ArcRecordErrors> getErrors() {
        return this.errors;
    }

    public boolean hasErrors() {
        return !this.errors.isEmpty();
    }

    @Override // org.archive.io.ArchiveRecord, java.io.InputStream
    public int read() throws IOException {
        int read;
        if (this.httpHeaderStream == null || this.httpHeaderStream.available() <= 0) {
            read = super.read();
        } else {
            read = this.httpHeaderStream.read();
            if (this.httpHeaderStream.available() <= 0) {
                this.httpHeaderStream = null;
            }
            incrementPosition();
        }
        return read;
    }

    @Override // org.archive.io.ArchiveRecord, java.io.InputStream
    public int read(byte[] bArr, int i, int i2) throws IOException {
        int read;
        if (this.httpHeaderStream == null || this.httpHeaderStream.available() <= 0) {
            read = super.read(bArr, i, i2);
        } else {
            int min = Math.min(i2, this.httpHeaderStream.available());
            read = min == 0 ? -1 : this.httpHeaderStream.read(bArr, i, min);
            if (this.httpHeaderStream.available() <= 0) {
                this.httpHeaderStream = null;
            }
            incrementPosition(read);
        }
        return read;
    }

    public int getBodyOffset() {
        return getMetaData().getContentBegin();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.archive.io.ArchiveRecord
    public String getIp4Cdx(ArchiveRecordHeader archiveRecordHeader) {
        String str = null;
        if (archiveRecordHeader instanceof ARCRecordMetaData) {
            str = ((ARCRecordMetaData) archiveRecordHeader).getIp();
        }
        return str != null ? str : super.getIp4Cdx(archiveRecordHeader);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.archive.io.ArchiveRecord
    public String getStatusCode4Cdx(ArchiveRecordHeader archiveRecordHeader) {
        String str = null;
        if (archiveRecordHeader instanceof ARCRecordMetaData) {
            str = ((ARCRecordMetaData) archiveRecordHeader).getStatusCode();
        }
        return str != null ? str : super.getStatusCode4Cdx(archiveRecordHeader);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.archive.io.ArchiveRecord
    public String getDigest4Cdx(ArchiveRecordHeader archiveRecordHeader) {
        String str = null;
        if (archiveRecordHeader instanceof ARCRecordMetaData) {
            str = ((ARCRecordMetaData) archiveRecordHeader).getDigest();
        }
        return str != null ? str : super.getDigest4Cdx(archiveRecordHeader);
    }
}
