package org.apache.pdfbox;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.Map;
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.apache.pdfbox.util.PDFText2HTML;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.tika.metadata.Metadata;

/* loaded from: input_file:WEB-INF/lib/pdfbox-1.8.3.jar:org/apache/pdfbox/ExtractText.class */
public class ExtractText {
    private static final String PASSWORD = "-password";
    private static final String ENCODING = "-encoding";
    private static final String CONSOLE = "-console";
    private static final String START_PAGE = "-startPage";
    private static final String END_PAGE = "-endPage";
    private static final String SORT = "-sort";
    private static final String IGNORE_BEADS = "-ignoreBeads";
    private static final String DEBUG = "-debug";
    private static final String HTML = "-html";
    private static final String FORCE = "-force";
    private static final String NONSEQ = "-nonSeq";
    private boolean debug = false;

    private ExtractText() {
    }

    public static void main(String[] strArr) throws Exception {
        new ExtractText().startExtraction(strArr);
    }

    public void startExtraction(String[] strArr) throws Exception {
        PDEmbeddedFilesNameTreeNode embeddedFiles;
        Map<String, COSObjectable> names;
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = true;
        boolean z6 = false;
        String str = "";
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = ".txt";
        int i = 1;
        int i2 = Integer.MAX_VALUE;
        int i3 = 0;
        while (i3 < strArr.length) {
            if (strArr[i3].equals(PASSWORD)) {
                i3++;
                if (i3 >= strArr.length) {
                    usage();
                }
                str = strArr[i3];
            } else if (strArr[i3].equals(ENCODING)) {
                i3++;
                if (i3 >= strArr.length) {
                    usage();
                }
                str2 = strArr[i3];
            } else if (strArr[i3].equals(START_PAGE)) {
                i3++;
                if (i3 >= strArr.length) {
                    usage();
                }
                i = Integer.parseInt(strArr[i3]);
            } else if (strArr[i3].equals(HTML)) {
                z2 = true;
                str5 = ".html";
            } else if (strArr[i3].equals(SORT)) {
                z4 = true;
            } else if (strArr[i3].equals(IGNORE_BEADS)) {
                z5 = false;
            } else if (strArr[i3].equals(DEBUG)) {
                this.debug = true;
            } else if (strArr[i3].equals(END_PAGE)) {
                i3++;
                if (i3 >= strArr.length) {
                    usage();
                }
                i2 = Integer.parseInt(strArr[i3]);
            } else if (strArr[i3].equals(CONSOLE)) {
                z = true;
            } else if (strArr[i3].equals(FORCE)) {
                z3 = true;
            } else if (strArr[i3].equals(NONSEQ)) {
                z6 = true;
            } else if (str3 == null) {
                str3 = strArr[i3];
            } else {
                str4 = strArr[i3];
            }
            i3++;
        }
        if (str3 == null) {
            usage();
            return;
        }
        OutputStreamWriter outputStreamWriter = null;
        PDDocument pDDocument = null;
        try {
            long startProcessing = startProcessing("Loading PDF " + str3);
            if (str4 == null && str3.length() > 4) {
                str4 = new File(str3.substring(0, str3.length() - 4) + str5).getAbsolutePath();
            }
            if (z6) {
                pDDocument = PDDocument.loadNonSeq(new File(str3), (RandomAccess) null, str);
            } else {
                pDDocument = PDDocument.load(str3, z3);
                if (pDDocument.isEncrypted()) {
                    pDDocument.openProtection(new StandardDecryptionMaterial(str));
                }
            }
            if (!pDDocument.getCurrentAccessPermission().canExtractContent()) {
                throw new IOException("You do not have permission to extract text");
            }
            stopProcessing("Time for loading: ", startProcessing);
            if (str2 == null && z2) {
                str2 = "UTF-8";
            }
            outputStreamWriter = z ? new OutputStreamWriter(System.out) : str2 != null ? new OutputStreamWriter(new FileOutputStream(str4), str2) : new OutputStreamWriter(new FileOutputStream(str4));
            PDFTextStripper pDFText2HTML = z2 ? new PDFText2HTML(str2) : new PDFTextStripper(str2);
            pDFText2HTML.setForceParsing(z3);
            pDFText2HTML.setSortByPosition(z4);
            pDFText2HTML.setShouldSeparateByBeads(z5);
            pDFText2HTML.setStartPage(i);
            pDFText2HTML.setEndPage(i2);
            long startProcessing2 = startProcessing("Starting text extraction");
            if (this.debug) {
                System.err.println("Writing to " + str4);
            }
            pDFText2HTML.writeText(pDDocument, outputStreamWriter);
            PDDocumentNameDictionary names2 = pDDocument.getDocumentCatalog().getNames();
            if (names2 != null && (embeddedFiles = names2.getEmbeddedFiles()) != null && (names = embeddedFiles.getNames()) != null) {
                for (Map.Entry<String, COSObjectable> entry : names.entrySet()) {
                    if (this.debug) {
                        System.err.println("Processing embedded file " + entry.getKey() + Metadata.NAMESPACE_PREFIX_DELIMITER);
                    }
                    PDEmbeddedFile embeddedFile = ((PDComplexFileSpecification) entry.getValue()).getEmbeddedFile();
                    if (embeddedFile.getSubtype().equals("application/pdf")) {
                        if (this.debug) {
                            System.err.println("  is PDF (size=" + embeddedFile.getSize() + ")");
                        }
                        InputStream createInputStream = embeddedFile.createInputStream();
                        try {
                            PDDocument load = PDDocument.load(createInputStream);
                            createInputStream.close();
                            try {
                                pDFText2HTML.writeText(load, outputStreamWriter);
                                load.close();
                            } catch (Throwable th) {
                                load.close();
                                throw th;
                            }
                        } catch (Throwable th2) {
                            createInputStream.close();
                            throw th2;
                        }
                    }
                }
            }
            stopProcessing("Time for extraction: ", startProcessing2);
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
            if (pDDocument != null) {
                pDDocument.close();
            }
        } catch (Throwable th3) {
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
            }
            if (pDDocument != null) {
                pDDocument.close();
            }
            throw th3;
        }
    }

    private long startProcessing(String str) {
        if (this.debug) {
            System.err.println(str);
        }
        return System.currentTimeMillis();
    }

    private void stopProcessing(String str, long j) {
        if (this.debug) {
            System.err.println(str + (((float) (System.currentTimeMillis() - j)) / 1000.0f) + " seconds");
        }
    }

    private static void usage() {
        System.err.println("Usage: java -jar pdfbox-app-x.y.z.jar ExtractText [OPTIONS] <PDF file> [Text File]\n  -password  <password>        Password to decrypt document\n  -encoding  <output encoding> (ISO-8859-1,UTF-16BE,UTF-16LE,...)\n  -console                     Send text to console instead of file\n  -html                        Output in HTML format instead of raw text\n  -sort                        Sort the text before writing\n  -ignoreBeads                 Disables the separation by beads\n  -force                       Enables pdfbox to ignore corrupt objects\n  -debug                       Enables debug output about the time consumption of every stage\n  -startPage <number>          The first page to start extraction(1 based)\n  -endPage <number>            The last page to extract(inclusive)\n  -nonSeq                      Enables the new non-sequential parser\n  <PDF file>                   The PDF document to use\n  [Text File]                  The file to write the text to\n");
        System.exit(1);
    }
}
