package eu.dnetlib.iis.utils.contents.arxiv;

import eu.dnetlib.iis.core.java.io.DataStore;
import eu.dnetlib.iis.core.java.io.FileSystemPath;
import eu.dnetlib.iis.utils.contents.Utils;
import eu.dnetlib.iis.utils.contents.schemas.arxiv.ArXiv2OpenAIRE;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.avro.file.DataFileWriter;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

/* loaded from: input_file:eu/dnetlib/iis/utils/contents/arxiv/ArXivConverter.class */
public class ArXivConverter {
    private final FileSystemPath inputDir;
    private final FileSystemPath outputDir;
    private final FileSystemPath outputMappingDir;
    private final ArXivOutput output;

    public static void main(String[] strArr) throws IOException {
        parse(strArr).run();
    }

    private static ArXivConverter parse(String[] strArr) throws IOException {
        if (strArr.length != 4) {
            parseError("This program converts plaintext documents corresponding to PDF versions of documents coming from arXiv repository. The following arguments should be given: 1) \"true\" if we should extract plaintext, \"false\" if we should extract PDF files; 2) \"input dir\" which corresponds do directory structure with {arXiv plaintext contained in separate UTF-8-encoded text files} or {PDFcontents contained in separate PDF files} where arXiv ID is the name of the file after removing its extension; 3) output dir for data store; 4) output dir for data store with arXiv and OpenAIRE IDs mapping");
        }
        boolean parseBoolean = Boolean.parseBoolean(strArr[0]);
        String str = strArr[1];
        String str2 = strArr[2];
        String str3 = strArr[3];
        ArXivOutput plaintextOutput = new PlaintextOutput();
        if (!parseBoolean) {
            plaintextOutput = new PDFContentOutput();
        }
        return new ArXivConverter(new FileSystemPath(new File(str)), new FileSystemPath(new File(str2)), new FileSystemPath(new File(str3)), plaintextOutput);
    }

    private static void parseError(String str) {
        System.err.println("ERROR while parsing command line: " + str);
        System.exit(1);
    }

    public ArXivConverter(FileSystemPath fileSystemPath, FileSystemPath fileSystemPath2, FileSystemPath fileSystemPath3, ArXivOutput arXivOutput) {
        this.inputDir = fileSystemPath;
        this.outputDir = fileSystemPath2;
        this.outputMappingDir = fileSystemPath3;
        this.output = arXivOutput;
    }

    public void run() throws IOException {
        this.output.open(this.outputDir);
        DataFileWriter create = DataStore.create(this.outputMappingDir, ArXiv2OpenAIRE.SCHEMA$);
        try {
            RemoteIterator listFiles = this.inputDir.getFileSystem().listFiles(this.inputDir.getPath(), true);
            ArXiv2OpenAIRE arXiv2OpenAIRE = new ArXiv2OpenAIRE();
            while (listFiles != null) {
                if (!listFiles.hasNext()) {
                    break;
                }
                LocatedFileStatus locatedFileStatus = (LocatedFileStatus) listFiles.next();
                assertValidFile(locatedFileStatus);
                Path path = locatedFileStatus.getPath();
                String nameWithoutExtension = getNameWithoutExtension(path.getName());
                String convertToOpenAIREId = convertToOpenAIREId(nameWithoutExtension);
                arXiv2OpenAIRE.setArXivId(nameWithoutExtension);
                arXiv2OpenAIRE.setOpenAIREId(convertToOpenAIREId);
                create.append(arXiv2OpenAIRE);
                FSDataInputStream open = this.inputDir.getFileSystem().open(path);
                this.output.append(open, convertToOpenAIREId);
                open.close();
            }
        } finally {
            if (this.output != null) {
                this.output.close();
            }
            if (create != null) {
                create.close();
            }
        }
    }

    private static ArrayList<String> getPathElements(Path path) {
        ArrayList<String> arrayList = new ArrayList<>();
        Path path2 = path;
        while (true) {
            Path path3 = path2;
            if (path3 == null) {
                return arrayList;
            }
            arrayList.add(path3.getName());
            path2 = path3.getParent();
        }
    }

    private static void assertValidFile(LocatedFileStatus locatedFileStatus) {
        if (!locatedFileStatus.isFile()) {
            throw new RuntimeException(locatedFileStatus.getPath() + " is not a file");
        }
    }

    private static String getNameWithoutExtension(String str) {
        int lastIndexOf = str.lastIndexOf(46);
        if (lastIndexOf == -1) {
            throw new RuntimeException("Dot not found in name of file \"" + str + "\"");
        }
        return str.substring(0, lastIndexOf);
    }

    private static String convertToOpenAIREId(String str) {
        return Utils.convertToOpenAIREId("od________18::", "oai:arXiv.org:", str);
    }
}
