/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.collection.orcid;

import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ORCIDExtractor
extends Thread {
    private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
    private final FileSystem fileSystem;
    private final String id;
    private final Path sourcePath;
    private final String baseOutputPath;

    public ORCIDExtractor(FileSystem fileSystem, String id, Path sourcePath, String baseOutputPath) {
        this.fileSystem = fileSystem;
        this.id = id;
        this.sourcePath = sourcePath;
        this.baseOutputPath = baseOutputPath;
    }

    private Map<String, SequenceFile.Writer> createMap() {
        try {
            log.info("Thread {} Creating sequence files starting from this input Path {}", (Object)this.id, (Object)this.sourcePath.getName());
            HashMap<String, SequenceFile.Writer> res = new HashMap<String, SequenceFile.Writer>();
            if (this.sourcePath.getName().contains("summaries")) {
                String summaryPath = String.format("%s/summaries_%s", this.baseOutputPath, this.id);
                SequenceFile.Writer summary_file = SequenceFile.createWriter((Configuration)this.fileSystem.getConf(), (SequenceFile.Writer.Option[])new SequenceFile.Writer.Option[]{SequenceFile.Writer.file((Path)new Path(summaryPath)), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)});
                log.info("Thread {} Creating only summary path here {}", (Object)this.id, (Object)summaryPath);
                res.put("summary", summary_file);
                return res;
            }
            String employmentsPath = String.format("%s/employments_%s", this.baseOutputPath, this.id);
            SequenceFile.Writer employments_file = SequenceFile.createWriter((Configuration)this.fileSystem.getConf(), (SequenceFile.Writer.Option[])new SequenceFile.Writer.Option[]{SequenceFile.Writer.file((Path)new Path(employmentsPath)), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)});
            res.put("employments", employments_file);
            log.info("Thread {} Creating employments path here {}", (Object)this.id, (Object)employmentsPath);
            String worksPath = String.format("%s/works_%s", this.baseOutputPath, this.id);
            SequenceFile.Writer works_file = SequenceFile.createWriter((Configuration)this.fileSystem.getConf(), (SequenceFile.Writer.Option[])new SequenceFile.Writer.Option[]{SequenceFile.Writer.file((Path)new Path(worksPath)), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)});
            res.put("works", works_file);
            log.info("Thread {} Creating works path here {}", (Object)this.id, (Object)worksPath);
            return res;
        }
        catch (Throwable e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void run() {
        CompressionCodecFactory factory = new CompressionCodecFactory(this.fileSystem.getConf());
        CompressionCodec codec = factory.getCodec(this.sourcePath);
        if (codec == null) {
            System.err.println("No codec found for " + this.sourcePath.getName());
            System.exit(1);
        }
        CompressionInputStream gzipInputStream = null;
        try {
            gzipInputStream = codec.createInputStream((InputStream)this.fileSystem.open(this.sourcePath));
            Map<String, SequenceFile.Writer> fileMap = this.createMap();
            this.iterateTar(fileMap, (InputStream)gzipInputStream);
        }
        catch (IOException e) {
            try {
                throw new RuntimeException(e);
            }
            catch (Throwable throwable) {
                log.info("Closing gzip stream");
                org.apache.hadoop.io.IOUtils.closeStream(gzipInputStream);
                throw throwable;
            }
        }
        log.info("Closing gzip stream");
        org.apache.hadoop.io.IOUtils.closeStream((Closeable)gzipInputStream);
    }

    private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> fileMap, String path) {
        if (this.sourcePath.getName().contains("summaries")) {
            return fileMap.get("summary");
        }
        if (path.contains("works")) {
            return fileMap.get("works");
        }
        if (path.contains("employments")) {
            return fileMap.get("employments");
        }
        return null;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void iterateTar(Map<String, SequenceFile.Writer> fileMap, InputStream gzipInputStream) throws IOException {
        int extractedItem = 0;
        try (TarArchiveInputStream tais = new TarArchiveInputStream(gzipInputStream);){
            TarArchiveEntry entry;
            while ((entry = tais.getNextTarEntry()) != null) {
                SequenceFile.Writer fl;
                if (!entry.isFile() || (fl = this.retrieveFile(fileMap, entry.getName())) == null) continue;
                Text key = new Text(entry.getName());
                Text value = new Text(IOUtils.toString((Reader)new BufferedReader(new InputStreamReader((InputStream)tais))));
                fl.append((Writable)key, (Writable)value);
                if (++extractedItem % 100000 != 0) continue;
                log.info("Thread {}: Extracted {} items", (Object)this.id, (Object)extractedItem);
            }
        }
        finally {
            for (SequenceFile.Writer k : fileMap.values()) {
                log.info("Thread {}: Completed processed {} items", (Object)this.id, (Object)extractedItem);
                k.hflush();
                k.close();
            }
        }
    }
}

