/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.oa.graph.clean;

import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import java.io.InputStream;
import java.util.Map;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GenerateBlacklistSparkJob {
    private static final Logger log = LoggerFactory.getLogger(GenerateBlacklistSparkJob.class);
    private ArgumentApplicationParser parser;

    public GenerateBlacklistSparkJob(ArgumentApplicationParser parser) {
        this.parser = parser;
    }

    public static void main(String[] args) throws Exception {
        String jsonConfiguration = IOUtils.toString((InputStream)GenerateBlacklistSparkJob.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_blacklist_parameters.json"));
        ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
        parser.parseArgument(args);
        Boolean isSparkSessionManaged = Optional.ofNullable(parser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", (Object)isSparkSessionManaged);
        new GenerateBlacklistSparkJob(parser).run(isSparkSessionManaged);
    }

    public void run(Boolean isSparkSessionManaged) throws ISLookUpException, ClassNotFoundException {
        String inputPath = this.parser.get("inputPath");
        log.info("inputPath: {}", (Object)inputPath);
        String outputPath = this.parser.get("outputPath");
        log.info("outputPath: {}", (Object)outputPath);
        String zenodoWithdrawn = this.parser.get("zenodoWithdrawn");
        log.info("zenodoWithdrawn: {}", (Object)zenodoWithdrawn);
        SparkConf conf = new SparkConf();
        SparkSessionSupport.runWithSparkSession((SparkConf)conf, (Boolean)isSparkSessionManaged, spark -> {
            HdfsSupport.remove((String)outputPath, (Configuration)spark.sparkContext().hadoopConfiguration());
            Dataset zenodo_withdrawn_dois = spark.read().load(zenodoWithdrawn);
            for (Map.Entry e : ModelSupport.oafTypes.entrySet()) {
                Class clazz = (Class)e.getValue();
                if (!Result.class.isAssignableFrom(clazz)) continue;
                spark.read().schema(Encoders.bean(Result.class).schema()).json(inputPath + "/" + (String)e.getKey()).where("array_contains(instance.hostedby.value, 'ZENODO')").selectExpr(new String[]{"id", "explode(instance) as instance"}).selectExpr(new String[]{"id", "explode(instance.pid) as pid"}).join(zenodo_withdrawn_dois, zenodo_withdrawn_dois.col("doi").equalTo((Object)new Column("pid.value")), "left_semi").select("id", new String[0]).distinct().write().mode("append").option("compression", "gzip").parquet(outputPath);
            }
        });
    }
}

