/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.oa.provision;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Splitter;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.schema.oaf.Relation;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.expressions.Window;
import org.apache.spark.sql.expressions.WindowSpec;
import org.apache.spark.sql.functions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PrepareRelationsJob {
    private static final Logger log = LoggerFactory.getLogger(PrepareRelationsJob.class);
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
    public static final int MAX_RELS = 100;
    public static final int DEFAULT_NUM_PARTITIONS = 3000;

    public static void main(String[] args) throws Exception {
        String jsonConfiguration = IOUtils.toString((InputStream)PrepareRelationsJob.class.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json"));
        ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
        parser.parseArgument(args);
        Boolean isSparkSessionManaged = Optional.ofNullable(parser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", (Object)isSparkSessionManaged);
        String inputRelationsPath = parser.get("inputRelationsPath");
        log.info("inputRelationsPath: {}", (Object)inputRelationsPath);
        String outputPath = parser.get("outputPath");
        log.info("outputPath: {}", (Object)outputPath);
        int relPartitions = Optional.ofNullable(parser.get("relPartitions")).map(Integer::valueOf).orElse(3000);
        log.info("relPartitions: {}", (Object)relPartitions);
        Set relationFilter = Optional.ofNullable(parser.get("relationFilter")).map(String::toLowerCase).map(s -> Sets.newHashSet((Iterable)Splitter.on((String)",").split((CharSequence)s))).orElse(new HashSet());
        log.info("relationFilter: {}", (Object)relationFilter);
        int sourceMaxRelations = Optional.ofNullable(parser.get("sourceMaxRelations")).map(Integer::valueOf).orElse(100);
        log.info("sourceMaxRelations: {}", (Object)sourceMaxRelations);
        int targetMaxRelations = Optional.ofNullable(parser.get("targetMaxRelations")).map(Integer::valueOf).orElse(100);
        log.info("targetMaxRelations: {}", (Object)targetMaxRelations);
        SparkConf conf = new SparkConf();
        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
        SparkSessionSupport.runWithSparkSession((SparkConf)conf, (Boolean)isSparkSessionManaged, spark -> {
            PrepareRelationsJob.removeOutputDir(spark, outputPath);
            PrepareRelationsJob.prepareRelationsRDD(spark, inputRelationsPath, outputPath, relationFilter, sourceMaxRelations, targetMaxRelations, relPartitions);
        });
    }

    private static void prepareRelationsRDD(SparkSession spark, String inputRelationsPath, String outputPath, Set<String> relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) {
        WindowSpec source_w = Window.partitionBy((String)"source", (String[])new String[]{"subRelType"}).orderBy(new Column[]{functions.col((String)"target").desc_nulls_last()});
        WindowSpec target_w = Window.partitionBy((String)"target", (String[])new String[]{"subRelType"}).orderBy(new Column[]{functions.col((String)"source").desc_nulls_last()});
        spark.read().schema(Encoders.bean(Relation.class).schema()).json(inputRelationsPath).where("source NOT LIKE 'unresolved%' AND  target  NOT LIKE 'unresolved%'").where("datainfo.deletedbyinference != true").where((String)(relationFilter.isEmpty() ? "" : "lower(relClass) NOT IN (" + relationFilter.stream().map(s -> "'" + s + "'").collect(Collectors.joining(",")) + ")")).withColumn("source_w_pos", functions.row_number().over(source_w)).where("source_w_pos < " + sourceMaxRelations).drop("source_w_pos").withColumn("target_w_pos", functions.row_number().over(target_w)).where("target_w_pos < " + targetMaxRelations).drop("target_w_pos").coalesce(relPartitions).write().mode(SaveMode.Overwrite).parquet(outputPath);
    }

    private static void removeOutputDir(SparkSession spark, String path) {
        HdfsSupport.remove((String)path, (Configuration)spark.sparkContext().hadoopConfiguration());
    }
}

