/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.actionmanager.webcrawl;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.webcrawl.CreateActionSetFromWebEntries;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class RemoveRelationFromActionSet
implements Serializable {
    private static final Logger log = LoggerFactory.getLogger(CreateActionSetFromWebEntries.class);
    private static final ObjectMapper MAPPER = new ObjectMapper();
    private static final StructType KV_SCHEMA = StructType$.MODULE$.apply(Arrays.asList(StructField$.MODULE$.apply("key", DataTypes.StringType, false, Metadata.empty()), StructField$.MODULE$.apply("value", DataTypes.StringType, false, Metadata.empty())));
    private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply(Arrays.asList(StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()), StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty())));
    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    public static void main(String[] args) throws Exception {
        String jsonConfiguration = IOUtils.toString((InputStream)CreateActionSetFromWebEntries.class.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/webcrawl/as_parameters.json"));
        ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
        parser.parseArgument(args);
        Boolean isSparkSessionManaged = Optional.ofNullable(parser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", (Object)isSparkSessionManaged);
        String inputPath = parser.get("sourcePath");
        log.info("inputPath: {}", (Object)inputPath);
        String outputPath = parser.get("outputPath");
        log.info("outputPath: {}", (Object)outputPath);
        String blackListInputPath = parser.get("blackListPath");
        log.info("blackListInputPath: {}", (Object)blackListInputPath);
        SparkConf conf = new SparkConf();
        SparkSessionSupport.runWithSparkSession((SparkConf)conf, (Boolean)isSparkSessionManaged, spark -> RemoveRelationFromActionSet.removeFromActionSet(spark, inputPath, outputPath, blackListInputPath));
    }

    private static void removeFromActionSet(SparkSession spark, String inputPath, String outputPath, String blackListInputPath) {
        Dataset blackList = RemoveRelationFromActionSet.readBlackList(spark, blackListInputPath).map((MapFunction & Serializable)r -> IdentifierFactory.idFromPid((String)"50", (String)"doi", (String)((String)r.getAs("doi")).substring(16), (boolean)true), Encoders.STRING());
        JavaPairRDD seq = JavaSparkContext.fromSparkContext((SparkContext)spark.sparkContext()).sequenceFile(inputPath, Text.class, Text.class);
        JavaRDD rdd = seq.map((Function & Serializable)x -> RowFactory.create((Object[])new Object[]{((Text)x._1()).toString(), ((Text)x._2()).toString()}));
        Dataset actionSet = spark.createDataFrame(rdd, KV_SCHEMA).withColumn("atomic_action", functions.from_json((Column)functions.col((String)"value"), (StructType)ATOMIC_ACTION_SCHEMA)).select(new Column[]{functions.expr((String)"atomic_action.*")});
        Dataset relation2 = actionSet.map((MapFunction & Serializable)r -> (Relation)MAPPER.readValue((String)r.getAs("payload"), Relation.class), Encoders.bean(Relation.class));
        Dataset relNoSource = relation2.joinWith(blackList, relation2.col("source").equalTo((Object)blackList.col("value")), "left").filter((FilterFunction & Serializable)t2 -> t2._2() == null).map((MapFunction & Serializable)t2 -> (Relation)t2._1(), Encoders.bean(Relation.class));
        relNoSource.joinWith(blackList, relNoSource.col("target").equalTo((Object)blackList.col("value")), "left").filter((FilterFunction & Serializable)t2 -> t2._2() == null).map((MapFunction & Serializable)t2 -> (Relation)t2._1(), Encoders.bean(Relation.class)).toJavaRDD().map((Function & Serializable)p -> new AtomicAction(p.getClass(), (Oaf)p)).mapToPair((PairFunction & Serializable)aa -> new Tuple2((Object)new Text(aa.getClazz().getCanonicalName()), (Object)new Text(OBJECT_MAPPER.writeValueAsString(aa)))).saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class);
    }

    private static Dataset<Row> readBlackList(SparkSession spark, String inputPath) {
        return spark.read().json(inputPath).select("doi", new String[0]);
    }
}

