package eu.dnetlib.dhp.migration;

import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;

/* loaded from: input_file:eu/dnetlib/dhp/migration/ExtractEntitiesFromHDFSJob.class */
public class ExtractEntitiesFromHDFSJob {
    private static final Log log = LogFactory.getLog(ExtractEntitiesFromHDFSJob.class);

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(MigrateMongoMdstoresApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/extract_entities_from_hdfs_parameters.json")));
        argumentApplicationParser.parseArgument(strArr);
        JavaSparkContext javaSparkContext = new JavaSparkContext(SparkSession.builder().appName(ExtractEntitiesFromHDFSJob.class.getSimpleName()).master(argumentApplicationParser.get("master")).getOrCreate().sparkContext());
        Throwable th = null;
        try {
            try {
                List list = (List) Arrays.stream(argumentApplicationParser.get("sourcePaths").split(",")).filter(str -> {
                    return exists(javaSparkContext, str);
                }).collect(Collectors.toList());
                String str2 = argumentApplicationParser.get("graphRawPath");
                processEntity(javaSparkContext, Publication.class, list, str2);
                processEntity(javaSparkContext, Dataset.class, list, str2);
                processEntity(javaSparkContext, Software.class, list, str2);
                processEntity(javaSparkContext, OtherResearchProduct.class, list, str2);
                processEntity(javaSparkContext, Datasource.class, list, str2);
                processEntity(javaSparkContext, Organization.class, list, str2);
                processEntity(javaSparkContext, Project.class, list, str2);
                processEntity(javaSparkContext, Relation.class, list, str2);
                if (javaSparkContext != null) {
                    if (0 == 0) {
                        javaSparkContext.close();
                        return;
                    }
                    try {
                        javaSparkContext.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
            } catch (Throwable th3) {
                th = th3;
                throw th3;
            }
        } catch (Throwable th4) {
            if (javaSparkContext != null) {
                if (th != null) {
                    try {
                        javaSparkContext.close();
                    } catch (Throwable th5) {
                        th.addSuppressed(th5);
                    }
                } else {
                    javaSparkContext.close();
                }
            }
            throw th4;
        }
    }

    private static void processEntity(JavaSparkContext javaSparkContext, Class<?> cls, List<String> list, String str) {
        String lowerCase = cls.getSimpleName().toLowerCase();
        log.info(String.format("Processing entities (%s) in files:", lowerCase));
        Log log2 = log;
        log2.getClass();
        list.forEach((v1) -> {
            r1.info(v1);
        });
        JavaRDD emptyRDD = javaSparkContext.emptyRDD();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            emptyRDD = emptyRDD.union(javaSparkContext.sequenceFile(it.next(), Text.class, Text.class).map(tuple2 -> {
                return new Tuple2(((Text) tuple2._1()).toString(), ((Text) tuple2._2()).toString());
            }).filter(tuple22 -> {
                return Boolean.valueOf(isEntityType((String) tuple22._1(), lowerCase));
            }).map((v0) -> {
                return v0._2();
            }));
        }
        emptyRDD.saveAsTextFile(str + "/" + lowerCase);
    }

    private static boolean isEntityType(String str, String str2) {
        return StringUtils.substringAfter(str, ":").equalsIgnoreCase(str2);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean exists(JavaSparkContext javaSparkContext, String str) {
        try {
            return FileSystem.get(javaSparkContext.hadoopConfiguration()).exists(new Path(str));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1879208354:
                if (implMethodName.equals("lambda$processEntity$6d6a80f7$1")) {
                    z = false;
                    break;
                }
                break;
            case -513535523:
                if (implMethodName.equals("lambda$processEntity$21193393$1")) {
                    z = 2;
                    break;
                }
                break;
            case 2995:
                if (implMethodName.equals("_2")) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/migration/ExtractEntitiesFromHDFSJob") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Lscala/Tuple2;")) {
                    return tuple2 -> {
                        return new Tuple2(((Text) tuple2._1()).toString(), ((Text) tuple2._2()).toString());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 5 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("scala/Tuple2") && serializedLambda.getImplMethodSignature().equals("()Ljava/lang/Object;")) {
                    return (v0) -> {
                        return v0._2();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/migration/ExtractEntitiesFromHDFSJob") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;Lscala/Tuple2;)Ljava/lang/Boolean;")) {
                    String str = (String) serializedLambda.getCapturedArg(0);
                    return tuple22 -> {
                        return Boolean.valueOf(isEntityType((String) tuple22._1(), str));
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
