package eu.dnetlib.jobs;

import eu.dnetlib.Deduper;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.FieldValueImpl;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.util.MapDocumentUtil;
import eu.dnetlib.support.ArgumentApplicationParser;
import eu.dnetlib.support.Block;
import eu.dnetlib.support.ConnectedComponent;
import eu.dnetlib.support.Relation;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.codehaus.jackson.map.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

/* loaded from: input_file:eu/dnetlib/jobs/SparkComputeStatistics.class */
public class SparkComputeStatistics extends AbstractSparkJob {
    private static final Logger log = LoggerFactory.getLogger(SparkComputeStatistics.class);

    public SparkComputeStatistics(ArgumentApplicationParser argumentApplicationParser, SparkSession sparkSession) {
        super(argumentApplicationParser, sparkSession);
    }

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(readResource("/jobs/parameters/computeStatistics_parameters.json", SparkCreateSimRels.class));
        argumentApplicationParser.parseArgument(strArr);
        new SparkComputeStatistics(argumentApplicationParser, getSparkSession(new SparkConf())).run();
    }

    @Override // eu.dnetlib.jobs.AbstractSparkJob
    public void run() throws IOException {
        String str = this.parser.get("entitiesPath");
        String str2 = this.parser.get("workingPath");
        String str3 = this.parser.get("dedupConfPath");
        String str4 = this.parser.get("groundTruthFieldJPath");
        int intValue = ((Integer) Optional.ofNullable(this.parser.get("numPartitions")).map(Integer::valueOf).orElse(1000)).intValue();
        log.info("entitiesPath:          '{}'", str);
        log.info("workingPath:           '{}'", str2);
        log.info("numPartitions:         '{}'", Integer.valueOf(intValue));
        log.info("dedupConfPath:         '{}'", str3);
        log.info("groundTruthFieldJPath: '{}'", str4);
        JavaSparkContext fromSparkContext = JavaSparkContext.fromSparkContext(this.spark.sparkContext());
        DedupConfig loadDedupConfig = loadDedupConfig(str3);
        JavaPairRDD mapToPair = fromSparkContext.textFile(str).repartition(intValue).mapToPair(str5 -> {
            MapDocument asMapDocumentWithJPath = MapDocumentUtil.asMapDocumentWithJPath(loadDedupConfig, str5);
            asMapDocumentWithJPath.getFieldMap().put("groundTruth", new FieldValueImpl(Type.String, "groundTruth", MapDocumentUtil.getJPathString(str4, str5)));
            return new Tuple2(asMapDocumentWithJPath.getIdentifier(), asMapDocumentWithJPath);
        });
        JavaRDD map = mapToPair.map(tuple2 -> {
            return ((Field) ((MapDocument) tuple2._2()).getFieldMap().get("groundTruth")).stringValue();
        });
        JavaRDD<List<String>> map2 = Deduper.createSortedBlocks(mapToPair, loadDedupConfig).map(tuple22 -> {
            return (List) ((Block) tuple22._2()).getDocuments().stream().map(mapDocument -> {
                return ((Field) mapDocument.getFieldMap().get("groundTruth")).stringValue();
            }).collect(Collectors.toList());
        });
        JavaRDD javaRDD = this.spark.read().load(str2 + "/mergerels").as(Encoders.bean(Relation.class)).toJavaRDD();
        JavaRDD javaRDD2 = this.spark.read().load(str2 + "/simrels").as(Encoders.bean(Relation.class)).toJavaRDD();
        JavaRDD<List<String>> map3 = fromSparkContext.textFile(str2 + "/groupentities").map(str6 -> {
            return (ConnectedComponent) new ObjectMapper().readValue(str6, ConnectedComponent.class);
        }).map(connectedComponent -> {
            return (List) connectedComponent.getDocs().stream().map(str7 -> {
                return MapDocumentUtil.getJPathString(str4, str7);
            }).collect(Collectors.toList());
        });
        long count = map.count();
        long count2 = map2.count();
        double randIndex = randIndex(map2);
        long count3 = javaRDD2.count();
        long count4 = javaRDD.count();
        double randIndex2 = randIndex(map3);
        long count5 = map3.count();
        long count6 = map.filter(str7 -> {
            return Boolean.valueOf(!str7.isEmpty());
        }).count();
        long count7 = map3.filter(list -> {
            return Boolean.valueOf(list.stream().distinct().count() == 1);
        }).count();
        System.out.println("Entities : " + count + "\nGround Truth : " + count6 + "\nBlocks : " + count2 + "\nBlocks RI : " + randIndex + "\nSimRels : " + count3 + "\nMergeRels : " + count4 + "\nGroups : " + count5 + " (correct: " + count7 + ", wrong: " + (count5 - count7) + ")\nGroups RI : " + randIndex2);
        writeStatsFileToHDFS(count6, count, randIndex, randIndex2, count2, count3, count4, count5, str2 + "/stats_file.txt");
    }

    public static void writeStatsFileToHDFS(long j, long j2, double d, double d2, long j3, long j4, long j5, long j6, String str) throws IOException {
        Configuration configuration = new Configuration();
        FileSystem.get(configuration).delete(new Path(str), true);
        try {
            FileSystem fileSystem = FileSystem.get(configuration);
            Path path = new Path(str);
            if (fileSystem.exists(path)) {
                System.out.println("Output file already exists");
                throw new IOException("Output file already exists");
            }
            String str2 = "Entities : " + j2 + "\nGround Truth : " + j + "\nBlocks : " + j3 + "\nBlocks RI : " + d + "\nSimRels : " + j4 + "\nMergeRels : " + j5 + "\nGroups : " + j6 + "\nGroups RI : " + d2;
            FSDataOutputStream create = fileSystem.create(path);
            try {
                create.writeBytes(str2);
                create.close();
            } catch (Throwable th) {
                create.close();
                throw th;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public double randIndex(JavaRDD<List<String>> javaRDD) {
        Tuple2 tuple2 = (Tuple2) javaRDD.map(list -> {
            int i = 0;
            for (String str : (List) list.stream().distinct().filter(str2 -> {
                return !str2.isEmpty();
            }).collect(Collectors.toList())) {
                i += binomialCoefficient((int) list.stream().filter(str3 -> {
                    return str3.equals(str);
                }).count());
            }
            return new Tuple2(Integer.valueOf(i), Integer.valueOf(binomialCoefficient(list.size())));
        }).reduce((tuple22, tuple23) -> {
            return new Tuple2(Integer.valueOf(((Integer) tuple22._1()).intValue() + ((Integer) tuple23._1()).intValue()), Integer.valueOf(((Integer) tuple22._2()).intValue() + ((Integer) tuple23._2()).intValue()));
        });
        return ((Integer) tuple2._1()).intValue() / ((Integer) tuple2._2()).intValue();
    }

    private static int binomialCoefficient(int i) {
        return (i * (i - 1)) / 2;
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -2130069967:
                if (implMethodName.equals("lambda$run$85c6f98c$1")) {
                    z = 5;
                    break;
                }
                break;
            case -1585602389:
                if (implMethodName.equals("lambda$run$f881ac61$1")) {
                    z = 4;
                    break;
                }
                break;
            case -1334328546:
                if (implMethodName.equals("lambda$randIndex$5f1bdc95$1")) {
                    z = 6;
                    break;
                }
                break;
            case -649458549:
                if (implMethodName.equals("lambda$run$4d83fd32$1")) {
                    z = false;
                    break;
                }
                break;
            case -583358664:
                if (implMethodName.equals("lambda$run$717dc991$1")) {
                    z = true;
                    break;
                }
                break;
            case 60987495:
                if (implMethodName.equals("lambda$run$af3fe14$1")) {
                    z = 2;
                    break;
                }
                break;
            case 327714189:
                if (implMethodName.equals("lambda$run$8162aac8$1")) {
                    z = 3;
                    break;
                }
                break;
            case 892640073:
                if (implMethodName.equals("lambda$run$e5bd2eef$1")) {
                    z = 7;
                    break;
                }
                break;
            case 1730218207:
                if (implMethodName.equals("lambda$randIndex$77ff6b0b$1")) {
                    z = 8;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/pace/config/DedupConfig;Ljava/lang/String;Ljava/lang/String;)Lscala/Tuple2;")) {
                    DedupConfig dedupConfig = (DedupConfig) serializedLambda.getCapturedArg(0);
                    String str = (String) serializedLambda.getCapturedArg(1);
                    return str5 -> {
                        MapDocument asMapDocumentWithJPath = MapDocumentUtil.asMapDocumentWithJPath(dedupConfig, str5);
                        asMapDocumentWithJPath.getFieldMap().put("groundTruth", new FieldValueImpl(Type.String, "groundTruth", MapDocumentUtil.getJPathString(str, str5)));
                        return new Tuple2(asMapDocumentWithJPath.getIdentifier(), asMapDocumentWithJPath);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Ljava/lang/Boolean;")) {
                    return str7 -> {
                        return Boolean.valueOf(!str7.isEmpty());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Ljava/lang/String;")) {
                    return tuple2 -> {
                        return ((Field) ((MapDocument) tuple2._2()).getFieldMap().get("groundTruth")).stringValue();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;Leu/dnetlib/support/ConnectedComponent;)Ljava/util/List;")) {
                    String str2 = (String) serializedLambda.getCapturedArg(0);
                    return connectedComponent -> {
                        return (List) connectedComponent.getDocs().stream().map(str72 -> {
                            return MapDocumentUtil.getJPathString(str2, str72);
                        }).collect(Collectors.toList());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Leu/dnetlib/support/ConnectedComponent;")) {
                    return str6 -> {
                        return (ConnectedComponent) new ObjectMapper().readValue(str6, ConnectedComponent.class);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Ljava/util/List;)Ljava/lang/Boolean;")) {
                    return list -> {
                        return Boolean.valueOf(list.stream().distinct().count() == 1);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function2") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;Lscala/Tuple2;)Lscala/Tuple2;")) {
                    return (tuple22, tuple23) -> {
                        return new Tuple2(Integer.valueOf(((Integer) tuple22._1()).intValue() + ((Integer) tuple23._1()).intValue()), Integer.valueOf(((Integer) tuple22._2()).intValue() + ((Integer) tuple23._2()).intValue()));
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Ljava/util/List;")) {
                    return tuple222 -> {
                        return (List) ((Block) tuple222._2()).getDocuments().stream().map(mapDocument -> {
                            return ((Field) mapDocument.getFieldMap().get("groundTruth")).stringValue();
                        }).collect(Collectors.toList());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/jobs/SparkComputeStatistics") && serializedLambda.getImplMethodSignature().equals("(Ljava/util/List;)Lscala/Tuple2;")) {
                    return list2 -> {
                        int i = 0;
                        for (String str3 : (List) list2.stream().distinct().filter(str22 -> {
                            return !str22.isEmpty();
                        }).collect(Collectors.toList())) {
                            i += binomialCoefficient((int) list2.stream().filter(str32 -> {
                                return str32.equals(str3);
                            }).count());
                        }
                        return new Tuple2(Integer.valueOf(i), Integer.valueOf(binomialCoefficient(list2.size())));
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
