package eu.dnetlib.doiboost.orcidnodoi;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.orcid.AuthorData;
import eu.dnetlib.dhp.schema.orcid.AuthorSummary;
import eu.dnetlib.dhp.schema.orcid.Contributor;
import eu.dnetlib.dhp.schema.orcid.Work;
import eu.dnetlib.dhp.schema.orcid.WorkDetail;
import eu.dnetlib.doiboost.orcid.json.JsonHelper;
import eu.dnetlib.doiboost.orcid.util.HDFSUtil;
import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf;
import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
import java.lang.invoke.SerializedLambda;
import java.util.Arrays;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

/* loaded from: input_file:eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.class */
public class SparkGenEnrichedOrcidWorks {
    static Logger logger = LoggerFactory.getLogger(SparkGenEnrichedOrcidWorks.class);
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(SparkGenEnrichedOrcidWorks.class.getResourceAsStream("/eu/dnetlib/dhp/doiboost/gen_orcid-no-doi_params.json")));
        argumentApplicationParser.parseArgument(strArr);
        Boolean bool = (Boolean) Optional.ofNullable(argumentApplicationParser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        String str = argumentApplicationParser.get("hdfsServerUri");
        String str2 = argumentApplicationParser.get("workingPath");
        String str3 = argumentApplicationParser.get("outputEnrichedWorksPath");
        String str4 = argumentApplicationParser.get("orcidDataFolder");
        SparkSessionSupport.runWithSparkSession(new SparkConf(), bool, sparkSession -> {
            String readFromTextFile = HDFSUtil.readFromTextFile(str, str2, "last_update.txt");
            if (StringUtils.isBlank(readFromTextFile)) {
                throw new RuntimeException("last update info not found");
            }
            String substring = readFromTextFile.substring(0, 10);
            JavaSparkContext fromSparkContext = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
            Dataset createDataset = sparkSession.createDataset(fromSparkContext.textFile(str2.concat(str4).concat("/authors/*")).map(str5 -> {
                return (AuthorSummary) OBJECT_MAPPER.readValue(str5, AuthorSummary.class);
            }).filter(authorSummary -> {
                return Boolean.valueOf(authorSummary.getAuthorData() != null);
            }).map(authorSummary2 -> {
                return authorSummary2.getAuthorData();
            }).rdd(), Encoders.bean(AuthorData.class));
            logger.info("Authors data loaded: " + createDataset.count());
            Dataset createDataset2 = sparkSession.createDataset(fromSparkContext.textFile(str2.concat(str4).concat("/works/*")).map(str6 -> {
                return (Work) OBJECT_MAPPER.readValue(str6, Work.class);
            }).filter(work -> {
                return Boolean.valueOf(work.getWorkDetail() != null);
            }).map(work2 -> {
                return work2.getWorkDetail();
            }).filter(workDetail -> {
                return Boolean.valueOf(workDetail.getErrorCode() == null);
            }).filter(workDetail2 -> {
                return Boolean.valueOf(workDetail2.getExtIds().stream().filter(externalId -> {
                    return externalId.getType() != null;
                }).noneMatch(externalId2 -> {
                    return externalId2.getType().equalsIgnoreCase("doi");
                }));
            }).rdd(), Encoders.bean(WorkDetail.class));
            logger.info("Works data loaded: " + createDataset2.count());
            LongAccumulator longAccumulator = sparkSession.sparkContext().longAccumulator("warnNotFoundContributors");
            JavaRDD javaRDD = createDataset2.joinWith(createDataset, createDataset2.col("oid").equalTo(createDataset.col("oid")), "inner").map(tuple2 -> {
                WorkDetail workDetail3 = (WorkDetail) tuple2._1;
                AuthorData authorData = (AuthorData) tuple2._2;
                if (workDetail3.getContributors() == null || (workDetail3.getContributors() != null && workDetail3.getContributors().size() == 0)) {
                    Contributor contributor = new Contributor();
                    contributor.setName(authorData.getName());
                    contributor.setSurname(authorData.getSurname());
                    contributor.setCreditName(authorData.getCreditName());
                    contributor.setOid(authorData.getOid());
                    workDetail3.setContributors(Arrays.asList(contributor));
                    if (longAccumulator != null) {
                        longAccumulator.add(1L);
                    }
                } else {
                    AuthorMatcher.match(authorData, workDetail3.getContributors());
                }
                return new Tuple2(authorData.getOid(), JsonHelper.createOidWork(workDetail3));
            }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())).filter((v0) -> {
                return Objects.nonNull(v0);
            }).toJavaRDD();
            logger.info("Enriched works RDD ready.");
            LongAccumulator longAccumulator2 = sparkSession.sparkContext().longAccumulator("parsedPublications");
            LongAccumulator longAccumulator3 = sparkSession.sparkContext().longAccumulator("enrichedPublications");
            LongAccumulator longAccumulator4 = sparkSession.sparkContext().longAccumulator("errorsGeneric");
            LongAccumulator longAccumulator5 = sparkSession.sparkContext().longAccumulator("errorsInvalidTitle");
            LongAccumulator longAccumulator6 = sparkSession.sparkContext().longAccumulator("errorsNotFoundAuthors");
            LongAccumulator longAccumulator7 = sparkSession.sparkContext().longAccumulator("errorsInvalidType");
            LongAccumulator longAccumulator8 = sparkSession.sparkContext().longAccumulator("otherTypeFound");
            LongAccumulator longAccumulator9 = sparkSession.sparkContext().longAccumulator("deactivated_found");
            LongAccumulator longAccumulator10 = sparkSession.sparkContext().longAccumulator("Title_not_provided_found");
            LongAccumulator longAccumulator11 = sparkSession.sparkContext().longAccumulator("no_url_found");
            PublicationToOaf publicationToOaf = new PublicationToOaf(longAccumulator2, longAccumulator3, longAccumulator4, longAccumulator5, longAccumulator6, longAccumulator7, longAccumulator8, longAccumulator9, longAccumulator10, longAccumulator11, substring);
            JavaRDD filter = javaRDD.map(tuple22 -> {
                return publicationToOaf.generatePublicationActionsFromJson((String) tuple22._2());
            }).filter(publication -> {
                return Boolean.valueOf(publication != null);
            });
            fromSparkContext.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
            filter.mapToPair(publication2 -> {
                return new Tuple2(publication2.getClass().toString(), OBJECT_MAPPER.writeValueAsString(new AtomicAction(Publication.class, publication2)));
            }).mapToPair(tuple23 -> {
                return new Tuple2(new Text((String) tuple23._1()), new Text((String) tuple23._2()));
            }).saveAsNewAPIHadoopFile(str3, Text.class, Text.class, SequenceFileOutputFormat.class, fromSparkContext.hadoopConfiguration());
            logger.info("parsedPublications: " + longAccumulator2.value().toString());
            logger.info("enrichedPublications: " + longAccumulator3.value().toString());
            logger.info("warnNotFoundContributors: " + longAccumulator.value().toString());
            logger.info("errorsGeneric: " + longAccumulator4.value().toString());
            logger.info("errorsInvalidTitle: " + longAccumulator5.value().toString());
            logger.info("errorsNotFoundAuthors: " + longAccumulator6.value().toString());
            logger.info("errorsInvalidType: " + longAccumulator7.value().toString());
            logger.info("otherTypeFound: " + longAccumulator8.value().toString());
            logger.info("deactivatedAcc: " + longAccumulator9.value().toString());
            logger.info("titleNotProvidedAcc: " + longAccumulator10.value().toString());
            logger.info("noUrlAcc: " + longAccumulator11.value().toString());
        });
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1905630611:
                if (implMethodName.equals("lambda$null$9ac3d6c9$1")) {
                    z = 2;
                    break;
                }
                break;
            case -1822474549:
                if (implMethodName.equals("lambda$null$77917a73$1")) {
                    z = 5;
                    break;
                }
                break;
            case -1746938237:
                if (implMethodName.equals("lambda$null$b24f27d3$1")) {
                    z = 12;
                    break;
                }
                break;
            case -1746938236:
                if (implMethodName.equals("lambda$null$b24f27d3$2")) {
                    z = 10;
                    break;
                }
                break;
            case -1746938235:
                if (implMethodName.equals("lambda$null$b24f27d3$3")) {
                    z = 8;
                    break;
                }
                break;
            case 11295434:
                if (implMethodName.equals("lambda$null$2da4c869$1")) {
                    z = true;
                    break;
                }
                break;
            case 11295435:
                if (implMethodName.equals("lambda$null$2da4c869$2")) {
                    z = 4;
                    break;
                }
                break;
            case 965125825:
                if (implMethodName.equals("lambda$null$efddbc6$1")) {
                    z = 3;
                    break;
                }
                break;
            case 1273583375:
                if (implMethodName.equals("lambda$null$7d6a2419$1")) {
                    z = false;
                    break;
                }
                break;
            case 1273583376:
                if (implMethodName.equals("lambda$null$7d6a2419$2")) {
                    z = 7;
                    break;
                }
                break;
            case 1273583377:
                if (implMethodName.equals("lambda$null$7d6a2419$3")) {
                    z = 6;
                    break;
                }
                break;
            case 1273583378:
                if (implMethodName.equals("lambda$null$7d6a2419$4")) {
                    z = 11;
                    break;
                }
                break;
            case 1273583379:
                if (implMethodName.equals("lambda$null$7d6a2419$5")) {
                    z = 9;
                    break;
                }
                break;
            case 2123019764:
                if (implMethodName.equals("nonNull")) {
                    z = 13;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Leu/dnetlib/dhp/schema/orcid/Work;")) {
                    return str6 -> {
                        return (Work) OBJECT_MAPPER.readValue(str6, Work.class);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/oaf/Publication;)Lscala/Tuple2;")) {
                    return publication2 -> {
                        return new Tuple2(publication2.getClass().toString(), OBJECT_MAPPER.writeValueAsString(new AtomicAction(Publication.class, publication2)));
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/spark/util/LongAccumulator;Lscala/Tuple2;)Lscala/Tuple2;")) {
                    LongAccumulator longAccumulator = (LongAccumulator) serializedLambda.getCapturedArg(0);
                    return tuple2 -> {
                        WorkDetail workDetail3 = (WorkDetail) tuple2._1;
                        AuthorData authorData = (AuthorData) tuple2._2;
                        if (workDetail3.getContributors() == null || (workDetail3.getContributors() != null && workDetail3.getContributors().size() == 0)) {
                            Contributor contributor = new Contributor();
                            contributor.setName(authorData.getName());
                            contributor.setSurname(authorData.getSurname());
                            contributor.setCreditName(authorData.getCreditName());
                            contributor.setOid(authorData.getOid());
                            workDetail3.setContributors(Arrays.asList(contributor));
                            if (longAccumulator != null) {
                                longAccumulator.add(1L);
                            }
                        } else {
                            AuthorMatcher.match(authorData, workDetail3.getContributors());
                        }
                        return new Tuple2(authorData.getOid(), JsonHelper.createOidWork(workDetail3));
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/oaf/Publication;)Ljava/lang/Boolean;")) {
                    return publication -> {
                        return Boolean.valueOf(publication != null);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Lscala/Tuple2;")) {
                    return tuple23 -> {
                        return new Tuple2(new Text((String) tuple23._1()), new Text((String) tuple23._2()));
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf;Lscala/Tuple2;)Leu/dnetlib/dhp/schema/oaf/Publication;")) {
                    PublicationToOaf publicationToOaf = (PublicationToOaf) serializedLambda.getCapturedArg(0);
                    return tuple22 -> {
                        return publicationToOaf.generatePublicationActionsFromJson((String) tuple22._2());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/orcid/Work;)Leu/dnetlib/dhp/schema/orcid/WorkDetail;")) {
                    return work2 -> {
                        return work2.getWorkDetail();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/orcid/Work;)Ljava/lang/Boolean;")) {
                    return work -> {
                        return Boolean.valueOf(work.getWorkDetail() != null);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/orcid/AuthorSummary;)Leu/dnetlib/dhp/schema/orcid/AuthorData;")) {
                    return authorSummary2 -> {
                        return authorSummary2.getAuthorData();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/orcid/WorkDetail;)Ljava/lang/Boolean;")) {
                    return workDetail2 -> {
                        return Boolean.valueOf(workDetail2.getExtIds().stream().filter(externalId -> {
                            return externalId.getType() != null;
                        }).noneMatch(externalId2 -> {
                            return externalId2.getType().equalsIgnoreCase("doi");
                        }));
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/orcid/AuthorSummary;)Ljava/lang/Boolean;")) {
                    return authorSummary -> {
                        return Boolean.valueOf(authorSummary.getAuthorData() != null);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/orcid/WorkDetail;)Ljava/lang/Boolean;")) {
                    return workDetail -> {
                        return Boolean.valueOf(workDetail.getErrorCode() == null);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Leu/dnetlib/dhp/schema/orcid/AuthorSummary;")) {
                    return str5 -> {
                        return (AuthorSummary) OBJECT_MAPPER.readValue(str5, AuthorSummary.class);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FilterFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Z") && serializedLambda.getImplClass().equals("java/util/Objects") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/Object;)Z")) {
                    return (v0) -> {
                        return Objects.nonNull(v0);
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
