/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.enrich.orcid;

import eu.dnetlib.dhp.application.AbstractScalaApplication;
import eu.dnetlib.dhp.enrich.orcid.MatchData;
import eu.dnetlib.dhp.enrich.orcid.ORCIDAuthorEnricher$;
import eu.dnetlib.dhp.enrich.orcid.ORCIDAuthorEnricherResult;
import eu.dnetlib.dhp.enrich.orcid.SparkEnrichGraphWithOrcidAuthors$;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.functions$;
import org.slf4j.Logger;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.IterableLike;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

@ScalaSignature(bytes="\u0006\u0001\u00154A!\u0001\u0002\u0001\u001b\t\u00013\u000b]1sW\u0016s'/[2i\u000fJ\f\u0007\u000f[,ji\"|%oY5e\u0003V$\bn\u001c:t\u0015\t\u0019A!A\u0003pe\u000eLGM\u0003\u0002\u0006\r\u00051QM\u001c:jG\"T!a\u0002\u0005\u0002\u0007\u0011D\u0007O\u0003\u0002\n\u0015\u00059AM\\3uY&\u0014'\"A\u0006\u0002\u0005\u0015,8\u0001A\n\u0003\u00019\u0001\"a\u0004\n\u000e\u0003AQ!!\u0005\u0004\u0002\u0017\u0005\u0004\b\u000f\\5dCRLwN\\\u0005\u0003'A\u0011\u0001$\u00112tiJ\f7\r^*dC2\f\u0017\t\u001d9mS\u000e\fG/[8o\u0011%)\u0002A!A!\u0002\u00131\u0002%\u0001\u0007qe>\u0004XM\u001d;z!\u0006$\b\u000e\u0005\u0002\u0018;9\u0011\u0001dG\u0007\u00023)\t!$A\u0003tG\u0006d\u0017-\u0003\u0002\u001d3\u00051\u0001K]3eK\u001aL!AH\u0010\u0003\rM#(/\u001b8h\u0015\ta\u0012$\u0003\u0002\u0016%!I!\u0005\u0001B\u0001B\u0003%1EJ\u0001\u0005CJ<7\u000fE\u0002\u0019IYI!!J\r\u0003\u000b\u0005\u0013(/Y=\n\u0005\t\u0012\u0002\u0002\u0003\u0015\u0001\u0005\u0003\u0005\u000b\u0011B\u0015\u0002\u00071|w\r\u0005\u0002+_5\t1F\u0003\u0002-[\u0005)1\u000f\u001c45U*\ta&A\u0002pe\u001eL!\u0001M\u0016\u0003\r1{wmZ3s\u0011\u0015\u0011\u0004\u0001\"\u00014\u0003\u0019a\u0014N\\5u}Q!AGN\u001c9!\t)\u0004!D\u0001\u0003\u0011\u0015)\u0012\u00071\u0001\u0017\u0011\u0015\u0011\u0013\u00071\u0001$\u0011\u0015A\u0013\u00071\u0001*\u0011\u0015Q\u0004\u0001\"\u0011<\u0003\r\u0011XO\u001c\u000b\u0002yA\u0011\u0001$P\u0005\u0003}e\u0011A!\u00168ji\")\u0001\t\u0001C\u0005\u0003\u0006iq-\u001a8fe\u0006$Xm\u0012:ba\"$B\u0001\u0010\"E\r\")1i\u0010a\u0001-\u0005IqM]1qQB\u000bG\u000f\u001b\u0005\u0006\u000b~\u0002\rAF\u0001\u000bo>\u00148.\u001b8h\t&\u0014\b\"B$@\u0001\u00041\u0012A\u0003;be\u001e,G\u000fU1uQ\")\u0011\n\u0001C\u0005\u0015\u0006\u00192M]3bi\u0016$V-\u001c9pe\u0006\u0014\u0018\u0010R1uCR!Ah\u0013'O\u0011\u0015\u0019\u0005\n1\u0001\u0017\u0011\u0015i\u0005\n1\u0001\u0017\u0003%y'oY5e!\u0006$\b\u000eC\u0003H\u0011\u0002\u0007a\u0003C\u0003Q\u0001\u0011%\u0011+\u0001\u0005b]\u0006d\u0017n]=t)\ta$\u000bC\u0003H\u001f\u0002\u0007acB\u0003U\u0005!\u0005Q+\u0001\u0011Ta\u0006\u00148.\u00128sS\u000eDwI]1qQ^KG\u000f[(sG&$\u0017)\u001e;i_J\u001c\bCA\u001bW\r\u0015\t!\u0001#\u0001X'\t1\u0006\f\u0005\u0002\u00193&\u0011!,\u0007\u0002\u0007\u0003:L(+\u001a4\t\u000bI2F\u0011\u0001/\u0015\u0003UCq\u0001\u000b,C\u0002\u0013\u0005a,F\u0001*\u0011\u0019\u0001g\u000b)A\u0005S\u0005!An\\4!\u0011\u0015\u0011g\u000b\"\u0001d\u0003\u0011i\u0017-\u001b8\u0015\u0005q\"\u0007\"\u0002\u0012b\u0001\u0004\u0019\u0003")
public class SparkEnrichGraphWithOrcidAuthors
extends AbstractScalaApplication {
    private final Logger log;

    public static void main(String[] stringArray) {
        SparkEnrichGraphWithOrcidAuthors$.MODULE$.main(stringArray);
    }

    public void run() {
        String graphPath = this.parser().get("graphPath");
        this.log.info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"graphPath is '", "'"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{graphPath})));
        String orcidPath = this.parser().get("orcidPath");
        this.log.info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"orcidPath is '", "'"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{orcidPath})));
        String targetPath = this.parser().get("targetPath");
        this.log.info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"targetPath is '", "'"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{targetPath})));
        String workingDir = this.parser().get("workingDir");
        this.log.info(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"targetPath is '", "'"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{workingDir})));
        this.createTemporaryData(graphPath, orcidPath, workingDir);
        this.analisys(workingDir);
        this.generateGraph(graphPath, workingDir, targetPath);
    }

    private void generateGraph(String graphPath, String workingDir, String targetPath) {
        ((IterableLike)((TraversableLike)JavaConverters$.MODULE$.mapAsScalaMapConverter(ModelSupport.entityTypes).asScala()).filter((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final boolean apply(Tuple2<EntityType, Class<?>> e) {
                return ModelSupport.isResult((EntityType)((EntityType)e._1()));
            }
        })).foreach((Function1)new Serializable(this, graphPath, workingDir, targetPath){
            public static final long serialVersionUID = 0L;
            private final /* synthetic */ SparkEnrichGraphWithOrcidAuthors $outer;
            private final String graphPath$2;
            private final String workingDir$1;
            private final String targetPath$3;

            public final void apply(Tuple2<EntityType, Class<?>> e) {
                String resultType = ((Enum)e._1()).name();
                Encoder enc = Encoders$.MODULE$.bean((Class)e._2());
                Dataset matched = this.$outer.spark().read().schema(Encoders$.MODULE$.bean(ORCIDAuthorEnricherResult.class).schema()).parquet(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/", "_matched"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.workingDir$1, resultType}))).selectExpr((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"id", "enriched_author"}));
                this.$outer.spark().read().schema(enc.schema()).json(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.graphPath$2, resultType}))).join(matched, (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"id"})), "left").withColumn("author", functions$.MODULE$.when(functions$.MODULE$.size(functions$.MODULE$.col("enriched_author")).gt((Object)BoxesRunTime.boxToInteger((int)0)), (Object)functions$.MODULE$.col("enriched_author")).otherwise((Object)functions$.MODULE$.col("author"))).drop("enriched_author").write().mode(SaveMode.Overwrite).option("compression", "gzip").json(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.targetPath$3, resultType})));
            }
            {
                if ($outer == null) {
                    throw null;
                }
                this.$outer = $outer;
                this.graphPath$2 = graphPath$2;
                this.workingDir$1 = workingDir$1;
                this.targetPath$3 = targetPath$3;
            }
        });
    }

    private void createTemporaryData(String graphPath, String orcidPath, String targetPath) {
        Dataset orcidAuthors = this.spark().read().load(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/Authors"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{orcidPath}))).select("orcid", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"familyName", "givenName", "creditName", "otherNames"}));
        Dataset orcidWorks = this.spark().read().load(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/Works"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{orcidPath}))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("orcid"), functions$.MODULE$.explode(functions$.MODULE$.col("pids")).alias("identifier")})).where("identifier.schema IN('doi','pmid','pmc','arxiv','handle')");
        Dataset orcidWorksWithAuthors = orcidAuthors.join(orcidWorks, (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"orcid"}))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.lower(functions$.MODULE$.col("identifier.schema")).alias("pid_schema"), functions$.MODULE$.lower(functions$.MODULE$.col("identifier.value")).alias("pid_value"), functions$.MODULE$.struct((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("orcid"), functions$.MODULE$.col("givenName"), functions$.MODULE$.col("familyName"), functions$.MODULE$.col("creditName"), functions$.MODULE$.col("otherNames")})).alias("author")})).cache();
        ((IterableLike)((TraversableLike)JavaConverters$.MODULE$.mapAsScalaMapConverter(ModelSupport.entityTypes).asScala()).filter((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final boolean apply(Tuple2<EntityType, Class<?>> e) {
                return ModelSupport.isResult((EntityType)((EntityType)e._1()));
            }
        })).foreach((Function1)new Serializable(this, graphPath, targetPath, orcidWorksWithAuthors){
            public static final long serialVersionUID = 0L;
            private final /* synthetic */ SparkEnrichGraphWithOrcidAuthors $outer;
            private final String graphPath$1;
            private final String targetPath$1;
            private final Dataset orcidWorksWithAuthors$1;

            public final void apply(Tuple2<EntityType, Class<?>> e) {
                String resultType = ((Enum)e._1()).name();
                Encoder enc = Encoders$.MODULE$.bean((Class)e._2());
                Dataset oaEntities = this.$outer.spark().read().schema(enc.schema()).json(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.graphPath$1, resultType}))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("id"), functions$.MODULE$.col("datainfo"), functions$.MODULE$.col("instance")})).where("datainfo.deletedbyinference != true").drop("datainfo").withColumn("instances", functions$.MODULE$.explode(functions$.MODULE$.col("instance"))).withColumn("pids", functions$.MODULE$.explode(functions$.MODULE$.col("instances.pid"))).select((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.lower(functions$.MODULE$.col("pids.qualifier.classid")).alias("pid_schema"), functions$.MODULE$.lower(functions$.MODULE$.col("pids.value")).alias("pid_value"), functions$.MODULE$.col("id")}));
                Dataset orcidDnet = this.orcidWorksWithAuthors$1.join(oaEntities, (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"pid_schema", "pid_value"})), "inner").groupBy((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("id")})).agg(functions$.MODULE$.collect_set(functions$.MODULE$.col("author")).alias("orcid_authors"), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0])).select("id", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"orcid_authors"}));
                Dataset result = this.$outer.spark().read().schema(enc.schema()).json(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.graphPath$1, resultType}))).selectExpr((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"id", "author as graph_authors"}));
                result.join(orcidDnet, (Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"id"}))).write().mode(SaveMode.Overwrite).option("compression", "gzip").parquet(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/", "_unmatched"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.targetPath$1, resultType})));
            }
            {
                if ($outer == null) {
                    throw null;
                }
                this.$outer = $outer;
                this.graphPath$1 = graphPath$1;
                this.targetPath$1 = targetPath$1;
                this.orcidWorksWithAuthors$1 = orcidWorksWithAuthors$1;
            }
        });
        orcidWorksWithAuthors.unpersist();
    }

    private void analisys(String targetPath) {
        ((IterableLike)((TraversableLike)JavaConverters$.MODULE$.mapAsScalaMapConverter(ModelSupport.entityTypes).asScala()).filter((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final boolean apply(Tuple2<EntityType, Class<?>> e) {
                return ModelSupport.isResult((EntityType)((EntityType)e._1()));
            }
        })).foreach((Function1)new Serializable(this, targetPath){
            public static final long serialVersionUID = 0L;
            private final /* synthetic */ SparkEnrichGraphWithOrcidAuthors $outer;
            private final String targetPath$2;

            public final void apply(Tuple2<EntityType, Class<?>> e) {
                String resultType = ((Enum)e._1()).name();
                this.$outer.spark().read().parquet(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/", "_unmatched"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.targetPath$2, resultType}))).where("size(graph_authors) > 0").as(Encoders$.MODULE$.bean(MatchData.class)).map((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final ORCIDAuthorEnricherResult apply(MatchData md) {
                        return ORCIDAuthorEnricher$.MODULE$.enrichOrcid(md.id(), md.graph_authors(), md.orcid_authors());
                    }
                }, Encoders$.MODULE$.bean(ORCIDAuthorEnricherResult.class)).write().option("compression", "gzip").mode("overwrite").parquet(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"", "/", "_matched"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.targetPath$2, resultType})));
            }
            {
                if ($outer == null) {
                    throw null;
                }
                this.$outer = $outer;
                this.targetPath$2 = targetPath$2;
            }
        });
    }

    public SparkEnrichGraphWithOrcidAuthors(String propertyPath, String[] args, Logger log) {
        this.log = log;
        super(propertyPath, args, log);
    }
}

