package eu.dnetlib.dhp.oa.dedup;

import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.pace.config.DedupConfig;
import java.io.IOException;
import java.io.Serializable;
import java.io.StringReader;
import java.lang.invoke.SerializedLambda;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.class */
public abstract class AbstractSparkAction implements Serializable {
    protected static final int NUM_PARTITIONS = 1000;
    protected static final int NUM_CONNECTIONS = 20;
    protected static final String TYPE_VALUE_SEPARATOR = "###";
    protected static final String SP_SEPARATOR = "@@@";
    protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
    public ArgumentApplicationParser parser;
    public SparkSession spark;

    public AbstractSparkAction(ArgumentApplicationParser argumentApplicationParser, SparkSession sparkSession) {
        this.parser = argumentApplicationParser;
        this.spark = sparkSession;
    }

    public List<DedupConfig> getConfigurations(ISLookUpService iSLookUpService, String str) throws ISLookUpException, DocumentException, IOException {
        Document read = new SAXReader().read(new StringReader(iSLookUpService.getResourceProfileByQuery(String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", str))));
        String valueOf = read.valueOf("//DEDUPLICATION/ACTION_SET/@id");
        ArrayList arrayList = new ArrayList();
        Iterator it = read.selectNodes("//SCAN_SEQUENCE/SCAN").iterator();
        while (it.hasNext()) {
            arrayList.add(loadConfig(iSLookUpService, valueOf, it.next()));
        }
        return arrayList;
    }

    private DedupConfig loadConfig(ISLookUpService iSLookUpService, String str, Object obj) throws ISLookUpException, IOException {
        DedupConfig dedupConfig = (DedupConfig) new ObjectMapper().readValue(iSLookUpService.getResourceProfileByQuery(String.format("for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()", ((Element) obj).attributeValue("id"))), DedupConfig.class);
        dedupConfig.getPace().initModel();
        dedupConfig.getPace().initTranslationMap();
        dedupConfig.getWf().setConfigurationId(str);
        return dedupConfig;
    }

    abstract void run(ISLookUpService iSLookUpService) throws DocumentException, IOException, ISLookUpException;

    /* JADX INFO: Access modifiers changed from: protected */
    public static SparkSession getSparkSession(SparkConf sparkConf) {
        return SparkSession.builder().config(sparkConf).getOrCreate();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static <T> void save(Dataset<T> dataset, String str, SaveMode saveMode) {
        dataset.write().option("compression", "gzip").mode(saveMode).json(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static <T> void saveParquet(Dataset<T> dataset, String str, SaveMode saveMode) {
        dataset.write().option("compression", "gzip").mode(saveMode).parquet(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void removeOutputDir(SparkSession sparkSession, String str) {
        HdfsSupport.remove(str, sparkSession.sparkContext().hadoopConfiguration());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static String structuredPropertyListToString(List<StructuredProperty> list) {
        return (String) list.stream().filter(structuredProperty -> {
            return structuredProperty.getQualifier() != null;
        }).filter(structuredProperty2 -> {
            return StringUtils.isNotBlank(structuredProperty2.getQualifier().getClassid());
        }).filter(structuredProperty3 -> {
            return StringUtils.isNotBlank(structuredProperty3.getValue());
        }).map(structuredProperty4 -> {
            return structuredProperty4.getValue() + TYPE_VALUE_SEPARATOR + structuredProperty4.getQualifier().getClassid();
        }).collect(Collectors.joining(SP_SEPARATOR));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static MapFunction<String, Relation> patchRelFn() {
        return str -> {
            Relation relation = (Relation) OBJECT_MAPPER.readValue(str, Relation.class);
            if (relation.getDataInfo() == null) {
                relation.setDataInfo(new DataInfo());
            }
            return relation;
        };
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isOpenorgs(Relation relation) {
        return ((Boolean) Optional.ofNullable(relation.getCollectedfrom()).map(list -> {
            return Boolean.valueOf(list.stream().filter((v0) -> {
                return Objects.nonNull(v0);
            }).filter(keyValue -> {
                return "OpenOrgs Database".equals(keyValue.getValue());
            }).findFirst().isPresent());
        }).orElse(false)).booleanValue();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Boolean parseECField(Field<String> field) {
        if (field == null || StringUtils.isBlank((CharSequence) field.getValue()) || ((String) field.getValue()).equalsIgnoreCase("null")) {
            return null;
        }
        return Boolean.valueOf(((String) field.getValue()).equalsIgnoreCase("true"));
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case 1790153145:
                if (implMethodName.equals("lambda$patchRelFn$9e2df7b1$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/dedup/AbstractSparkAction") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Leu/dnetlib/dhp/schema/oaf/Relation;")) {
                    return str -> {
                        Relation relation = (Relation) OBJECT_MAPPER.readValue(str, Relation.class);
                        if (relation.getDataInfo() == null) {
                            relation.setDataInfo(new DataInfo());
                        }
                        return relation;
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
