package eu.dnetlib.dhp.oa.dedup;

import com.google.common.collect.Sets;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.codec.binary.Hex;
import org.apache.spark.SparkContext;
import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

/* loaded from: input_file:eu/dnetlib/dhp/oa/dedup/DedupUtility.class */
public class DedupUtility {
    public static Map<String, LongAccumulator> constructAccumulator(DedupConfig dedupConfig, SparkContext sparkContext) {
        HashMap hashMap = new HashMap();
        String format = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "records per hash key = 1");
        hashMap.put(format, sparkContext.longAccumulator(format));
        String format2 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "missing " + dedupConfig.getWf().getOrderField());
        hashMap.put(format2, sparkContext.longAccumulator(format2));
        String format3 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), String.format("Skipped records for count(%s) >= %s", dedupConfig.getWf().getOrderField(), Integer.valueOf(dedupConfig.getWf().getGroupMaxSize())));
        hashMap.put(format3, sparkContext.longAccumulator(format3));
        String format4 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "skip list");
        hashMap.put(format4, sparkContext.longAccumulator(format4));
        String format5 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "dedupSimilarity (x2)");
        hashMap.put(format5, sparkContext.longAccumulator(format5));
        String format6 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "d < " + dedupConfig.getWf().getThreshold());
        hashMap.put(format6, sparkContext.longAccumulator(format6));
        return hashMap;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Set<String> getGroupingKeys(DedupConfig dedupConfig, MapDocument mapDocument) {
        return Sets.newHashSet(BlacklistAwareClusteringCombiner.filterAndCombine(mapDocument, dedupConfig));
    }

    public static String md5(String str) {
        try {
            MessageDigest messageDigest = MessageDigest.getInstance("MD5");
            messageDigest.update(str.getBytes(StandardCharsets.UTF_8));
            return new String(Hex.encodeHex(messageDigest.digest()));
        } catch (Exception e) {
            System.err.println("Error creating id");
            return null;
        }
    }

    public static String createDedupRecordPath(String str, String str2, String str3) {
        return String.format("%s/%s/%s_deduprecord", str, str2, str3);
    }

    public static String createEntityPath(String str, String str2) {
        return String.format("%s/%s", str, str2);
    }

    public static String createSimRelPath(String str, String str2, String str3) {
        return String.format("%s/%s/%s_simrel", str, str2, str3);
    }

    public static String createMergeRelPath(String str, String str2, String str3) {
        return String.format("%s/%s/%s_mergerel", str, str2, str3);
    }

    public static List<DedupConfig> getConfigurations(String str, String str2) throws ISLookUpException, DocumentException {
        ISLookUpService lookUpService = ISLookupClientFactory.getLookUpService(str);
        Document read = new SAXReader().read(new StringReader(lookUpService.getResourceProfileByQuery(String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", str2))));
        String valueOf = read.valueOf("//DEDUPLICATION/ACTION_SET/@id");
        ArrayList arrayList = new ArrayList();
        Iterator it = read.selectNodes("//SCAN_SEQUENCE/SCAN").iterator();
        while (it.hasNext()) {
            arrayList.add(loadConfig(lookUpService, valueOf, it.next()));
        }
        return arrayList;
    }

    private static DedupConfig loadConfig(ISLookUpService iSLookUpService, String str, Object obj) throws ISLookUpException {
        DedupConfig load = DedupConfig.load(iSLookUpService.getResourceProfileByQuery(String.format("for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()", ((Element) obj).attributeValue("id"))));
        load.getWf().setConfigurationId(str);
        return load;
    }
}
