/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.oa.dedup;

import com.google.common.collect.Sets;
import com.wcohen.ss.JaroWinkler;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.Person;
import java.io.Reader;
import java.io.StringReader;
import java.security.MessageDigest;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkContext;
import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import scala.Tuple2;

public class DedupUtility {
    private static final Double THRESHOLD = 0.95;

    public static Map<String, LongAccumulator> constructAccumulator(DedupConfig dedupConf, SparkContext context) {
        HashMap<String, LongAccumulator> accumulators = new HashMap<String, LongAccumulator>();
        String acc1 = String.format("%s::%s", dedupConf.getWf().getEntityType(), "records per hash key = 1");
        accumulators.put(acc1, context.longAccumulator(acc1));
        String acc2 = String.format("%s::%s", dedupConf.getWf().getEntityType(), "missing " + dedupConf.getWf().getOrderField());
        accumulators.put(acc2, context.longAccumulator(acc2));
        String acc3 = String.format("%s::%s", dedupConf.getWf().getEntityType(), String.format("Skipped records for count(%s) >= %s", dedupConf.getWf().getOrderField(), dedupConf.getWf().getGroupMaxSize()));
        accumulators.put(acc3, context.longAccumulator(acc3));
        String acc4 = String.format("%s::%s", dedupConf.getWf().getEntityType(), "skip list");
        accumulators.put(acc4, context.longAccumulator(acc4));
        String acc5 = String.format("%s::%s", dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)");
        accumulators.put(acc5, context.longAccumulator(acc5));
        String acc6 = String.format("%s::%s", dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold());
        accumulators.put(acc6, context.longAccumulator(acc6));
        return accumulators;
    }

    static Set<String> getGroupingKeys(DedupConfig conf, MapDocument doc) {
        return Sets.newHashSet((Iterable)BlacklistAwareClusteringCombiner.filterAndCombine((MapDocument)doc, (Config)conf));
    }

    public static String md5(String s) {
        try {
            MessageDigest md = MessageDigest.getInstance("MD5");
            md.update(s.getBytes("UTF-8"));
            return new String(Hex.encodeHex((byte[])md.digest()));
        }
        catch (Exception e) {
            System.err.println("Error creating id");
            return null;
        }
    }

    public static List<Author> mergeAuthor(List<Author> a, List<Author> b) {
        List<Author> enrich;
        List<Author> base;
        int pa = DedupUtility.countAuthorsPids(a);
        int pb = DedupUtility.countAuthorsPids(b);
        int sa = DedupUtility.authorsSize(a);
        int sb = DedupUtility.authorsSize(b);
        if (pa == pb) {
            base = sa > sb ? a : b;
            enrich = sa > sb ? b : a;
        } else {
            base = pa > pb ? a : b;
            enrich = pa > pb ? b : a;
        }
        DedupUtility.enrichPidFromList(base, enrich);
        return base;
    }

    private static void enrichPidFromList(List<Author> base, List<Author> enrich) {
        if (base == null || enrich == null) {
            return;
        }
        Map<String, Author> basePidAuthorMap = base.stream().filter(a -> a.getPid() != null && a.getPid().size() > 0).flatMap(a -> a.getPid().stream().map(p -> new Tuple2((Object)p.toComparableString(), a))).collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
        List<Tuple2> pidToEnrich = enrich.stream().filter(a -> a.getPid() != null && a.getPid().size() > 0).flatMap(a -> a.getPid().stream().filter(p -> !basePidAuthorMap.containsKey(p.toComparableString())).map(p -> new Tuple2(p, a))).collect(Collectors.toList());
        pidToEnrich.forEach(a -> {
            Optional<Tuple2> simAuhtor = base.stream().map(ba -> new Tuple2((Object)DedupUtility.sim(ba, (Author)a._2()), ba)).max(Comparator.comparing(Tuple2::_1));
            if (simAuhtor.isPresent() && (Double)simAuhtor.get()._1() > THRESHOLD) {
                Author r = (Author)simAuhtor.get()._2();
                r.getPid().add(a._1());
            }
        });
    }

    public static String createDedupRecordPath(String basePath, String actionSetId, String entityType) {
        return String.format("%s/%s/%s_deduprecord", basePath, actionSetId, entityType);
    }

    public static String createEntityPath(String basePath, String entityType) {
        return String.format("%s/%s", basePath, entityType);
    }

    public static String createSimRelPath(String basePath, String actionSetId, String entityType) {
        return String.format("%s/%s/%s_simrel", basePath, actionSetId, entityType);
    }

    public static String createMergeRelPath(String basePath, String actionSetId, String entityType) {
        return String.format("%s/%s/%s_mergerel", basePath, actionSetId, entityType);
    }

    private static Double sim(Author a, Author b) {
        Person pa = DedupUtility.parse(a);
        Person pb = DedupUtility.parse(b);
        if (pa.isAccurate() & pb.isAccurate()) {
            return new JaroWinkler().score(DedupUtility.normalize(pa.getSurnameString()), DedupUtility.normalize(pb.getSurnameString()));
        }
        return new JaroWinkler().score(DedupUtility.normalize(pa.getNormalisedFullname()), DedupUtility.normalize(pb.getNormalisedFullname()));
    }

    private static String normalize(String s) {
        return DedupUtility.nfd(s).toLowerCase().replaceAll("(\\W)+", " ").replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ").replaceAll("(\\p{Punct})+", " ").replaceAll("(\\d)+", " ").replaceAll("(\\n)+", " ").trim();
    }

    private static String nfd(String s) {
        return Normalizer.normalize(s, Normalizer.Form.NFD);
    }

    private static Person parse(Author author) {
        if (StringUtils.isNotBlank((CharSequence)author.getSurname())) {
            return new Person(author.getSurname() + ", " + author.getName(), false);
        }
        return new Person(author.getFullname(), false);
    }

    private static int countAuthorsPids(List<Author> authors) {
        if (authors == null) {
            return 0;
        }
        return (int)authors.stream().filter(DedupUtility::hasPid).count();
    }

    private static int authorsSize(List<Author> authors) {
        if (authors == null) {
            return 0;
        }
        return authors.size();
    }

    private static boolean hasPid(Author a) {
        if (a == null || a.getPid() == null || a.getPid().size() == 0) {
            return false;
        }
        return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank((CharSequence)p.getValue()));
    }

    public static List<DedupConfig> getConfigurations(String isLookUpUrl, String orchestrator) throws ISLookUpException, DocumentException {
        ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService((String)isLookUpUrl);
        String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator);
        String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery);
        Document doc = new SAXReader().read((Reader)new StringReader(orchestratorProfile));
        String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id");
        ArrayList<DedupConfig> configurations = new ArrayList<DedupConfig>();
        for (Object o : doc.selectNodes("//SCAN_SEQUENCE/SCAN")) {
            configurations.add(DedupUtility.loadConfig(isLookUpService, actionSetId, o));
        }
        return configurations;
    }

    private static DedupConfig loadConfig(ISLookUpService isLookUpService, String actionSetId, Object o) throws ISLookUpException {
        Element s = (Element)o;
        String configProfileId = s.attributeValue("id");
        String conf = isLookUpService.getResourceProfileByQuery(String.format("for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()", configProfileId));
        DedupConfig dedupConfig = DedupConfig.load((String)conf);
        dedupConfig.getWf().setConfigurationId(actionSetId);
        return dedupConfig;
    }
}

