/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.pace.util;

import com.google.common.collect.Lists;
import eu.dnetlib.pace.clustering.NGramUtils;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.config.WfConfig;
import eu.dnetlib.pace.model.Document;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.MapDocumentComparator;
import eu.dnetlib.pace.tree.AuthorsMatch;
import eu.dnetlib.pace.tree.InstanceTypeMatch;
import eu.dnetlib.pace.tree.JsonListMatch;
import eu.dnetlib.pace.tree.LevensteinTitle;
import eu.dnetlib.pace.tree.TitleVersionMatch;
import eu.dnetlib.pace.tree.support.TreeProcessor;
import eu.dnetlib.pace.util.Reporter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Queue;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class BlockProcessorForTesting {
    public static final List<String> accumulators = new ArrayList<String>();
    private static final Log log = LogFactory.getLog(BlockProcessorForTesting.class);
    private DedupConfig dedupConf;

    public static void constructAccumulator(DedupConfig dedupConf) {
        accumulators.add(String.format("%s::%s", dedupConf.getWf().getEntityType(), "records per hash key = 1"));
        accumulators.add(String.format("%s::%s", dedupConf.getWf().getEntityType(), "missing " + dedupConf.getWf().getOrderField()));
        accumulators.add(String.format("%s::%s", dedupConf.getWf().getEntityType(), String.format("Skipped records for count(%s) >= %s", dedupConf.getWf().getOrderField(), dedupConf.getWf().getGroupMaxSize())));
        accumulators.add(String.format("%s::%s", dedupConf.getWf().getEntityType(), "skip list"));
        accumulators.add(String.format("%s::%s", dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)"));
        accumulators.add(String.format("%s::%s", dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold()));
    }

    public BlockProcessorForTesting(DedupConfig dedupConf) {
        this.dedupConf = dedupConf;
    }

    public void processSortedBlock(String key, List<MapDocument> documents, Reporter context, boolean useTree, boolean noMatch) {
        if (documents.size() > 1) {
            this.process(this.prepare(documents), context, useTree, noMatch);
        } else {
            context.incrementCounter(this.dedupConf.getWf().getEntityType(), "records per hash key = 1", 1L);
        }
    }

    public void process(String key, Iterable<MapDocument> documents, Reporter context, boolean useTree, boolean noMatch) {
        Queue<MapDocument> q = this.prepare(documents);
        if (q.size() > 1) {
            this.process(this.simplifyQueue(q, key, context), context, useTree, noMatch);
        } else {
            context.incrementCounter(this.dedupConf.getWf().getEntityType(), "records per hash key = 1", 1L);
        }
    }

    private Queue<MapDocument> prepare(Iterable<MapDocument> documents) {
        PriorityQueue<Document> queue = new PriorityQueue<Document>(100, new MapDocumentComparator(this.dedupConf.getWf().getOrderField()));
        HashSet seen = new HashSet();
        int queueMaxSize = this.dedupConf.getWf().getQueueMaxSize();
        documents.forEach(doc -> {
            String id;
            if (queue.size() <= queueMaxSize && !seen.contains(id = doc.getIdentifier())) {
                seen.add(id);
                queue.add((Document)doc);
            }
        });
        return queue;
    }

    private Queue<MapDocument> simplifyQueue(Queue<MapDocument> queue, String ngram, Reporter context) {
        LinkedList<MapDocument> q = new LinkedList<MapDocument>();
        String fieldRef = "";
        ArrayList tempResults = Lists.newArrayList();
        while (!queue.isEmpty()) {
            String orderFieldName;
            MapDocument result = queue.remove();
            Field orderFieldValue = result.values(orderFieldName = this.dedupConf.getWf().getOrderField());
            if (!orderFieldValue.isEmpty()) {
                String field = NGramUtils.cleanupForOrdering(orderFieldValue.stringValue());
                if (field.equals(fieldRef)) {
                    tempResults.add(result);
                    continue;
                }
                this.populateSimplifiedQueue(q, tempResults, context, fieldRef, ngram);
                tempResults.clear();
                tempResults.add(result);
                fieldRef = field;
                continue;
            }
            context.incrementCounter(this.dedupConf.getWf().getEntityType(), "missing " + this.dedupConf.getWf().getOrderField(), 1L);
        }
        this.populateSimplifiedQueue(q, tempResults, context, fieldRef, ngram);
        return q;
    }

    private void populateSimplifiedQueue(Queue<MapDocument> q, List<MapDocument> tempResults, Reporter context, String fieldRef, String ngram) {
        WfConfig wf = this.dedupConf.getWf();
        if (tempResults.size() < wf.getGroupMaxSize()) {
            q.addAll(tempResults);
        } else {
            context.incrementCounter(wf.getEntityType(), String.format("Skipped records for count(%s) >= %s", wf.getOrderField(), wf.getGroupMaxSize()), tempResults.size());
        }
    }

    private void process(Queue<MapDocument> queue, Reporter context, boolean useTree, boolean noMatch) {
        block0: while (!queue.isEmpty()) {
            MapDocument pivot = queue.remove();
            String idPivot = pivot.getIdentifier();
            WfConfig wf = this.dedupConf.getWf();
            Field fieldsPivot = pivot.values(wf.getOrderField());
            String fieldPivot = fieldsPivot == null || fieldsPivot.isEmpty() ? "" : fieldsPivot.stringValue();
            if (fieldPivot == null) continue;
            int i = 0;
            for (MapDocument curr : queue) {
                String fieldCurr;
                String idCurr = curr.getIdentifier();
                if (this.mustSkip(idCurr)) {
                    context.incrementCounter(wf.getEntityType(), "skip list", 1L);
                    continue block0;
                }
                if (i > wf.getSlidingWindowSize()) continue block0;
                Field fieldsCurr = curr.values(wf.getOrderField());
                String string = fieldCurr = fieldsCurr == null || fieldsCurr.isEmpty() ? null : fieldsCurr.stringValue();
                if (idCurr.equals(idPivot) || fieldCurr == null) continue;
                if (noMatch) {
                    this.emitOutput(!new TreeProcessor(this.dedupConf).compare(pivot, curr), idPivot, idCurr, context);
                    continue;
                }
                if (useTree) {
                    this.emitOutput(new TreeProcessor(this.dedupConf).compare(pivot, curr), idPivot, idCurr, context);
                    continue;
                }
                this.emitOutput(this.publicationCompare(pivot, curr, this.dedupConf), idPivot, idCurr, context);
            }
        }
    }

    protected static boolean compareInstanceType(MapDocument a, MapDocument b, DedupConfig conf) {
        HashMap<String, String> params = new HashMap<String, String>();
        InstanceTypeMatch instanceTypeMatch = new InstanceTypeMatch(params);
        double compare = instanceTypeMatch.compare(a.getFieldMap().get("instance"), b.getFieldMap().get("instance"), conf);
        return compare >= 1.0;
    }

    private boolean publicationCompare(MapDocument a, MapDocument b, DedupConfig config) {
        TitleVersionMatch titleVersionMatch;
        double result1;
        JsonListMatch jsonListMatch;
        HashMap<String, String> params = new HashMap<String, String>();
        params.put("jpath_value", "$.value");
        params.put("jpath_classid", "$.qualifier.classid");
        params.put("mode", "count");
        double score = 0.0;
        LevensteinTitle levensteinTitle = new LevensteinTitle(params);
        if (levensteinTitle.compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config) >= 0.9) {
            score += 0.2;
        }
        if ((jsonListMatch = new JsonListMatch(params)).compare(a.getFieldMap().get("pid"), b.getFieldMap().get("pid"), config) >= 1.0) {
            score += 0.5;
        }
        if ((result1 = (titleVersionMatch = new TitleVersionMatch(params)).compare(a.getFieldMap().get("title"), b.getFieldMap().get("title"), config)) < 0.0 || result1 >= 1.0) {
            score += 0.1;
        }
        params.remove("mode");
        AuthorsMatch authorsMatch = new AuthorsMatch(params);
        double result2 = authorsMatch.compare(a.getFieldMap().get("authors"), b.getFieldMap().get("authors"), config);
        if (result2 < 0.0 || result2 >= 0.6) {
            score += 0.2;
        }
        return score >= 0.5;
    }

    private void emitOutput(boolean result, String idPivot, String idCurr, Reporter context) {
        if (result) {
            this.writeSimilarity(context, idPivot, idCurr);
            context.incrementCounter(this.dedupConf.getWf().getEntityType(), "dedupSimilarity (x2)", 1L);
        } else {
            context.incrementCounter(this.dedupConf.getWf().getEntityType(), "d < " + this.dedupConf.getWf().getThreshold(), 1L);
        }
    }

    private boolean mustSkip(String idPivot) {
        return this.dedupConf.getWf().getSkipList().contains(this.getNsPrefix(idPivot));
    }

    private String getNsPrefix(String id) {
        return StringUtils.substringBetween((String)id, (String)"|", (String)"::");
    }

    private void writeSimilarity(Reporter context, String from, String to) {
        String type = this.dedupConf.getWf().getEntityType();
        context.emit(type, from, to);
    }
}

