package com.wcohen.ss;

import com.wcohen.ss.api.StringDistance;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import com.wcohen.ss.expt.Blocker;
import com.wcohen.ss.expt.ClusterNGramBlocker;
import com.wcohen.ss.expt.MatchData;
import com.wcohen.ss.tokens.SimpleTokenizer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.codehaus.jackson.util.MinimalPrettyPrinter;

/* loaded from: input_file:WEB-INF/lib/secondstring-1.0.0.jar:com/wcohen/ss/SoftTokenFelligiSunter.class */
public class SoftTokenFelligiSunter extends AbstractStatisticalTokenDistance {
    private double mismatchFactor;
    private StringDistance tokenDistance;
    private double tokenMatchThreshold;
    private static final StringDistance DEFAULT_TOKEN_DISTANCE = new JaroWinkler();
    private boolean tokenDistancesComputed;
    private Map<String, Set<TokenNeighbor>> neighborMap;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/secondstring-1.0.0.jar:com/wcohen/ss/SoftTokenFelligiSunter$TokenNeighbor.class */
    public class TokenNeighbor implements Comparable<TokenNeighbor> {
        public String tokVal;
        public int freq;
        public double score;

        public TokenNeighbor(String str, double d) {
            this.tokVal = str;
            this.score = d;
            this.freq = SoftTokenFelligiSunter.this.getDocumentFrequency(SoftTokenFelligiSunter.this.tokenizer.intern(str));
        }

        @Override // java.lang.Comparable
        public int compareTo(TokenNeighbor tokenNeighbor) {
            if (tokenNeighbor.score > this.score) {
                return 1;
            }
            return tokenNeighbor.score < this.score ? -1 : 0;
        }

        public int hashCode() {
            return this.tokVal.hashCode();
        }
    }

    public SoftTokenFelligiSunter(Tokenizer tokenizer, StringDistance stringDistance, double d, double d2) {
        super(tokenizer);
        this.tokenDistancesComputed = false;
        this.tokenDistance = stringDistance;
        this.tokenMatchThreshold = d;
        this.mismatchFactor = d2;
    }

    public SoftTokenFelligiSunter() {
        this(SimpleTokenizer.defaultTokenizer(), DEFAULT_TOKEN_DISTANCE, 0.9d, 0.5d);
    }

    public void setMismatchFactor(double d) {
        this.mismatchFactor = d;
    }

    public void setMismatchFactor(Double d) {
        this.mismatchFactor = d.doubleValue();
    }

    public void setTokenMatchThreshold(double d) {
        this.tokenMatchThreshold = d;
    }

    public void setTokenMatchThreshold(Double d) {
        this.tokenMatchThreshold = d.doubleValue();
    }

    @Override // com.wcohen.ss.AbstractStringDistance, com.wcohen.ss.api.StringDistance
    public double score(StringWrapper stringWrapper, StringWrapper stringWrapper2) {
        computeTokenDistances();
        BagOfTokens bagOfTokens = (BagOfTokens) stringWrapper2;
        double d = 0.0d;
        Iterator<Token> it = ((BagOfTokens) stringWrapper).tokenIterator();
        while (it.hasNext()) {
            Token next = it.next();
            double documentFrequency = getDocumentFrequency(next);
            if (bagOfTokens.contains(next)) {
                d += -Math.log(documentFrequency / this.collectionSize);
            } else {
                Token token = null;
                double d2 = this.tokenMatchThreshold;
                Iterator<Token> it2 = bagOfTokens.tokenIterator();
                while (it2.hasNext()) {
                    Token next2 = it2.next();
                    double score = this.tokenDistance.score(next.getValue(), next2.getValue());
                    if (score >= d2) {
                        token = next2;
                        d2 = score;
                    }
                }
                if (token != null) {
                    d += -Math.log(neighborhoodDocumentFrequency(next, d2) / this.collectionSize);
                } else {
                    d -= (-Math.log(documentFrequency / this.collectionSize)) * this.mismatchFactor;
                }
            }
        }
        return d;
    }

    @Override // com.wcohen.ss.AbstractStringDistance, com.wcohen.ss.api.StringDistance
    public StringWrapper prepare(String str) {
        return new BagOfTokens(str, this.tokenizer.tokenize(str));
    }

    @Override // com.wcohen.ss.AbstractStringDistance, com.wcohen.ss.api.StringDistance
    public String explainScore(StringWrapper stringWrapper, StringWrapper stringWrapper2) {
        BagOfTokens bagOfTokens = (BagOfTokens) stringWrapper2;
        StringBuilder sb = new StringBuilder("");
        PrintfFormat printfFormat = new PrintfFormat("%.3f");
        sb.append("Common tokens: ");
        Iterator<Token> it = ((BagOfTokens) stringWrapper).tokenIterator();
        while (it.hasNext()) {
            Token next = it.next();
            if (bagOfTokens.contains(next)) {
                sb.append(MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR + next.getValue() + ": ");
                sb.append(printfFormat.sprintf(bagOfTokens.getWeight(next)));
            }
        }
        sb.append("\nscore = " + score(stringWrapper, stringWrapper2));
        return sb.toString();
    }

    public String toString() {
        return "[SoftTokenFelligiSunter]";
    }

    private void computeTokenDistances() {
        if (this.tokenDistancesComputed) {
            return;
        }
        this.neighborMap = new HashMap();
        MatchData matchData = new MatchData();
        for (Token token : this.documentFrequency.keySet()) {
            matchData.addInstance("tokens", token.getValue(), token.getValue());
        }
        ClusterNGramBlocker clusterNGramBlocker = new ClusterNGramBlocker();
        clusterNGramBlocker.block(matchData);
        for (int i = 0; i < clusterNGramBlocker.size(); i++) {
            Blocker.Pair pair = clusterNGramBlocker.getPair(i);
            String unwrap = pair.getA().unwrap();
            String unwrap2 = pair.getB().unwrap();
            double score = this.tokenDistance.score(unwrap, unwrap2);
            if (score >= this.tokenMatchThreshold) {
                addNeighbor(unwrap, unwrap2, score);
            }
        }
        this.tokenDistancesComputed = true;
    }

    private void addNeighbor(String str, String str2, double d) {
        Set<TokenNeighbor> set = this.neighborMap.get(str);
        if (set == null) {
            set = new TreeSet();
            this.neighborMap.put(str, set);
        }
        set.add(new TokenNeighbor(str2, d));
    }

    private int neighborhoodDocumentFrequency(Token token, double d) {
        int documentFrequency = getDocumentFrequency(token);
        Set<TokenNeighbor> set = this.neighborMap.get(token.getValue());
        if (set == null) {
            return documentFrequency;
        }
        for (TokenNeighbor tokenNeighbor : set) {
            if (tokenNeighbor.score < d) {
                break;
            }
            documentFrequency += tokenNeighbor.freq;
        }
        return documentFrequency;
    }

    public static void main(String[] strArr) {
        doMain(new SoftTokenFelligiSunter(), strArr);
    }
}
