/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.pace.clustering;

import eu.dnetlib.pace.clustering.AbstractClusteringFunction;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.StringTokenizer;

public class Ngrams
extends AbstractClusteringFunction {
    public Ngrams(Map<String, Integer> params) {
        super(params);
    }

    @Override
    protected Collection<String> doApply(String s) {
        return this.getNgrams(s, this.param("ngramLen"), this.param("max"), this.param("maxPerToken"), this.param("minNgramLen"));
    }

    protected Collection<String> getNgrams(String s, int ngramLen, int max, int maxPerToken, int minNgramLen) {
        LinkedHashSet<String> ngrams = new LinkedHashSet<String>();
        StringTokenizer st = new StringTokenizer(s);
        while (st.hasMoreTokens()) {
            String token = st.nextToken();
            if (token.isEmpty()) continue;
            for (int i = 0; i < maxPerToken && ngramLen + i <= token.length(); ++i) {
                String ngram = (token + "    ").substring(i, ngramLen + i).trim();
                if (ngrams.size() >= max) {
                    return ngrams;
                }
                if (ngram.length() < minNgramLen) continue;
                ngrams.add(ngram);
            }
        }
        return ngrams;
    }
}

