/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.pace.clustering;

import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import eu.dnetlib.pace.clustering.ClusteringFunction;
import eu.dnetlib.pace.common.AbstractPaceFunctions;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.gt.Author;
import eu.dnetlib.pace.model.gt.GTAuthor;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;

public class PersonClustering
extends AbstractPaceFunctions
implements ClusteringFunction {
    private Map<String, Integer> params;
    private static final int MAX_TOKENS = 5;

    public PersonClustering(Map<String, Integer> params) {
        this.params = params;
    }

    @Override
    public Collection<String> apply(List<Field> fields) {
        HashSet hashes = Sets.newHashSet();
        for (Field f : fields) {
            GTAuthor gta = GTAuthor.fromDNGFJson(f.stringValue());
            Author a = gta.getAuthor();
            if (a.isWellFormed()) {
                hashes.add(this.firstLC(a.getFirstname()) + a.getSecondnames().toLowerCase());
                continue;
            }
            for (String token1 : this.tokens(a.getFullname())) {
                for (String token2 : this.tokens(a.getFullname())) {
                    if (token1.equals(token2)) continue;
                    hashes.add(this.firstLC(token1) + token2);
                }
            }
        }
        return hashes;
    }

    private String firstLC(String s) {
        return StringUtils.substring((String)s, (int)0, (int)1).toLowerCase();
    }

    private Iterable<String> tokens(String s) {
        return Iterables.limit((Iterable)Splitter.on((String)" ").omitEmptyStrings().trimResults().split((CharSequence)s), (int)5);
    }

    @Override
    public Map<String, Integer> getParams() {
        return this.params;
    }
}

