package eu.dnetlib.data.utility.cleaner;

import com.google.common.base.Joiner;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/* loaded from: input_file:eu/dnetlib/data/utility/cleaner/VocabularyRule.class */
public class VocabularyRule extends XPATHCleaningRule {
    private Set<String> vocabularies;
    private static final Log log = LogFactory.getLog(VocabularyRule.class);
    private Map<String, String> synonyms = Maps.newHashMap();
    private Set<String> validTerms = Sets.newHashSet();

    public VocabularyRule(Set<String> set, ISLookUpService iSLookUpService) throws CleanerException {
        this.vocabularies = set;
        loadSynonymsAndTerms(iSLookUpService);
    }

    @Override // eu.dnetlib.data.utility.cleaner.XPATHCleaningRule
    protected String calculateNewValue(String str) throws CleanerException {
        log.debug("calculating new value for: " + str);
        if (this.synonyms.isEmpty()) {
            log.warn("Vocabulary terms is void, vocabularies: " + this.vocabularies);
        }
        String str2 = null;
        if (this.synonyms.containsKey(str.toLowerCase())) {
            str2 = this.synonyms.get(str.toLowerCase());
        }
        if (str2 != null) {
            return str2;
        }
        log.debug("Synonym " + str + " not found in vocabulary");
        return str;
    }

    private void loadSynonymsAndTerms(ISLookUpService iSLookUpService) throws CleanerException {
        for (String str : this.vocabularies) {
            try {
                for (String str2 : iSLookUpService.quickSearchProfile("for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')//RESOURCE_PROFILE[.//VOCABULARY_NAME/@code='" + str + "']//TERM return ( concat($x/@code,'|-:-|', $x/@code), concat($x/@english_name,'|-:-|', $x/@code), concat($x/@native_name,'|-:-|', $x/@code), for $y in $x//SYNONYM return concat($y/@term,'|-:-|', $x/@code) )")) {
                    log.debug("SYNONYM : " + str2);
                    String[] split = str2.split("\\|-:-\\|");
                    if (split[0] != null && !split[0].isEmpty()) {
                        this.synonyms.put(split[0].toLowerCase(), split[1]);
                        this.validTerms.add(split[1].toLowerCase());
                    }
                }
                log.info("VOCABULARY " + str.trim() + " - terms size " + this.synonyms.size());
            } catch (Exception e) {
                throw new CleanerException("Error obtaining vocabulary " + str, e);
            }
        }
    }

    @Override // eu.dnetlib.data.utility.cleaner.XPATHCleaningRule
    protected Map<String, String> verifyValue(String str) throws CleanerException {
        if (this.synonyms.isEmpty()) {
            log.warn("Vocabulary terms is void, vocabularies: " + this.vocabularies);
        }
        if (this.validTerms.contains(str.toLowerCase())) {
            return null;
        }
        HashMap hashMap = new HashMap();
        hashMap.put("term", str);
        hashMap.put("vocabularies", this.vocabularies.toString().replaceAll("\\[", "").replaceAll("\\]", ""));
        hashMap.put("xpath", getXpath());
        return hashMap;
    }

    public Map<String, String> getVocabularyTerms() {
        return this.synonyms;
    }

    public String toString() {
        return "VOCABULARIES: [" + Joiner.on(", ").join(this.vocabularies) + "]";
    }
}
