package com.rapidminer.operator.extraction.util;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.extraction.AttributeQueryMap;
import com.rapidminer.operator.extraction.ExtractionException;
import com.rapidminer.operator.extraction.RegexExtractor;
import com.rapidminer.operator.extraction.TextExtractor;
import com.rapidminer.operator.extraction.XPathExtractor;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.Parameters;
import com.rapidminer.parameter.UndefinedParameterError;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.PatternSyntaxException;
import org.hsqldb.Tokens;
import org.jaxen.JaxenException;

/* JADX WARN: Classes with same name are omitted:
  input_file:builds/deps.jar:com/rapidminer/operator/extraction/util/FeatureExtractionUtil.class
  input_file:builds/deps.jar:rapidMinerPluginText.jar:com/rapidminer/operator/extraction/util/FeatureExtractionUtil.class
  input_file:builds/deps.jar:tmp-src.zip:rapidMinerPluginText.jar:com/rapidminer/operator/extraction/util/FeatureExtractionUtil.class
  input_file:com/rapidminer/operator/extraction/util/FeatureExtractionUtil.class
  input_file:rapidMinerPluginText.jar:com/rapidminer/operator/extraction/util/FeatureExtractionUtil.class
  input_file:rapidMinerPluginText.jar:com/rapidminer/operator/extraction/util/FeatureExtractionUtil.class
 */
/* loaded from: input_file:tmp-src.zip:rapidMinerPluginText.jar:com/rapidminer/operator/extraction/util/FeatureExtractionUtil.class */
public class FeatureExtractionUtil {
    public static final String PARAMETER_ATTRIBUTES = "attributes";
    public static final String PARAMETER_NAMESPACES = "namespaces";

    public static ParameterType createQueryParameter() {
        return new ParameterTypeList("attributes", "Specifies a list of attribute names and extraction queries. These queries can be XPath or a regular expression. If a regular expression is used, the query must have the following form: '<regex-expression> <replacement-pattern>', where the <replacement_pattern> states how a match is replaced to generate the final information. '$1' would yield the first matching group as result. A number sign in front of an attribute name marks the attribute as numeric. In these cases, the operator uses different heuristicts to parse a number from the extracted string. An ! in front of an attribute name marks it as binary. For both XPath and regex, only the first match is used.", new ParameterTypeString("query_expr", "query expression: either ?<xpath> where <xpath> is an XPath expression or <regex> <target> where <regex> is a regular expression and target is a string referencing the match, e.g. \"[0-9]+\\sEuro $1\"", false));
    }

    public static ParameterType createNamespaceParameter() {
        return new ParameterTypeList(PARAMETER_NAMESPACES, "Specifies pairs of identifier and namespace for use in XPath queries. The namespace for (x)html is bound automatically to the identifier h.", new ParameterTypeString("name_space", "An id and the namespace to which it should be bound.", false));
    }

    public static Map<String, String> getNamespaceMapping(Parameters parameters) {
        List<String[]> list = null;
        try {
            list = Parameters.transformString2List(parameters.getParameter(PARAMETER_NAMESPACES));
        } catch (UndefinedParameterError e) {
        }
        if (list == null) {
            return new HashMap();
        }
        HashMap hashMap = new HashMap();
        for (String[] strArr : list) {
            hashMap.put(strArr[0], strArr[1]);
        }
        return hashMap;
    }

    public static AttributeQueryMap getAttributeQueryMap(Parameters parameters) throws ExtractionException {
        Attribute createAttribute;
        Map<String, String> namespaceMapping = getNamespaceMapping(parameters);
        List<String[]> list = null;
        try {
            list = Parameters.transformString2List(parameters.getParameter("attributes"));
        } catch (UndefinedParameterError e) {
        }
        if (list == null) {
            return new AttributeQueryMap();
        }
        AttributeQueryMap attributeQueryMap = new AttributeQueryMap();
        for (String[] strArr : list) {
            String str = strArr[0];
            if (str.charAt(0) == '#') {
                createAttribute = AttributeFactory.createAttribute(str.substring(1), 4);
            } else if (str.charAt(0) == '!') {
                createAttribute = AttributeFactory.createAttribute(str.substring(1), 6);
                createAttribute.getMapping().mapString("false");
                createAttribute.getMapping().mapString("true");
            } else {
                createAttribute = AttributeFactory.createAttribute(str, 1);
            }
            TextExtractor extractor = getExtractor(strArr[1], namespaceMapping);
            if (extractor != null) {
                attributeQueryMap.addQuery(createAttribute, extractor);
            }
        }
        return attributeQueryMap;
    }

    public static TextExtractor getExtractor(String str, Map<String, String> map) throws ExtractionException {
        if (str.charAt(0) == '/') {
            try {
                return new XPathExtractor(str, map);
            } catch (JaxenException e) {
                throw new ExtractionException("", e, new UserError((Operator) null, Tokens.PRECISION, str, e));
            }
        }
        try {
            return new RegexExtractor(str);
        } catch (PatternSyntaxException e2) {
            throw new ExtractionException("", e2, new UserError((Operator) null, 206, str, e2));
        }
    }
}
