package eu.dnetlib.data.collective.transformation.engine.functions;

import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:WEB-INF/lib/unibi-data-collective-transformation-common-2.2.0.jar:eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.class */
public class IdentifierExtract extends AbstractTransformationFunction {
    public static final Log log = LogFactory.getLog(IdentifierExtract.class);
    public static final String paramXpathExprJson = "xpathExprJson";
    public static final String paramXpathExprInSource = "xpathExprInputSource";
    public static final String paramRegExpr = "regExpr";

    @Override // eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction
    String execute() throws ProcessingException {
        return null;
    }

    public NodeList extract(List<String> list, Node node, String str, Document document, XPath xPath) throws ProcessingException {
        log.debug("xpathExprList: " + list);
        log.debug("regExpr: " + str);
        HashSet hashSet = new HashSet();
        log.debug("regular expression : " + str);
        Pattern compile = Pattern.compile(str);
        try {
            for (String str2 : extractText(list, node, xPath)) {
                log.debug("text as input : " + str2);
                Matcher matcher = compile.matcher(str2);
                while (matcher.find()) {
                    log.debug("extracted identifier: " + matcher.group());
                    hashSet.add(matcher.group());
                }
            }
            return toNodeList(hashSet, document);
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
            throw new ProcessingException(e);
        } catch (XPathExpressionException e2) {
            e2.printStackTrace();
            throw new ProcessingException(e2);
        }
    }

    private NodeList toNodeList(Set<String> set, Document document) {
        DocumentFragment createDocumentFragment = document.createDocumentFragment();
        Element createElement = document.createElement("root");
        createDocumentFragment.appendChild(createElement);
        for (String str : set) {
            Element createElement2 = document.createElement("value");
            createElement2.setTextContent(str);
            createElement.appendChild(createElement2);
        }
        return createDocumentFragment.getChildNodes();
    }

    private List<String> extractText(List<String> list, Node node, XPath xPath) throws XPathExpressionException, ParserConfigurationException {
        LinkedList linkedList = new LinkedList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            NodeList nodeList = (NodeList) xPath.evaluate(it.next(), node, XPathConstants.NODESET);
            log.debug("extract text: nodelist length: " + nodeList.getLength());
            for (int i = 0; i < nodeList.getLength(); i++) {
                linkedList.add(nodeList.item(i).getTextContent());
            }
        }
        return linkedList;
    }
}
