package eu.dnetlib.msro.workflows.nodes.datacite;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Queue;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.google.common.collect.Maps;

/**
 * The Class SplitterDatasetsIterator.
 */
public class SplitterDatasetsIterator {

	/** The Constant log. */
	private static final Log log = LogFactory.getLog(SplitterDatasetsIterator.class);

	/** The end queue. */
	public static String END_QUEUE = "END_QUEUE";

	/** The publications. */
	private Queue<String> publications;

	/** The input epr. */
	private Iterable<String> inputEPR;

	/** The root name. */
	private String rootName;

	/**
	 * Instantiates a new splitter datasets iterator.
	 *
	 * @param datasetsQueue
	 *            the datasets queue
	 * @param publicationsQueue
	 *            the publications queue
	 * @param inputEPR
	 *            the input epr
	 * @param rootName
	 *            the root name
	 */
	public SplitterDatasetsIterator(final Queue<String> publicationsQueue, final Iterable<String> inputEPR, final String rootName) {
		this.publications = publicationsQueue;
		this.inputEPR = inputEPR;
		this.rootName = rootName;

	}

	/**
	 * Populate queues.
	 */
	public void populateQueues() {

		if (this.inputEPR == null) return;
		for (String inputXML : inputEPR) {

			List<String> publication_extracted = extractByTag(new ByteArrayInputStream(inputXML.getBytes(StandardCharsets.UTF_8)), "publication", inputXML);

			if (publication_extracted != null) {
				publications.addAll(publication_extracted);

			}
		}
		publications.add(END_QUEUE);

	}

	/**
	 * Extract by tag.
	 *
	 * @param inputXML
	 *            the input xml
	 * @param tag
	 *            the tag
	 * @return the list
	 */
	private List<String> extractByTag(final InputStream inputXML, final String tag, final String input) {
		try {

			DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
			Document doc = dbf.newDocumentBuilder().parse(inputXML);

			XPath xpath = XPathFactory.newInstance().newXPath();

			Node rootNode = (Node) xpath.evaluate("//*[local-name()='" + this.rootName + "']", doc, XPathConstants.NODE);

			NamedNodeMap attributes = rootNode.getAttributes();

			Map<String, String> nameSpaces = Maps.newHashMap();

			for (int i = 0; i < attributes.getLength(); i++) {
				Node node = attributes.item(i);
				String name = node.getNodeName();
				if (name.startsWith("xmlns:")) {
					nameSpaces.put(StringUtils.substringAfter(name, "xmlns:"), node.getNodeValue());
				}

			}
			xpath = XPathFactory.newInstance().newXPath();
			NodeList nodes = (NodeList) xpath.evaluate("//*[local-name()='" + tag + "']/*[local-name()='record']", doc, XPathConstants.NODESET);

			if ((nodes != null) && (nodes.getLength() > 0)) {
				List<String> result = new ArrayList<String>();
				for (int i = 0; i < nodes.getLength(); i++) {
					Document currentDoc = dbf.newDocumentBuilder().newDocument();
					Node imported = currentDoc.importNode(nodes.item(i), true);
					for (String key : nameSpaces.keySet()) {
						Element element = (Element) imported;
						element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, "xmlns:" + key, nameSpaces.get(key));
					}
					Transformer transformer = TransformerFactory.newInstance().newTransformer();
					DOMSource mydoc = new DOMSource(imported);
					StringWriter writer = new StringWriter();
					transformer.transform(mydoc, new StreamResult(writer));
					String record = writer.toString();
					result.add(record);
				}
				return result;
			}
		} catch (Exception e) {
			log.error("Error on extracting " + tag, e);
			return null;
		}
		return null;
	}
}
