package eu.dnetlib.data.collector.plugins.filesystem;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.Node;
import org.dom4j.QName;
import org.dom4j.io.SAXReader;
import org.json.JSONObject;
import org.json.XML;

import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;

import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;

/**
 * The Class FilesystemIterable.
 *
 * @author Sandro, Michele, Andrea
 */
public class FilesystemIterable implements Iterable<String> {

	/**
	 * The Constant log.
	 */
	private static final Log log = LogFactory.getLog(FilesystemIterable.class);

	/**
	 * The base dir.
	 */
	private File baseDir;

	/**
	 * The extensions.
	 */
	private String extensions;

	/**
	 * File format (json / xml)
	 **/
	private String fileFormat = "xml";

	private final List<String> supportedFormats = Lists.newArrayList("xml", "json");

	private boolean setObjIdentifierFromFileName = false;

	private String fromDate;

	private final SAXReader saxReader = new SAXReader();

	/**
	 * Instantiates a new filesystem iterable.
	 *
	 * @param descriptor
	 *            the descriptor
	 * @throws CollectorServiceException
	 *             the collector service exception
	 */
	public FilesystemIterable(final InterfaceDescriptor descriptor, final String fromDate) throws CollectorServiceException {
		try {
			final String baseUrl = descriptor.getBaseUrl();
			final URL basePath = new URL(baseUrl);
			this.baseDir = new File(basePath.getPath());
			if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath())); }
			this.extensions = descriptor.getParams().get("extensions");
			if (descriptor.getParams().containsKey("fileFormat")) {
				fileFormat = descriptor.getParams().get("fileFormat");
			}
			if (!supportedFormats.contains(fileFormat)) {
				throw new CollectorServiceException("File format " + fileFormat + " not supported. Supported formats are: " + StringUtils
					.join(supportedFormats, ','));
			}
			if (descriptor.getParams().containsKey("setObjIdentifierFromFileName")) {
				setObjIdentifierFromFileName = Boolean.parseBoolean(descriptor.getParams().get("setObjIdentifierFromFileName"));
			}
			this.fromDate = fromDate;
		} catch (final MalformedURLException e) {
			throw new CollectorServiceException("Filesystem collector failed! ", e);
		}
	}

	/**
	 * {@inheritDoc}
	 *
	 * @see java.lang.Iterable#iterator()
	 */
	@Override
	public Iterator<String> iterator() {
		final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), extensions, fromDate);
		return Iterators.transform(fsi, inputFileName -> {
			FileInputStream fileInputStream = null;
			try {
				fileInputStream = new FileInputStream(inputFileName);
				final String s = IOUtils.toString(fileInputStream);
				if (fileFormat.equalsIgnoreCase("json")) {
					final JSONObject json = new JSONObject(s);
					final JSONObject obj = new JSONObject();
					if (setObjIdentifierFromFileName) {
						obj.put("header", new JSONObject().put("objIdentifier", FilenameUtils.getBaseName(inputFileName)));
					}
					obj.put("metadata", json);
					log.debug(obj.toString());
					return XML.toString(obj, "record");
				}
				final String cleanedXML = XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
				if (setObjIdentifierFromFileName) {
					return addObjIdentifier(cleanedXML, FilenameUtils.getBaseName(inputFileName));
				} else {
					return cleanedXML;
				}
			} catch (final DocumentException e) {
				log.error("Cannot process XML to set the objIdentifier " + inputFileName, e);
				return "";
			} catch (final Exception e) {
				log.error("Unable to read " + inputFileName, e);
				return "";
			} finally {
				if (fileInputStream != null) {
					try {
						fileInputStream.close();
					} catch (final IOException e) {
						log.error("Unable to close inputstream for  " + inputFileName);
					}
				}
			}
		});
	}

	private String addObjIdentifier(final String xml, final String objidentifier) throws DocumentException {
		final Document doc = DocumentHelper.parseText(xml);

		final Namespace driNs = new Namespace("dri", "http://www.driver-repository.eu/namespace/dri");

		final Node headerNode = doc.selectSingleNode("//*[local-name()='header']");
		if (headerNode == null) {
			final Element newRoot = DocumentHelper.createElement("record");
			newRoot.addElement("header").addElement(new QName("objIdentifier", driNs)).setText(objidentifier);
			newRoot.addElement("metadata").add(doc.getRootElement().detach());
			return DocumentHelper.createDocument(newRoot).asXML();
		} else {
			final Node node = headerNode.selectSingleNode("./*[local-name()='objIdentifier']");
			if (node == null) {
				((Element) headerNode).addElement(new QName("objIdentifier", driNs)).setText(objidentifier);
			} else {
				node.setText(objidentifier);
			}
			return doc.asXML();
		}
	}
}
