package eu.dnetlib.data.collector.plugins.filesystem;

import java.io.File;
import java.io.FileInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.stream.Stream;

import eu.dnetlib.data.collector.plugins.oai.engine.XmlCleaner;
import eu.dnetlib.enabling.tools.DnetStreamSupport;
import eu.dnetlib.rmi.data.CollectorServiceException;
import eu.dnetlib.rmi.data.InterfaceDescriptor;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * The Class FilesystemIterable.
 *
 * @author Sandro, Michele, Andrea
 */
public class FilesystemIterable implements Iterable<String> {

	/**
	 * The Constant log.
	 */
	private static final Log log = LogFactory.getLog(FilesystemIterable.class);

	/**
	 * The base dir.
	 */
	private File baseDir;

	/**
	 * The extensions.
	 */
	private String extension;

	/**
	 * Instantiates a new filesystem iterable.
	 *
	 * @param descriptor the descriptor
	 * @throws CollectorServiceException the collector service exception
	 */
	public FilesystemIterable(final InterfaceDescriptor descriptor) throws CollectorServiceException {
		try {
			final String baseUrl = descriptor.getBaseUrl();
			URL basePath = new URL(baseUrl);
			this.baseDir = new File(basePath.getPath());
			if (!baseDir.exists()) { throw new CollectorServiceException(String.format("The base URL %s, does not exist", basePath.getPath())); }
			this.extension = descriptor.getParams().get("extensions");
		} catch (MalformedURLException e) {
			throw new CollectorServiceException("Filesystem collector failed! ", e);
		}
	}

	/**
	 * {@inheritDoc}
	 *
	 * @see Iterable#iterator()
	 */
	@Override
	public Iterator<String> iterator() {
		final FileSystemIterator fsi = new FileSystemIterator(baseDir.getAbsolutePath(), extension);
		final Stream<String> stringStream = DnetStreamSupport.generateStreamFromIterator(fsi);

		return stringStream.map(inputFileName -> {

			try (FileInputStream fileInputStream = new FileInputStream(inputFileName)) {
				final String s = IOUtils.toString(fileInputStream);
				return XmlCleaner.cleanAllEntities(s.startsWith("\uFEFF") ? s.substring(1) : s);
			} catch (Exception e) {
				log.error("Unable to read " + inputFileName);
				return "";
			}
		}).iterator();
	}
}
