package eu.dnetlib.msro.workflows.nodes.objectstore;

import java.io.File;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
import eu.dnetlib.rmi.data.ObjectStoreFile;
import eu.dnetlib.rmi.data.Protocols;

public class ArxivIteratorRepository extends IteratorRepository {

	private final Pattern pattern;

	public ArxivIteratorRepository(final Iterable<File> input, final String repositoryPrefix, final String oaiPrefix) {
		super(input, repositoryPrefix, oaiPrefix);
		this.pattern = Pattern.compile("[0-9]");

	}

	@Override
	String generateNextElement() {
		try {
			final String inputname = this.input.next().getCanonicalPath();
			final String[] values = inputname.split("/");
			if (values.length > 1) {
				final ObjectStoreFile info = new ObjectStoreFile();
				String fileName = values[values.length - 1].replace(".pdf", "");
				final Matcher matcher = this.pattern.matcher(fileName);
				if (matcher.find()) {
					if (matcher.start() != 0) {
						fileName = fileName.substring(0, matcher.start()) + "/" + fileName.substring(matcher.start());
					}
				}
				info.setDownloadedURL("http://arxiv.org/abs/" + fileName);
				info.setAccessProtocol(Protocols.None);
				// String value = "oai:arXiv.org:" + fileName;
				final String value = this.oaiPrefix + fileName;
				final String resultID = this.repositoryPrefix + "::" + DnetXsltFunctions.md5(value);
				info.setObjectID(resultID + "::" + DnetXsltFunctions.md5(info.getDownloadedURL()));
				info.setMimeType("pdf");
				info.setURI(inputname);
				return info.toJSON();
			}
		} catch (final Exception e) {
			return null;
		}
		return null;
	}

}
