package eu.dnetlib.data.collector.plugins.ariadneplus.ehri;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URL;
import java.util.Iterator;

import com.ximpleware.*;
import eu.dnetlib.data.collector.ThreadSafeIterator;
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * Created by Alessia Bardi on 03/05/2017.
 *
 * @author Alessia Bardi
 */
public class EHRIIterator extends ThreadSafeIterator {

	private static final Log log = LogFactory.getLog(EHRIIterator.class);
	private static int MAX_FAILED = 100;
	private Iterator<String> identifiers;
	private String baseUrl;
	private String suffix;
	private int failed = 0;


	public EHRIIterator(final Iterator<String> idIterator, final String baseUrl, final String suffix){
		this.identifiers = idIterator;
		this.baseUrl = baseUrl;
		this.suffix = suffix;
	}

	@Override
	public boolean doHasNext() {
		return identifiers.hasNext();
	}

	@Override
	public String doNext()  {
		String target = baseUrl+"/"+identifiers.next()+"/"+suffix;
		log.debug("Getting "+target);
		try {
			URL url = new URL(target);
			String record = IOUtils.toString(url, "UTF-8");
			return removeDefaultEADNamespace(record);
		} catch (IOException e) {
			log.error("Unable to get "+target, e);
			failed++;
			if(failed > MAX_FAILED){
				throw new CollectorServiceRuntimeException("Could not download more than "+MAX_FAILED+"documents from EHRI. Stopping.");
			}
			if(this.hasNext()){
				return this.next();
			}
			else return "";
		}
	}

	protected String removeDefaultEADNamespace(final String xml) {
		try {
			VTDGen vg = new VTDGen();
			ByteArrayOutputStream baos = new ByteArrayOutputStream();
			vg.setDoc(xml.getBytes());
			vg.parse(false); // turn off namespace awareness so that
			VTDNav vn = vg.getNav();
			AutoPilot ap = new AutoPilot(vn);
			XMLModifier xm = new XMLModifier(vn);
			ap.selectXPath("/ead/@xmlns");
			int i = 0;
			while ((i = ap.evalXPath()) != -1) {
				xm.remove();
			}
			xm.output(baos);
			return baos.toString();
		}catch(Exception e){
			log.error("Cannot remove default namespace from ead element: "+xml);
			throw new CollectorServiceRuntimeException("Cannot remove default namespace from ead element", e);
		}
	}

	public Iterator<String> getIdentifiers() {
		return identifiers;
	}

	public void setIdentifiers(final Iterator<String> identifiers) {
		this.identifiers = identifiers;
	}

	public String getBaseUrl() {
		return baseUrl;
	}

	public void setBaseUrl(final String baseUrl) {
		this.baseUrl = baseUrl;
	}

	public String getSuffix() {
		return suffix;
	}

	public void setSuffix(final String suffix) {
		this.suffix = suffix;
	}
}
