package eu.dnetlib.data.mapreduce.hbase.index;

import java.io.IOException;
import java.util.List;
import java.util.Map.Entry;

import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrInputDocument;

import com.google.common.collect.Lists;

import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.functionality.index.solr.feed.InputDocumentFactory;
import eu.dnetlib.functionality.index.solr.feed.SolrServerPool;
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory;
import eu.dnetlib.miscutils.datetime.HumanTime;
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;

public class IndexFeedMapper extends Mapper<Text, Text, Text, Text> {

	private InputDocumentFactory documentFactory;

	private SolrServerPool serverPool;

	private String version;

	private String dsId;

	private int shutdownWaitTime = 10000;

	private int bufferFlushThreshold = 100;

	private ApplyXslt dmfToRecord;

	private List<SolrInputDocument> buffer;

	private boolean simulation = false;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {

		logConfiguration(context.getConfiguration());

		serverPool = new SolrServerPool(context.getConfiguration().get(JobParams.INDEX_SOLR_URL_LOCAL), context.getConfiguration().get(
				JobParams.INDEX_SOLR_URL_LIST), context.getConfiguration().get(JobParams.INDEX_SOLR_URL_ZK), context.getConfiguration().get(
				JobParams.INDEX_SOLR_COLLECTION), Boolean.parseBoolean(context.getConfiguration().get(JobParams.INDEX_LOCAL_FEEDING)));

		dsId = context.getConfiguration().get(JobParams.INDEX_DSID);
		shutdownWaitTime = Integer.parseInt(context.getConfiguration().get(JobParams.INDEX_SHUTDOWN_WAIT));
		bufferFlushThreshold = Integer.parseInt(context.getConfiguration().get(JobParams.INDEX_BUFFER_FLUSH_TRESHOLD));
		documentFactory = new StreamingInputDocumentFactory();
		version = InputDocumentFactory.getParsedDateField(context.getConfiguration().get(JobParams.INDEX_FEED_TIME));
		buffer = Lists.newArrayList();
		simulation = Boolean.parseBoolean(context.getConfiguration().get(JobParams.INDEX_FEED_SIMULATION_MODE));

		final String xslt = new String(Base64.decodeBase64(context.getConfiguration().get(JobParams.INDEX_XSLT)));

		System.out.print("got xslt: \n" + xslt + "\ngot version: " + version + "\nsimulation: " + simulation + "\nbuffer size: " + bufferFlushThreshold
				+ "\n\n");

		dmfToRecord = new ApplyXslt(xslt);
	}

	@Override
	protected void map(final Text key, final Text value, final Context context) throws IOException, InterruptedException {

		String indexRecord = "";
		SolrInputDocument doc = null;

		// if (!value.toString().contains("oaf:person")) {
		// return;
		// }

		try {
			long start = System.currentTimeMillis();
			indexRecord = dmfToRecord.evaluate(value.toString());
			doc = documentFactory.parseDocument(version, indexRecord.replaceAll("&#", "&amp;#"), dsId);
			long stop = System.currentTimeMillis() - start;

			System.out.println("parse " + doc.getField("__indexrecordidentifier").getValue() + " : " + HumanTime.exactly(stop));

			if (!doc.isEmpty()) {

				buffer.add(doc);
				if (buffer.size() >= bufferFlushThreshold) {
					doAdd(buffer, context);
					// Thread.sleep(100);
				}
			} else {
				context.getCounter("index", "skipped records").increment(1);
			}
		} catch (Throwable e) {
			context.getCounter("index", e.getClass().toString()).increment(1);
			context.write(key, printRottenRecord(context.getTaskAttemptID().toString(), value, indexRecord, doc));
			e.printStackTrace(System.err);
		}
	}

	private void doAdd(final List<SolrInputDocument> buffer, final Context context) throws SolrServerException, IOException {
		if (!simulation) {
			long start = System.currentTimeMillis();
			UpdateResponse rsp = serverPool.addAll(buffer);
			long stop = System.currentTimeMillis() - start;
			System.out.println("feed time for " + buffer.size() + " records : " + HumanTime.exactly(stop) + "\n");
			context.getCounter("index", "status code: " + rsp.getStatus()).increment(buffer.size());
		}
		buffer.clear();
	}

	@Override
	protected void cleanup(Context context) throws IOException, InterruptedException {
		super.cleanup(context);
		try {
			if (!buffer.isEmpty()) {
				doAdd(buffer, context);
			}
			System.out.println("\nwaiting " + shutdownWaitTime + "ms before shutdown");
			Thread.sleep(shutdownWaitTime);
			serverPool.shutdownAll();
		} catch (SolrServerException e) {
			System.err.println("couldn't shutdown server " + e.getMessage());
		}
	}

	private Text printRottenRecord(final String taskid, final Text value, final String indexRecord, final SolrInputDocument doc) {
		return new Text("\n**********************************\n" + "task: " + taskid + "\n"
				+ check("original", value.toString() + check("indexRecord", indexRecord) + check("solrDoc", doc)));
	}

	private String check(final String label, final Object value) {
		if (value != null && !value.toString().isEmpty()) { return "\n " + label + ":\n" + value + "\n"; }
		return "\n";
	}

	private void logConfiguration(Configuration conf) {
		System.out.println("job configutation #################");
		for (Entry<String, String> e : conf) {
			System.out.println("'" + e.getKey() + "' : '" + e.getValue() + "'");
		}
		System.out.println("end of job configutation #################\n");
	}

}
