package eu.dnetlib.data.mapreduce.hbase.index;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrInputDocument;
import org.dom4j.DocumentException;

import com.google.common.collect.Lists;
import com.googlecode.protobuf.format.JsonFormat;

import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
import eu.dnetlib.data.transform.SolrProtoMapper;
import eu.dnetlib.miscutils.datetime.HumanTime;

public class DedupIndexFeedMapper extends TableMapper<Text, Text> {

	private static final Log log = LogFactory.getLog(DedupIndexFeedMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");

	private final static List<String> dateFormats = Arrays.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy");

	private CloudSolrServer solrServer;

	private String dsId;

	private String version;

	private int shutdownWaitTime = 10000;

	private int bufferFlushThreshold = 100;

	private List<SolrInputDocument> buffer;

	private int backoffTimeMs = 5000;

	private boolean simulation = false;

	private String entityType = null;

	private String actionset = null;

	private SolrProtoMapper mapper = null;

	private final static int MAX_RETRIES = 10;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {

		logConfiguration(context.getConfiguration());

		shutdownWaitTime = Integer.parseInt(context.getConfiguration().get(JobParams.INDEX_SHUTDOWN_WAIT));
		bufferFlushThreshold = Integer.parseInt(context.getConfiguration().get(JobParams.INDEX_BUFFER_FLUSH_TRESHOLD));
		dsId = context.getConfiguration().get(JobParams.INDEX_DSID);
		version = getParsedDateField(context.getConfiguration().get(JobParams.INDEX_FEED_TIME));
		buffer = Lists.newArrayList();
		simulation = Boolean.parseBoolean(context.getConfiguration().get(JobParams.INDEX_FEED_SIMULATION_MODE));
		entityType = context.getConfiguration().get("entityType");
		actionset = context.getConfiguration().get("actionset");

		final String fields = context.getConfiguration().get("index.fields");

		log.info("got fields: \n" + fields);
		log.info("got dsId: " + dsId);
		log.info("got version: " + version);
		log.info("simulation: " + simulation);
		log.info("entityType: " + entityType);
		log.info("actionset: " + actionset);
		log.info("buffer size: " + bufferFlushThreshold);

		try {
			mapper = new SolrProtoMapper(fields);
		} catch (final DocumentException e) {
			log.error("unable to parse fields: " + fields);
			throw new IllegalArgumentException(e);
		}

		final String baseURL = context.getConfiguration().get(JobParams.INDEX_SOLR_URL);
		log.info("solr server baseURL: " + baseURL);

		final String collection = context.getConfiguration().get(JobParams.INDEX_SOLR_COLLECTION);
		log.info("solr server collection: " + collection);

		while (true) {
			try {
				log.info("initializing solr server...");
				solrServer = new CloudSolrServer(baseURL);

				solrServer.connect();

				solrServer.setParallelUpdates(true);
				solrServer.setDefaultCollection(collection);

				final SolrPingResponse rsp = solrServer.ping();

				if (rsp.getStatus() != 0) throw new SolrServerException("bad init status: " + rsp.getStatus());
				else {
					break;
				}

			} catch (final Throwable e) {
				if (solrServer != null) {
					solrServer.shutdown();
				}
				context.getCounter("index init", e.getMessage()).increment(1);
				log.info(String.format("failed to init solr client wait %dms", backoffTimeMs));
				Thread.sleep(backoffTimeMs);
			}
		}

		log.info("setup completed, solr server initialization done.");
	}

	@Override
	protected void map(final ImmutableBytesWritable key, final Result value, final Context context) throws IOException, InterruptedException {

		SolrInputDocument doc = null;

		final Map<byte[], byte[]> bMap = value.getFamilyMap(Bytes.toBytes(entityType));

		if (MapUtils.isEmpty(bMap) || !bMap.containsKey(HBaseTableDAO.cfMetadataByte())) {
			context.getCounter(entityType, "missing body");
			return;
		}

		final DNGF oaf = DNGF.parseFrom(bMap.get(HBaseTableDAO.cfMetadataByte()));

		try {
			doc = getDocument(oaf);
		} catch (final Throwable e) {
			handleError(key, new JsonFormat().printToString(oaf), context, doc, e);
			return;
		}

		int retries = 0;
		while (retries < MAX_RETRIES) {
			try {
				addDocument(context, doc);
				return;
			} catch (final Throwable e) {
				retries++;
				context.getCounter("index feed", "retries").increment(1);
				handleError(key, new JsonFormat().printToString(oaf), context, doc, e);
				log.info(String.format("failed to feed documents, waiting %dms", backoffTimeMs));
				Thread.sleep(backoffTimeMs);
			}
		}
		if (retries >= MAX_RETRIES)
			throw new IOException("too many retries: " + retries);
	}

	private SolrInputDocument getDocument(final DNGF oaf) throws DocumentException {
		final SolrInputDocument document = mapper.map(oaf, version, dsId, actionset);
		document.addField("actionset", actionset);
		return document;
	}

	private void addDocument(final Context context, final SolrInputDocument doc) throws SolrServerException, IOException {
		if (!doc.isEmpty()) {

			buffer.add(doc);
			if (buffer.size() >= bufferFlushThreshold) {
				doAdd(buffer, context);
				// Thread.sleep(100);
			}
		} else {
			context.getCounter("index feed", "skipped records").increment(1);
		}
	}

	private void doAdd(final List<SolrInputDocument> buffer, final Context context) throws SolrServerException, IOException {
		if (!simulation) {
			final long start = System.currentTimeMillis();
			final UpdateResponse rsp = solrServer.add(buffer);
			final long stop = System.currentTimeMillis() - start;
			log.info("feed time for " + buffer.size() + " records : " + HumanTime.exactly(stop) + "\n");

			final int status = rsp.getStatus();
			context.getCounter("index feed", "status code: " + status).increment(buffer.size());

			if (status != 0) throw new SolrServerException("bad status: " + status);
		}
		buffer.clear();
	}

	@Override
	protected void cleanup(final Context context) throws IOException, InterruptedException {
		super.cleanup(context);
		try {
			if (!buffer.isEmpty()) {
				doAdd(buffer, context);
			}
			log.info("\nwaiting " + shutdownWaitTime + "ms before shutdown");
			Thread.sleep(shutdownWaitTime);
			solrServer.shutdown();
		} catch (final SolrServerException e) {
			System.err.println("couldn't shutdown server " + e.getMessage());
		}
	}

	/**
	 * method return a solr-compatible string representation of a date
	 *
	 * @param date
	 * @return
	 * @throws DocumentException
	 * @throws ParseException
	 */
	public String getParsedDateField(final String date) {
		for (String formatString : dateFormats) {
			try {
				return new SimpleDateFormat(outFormat).format(new SimpleDateFormat(formatString).parse(date));
			} catch (ParseException e) {}
		}
		throw new IllegalStateException("unable to parse date: " + date);
	}

	private void handleError(final ImmutableBytesWritable key, final String value, final Context context, final SolrInputDocument doc, final Throwable e)
			throws IOException, InterruptedException {
		context.getCounter("index feed", e.getClass().getName()).increment(1);
		context.write(new Text(key.copyBytes()), printRottenRecord(context.getTaskAttemptID().toString(), value, doc));
		e.printStackTrace(System.err);
	}

	private Text printRottenRecord(final String taskid, final String value, final SolrInputDocument doc) {
		return new Text("\n**********************************\n" + "task: " + taskid + "\n"
				+ check("original", value.toString() + check("solrDoc", doc)));
	}

	private String check(final String label, final Object value) {
		if ((value != null) && !value.toString().isEmpty()) return "\n " + label + ":\n" + value + "\n";
		return "\n";
	}

	private void logConfiguration(final Configuration conf) {
		log.info("job configutation #################");
		for (final Entry<String, String> e : conf) {
			log.info("'" + e.getKey() + "' : '" + e.getValue() + "'");
		}
		log.info("end of job configutation #################\n\n");
	}

}
