package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;

import eu.dnetlib.data.actionmanager.actions.AtomicActionSerialiser;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.rmi.data.hadoop.actionmanager.Agent;
import eu.dnetlib.rmi.data.hadoop.actionmanager.actions.ActionFactory;
import eu.dnetlib.rmi.data.hadoop.actionmanager.actions.AtomicAction;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.Text;

import static eu.dnetlib.data.graph.utils.RelDescriptor.QUALIFIER_SEPARATOR;
import static eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO.*;

/**
 * Created by claudio on 28/04/16.
 */
public class DedupSimilarityToHdfsActionsMapper extends TableMapper<Text, Text> {

	private static final Log log = LogFactory.getLog(DedupSimilarityToHdfsActionsMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private static final String RAW_SET = "rawSetId";
	private static final String SIMILARITY_CF = "similarityCF";

	private String rawSet = null;

	private String similarityCF = null;

	private DedupConfig dedupConf = null;

	private ActionFactory actionFactory = null;

	private Text keyOut;
	private Text valueOut;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		rawSet = context.getConfiguration().get(RAW_SET);
		if (StringUtils.isBlank(rawSet)) throw new IOException("Input parameter (" + RAW_SET + ") is missing or empty: '" + rawSet + "'");
		log.info("raw set: '" + rawSet + "'");

		similarityCF = context.getConfiguration().get(SIMILARITY_CF);
		if (StringUtils.isBlank(similarityCF)) throw new IOException("Input parameter (" + SIMILARITY_CF + ") is missing or empty: '" + similarityCF + "'");
		log.info("similarityCF: '" + similarityCF + "'");

		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));

		log.info("wf conf: " + dedupConf.toString());

		actionFactory = new ActionFactory();

		keyOut = new Text();
		valueOut = new Text();
	}

	@Override
	protected void map(final ImmutableBytesWritable key, final Result value, final Context context) throws IOException, InterruptedException {

		final Agent agent = new Agent("dedup", "Deduplication", Agent.AGENT_TYPE.algo);
		getTargetIds(value, similarityCF).forEach(targetId -> {
				final String targetKey = new String(key.copyBytes());
				final String qualifier = similarityCF + QUALIFIER_SEPARATOR + targetId;
				final AtomicAction aa = actionFactory.createAtomicAction(rawSet, agent, targetKey, cfRels(), qualifier, null);

				keyOut.set(aa.getTargetRowKey() + "@" + aa.getTargetColumnFamily() + "@" + aa.getTargetColumn());
				valueOut.set(AtomicActionSerialiser.toJSON(aa));

				try {
					context.write(keyOut, valueOut);
					context.getCounter(dedupConf.getWf().getEntityType(), "similarity2actions").increment(1);
				} catch (Exception e) {
					throw new RuntimeException(e);
				}
			}
		);
	}

}
