package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;

import eu.dnetlib.actionmanager.actions.ActionFactory;
import eu.dnetlib.actionmanager.actions.AtomicAction;
import eu.dnetlib.actionmanager.common.Agent;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.pace.config.DedupConfig;

public class DedupSimilarityToActionsMapper extends TableMapper<ImmutableBytesWritable, Put> {

	private static final Log log = LogFactory.getLog(DedupSimilarityToActionsMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private static final String RAW_SET = "rawSetId";
	private static final String SIMILARITY_CF = "similarityCF";

	private String rawSet = null;

	private String similarityCF = null;

	private DedupConfig dedupConf = null;

	private ActionFactory actionFactory = null;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		rawSet = context.getConfiguration().get(RAW_SET);
		if (StringUtils.isBlank(rawSet)) throw new IOException("Input parameter (" + RAW_SET + ") is missing or empty: '" + rawSet + "'");
		log.info("raw set: '" + rawSet + "'");

		similarityCF = context.getConfiguration().get(SIMILARITY_CF);
		if (StringUtils.isBlank(similarityCF)) throw new IOException("Input parameter (" + SIMILARITY_CF + ") is missing or empty: '" + similarityCF + "'");
		log.info("similarityCF: '" + similarityCF + "'");

		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));

		log.info("wf conf: " + dedupConf.toString());

		actionFactory = new ActionFactory();
	}

	@Override
	protected void map(final ImmutableBytesWritable key, final Result value, final Context context) throws IOException, InterruptedException {

		final Map<byte[], byte[]> sMap = value.getFamilyMap(Bytes.toBytes(similarityCF));
		if (MapUtils.isEmpty(sMap)) return;

		final Agent agent = new Agent("dedup", "Deduplication", Agent.AGENT_TYPE.algo);

		for (final Entry<byte[], byte[]> similarity : sMap.entrySet()) {

			final String targetKey = new String(key.copyBytes());
			final String qualifier = new String(similarity.getKey());
			final AtomicAction aa = actionFactory.createAtomicAction(rawSet, agent, targetKey, similarityCF, qualifier, null);

			for (final Put put : aa.asPutOperations(null, null, null, null)) {

				context.write(new ImmutableBytesWritable(Bytes.toBytes(aa.getRowKey())), put);
				context.getCounter(dedupConf.getWf().getEntityType(), "similarity2actions").increment(1);
			}
		}

	}
}
