package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Map;

import eu.dnetlib.data.graph.model.DNGFDecoder;
import eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;

import com.google.common.collect.Iterables;

import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.pace.config.DedupConfig;

public class DedupRootsToCsvMapper extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {

	/**
	 * logger.
	 */
	private static final Log log = LogFactory.getLog(DedupRootsToCsvMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private DedupConfig dedupConf;

	private ImmutableBytesWritable key;

	private ImmutableBytesWritable value;

	@Override
	protected void setup(final Context context) {
		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
		System.out.println("dedup buildRoots mapper\nwf conf: " + dedupConf.toString());
		key = new ImmutableBytesWritable();
		value = new ImmutableBytesWritable();
	}

	@Override
	protected void map(final ImmutableBytesWritable rowkey, final Result result, final Context context) throws IOException, InterruptedException {

		if (HBaseTableDAO.isRoot(rowkey)) {
			context.getCounter(dedupConf.getWf().getEntityType(), "root row skipped").increment(1);
			return;
		}

		final Map<byte[], byte[]> entityCf = result.getFamilyMap(Bytes.toBytes(dedupConf.getWf().getEntityType()));
		if (MapUtils.isEmpty(entityCf) && (entityCf.get(HBaseTableDAO.cfMetadataByte()) == null)) {
			context.getCounter(dedupConf.getWf().getEntityType(), "missing body").increment(1);
			return;
		}

		final Map<byte[], byte[]> mergedIn = result.getFamilyMap(HBaseTableDAO.getDedupQualifier_mergedInBytes(dedupConf.getWf().getEntityType()));
		if (MapUtils.isEmpty(mergedIn)) {
			context.getCounter(dedupConf.getWf().getEntityType(), "missing mergedIn relationship").increment(1);
			return;
		}
		final String rootId = new String(Iterables.getOnlyElement(mergedIn.keySet()), Charset.forName("UTF-8"));
		final byte[] body = entityCf.get(HBaseTableDAO.cfMetadataByte());

		key.set(Bytes.toBytes(rootId));
		value.set(Bytes.toBytes(Iterables.getOnlyElement(DNGFDecoder.decode(body).getEntity().getOriginalIdList())));
		context.write(key, value);
		context.getCounter(dedupConf.getWf().getEntityType(), "root entity").increment(1);

	}

}
