package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;

import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO;
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
import eu.dnetlib.data.proto.KindProtos.Kind;
import eu.dnetlib.data.proto.TypeProtos.Type;
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.gt.Authors;
import eu.dnetlib.pace.model.gt.CoAuthors;
import eu.dnetlib.pace.model.gt.GTAuthor;
import eu.dnetlib.pace.model.gt.GTAuthorMapper;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;

public class DedupFindRootsPersonReducer extends TableReducer<ImmutableBytesWritable, ImmutableBytesWritable, ImmutableBytesWritable> {

	private DedupConfig dedupConf;

	private ImmutableBytesWritable outKey;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
		System.out.println("dedup findRoots mapper\nwf conf: " + dedupConf.toString());

		outKey = new ImmutableBytesWritable();
	}

	@Override
	protected void reduce(final ImmutableBytesWritable key, final Iterable<ImmutableBytesWritable> values, final Context context) throws IOException,
			InterruptedException {

		final Authors aas = new Authors();
		final CoAuthors cas = new CoAuthors();
		// final Set<String> dupIds = Sets.newHashSet();

		for (final GTAuthor a : asGTA(values)) {
			if (a.hasMerged()) {
				aas.addAll(a.getMerged());
			} else {
				aas.add(a.getAuthor());
			}
			if (a.hasCoAuthors()) {
				cas.addAll(a.getCoAuthors());
			}

			// dupIds.add(a.getId());

			final byte[] row = Bytes.toBytes(a.getId());
			final Delete delete = new Delete(row);
			outKey.set(row);
			context.write(outKey, delete);
			context.getCounter(dedupConf.getWf().getEntityType(), "deleted").increment(1);
		}

		// if (aas.isEmpty())
		// throw new IllegalArgumentException("empty merged author set, grouping key: " + new String(key.copyBytes()) + ", dupIds: " +
		// dupIds);

		final String rootId = hashCodeString(aas);
		final GTAuthor gta = new GTAuthor(rootId, aas, cas, true);

		// for (final String id : dupIds) {
		// final byte[] row = Bytes.toBytes(id);
		// final byte[] root = Bytes.toBytes(rootId);
		// emitDedupRel(context, DedupUtils.getDedupCF_mergedInBytes(Type.person), row, root, buildRel(row, root,
		// Dedup.RelName.isMergedIn));
		// emitDedupRel(context, DedupUtils.getDedupCF_mergesBytes(Type.person), root, row, buildRel(root, row, Dedup.RelName.merges));
		//
		// context.getCounter(dedupConf.getWf().getEntityType(), "dedupRel (x2)").increment(1);
		// }

		final Put put = new Put(Bytes.toBytes(gta.getId()));
		put.setWriteToWAL(JobParams.WRITE_TO_WAL);
		put.add(Bytes.toBytes(dedupConf.getWf().getEntityType()), HBaseTableDAO.cfMetadataByte(), toDNGFByteArray(gta));

		outKey.set(Bytes.toBytes(gta.getId()));
		context.write(outKey, put);

		context.getCounter(dedupConf.getWf().getEntityType(), "out").increment(1);
	}

	private Iterable<GTAuthor> asGTA(final Iterable<ImmutableBytesWritable> values) {

		return Iterables.transform(values, input -> GTAuthor.fromJson(new String(input.copyBytes())));
	}

	public byte[] toDNGFByteArray(final GTAuthor gta) {
		final DNGF oaf = new GTAuthorMapper().map(gta);
		return oaf.toByteArray();
	}

	protected String hashCodeString(final Authors ag) {
		return getRowKey(String.valueOf(ag.hashCode()));
	}

	protected String getRowKey(final String s) {
		return AbstractDNetXsltFunctions.oafId(Type.person.toString(), "dedup_wf_001", s);
	}

}
