package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.protobuf.InvalidProtocolBufferException;

import eu.dnetlib.data.mapreduce.util.DedupRootUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.mapreduce.util.OafEntityMerger;
import eu.dnetlib.data.mapreduce.util.OafImporter;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
import eu.dnetlib.data.proto.TypeProtos.Type;
import eu.dnetlib.pace.util.DedupConfig;
import eu.dnetlib.pace.util.DedupConfigLoader;

public class DedupBuildRootsReducer extends TableReducer<Text, ImmutableBytesWritable, ImmutableBytesWritable> {

	private enum OafPatch {
		rootToEntity, entityToRoot
	}

	private DedupConfig dedupConf;

	@Override
	protected void setup(Context context) throws IOException, InterruptedException {
		super.setup(context);
		dedupConf = DedupConfigLoader.load(context.getConfiguration().get("dedup.wf.conf"));
		System.out.println("dedup buildRoots reducer\nwf conf: " + dedupConf.toString());
	}

	@Override
	protected void reduce(Text key, Iterable<ImmutableBytesWritable> values, Context context) throws IOException, InterruptedException {

		// ensures we're dealing with a root, otherwise returns
		if (!DedupRootUtils.isRoot(key.toString())) {
			System.err.println("aborting DedupBuildRootsReducer, found non-root key: " + key);
			context.getCounter("DedupBuildRootsReducer", "aborted").increment(1);
			return;
		}

		byte[] rowkey = Bytes.toBytes(key.toString());
		final List<Oaf> entities = Lists.newArrayList();

		for (Oaf oaf : toOaf(values)) {
			switch (oaf.getKind()) {
			case entity:
				entities.add(oaf);
				break;
			case relation:
				handleRels(context, rowkey, oaf, true);
				break;
			default:
				break;
			}
		}

		// build and emit the root body
		Oaf.Builder builder = new OafEntityMerger().mergeEntities(key.toString(), entities);
		context.getCounter(dedupConf.getEntityName() + " root group size", lpad(entities.size())).increment(1);
		emit(context, rowkey, dedupConf.getEntityName(), "body", builder.build().toByteArray(), "root");

		// add person rels TODO: remove this hack
		// context.getCounter("hack", "personResult out").increment(personMap.size());

	}

	private Iterable<Oaf> toOaf(Iterable<ImmutableBytesWritable> values) {
		return Iterables.transform(values, new OafImporter());
	}

	private void handleRels(Context context, byte[] rowkey, Oaf oaf, boolean hack) throws IOException, InterruptedException, InvalidProtocolBufferException {

		if (hack && checkHack(new String(rowkey), dedupConf.getEntityName(), oaf)) {
			context.getCounter("hack", "personResult in").increment(1);
		} else {

			OafDecoder decoder = rootToEntity(rowkey, oaf);
			emit(context, rowkey, decoder.relTypeName(), decoder.relTargetId(), decoder.toByteArray(), "[root -> entity]");

			// emit relation from the related entities to the root
			decoder = entityToRoot(rowkey, oaf);
			byte[] revKey = Bytes.toBytes(decoder.relSourceId());
			emit(context, revKey, decoder.relTypeName(), new String(rowkey), decoder.toByteArray(), "[entity -> root]");
		}
		// mark relation from the related entities to the duplicate as deleted
		OafDecoder decoder = markDeleted(oaf, true);
		byte[] revKey = Bytes.toBytes(decoder.relSourceId());
		emit(context, revKey, decoder.relTypeName(), decoder.relTargetId(), decoder.toByteArray(), "mark deleted [dup -> entity]");

		// mark relation from the related entities to the duplicate as deleted
		decoder = markDeleted(oaf, false);
		revKey = Bytes.toBytes(decoder.relSourceId());
		emit(context, revKey, decoder.relTypeName(), decoder.relTargetId(), decoder.toByteArray(), "mark deleted [entity -> dup]");
	}

	private void emit(Context context, byte[] rowkey, String family, String qualifier, byte[] value, String label) throws IOException, InterruptedException {
		Put put = new Put(rowkey).add(Bytes.toBytes(family), Bytes.toBytes(qualifier), value);
		context.write(new ImmutableBytesWritable(rowkey), put);
		context.getCounter(family, label).increment(1);
	}

	// /////////////////

	public boolean checkHack(String root, String entityName, Oaf oaf) {

		boolean res;
		if (entityName.equals(Type.result.toString()) && oaf.getRel().getRelType().toString().equals(RelType.personResult.toString())
				&& !md5matches(root, oaf.getRel().getSource())) {

			res = true;
		} else {
			res = false;
		}

		// if (root.equals("50|dedup_wf_001::92f6197ea6f16ae554755aced832fb6f")) {
		// System.out.println("##################");
		// System.out.println("root  : " + root);
		// System.out.println("source: " + oaf.getRel().getSource());
		// System.out.println("ckeck:  " + res);
		// }

		return res;
	}

	private boolean md5matches(String id1, String id2) {
		return id1.replaceAll("^.*\\:\\:", "").equals(id2.replaceAll("^.*\\:\\:", ""));
	}

	private OafDecoder rootToEntity(final byte[] rootRowkey, final Oaf rel) {
		return patchRelations(rootRowkey, rel, OafPatch.rootToEntity);
	}

	private OafDecoder entityToRoot(final byte[] rootRowkey, final Oaf rel) {
		return patchRelations(rootRowkey, rel, OafPatch.entityToRoot);
	}

	private OafDecoder markDeleted(final Oaf rel, boolean reverse) {
		return deleteRelations(rel, reverse);
	}

	// patches relation objects setting the source field with the root id
	private OafDecoder patchRelations(final byte[] rootRowkey, Oaf rel, final OafPatch patchKind) {
		Oaf.Builder builder = Oaf.newBuilder(rel);
		builder.getDataInfoBuilder().setInferred(true).setDeletedbyinference(false);
		switch (patchKind) {
		case rootToEntity:
			builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots p:rootToEntity)");
			builder.getRelBuilder().setSource(new String(rootRowkey));
			break;

		case entityToRoot:
			builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots p:entityToRoot)");
			builder.getRelBuilder().setSource(builder.getRel().getTarget());
			builder.getRelBuilder().setTarget(new String(rootRowkey));
			break;

		default:
			break;
		}

		return OafDecoder.decode(builder.build());
	}

	private OafDecoder deleteRelations(Oaf rel, final boolean reverse) {
		Oaf.Builder builder = Oaf.newBuilder(rel);
		builder.getDataInfoBuilder().setInferenceprovenance("dedup (BuildRoots d: " + reverse + ")");
		builder.getDataInfoBuilder().setDeletedbyinference(true);

		if (reverse) {
			// swap source and target
			String tmp = builder.getRel().getSource();
			builder.getRelBuilder().setSource(builder.getRel().getTarget());
			builder.getRelBuilder().setTarget(tmp);
		}

		return OafDecoder.decode(builder.build());
	}

	private String lpad(int s) {
		return StringUtils.leftPad(String.valueOf(s), 5);
	}

}
