package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;
import java.util.List;

import eu.dnetlib.data.graph.utils.RelDescriptor;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
import eu.dnetlib.data.proto.KindProtos.Kind;
import eu.dnetlib.data.proto.TypeProtos.Type;
import eu.dnetlib.pace.config.DedupConfig;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.Text;

import static eu.dnetlib.data.mapreduce.util.UpdateMerger.mergeBodyUpdates;
import static eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO.*;

public class DedupBuildRootsMapper extends TableMapper<Text, ImmutableBytesWritable> {

	private static final Log log = LogFactory.getLog(DedupBuildRootsMapper.class);

	private DedupConfig dedupConf;

	private ImmutableBytesWritable ibw;

	@Override
	protected void setup(final Context context) {
		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
		System.out.println("dedup buildRoots mapper\nwf conf: " + dedupConf.toString());

		ibw = new ImmutableBytesWritable();
	}

	@Override
	protected void map(final ImmutableBytesWritable rowkey, final Result value, final Context context) throws IOException, InterruptedException {
		// System.out.println("Find root mapping: " + new String(rowkey.copyBytes()));

		final Type type = Type.valueOf(dedupConf.getWf().getEntityType());
		final List<String> mergedIn = getTargetIds(value, "isMergedIn");

		if ((mergedIn != null) && !mergedIn.isEmpty()) {

			if (mergedIn.size() > 1) {
				throw new RuntimeException("found more than one merged in relation for row key: " + new String(rowkey.copyBytes()));
			}
			final Text rootId = new Text(mergedIn.get(0));

			context.getCounter(dedupConf.getWf().getEntityType(), "merged").increment(1);

			final DNGF dngfMerged = mergeBodyUpdates(context, value.getFamilyMap(cfMetadataByte()), type);
			if (dngfMerged == null) {
				context.getCounter(dedupConf.getWf().getEntityType(), "missing body").increment(1);
				System.out.println("missing body in: " + new String(rowkey.copyBytes()));
				return;
			}
			emit(context, rootId, dngfMerged.toByteArray());

			rel(value, "isMergedIn", "merges", "isSimilarTo").values().forEach(dngf -> {
				if (!isRelMarkedDeleted(dngf)) {
					emit(context, rootId, dngf.toByteArray());
				} else {
                    //context.getCounter(RelDescriptor.asString(dngf.getRel().getRelType()), "rel marked deleted").increment(1);
                }
            });

		} else {
			//context.getCounter(dedupConf.getWf().getEntityType(), "not in duplicate group").increment(1);

			final List<String> mergesRels = getTargetIds(value, "merges");
			if (mergesRels != null && !mergesRels.isEmpty()) {
				final byte[] body = value.getValue(cfMetadataByte(), type.toString().getBytes());
				if (body != null) {
					context.getCounter(type.toString(), "root").increment(1);
					emit(context, new Text(rowkey.copyBytes()), body);
				}
			}
		}

	}

	private void emit(final Context context, final Text rootId, final byte[] value) {
		ibw.set(value);
		try {
			context.write(rootId, ibw);
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}

	private boolean isRelMarkedDeleted(final DNGF dngf) {
		return dngf.getKind().equals(Kind.relation) && dngf.getDataInfo().getDeletedbyinference();
	}

}
