package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;

import com.google.common.base.Joiner;

import eu.dnetlib.data.mapreduce.hbase.dedup.config.DedupConfig;
import eu.dnetlib.data.mapreduce.hbase.dedup.config.DedupConfigLoader;
import eu.dnetlib.data.mapreduce.util.DedupRootUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.pace.model.Person;

public class DedupPersonMapper extends TableMapper<Text, ImmutableBytesWritable> {

	private DedupConfig dedupConf;
	
	@Override
	protected void setup(Context context) throws IOException, InterruptedException {
		dedupConf = DedupConfigLoader.load(context.getConfiguration().get("dedup.wf.conf"));
	}
	
	@Override
	protected void map(ImmutableBytesWritable keyIn, Result result, Context context) throws IOException, InterruptedException {
		//System.out.println("got key: " + new String(keyIn.copyBytes()));

		if (DedupRootUtils.isRoot(new String(keyIn.copyBytes()))) {
			context.getCounter(dedupConf.getEntityName(), "roots skipped").increment(1);
			return;
		}
		byte[] body = result.getValue(dedupConf.getEntityNameBytes(), Bytes.toBytes("body"));

		if (body != null) {
			try {
				OafDecoder decoder = OafDecoder.decode(body);
				
				String hash = new Person(getPersonName(decoder)).hash();
				//String hash = decoder.getEntity().getPerson().getMetadata().getFullname().trim().toLowerCase().replaceAll(" ", "");
				//final String rootId = getRootId(context, decoder.getEntity().getPerson().getMetadata().getFullname());
				context.write(new Text(hash), new ImmutableBytesWritable(body));			

			} catch (Throwable e) {
				System.out.println("GOT EX " + e);
				e.printStackTrace(System.err);
				context.getCounter(dedupConf.getEntityName(), e.getClass().toString()).increment(1);
			}
		} else {
			context.getCounter(dedupConf.getEntityName(), "missing body").increment(1);
		}
	}

	private String getPersonName(OafDecoder decoder) {
		String fullname = decoder.getEntity().getPerson().getMetadata().getFullname();
		String firstname = decoder.getEntity().getPerson().getMetadata().getFirstname();
		String secondnames = Joiner.on(" ").join(decoder.getEntity().getPerson().getMetadata().getSecondnamesList());
		
		return isValid(fullname) ? fullname : (secondnames + ", " + firstname);
	}

	private boolean isValid(String fullname) {
		return fullname != null && !fullname.isEmpty();
	}

}
