package eu.dnetlib.data.mapreduce.hbase.dedup.gt;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;

import com.google.common.collect.Maps;

import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.PersonProtos.Person.CoAuthor;
import eu.dnetlib.data.proto.PersonProtos.Person.CoAuthor.Builder;
import eu.dnetlib.data.proto.TypeProtos.Type;

public class CoAuthorUpdateMapper extends TableMapper<ImmutableBytesWritable, Put> {

	private Map<String, String> mergedToAnchor;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {

		mergedToAnchor = Maps.newHashMap();

		final String filePath = context.getConfiguration().get("mapred.output.dir") + "/part-r-00000";
		if (StringUtils.isBlank(filePath)) throw new IllegalArgumentException("missing 'mapred.output.dir'");

		final Path path = new Path(filePath);
		final FileSystem fs = FileSystem.get(context.getConfiguration());
		final BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));

		String line = br.readLine();
		while (line != null) {
			final String[] split = line.split("=");

			mergedToAnchor.put(split[0], split[1]);

			line = br.readLine();
		}

		context.getCounter("anchor", "map size").setValue(mergedToAnchor.size());

		fs.close();
	}

	@Override
	protected void map(final ImmutableBytesWritable key, final Result value, final Context context) throws IOException, InterruptedException {

		final byte[] body = value.getValue(Bytes.toBytes(Type.person.toString()), DedupUtils.BODY_B);

		final OafDecoder d = OafDecoder.decode(body);

		final Oaf.Builder oafBuilder = Oaf.newBuilder(d.getOaf());

		final List<Builder> coAuthors = oafBuilder.getEntityBuilder().getPersonBuilder().getCoauthorBuilderList();

		for (final Builder cb : coAuthors) {

			final String newAnchorId = mergedToAnchor.get(cb.getId());
			if (newAnchorId != null) {
				context.getCounter("anchor", "hit").increment(1);

				if (!cb.getAnchorId().equals(newAnchorId)) {
					cb.setAnchorId(newAnchorId);
					context.getCounter("anchor", "updated").increment(1);
				}
			} else {
				context.getCounter("anchor", "miss").increment(1);
			}
		}

		final Map<String, CoAuthor> coAuthorSet = Maps.newHashMap();

		for (final Builder cb : coAuthors) {
			coAuthorSet.put(cb.hasAnchorId() ? cb.getAnchorId() : cb.getId(), cb.build());
		}

		oafBuilder.getEntityBuilder().getPersonBuilder().clearCoauthor();
		oafBuilder.getEntityBuilder().getPersonBuilder().addAllCoauthor(coAuthorSet.values());

		final Put put = new Put(key.copyBytes());
		put.setWriteToWAL(false);
		put.add(Bytes.toBytes(Type.person.toString()), DedupUtils.BODY_B, oafBuilder.build().toByteArray());

		context.write(key, put);

	}

}
