package eu.dnetlib.data.mapreduce.hbase.actions2;

import java.io.IOException;
import java.util.Set;

import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;

import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;

import eu.dnetlib.miscutils.datetime.DateUtils;

public class GarbageActionsMapper extends TableMapper<ImmutableBytesWritable, Delete> {

	private static final String LATEST_RAW_SETS = "latestRawSets";

	private Set<byte[]> latestRawSets = Sets.newHashSet();
	private long dateLimit;

	private final long MAX_DATE_INTERVAL = 4 * 24 * 60 * 60 * 1000; // 4 days

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {

		this.dateLimit = DateUtils.now() - MAX_DATE_INTERVAL;

		final String s = context.getConfiguration().get(LATEST_RAW_SETS);
		if (s != null) {
			for (String set : Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(s))) {
				latestRawSets.add(Bytes.toBytes(set));
			}
		}

		if (latestRawSets.isEmpty()) { throw new IOException("Input parameter (" + LATEST_RAW_SETS + ") is missing or empty: " + s); }
	}

	@Override
	protected void map(final ImmutableBytesWritable key, final Result value, final Context context) throws IOException, InterruptedException {
		if (isInARecentRawSet(value)) { return; }

		context.getCounter("Actions", "N. Deletes").increment(1);
		context.write(key, new Delete(key.copyBytes()));
	}

	private boolean isInARecentRawSet(final Result value) {
		for (byte[] s1 : value.getFamilyMap(Bytes.toBytes("set")).keySet()) {
			if (isRecentRawSet(s1)) { return true; }
		}
		return false;
	}

	private boolean isRecentRawSet(final byte[] rawSet) {
		final String date = Iterables.getLast(Splitter.on("_").split(Bytes.toString(rawSet)));

		if (Long.parseLong(date) > this.dateLimit) { return true; }

		for (byte[] s : latestRawSets) {
			if (Bytes.equals(rawSet, s)) { return true; }
		}

		return false;
	}
}
