package eu.dnetlib.data.mapreduce.hbase.dedup.config;

import java.util.List;
import java.util.Set;

import org.apache.hadoop.hbase.util.Bytes;

import com.google.gson.Gson;

import eu.dnetlib.data.proto.TypeProtos.Type;

public class DedupConfig {

	/**
	 * Entity type.
	 */
	private Type entityType;

	/**
	 * Field name used to sort the values in the reducer phase.
	 */
	private String orderField;

	/**
	 * Column Families involved in the relations redirection.
	 */
	private List<String> rootBuilderFamilies;
	
	/**
	 * Set of datasource namespace prefixes that won't be deduplicated.
	 */
	private Set<String> skipList;
	
	/**
	 * Subprefix used to build the root id, allows multiple dedup runs.
	 */
	private String dedupRun;
	
	/**
	 * Similarity threshold.
	 */
	private double threshold;

	public DedupConfig(Type entityType, String orderField, List<String> rootBuilderFamilies, String dedupRun, double threshold, Set<String> skipList) {
		super();
		this.entityType = entityType;
		this.orderField = orderField;
		this.rootBuilderFamilies = rootBuilderFamilies;
		this.dedupRun = dedupRun.contains("'") ? dedupRun.replaceAll("'", "") : dedupRun;
		this.threshold = threshold;
		this.skipList = skipList;
	}

	public Type getEntityType() {
		return entityType;
	}

	public String getEntityName() {
		return getEntityType().toString();
	}

	public byte[] getEntityNameBytes() {
		return Bytes.toBytes(getEntityType().toString());
	}

	public String getEntityId() {
		return Integer.toString(getEntityType().getNumber());
	}

	public String getOrderField() {
		return orderField;
	}

	public List<String> getRootBuilderFamilies() {
		return rootBuilderFamilies;
	}
	
	public String getDedupRun() {
		return dedupRun;
	}

	public double getThreshold() {
		return threshold;
	}

	public Set<String> getSkipList() {
		return skipList;
	}
	
	@Override
	public String toString() {
		return new Gson().toJson(this);
	}

}
