package eu.dnetlib.data.hadoop.utils;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.util.Base64;
import org.dom4j.Document;
import org.dom4j.Node;
import org.springframework.beans.factory.annotation.Value;

public class ScanFactory {

	private static final Log log = LogFactory.getLog(ScanFactory.class); // NOPMD by marko on 11/24/08 5:02 PM

	@Value("${services.hadoop.hbase.maxversions}")
	private int hbaseMaxVersions;

	public String getScan(final ScanProperties scanProperties) throws IOException {
		return convertScanToString(getScanObject(scanProperties));
	}

	public Scan getScanObject(final ScanProperties scanProperties) {
		final Scan scan = new Scan();

		scan.setCaching(scanProperties.getCaching());
		scan.setCacheBlocks(false); // don't set to true for MR jobs

		scan.setFilter(scanProperties.getFilterList());
		for (String family : scanProperties.getFamilies()) {
			scan.addFamily(family.getBytes());
		}

		if (scanProperties.getVersions() > 0) {
			log.info(String.format("scanner to read %s versions from hbase", scanProperties.getVersions()));
			scan.setMaxVersions(scanProperties.getVersions());
		}

		return scan;
	}

	public ScanProperties parseScanProperties(final Document doc, final Map<String, String> bbParams) {
		log.debug("setting job scanner");

		final ScanProperties scanProperties = new ScanProperties(doc.valueOf("//FILTERS/@operator"));

		String caching = doc.valueOf("//SCAN/@caching");
		if (!StringUtils.isBlank(caching)) {
			log.info("overriding default scan caching with: " + caching);
			scanProperties.setCaching(Integer.valueOf(caching));
		}

		final FilterList columnFilter = new FilterList(Operator.MUST_PASS_ONE);
		for (Object o : doc.selectNodes("//SCAN/FAMILIES/FAMILY")) {
			Node node = (Node) o;
			String value = node.valueOf("./@value");
			if (StringUtils.isBlank(value)) {
				value = bbParams.get(node.valueOf("./@param"));
			}
			String qualifier = node.valueOf("./@qualifier");
			if (StringUtils.isNotBlank(qualifier)) {
				columnFilter.addFilter(new ColumnPrefixFilter(qualifier.trim().getBytes()));
			}
			String readVersions = node.valueOf("./@readVersions");
			if (StringUtils.isNotBlank(readVersions) && Boolean.parseBoolean(readVersions)) {
				scanProperties.setVersions(hbaseMaxVersions);
			}

			log.debug(String.format("scanner family: '%s', qualifier: '%s', readVersions: '%s'", value, qualifier, readVersions));
			scanProperties.getFamilies().add(value);
		}

		if (!columnFilter.getFilters().isEmpty()) {
			scanProperties.getFilterList().addFilter(columnFilter);
		}

		for (Object o : doc.selectNodes("//SCAN/FILTERS/FILTER")) {
			Node node = (Node) o;
			String filterType = node.valueOf("./@type");

			String value = node.valueOf("./@value");
			if ((value == null) || value.isEmpty()) {
				value = bbParams.get(node.valueOf("./@param"));
			}

			if (value == null) {
				throw new IllegalArgumentException("missing value for Scan filter [entityTypeId|entityType]");
			}

			if (filterType.equals("prefix")) {
				log.debug("scanner prefix filter, value: " + value);
				scanProperties.getFilterList().addFilter(new PrefixFilter(value.getBytes()));
			} // TODO add more filterType cases here
		}
		return scanProperties;
	}

	/**
	 * Writes the given scan into a Base64 encoded string.
	 *
	 * @param scan
	 *            The scan to write out.
	 * @return The scan saved in a Base64 encoded string.
	 * @throws IOException
	 *             When writing the scan fails.
	 */
	private static String convertScanToString(final Scan scan) throws IOException {
		log.debug("serializing scan");
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		DataOutputStream dos = new DataOutputStream(out);
		scan.write(dos);
		return Base64.encodeBytes(out.toByteArray());
	}

}
