package eu.dnetlib.data.mapreduce.hbase.oai.utils;

import java.io.StringReader;
import java.util.List;
import java.util.Map.Entry;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;

import com.google.common.base.Function;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multimap;

/**
 * An instance of this class can parse an XML record and extract the information needed to store the record in a publisher store.
 * 
 * 
 * @author alessia
 * 
 */
public class RecordFieldsExtractor {

	private static final Log log = LogFactory.getLog(RecordFieldsExtractor.class); // NOPMD by marko on 11/24/08 5:02 PM

	/**
	 * List of the indices of the target store.
	 */
	private List<PublisherField> storeIndices;

	/**
	 * Parses the record and returns a map where a key is the name of an index, the value is the value in the record at the xpath specificed
	 * in this.storeIndices.
	 * 
	 * @param record
	 *            the XML string to parse.
	 * @return a Multimap describing the values to be indexed for this record.
	 */
	@SuppressWarnings({ "unchecked", "rawtypes" })
	public Multimap<String, String> extractFields(final String record) {
		Multimap<String, String> recordProps = ArrayListMultimap.create();
		try {
			Document doc = new SAXReader().read(new StringReader(record));
			for (PublisherField field : this.storeIndices) {
				for (Entry<String, String> indexEntry : field.getSources().entries()) {
					// each xpath can return a list of nodes or strings, depending on the xpath
					String xpath = indexEntry.getValue();

					List xPathResult = doc.selectNodes(xpath);
					if ((xPathResult != null) && !xPathResult.isEmpty()) {
						if (containsStrings(xPathResult)) {
							recordProps.putAll(field.getFieldName(), xPathResult);
						} else {
							if (containsNodes(xPathResult)) {
								recordProps.putAll(field.getFieldName(), Iterables.transform(xPathResult, new Function<Object, String>() {

									@Override
									public String apply(final Object obj) {
										if (obj == null) return "";
										Node node = (Node) obj;
										return node.getText();
									}
								}));
							}
						}
					}
				}
			}

		} catch (DocumentException e) {
			recordProps = null;
		}
		return recordProps;
	}

	@SuppressWarnings("rawtypes")
	private boolean containsStrings(final List objects) {
		Object first = objects.get(0);
		return first instanceof String;
	}

	@SuppressWarnings("rawtypes")
	private boolean containsNodes(final List objects) {
		Object first = objects.get(0);
		return first instanceof Node;
	}

	public List<PublisherField> getStoreIndices() {
		return storeIndices;
	}

	public void setStoreIndices(final List<PublisherField> storeIndices) {
		this.storeIndices = storeIndices;
	}

	public RecordFieldsExtractor(final List<PublisherField> storeIndices) {
		super();
		this.storeIndices = storeIndices;
	}

	public RecordFieldsExtractor() {
		super();
		// TODO Auto-generated constructor stub
	}

}
