package eu.dnetlib.data.mdstore.plugins;

import java.io.StringReader;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;

import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.client.MongoCollection;

import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore;
import eu.dnetlib.rmi.data.MDStoreServiceException;

public class FilterMdRecordsPlugin extends AbstractIstiMDStorePlugin {

	private static final Log log = LogFactory.getLog(FilterMdRecordsPlugin.class);

	@Override
	public final void process(final MongoMDStore store, final Map<String, String> params) throws MDStoreServiceException {

		final String xpath = params.get("xpath");

		final MongoCollection<DBObject> collPubs = store.getCollection();

		long valid = 0;
		long skipped = 0;

		final SAXReader reader = new SAXReader();
		for (final DBObject obj : collPubs.find()) {
			try {
				final String recordId = obj.get("id").toString();
				final Document doc = reader.read(new StringReader(obj.get("body").toString()));
				if (doc.selectNodes(xpath).isEmpty()) {
					collPubs.deleteOne(new BasicDBObject("id", recordId));
					skipped++;
				} else {
					valid++;
				}
			} catch (final DocumentException e) {
				log.warn("Problem parsing a mdstore record", e);
			}
		}

		if (log.isDebugEnabled()) {
			log.debug("***** Filtering records *****");
			log.debug("*     xpath: " + xpath);
			log.debug("*     valid: " + valid);
			log.debug("*   skipped: " + skipped);
			log.debug("*****************************");
		}
		touch(store);
	}

}
