/*
 * LuceneStatistics.java
 *
 * $Author: tsakas $
 * $Date: 2007/12/20 14:37:39 $
 * $Id: LuceneStatistics.java,v 1.1 2007/12/20 14:37:39 tsakas Exp $
 *
 * <pre>
 *             Copyright (c) : 2006 Fast Search & Transfer ASA
 *                             ALL RIGHTS RESERVED
 * </pre>
 */

package org.gcube.indexmanagement.lucenewrapper;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.gcube.common.core.utils.logging.GCUBELog;
import org.gcube.common.searchservice.searchlibrary.resultset.elements.ResultElementBLOBGeneric;
import org.gcube.common.searchservice.searchlibrary.rsclient.elements.RSResourceWSRFType;
import org.gcube.common.searchservice.searchlibrary.rswriter.RSBLOBWriter;
import org.gcube.indexmanagement.common.IndexException;
import org.gcube.indexmanagement.common.IndexStatistics;
import org.gcube.indexmanagement.common.ServiceContextContainer;
import org.gcube.indexmanagement.common.StatisticsFileMerger;
import org.gcube.indexmanagement.common.XMLProfileParser;

/**
 * Used for creating per term statistics from Lucene indexes.
 * 
 */
public class LuceneStatistics implements IndexStatistics {

	/** The class logger */
	static GCUBELog logger = new GCUBELog(LuceneStatistics.class);
	
	private ServiceContextContainer ServiceContextCont = null;
	
    /** The directory where the local index is stored */
    //private final String localIndexDir = IndexServiceConst.LOCAL_INDEX_DIRECTORY + "lucene/";

    /** The Lucene index reader of the index to create statistics for */
    private IndexReader reader;

    /** The path to the index to create statistics for */
    private String indexPath = null;

    /**
     * A constructor which sets the EPR of the calling Service.
     * 
     * @param dlContextIdentifier - the EPR of the calling Service.
     */
    public LuceneStatistics(ServiceContextContainer ServiceContextCont) {
       	this.ServiceContextCont = ServiceContextCont;
    }

    /**
     * {@inheritDoc}
     */
    public void openIndex(String baseIndexDir, String indexName) throws IndexException {
        indexPath = baseIndexDir + indexName;
        try {
            reader = IndexReader.open(indexPath);
        } catch (Exception e) {
            logger.error("Failed to open index.", e);
            throw new IndexException("Failed to open index.", e);
        }
    }

    /**
     * {@inheritDoc}
     */
    public void updateIndex() throws IndexException {
        try {
        	try{
				reader.close();
			} catch (Exception e) {
				logger.error("could not close reader while updating the Index: ", e);
			}
            reader = IndexReader.open(indexPath);
        } catch (Exception e) {
            logger.error("Failed to update index.", e);
            throw new IndexException("Failed to update index.", e);
        }
    }

    /**
     * Creates a statistics file and stores it in a ResultSet. The statistics are
     * represented as an XML document with a doc frequency (<d> number of documents
     * the term was found in) and term frequency (<c> number of times the term 
     * occurred in the whole collection) for each term in the collection.
     * 
     * @return The EPR of the created statistics ResultSet.
     * @throws IndexException Unable to create the statistics file
     */
    public String createStatistics() throws IndexException {
        try {
        	/* Produce the XML statistics */
        	ByteArrayOutputStream memorizedFile = new ByteArrayOutputStream();
            BufferedOutputStream localOutput = new BufferedOutputStream(memorizedFile, 2048);
            OutputStream out = new BufferedOutputStream(localOutput);
            createXMLStatistics(out);
            out.close();

            /* Create a BLOB resultset and populate it with the statistics (whole file in one RS record */
            ByteArrayInputStream in = new ByteArrayInputStream(memorizedFile.toByteArray());
            RSBLOBWriter writer = RSBLOBWriter.getRSBLOBWriter();
            ResultElementBLOBGeneric blob = new ResultElementBLOBGeneric("foo", "bar", null, new BufferedInputStream(in));
			writer.addResults(blob);
			String stats_RS_EPR = writer.getRSLocator(new RSResourceWSRFType()).getLocator();
			writer.close();
        	return stats_RS_EPR;
        } catch (Exception e) {
        	logger.error("Error while producing index statistics.", e);
            throw new IndexException(e);
        }
    }

    /**
     * {@inheritDoc}
     */
    public String createMergedStatistics(List<String> RS_EPRs) throws IndexException {
        try {
            RS_EPRs.add(createStatistics());
            return StatisticsFileMerger.merge(RS_EPRs, ServiceContextCont);
        } catch (Exception e) {
        	logger.error("Error while producing merged index statistics.", e);
            throw new IndexException("Failed to create merged statistics.", e);
        }
    }

    /**
     * A method used to write statistics to an OutputStream. The statistics are
     * represented as an XML document with a doc frequency (<d> number of
     * documents the term was found in) and term frequency (<c> number of times
     * the term occurred in the whole collection) for each term in the
     * collection.
     * 
     * @param out -
     *            The OutputStream to use when writing the XML statistics
     * @throws IndexException
     *             Unable to write statistics to the OutputStream
     */
    private void createXMLStatistics(OutputStream out) throws IndexException {

        try {
            String output = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"
                    + "<statistics>\n";
            output += "<head>\n<numDocs>" + reader.numDocs() + "</numDocs>\n"
                    + "<time_created>" + new java.util.Date()
                    + "</time_created>\n</head>\n";

            output += ("<body><terms>\n");
            out.write(output.getBytes("ISO-8859-1"), 0, output.length());

            TermEnum te = reader.terms();
            int termCount = 0;

            while (te.next()) {
                termCount++;
                Term term = te.term();
                if (term.field() == "_contents") {
                    int absCount = 0;
                    TermDocs td = reader.termDocs(term);
                    while (td.next())
                        absCount += td.freq();

                    String termElement = ("  <term><t>"
                            + XMLProfileParser.escapeForXML(term.text())
                            + "</t><d>" + te.docFreq() + "</d><c>" + absCount + "</c></term>\n");

                    out.write(termElement.getBytes("ISO-8859-1"), 0,
                            termElement.length());
                }
            }
            String footer = "</terms></body>\n</statistics>\n";
            out.write(footer.getBytes("ISO-8859-1"), 0, footer.length());
        } catch (IOException e) {
        	logger.error("Failed to create XML statistics.", e);
            throw new IndexException("Failed to create XML statistics.", e);
        }
    }
    
    /**
     * {@inheritDoc}
     */
    public void close() throws IndexException {
       
    }
}
