package org.gcube.indexmanagement.common;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.gcube.common.core.utils.logging.GCUBELog;
import org.gcube.common.searchservice.searchlibrary.resultset.elements.ResultElementBLOBGeneric;
import org.gcube.common.searchservice.searchlibrary.rsclient.elements.RSLocator;
import org.gcube.common.searchservice.searchlibrary.rsclient.elements.RSResourceLocalType;
import org.gcube.common.searchservice.searchlibrary.rsclient.elements.RSResourceWSRFType;
import org.gcube.common.searchservice.searchlibrary.rsreader.RSBLOBIterator;
import org.gcube.common.searchservice.searchlibrary.rsreader.RSBLOBReader;
import org.gcube.common.searchservice.searchlibrary.rswriter.RSBLOBWriter;

/**
 * A class used to merge the statistics files of a partitioned index into a
 * single statistics file.
 */
public class StatisticsFileMerger {

	/** Log4j logger */
    private static GCUBELog logger = new GCUBELog(StatisticsFileMerger.class);
    
    /**
     * A method used to merge the statistics files of a partitioned index into a
     * single statistics file.
     * 
     * @param RS_EPRs
     *            <code>List</code> - a list of the statistics ResultSets to merge
     * @throws IndexException -
     *             an error occurred while merging the statistics files
     */
    public static String merge(List<String> RS_EPRs, ServiceContextContainer scc) throws IndexException {

        try {
            logger.info("Statistics file merger called");
            logger.info("Fetching statistics resultsets...");

            /* Fetch the input resultsets and get their contents as InputStreams. Each resultset is
             * expected to contain exactly one BLOB element.
             */
            ArrayList<BufferedReader> statFileInputs = new ArrayList<BufferedReader>();
            for (int i=0; i<RS_EPRs.size(); i++) {
            	RSBLOBIterator iterator = RSBLOBReader.getRSBLOBReader(new RSLocator(RS_EPRs.get(i))).makeLocal(new RSResourceLocalType()).getRSIterator();
            	while (iterator.hasNext()) {
            		ResultElementBLOBGeneric blob = (ResultElementBLOBGeneric)iterator.next(ResultElementBLOBGeneric.class);
            		if(blob!=null) {
            			statFileInputs.add(new BufferedReader(new InputStreamReader(new BufferedInputStream(blob.getContentOfBLOB()))));
            			break;
            		}
            	}
            }

            logger.info("Combining statistics...");

            PipedOutputStream combinedOutput = new PipedOutputStream();
            PipedInputStream combinedInput = new PipedInputStream(combinedOutput);
            StatFileCombiner combiner = new StatFileCombiner(statFileInputs, combinedOutput);
            combiner.combine();

            logger.info("Creating output ResultSet...");

            RSBLOBWriter writer = RSBLOBWriter.getRSBLOBWriter();
            ResultElementBLOBGeneric blob = new ResultElementBLOBGeneric("foo", "bar", null, new BufferedInputStream(combinedInput));
			writer.addResults(blob);
			writer.close();
			return writer.getRSLocator(new RSResourceWSRFType()).getLocator();
        } catch (Exception e) {
            logger.error("Error while merging statistics.", e);
            throw new IndexException(e);
        }
    }

    /**
     * An inner class responsible for combining all the downloaded
     * statistics files into one.
     */
    private static class StatFileCombiner  {

        /** The writer to use to output the combined statistics file */
        private BufferedWriter writer;

        /** A list of readers; one for each statistics file part to merge */
        private ArrayList<BufferedReader> readers;

        /** A list to hold all readers which haven't finished */
        private ArrayList<BufferedReader> unfinishedReaders;

        /**
         * A set of all the readers which have the smallest term in the last
         * read line
         */
        private HashSet<BufferedReader> nextLineReaders = new HashSet<BufferedReader>();

        /**
         * A hashmap mapping all the readers to their last read lines (for
         * comparison)
         */
        private HashMap<BufferedReader, String> currentLines = new HashMap<BufferedReader, String>();

        /**
         * Constructor
         * 
         * @param statFileReaders
         *            <code>ArrayList<BufferedReader></code> - a list of
         *            readers for each of the downloaded statistics files
         * @param out
         *            <code>PipedOutputStream</code> - the OutputStream to
         *            output the combined file through
         */
        public StatFileCombiner(ArrayList<BufferedReader> statFileReaders, PipedOutputStream out) {
            try {
                readers = statFileReaders;
                unfinishedReaders = new ArrayList<BufferedReader>();
                writer = new BufferedWriter(new OutputStreamWriter(out, "ISO-8859-1"));
            } catch (Exception e) {
                logger.error(e);
            }
        }

        /**
         * Combines the input statistics streams
         */
        public void combine() {
            try {
                String regex = "^<term><t>(.*)</t><d>(\\d*)</d><c>(\\d*)</c></term>$";
                Pattern termPattern = Pattern.compile(regex);

                String docsRegex = "^<numDocs>(\\d*)</numDocs>$";
                Pattern docsPattern = Pattern.compile(docsRegex);
                int numDocs = 0;

                // read some header information and go to the start of each file
                for (BufferedReader reader : readers) {
                    String line = "";
                    while (line != null && !line.trim().startsWith("<term>")
                            && !line.trim().startsWith("</terms>")) {
                        if (line.trim().startsWith("<numDocs>")) {
                            Matcher m = docsPattern.matcher(line.trim());
                            m.find();
                            String docs = m.group(1);
                            if (docs != null) {
                                numDocs += Integer.parseInt(docs);
                            }
                        }
                        line = reader.readLine();
                    }

                    if (line != null && line.trim().startsWith("<term>")) {
                        unfinishedReaders.add(reader);
                        currentLines.put(reader, line);
                    }
                }
                String header = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"
                        + "<statistics>\n"
                        + "<head>\n<numDocs>"
                        + numDocs
                        + "</numDocs>\n"
                        + "<time_created>"
                        + new java.util.Date()
                        + "</time_created>\n</head>\n"
                        + "<body><terms>\n";
                writer.write(header);

                // get a line from each statistics file, find the ones with the
                // smallest (lexically) term, merge the numbers.
                // get new lines from the files that contained the term, use the
                // previous lines from the rest.
                // big problems if the terms are sorted in a different manner
                // than the lexicographical order used in Java
                while (unfinishedReaders.size() > 0) {
                    String smallestTerm = null;
                    int docCount = 0, termCount = 0;
                    for (ListIterator<BufferedReader> iterator = unfinishedReaders
                            .listIterator(); iterator.hasNext();) {
                        BufferedReader reader = iterator.next();
                        String termLine, myTerm;
                        termLine = currentLines.get(reader);
                        if (termLine == null
                                || !termLine.trim().startsWith("<term>")) {
                            iterator.remove();
                            continue;
                        }
                        Matcher m = termPattern.matcher(termLine.trim());
                        m.find();
                        myTerm = m.group(1);
                        int comparison = smallestTerm == null ? -1 : myTerm
                                .compareTo(smallestTerm);

                        if (comparison < 0) {
                            smallestTerm = myTerm;
                            docCount = Integer.parseInt(m.group(2));
                            termCount = Integer.parseInt(m.group(3));
                            nextLineReaders.clear();
                            nextLineReaders.add(reader);
                        } else if (comparison == 0) {
                            docCount += Integer.parseInt(m.group(2));
                            termCount += Integer.parseInt(m.group(3));
                            nextLineReaders.add(reader);
                        }
                    }
                    // all the readers/files who had the last term, will need to
                    // retrieve a new term
                    for (BufferedReader reader : nextLineReaders) {
                        currentLines.put(reader, reader.readLine());
                    }

                    writer.write("  <term><t>" + smallestTerm + "</t><d>"
                            + docCount + "</d><c>" + termCount
                            + "</c></term>\n");
                }
                String footer = "</terms></body>\n</statistics>\n";
                writer.write(footer);

            } catch (Exception e) {
                logger.error("Error while combining statistics.", e);
            }
        }

    }
}