package eu.dnetlib.espas.data.harvest;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.Formatter;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stax.StAXSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.log4j.Logger;

import eu.dnetlib.espas.data.harvest.csw.CSWGetRecordsRequest;

public class RecordsHarvesterTransImpl extends AbstractRecordsHarvester implements RecordsHarvester
{
   protected static final int MAX_QUEUE_SIZE = Integer.MAX_VALUE;
   private Logger logger = Logger.getLogger(RecordsHarvesterTransImpl.class);
   protected LinkedList<CSWGetRecordsRequest> cswGetRecordsRequests = null;
   protected int maxQueueSize = MAX_QUEUE_SIZE;
   protected int numOfHarvestedRecords = 0;
   protected BlockingQueue<String> recordsBlockingQueue = null;
   protected boolean isHarvestingCompleted = false;

   @Override
   public void run()
   {
      this.harvest();
   }

   public RecordsHarvesterTransImpl()
   {
      this.initializeGetRecords(MAX_QUEUE_SIZE, null);
   }

   public RecordsHarvesterTransImpl(LinkedList<CSWGetRecordsRequest> cswGetRecordsRequest)
   {
      this.initializeGetRecords(MAX_QUEUE_SIZE, cswGetRecordsRequest);
   }

   public RecordsHarvesterTransImpl(int queueSize, LinkedList<CSWGetRecordsRequest> cswGetRecordsRequest)
   {
      this.initializeGetRecords(queueSize, cswGetRecordsRequest);
   }

   public void initializeGetRecords(int queueSize, LinkedList<CSWGetRecordsRequest> cswGetRecordsRequests)
   {
      this.setMaxQueueSize(queueSize);
      this.numOfHarvestedRecords = 0;
      this.recordsBlockingQueue = new LinkedBlockingQueue<String>(this.maxQueueSize);
      this.setCSWGetRecordsRequest(cswGetRecordsRequests);
      this.setHarvestingCompleted(false);
   }

   @Override
   public void harvest() {
       try{
	   for(CSWGetRecordsRequest cswGetRecordsRequest : this.cswGetRecordsRequests) {
		   this.harvestEachRecordTypeIndividuallyInFIFO(cswGetRecordsRequest);
 		   this.numOfHarvestedRecords += cswGetRecordsRequest.getNumOfHarvestedRecords();
	   }
       }
       catch(Exception ex){
           logger.error("Untreated Exception while harvesting", ex);
       }
       finally{
	   this.isHarvestingCompleted = true;
       }
   }

   public void harvestEachRecordTypeIndividuallyInFIFO(CSWGetRecordsRequest cswGetRecordsRequest)
   {
      boolean isFirstGetRecordsRequest = true;
      // We do not have a "nextRecord" value, since we have not made any GetRecords
      // requests yet. Thus, we inject a non-zero value e.g. 1, so as to put the while
      // loop into play.
      cswGetRecordsRequest.getSearchResults().put(Record.SR_ATT_NUM_OF_NEXT_RECORD, "1");
      while(Integer.parseInt(cswGetRecordsRequest.getSearchResults().get(Record.SR_ATT_NUM_OF_NEXT_RECORD)) != 0)
      {  // Harvest the next "maxRecords" records (the default value is 10) starting from
         // the "startPosition" offset. If the "nextRecord" has the value zero, then all
         // the records have been returned.
         if(isFirstGetRecordsRequest)
         {  // This is the very first GetRecords request.
        	this.harvestTheNextCSWRecordsDocument(cswGetRecordsRequest);
            isFirstGetRecordsRequest = false;
            if(cswGetRecordsRequest.getNumOfHarvestedRecords() >= Integer.parseInt(cswGetRecordsRequest.getSearchResults().get(Record.SR_ATT_NUM_OF_RECORDS_MATCHED))) {
            	// Harvesting has been completed.
                logger.debug("Harvesting completed, expected[" +  cswGetRecordsRequest.getSearchResults().get(Record.SR_ATT_NUM_OF_RECORDS_MATCHED) + "] " +
                			 "actual[" + cswGetRecordsRequest.getNumOfHarvestedRecords() + "]");
                return;
            }
         }
         else
         {  // This is not the first GerRecords request, thus adjust appropriately the next "startPosition".
            cswGetRecordsRequest.setStartPosition(Integer.parseInt(cswGetRecordsRequest.getSearchResults().get(Record.SR_ATT_NUM_OF_NEXT_RECORD)));
            this.harvestTheNextCSWRecordsDocument(cswGetRecordsRequest);
            // Since, we do not want to rely on the proper implementation
            // of the CSW specification on the server side, we monitor the current number
            // of harvested records, so as to stop the harvesting, when the "numberOfRecordsMatched"
            // has been reached or even exceeded.
            if(cswGetRecordsRequest.getNumOfHarvestedRecords() >= Integer.parseInt(cswGetRecordsRequest.getSearchResults().get(Record.SR_ATT_NUM_OF_RECORDS_MATCHED))) {
            	// Harvesting has been completed.
                logger.debug("Harvesting completed, expected[" +  cswGetRecordsRequest.getSearchResults().get(Record.SR_ATT_NUM_OF_RECORDS_MATCHED) + "] " +
           		             "actual[" + cswGetRecordsRequest.getNumOfHarvestedRecords() + "]");
                return;
            }
         }
      }
   }

   public void harvestTheNextCSWRecordsDocument(CSWGetRecordsRequest cswGetRecordsRequest)
   {
      XMLInputFactory xmlInputFactory = null;
      XMLStreamReader xmlStreamReader = null;
      TransformerFactory transformerFactory = null;
      Transformer transformer = null;
      try
      {
         InputStream inputStream = cswGetRecordsRequest.getURL().openStream();
         xmlInputFactory = XMLInputFactory.newInstance();
         xmlStreamReader = xmlInputFactory.createXMLStreamReader(inputStream);
         transformerFactory = TransformerFactory.newInstance();
         transformerFactory.setAttribute("indent-number", 3);
         transformer = transformerFactory.newTransformer();
         transformer.setOutputProperty(OutputKeys.INDENT, "yes");
         logger.debug("CSW GET Records request " + cswGetRecordsRequest.toString());
         while(xmlStreamReader.hasNext())
         {
            xmlStreamReader.next();
            if(XMLStreamReader.START_ELEMENT == xmlStreamReader.getEventType() && Record.SS_ELEMENT_NAME.equalsIgnoreCase(xmlStreamReader.getLocalName()) && cswGetRecordsRequest.getSearchStatus().isEmpty()) {
            	// Set the <csw:SearchStatus timestamp="2012-05-21T17:53:43Z"/>, ONLY the first time.
            	// There is no need to re-read for each batch of records, that is included in the
            	// same CSW GetRecords Request.
            	cswGetRecordsRequest.setSearchStatus(xmlStreamReader);
            }
            else if(XMLStreamReader.START_ELEMENT == xmlStreamReader.getEventType() && Record.SR_ELEMENT_NAME.equalsIgnoreCase(xmlStreamReader.getLocalName()))
            {
               cswGetRecordsRequest.setSearchResults(xmlStreamReader);
            }
            else if(XMLStreamReader.START_ELEMENT == xmlStreamReader.getEventType() && Record.REC_ELEMENT_NAME.equalsIgnoreCase(xmlStreamReader.getLocalName()))
            {
               ByteArrayOutputStream baosRecord = new ByteArrayOutputStream();
               transformer.transform(new StAXSource(xmlStreamReader), new StreamResult(baosRecord));
               this.put(baosRecord.toString());
               cswGetRecordsRequest.incrementNumOfHarvestedRecordsByOne();
            }
         }
         inputStream.close();
      }
      catch(MalformedURLException e)
      {
         logger.error("The passed url " + cswGetRecordsRequest.getURLStr() + " is malformed.", e);
      }
      catch(IOException e)
      {
         logger.error("IO exception.", e);
      }
      catch(XMLStreamException e)
      {
         logger.error("XML stream exception.", e);
      }
      catch(TransformerConfigurationException e)
      {
         logger.error("Transformer configuration exception.", e);
      }
      catch(TransformerException e)
      {
         logger.error("Transformer exception.", e);
      }
   }

   @Override
   public String toString()
   {
	  String _toString = null;
      Formatter formatter = new Formatter();
      for(String record : this.recordsBlockingQueue)
      {
         formatter.format("%s\n", record);
      }
      logger.debug(formatter.toString());
      _toString = formatter.toString();
      formatter.close();
      return _toString;
   }

   public int getMaxQueueSize()
   {
      return maxQueueSize;
   }

   public void setMaxQueueSize(int maxQueueSize)
   {
      this.maxQueueSize = maxQueueSize;
   }

   public int getNumOfHarvestedRecords()
   {
      return numOfHarvestedRecords;
   }

   public void setNumOfHarvestedRecords(int numOfHarvestedRecords)
   {
      this.numOfHarvestedRecords = numOfHarvestedRecords;
   }

   @Override
   public BlockingQueue<String> getRecordsBlockingQueue()
   {
      return recordsBlockingQueue;
   }

   public void setRecordsBlockingQueue(BlockingQueue<String> records)
   {
      this.recordsBlockingQueue = records;
   }

   @Override
   public void put(String value)
   {
      try
      {
         logger.debug("Before put value: " + value);
         this.recordsBlockingQueue.put(value);
      }
      catch(InterruptedException e)
      {
         logger.error("Interrupted exception.", e);
      }
   }

   @Override
   public String take()
   {
      String value = null;
      try
      {
         while(null == value && this.hasNext())
         {
            logger.debug("Before take value");
            value = this.recordsBlockingQueue.poll(100, TimeUnit.MILLISECONDS);
            logger.debug("poll timed out");
         }
         return value;
      }
      catch(InterruptedException e)
      {
         logger.error("Interrupted exception.", e);
      }
      return null;
   }

   @Override
   public int size()
   {
      return this.recordsBlockingQueue.size();
   }

   @Override
   public boolean hasNext()
   {
      if(this.isHarvestingCompleted)
      {
         return this.recordsBlockingQueue.size() >= 1 ? true : false;
      }
      else
      {
         return true;
      }
   }

   @Override
   public String next()
   {
      return this.take();
   }

   @Override
   public void remove()
   {
      this.take();
   }

   // This implementation is safe as long as a ONLY one iterator call,
   // is active at any time. For the time being, this is not a problem
   // since this records harvester is expected to be used by ONLY one
   // consumer (client code).
   @Override
   public Iterator<String> iterator()
   {
      return this;
   }

   @Override
   public void setCSWGetRecordsRequest(LinkedList<CSWGetRecordsRequest> cswGetRecordsRequests)
   {
      this.cswGetRecordsRequests = cswGetRecordsRequests;
   }

   @Override
   public LinkedList<CSWGetRecordsRequest> getCSWGetRecordsRequest()
   {
      return this.cswGetRecordsRequests;
   }

   @Override
   public boolean isHarvestingCompleted()
   {
      return this.isHarvestingCompleted;
   }

   @Override
   public void setHarvestingCompleted(boolean isHarvestingCompleted)
   {
      this.isHarvestingCompleted = isHarvestingCompleted;
   }
}
