org.gcube.execution.textExtraction.job.utils
Class TextExtractionUtils

java.lang.Object
  extended by org.gcube.execution.textExtraction.job.utils.TextExtractionUtils

public class TextExtractionUtils
extends java.lang.Object


Constructor Summary
TextExtractionUtils()
           
 
Method Summary
static void deleteFileFromFTP(java.lang.String fileName, java.lang.String ftpHost, int ftpPort, java.lang.String ftpUser, java.lang.String ftpPassword, java.lang.String metadataFolderName)
           
static java.util.ArrayList<DocumentInfos> getListOfFailuresFromReport(java.lang.String rsLocator, java.util.ArrayList<DocumentInfos> allDocuments, java.lang.String ftpHost, java.lang.String repositoryId, java.lang.String randomUUIDString, java.lang.String query)
           
static java.util.ArrayList<java.lang.String> performOCRtoPDF_HTTPInput(java.util.ArrayList<org.gcube.application.framework.contentmanagement.util.DocumentInfos> failedDocuments, java.lang.String suggestedOCREpr, java.lang.String scope, java.lang.String ftpHost, java.lang.String ftpUser, java.lang.String ftpPassword, java.lang.String ftpPort, java.lang.String randomUUIDString2, java.lang.String repositoryId)
          Transforms a list of PDF documents to text, using OCR Service.
static void storeFileInFTP(java.lang.String randomUUIDString, java.lang.String ftpHost, int ftpPort, java.lang.String ftpUser, java.lang.String ftpPassword, java.lang.String metadataFolderName)
           
static java.lang.String transformPDFDocumentsToText(java.lang.String listLocation, java.lang.String ftpHost, java.lang.String ftpUser, java.lang.String ftpPort, java.lang.String ftpPassword, java.lang.String ftpDirectory, java.lang.String scope, java.lang.String suggestedDtsEpr)
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

TextExtractionUtils

public TextExtractionUtils()
Method Detail

transformPDFDocumentsToText

public static java.lang.String transformPDFDocumentsToText(java.lang.String listLocation,
                                                           java.lang.String ftpHost,
                                                           java.lang.String ftpUser,
                                                           java.lang.String ftpPort,
                                                           java.lang.String ftpPassword,
                                                           java.lang.String ftpDirectory,
                                                           java.lang.String scope,
                                                           java.lang.String suggestedDtsEpr)
                                                    throws java.lang.Exception
Throws:
java.lang.Exception

getListOfFailuresFromReport

public static java.util.ArrayList<DocumentInfos> getListOfFailuresFromReport(java.lang.String rsLocator,
                                                                             java.util.ArrayList<DocumentInfos> allDocuments,
                                                                             java.lang.String ftpHost,
                                                                             java.lang.String repositoryId,
                                                                             java.lang.String randomUUIDString,
                                                                             java.lang.String query)
                                                                      throws ReadingRSException
Throws:
ReadingRSException

performOCRtoPDF_HTTPInput

public static java.util.ArrayList<java.lang.String> performOCRtoPDF_HTTPInput(java.util.ArrayList<org.gcube.application.framework.contentmanagement.util.DocumentInfos> failedDocuments,
                                                                              java.lang.String suggestedOCREpr,
                                                                              java.lang.String scope,
                                                                              java.lang.String ftpHost,
                                                                              java.lang.String ftpUser,
                                                                              java.lang.String ftpPassword,
                                                                              java.lang.String ftpPort,
                                                                              java.lang.String randomUUIDString2,
                                                                              java.lang.String repositoryId)
                                                                       throws ServiceEPRRetrievalException
Transforms a list of PDF documents to text, using OCR Service. It returns a list of the CM URIs of the output documents. It also copies the generated output to the collection given as a parameter.

Parameters:
failedDocuments - - the list of documents to be transformed
outpuCollectionId - - the collection to which the output will be inserted
scope -
Returns:
- list of CM URIs of transformed documents
Throws:
ServiceEPRRetrievalException
OCRException

storeFileInFTP

public static void storeFileInFTP(java.lang.String randomUUIDString,
                                  java.lang.String ftpHost,
                                  int ftpPort,
                                  java.lang.String ftpUser,
                                  java.lang.String ftpPassword,
                                  java.lang.String metadataFolderName)

deleteFileFromFTP

public static void deleteFileFromFTP(java.lang.String fileName,
                                     java.lang.String ftpHost,
                                     int ftpPort,
                                     java.lang.String ftpUser,
                                     java.lang.String ftpPassword,
                                     java.lang.String metadataFolderName)