org.gcube.execution.textExtraction.job.utils
Class TextExtractionUtils
java.lang.Object
org.gcube.execution.textExtraction.job.utils.TextExtractionUtils
public class TextExtractionUtils
- extends java.lang.Object
Method Summary |
static void |
deleteFileFromFTP(java.lang.String fileName,
java.lang.String ftpHost,
int ftpPort,
java.lang.String ftpUser,
java.lang.String ftpPassword,
java.lang.String metadataFolderName)
|
static java.util.ArrayList<DocumentInfos> |
getListOfFailuresFromReport(java.lang.String rsLocator,
java.util.ArrayList<DocumentInfos> allDocuments,
java.lang.String ftpHost,
java.lang.String repositoryId,
java.lang.String randomUUIDString,
java.lang.String query)
|
static java.util.ArrayList<java.lang.String> |
performOCRtoPDF_HTTPInput(java.util.ArrayList<org.gcube.application.framework.contentmanagement.util.DocumentInfos> failedDocuments,
java.lang.String suggestedOCREpr,
java.lang.String scope,
java.lang.String ftpHost,
java.lang.String ftpUser,
java.lang.String ftpPassword,
java.lang.String ftpPort,
java.lang.String randomUUIDString2,
java.lang.String repositoryId)
Transforms a list of PDF documents to text, using OCR Service. |
static void |
storeFileInFTP(java.lang.String randomUUIDString,
java.lang.String ftpHost,
int ftpPort,
java.lang.String ftpUser,
java.lang.String ftpPassword,
java.lang.String metadataFolderName)
|
static java.lang.String |
transformPDFDocumentsToText(java.lang.String listLocation,
java.lang.String ftpHost,
java.lang.String ftpUser,
java.lang.String ftpPort,
java.lang.String ftpPassword,
java.lang.String ftpDirectory,
java.lang.String scope,
java.lang.String suggestedDtsEpr)
|
Methods inherited from class java.lang.Object |
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
TextExtractionUtils
public TextExtractionUtils()
transformPDFDocumentsToText
public static java.lang.String transformPDFDocumentsToText(java.lang.String listLocation,
java.lang.String ftpHost,
java.lang.String ftpUser,
java.lang.String ftpPort,
java.lang.String ftpPassword,
java.lang.String ftpDirectory,
java.lang.String scope,
java.lang.String suggestedDtsEpr)
throws java.lang.Exception
- Throws:
java.lang.Exception
getListOfFailuresFromReport
public static java.util.ArrayList<DocumentInfos> getListOfFailuresFromReport(java.lang.String rsLocator,
java.util.ArrayList<DocumentInfos> allDocuments,
java.lang.String ftpHost,
java.lang.String repositoryId,
java.lang.String randomUUIDString,
java.lang.String query)
throws ReadingRSException
- Throws:
ReadingRSException
performOCRtoPDF_HTTPInput
public static java.util.ArrayList<java.lang.String> performOCRtoPDF_HTTPInput(java.util.ArrayList<org.gcube.application.framework.contentmanagement.util.DocumentInfos> failedDocuments,
java.lang.String suggestedOCREpr,
java.lang.String scope,
java.lang.String ftpHost,
java.lang.String ftpUser,
java.lang.String ftpPassword,
java.lang.String ftpPort,
java.lang.String randomUUIDString2,
java.lang.String repositoryId)
throws ServiceEPRRetrievalException
- Transforms a list of PDF documents to text, using OCR Service. It returns a list of the CM URIs of the output documents.
It also copies the generated output to the collection given as a parameter.
- Parameters:
failedDocuments
- - the list of documents to be transformedoutpuCollectionId
- - the collection to which the output will be insertedscope
-
- Returns:
- - list of CM URIs of transformed documents
- Throws:
ServiceEPRRetrievalException
OCRException
storeFileInFTP
public static void storeFileInFTP(java.lang.String randomUUIDString,
java.lang.String ftpHost,
int ftpPort,
java.lang.String ftpUser,
java.lang.String ftpPassword,
java.lang.String metadataFolderName)
deleteFileFromFTP
public static void deleteFileFromFTP(java.lang.String fileName,
java.lang.String ftpHost,
int ftpPort,
java.lang.String ftpUser,
java.lang.String ftpPassword,
java.lang.String metadataFolderName)