package org.gcube.textextractor.extractors;

import au.com.bytecode.opencsv.CSVReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.codehaus.jackson.util.MinimalPrettyPrinter;
import org.gcube.semantic.annotator.AnnotationBase;
import org.gcube.semantic.annotator.utils.ANNOTATIONS;
import org.gcube.textextractor.entities.ExtractedEntity;
import org.gcube.textextractor.entities.ShortenCE4NameResponse;
import org.gcube.textextractor.helpers.ExtractorHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/smartfish-doc-processor-2.1.1-3.4.0.jar:org/gcube/textextractor/extractors/CSVExtractor.class */
public class CSVExtractor extends InformationExtractor {
    private static final Logger logger = LoggerFactory.getLogger(CSVExtractor.class);

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public Map<String, String> extractFieldsFromFile(String str) throws Exception {
        long currentTimeMillis = System.currentTimeMillis();
        try {
            try {
                CSVReader cSVReader = new CSVReader(new FileReader(str));
                Throwable th = null;
                try {
                    try {
                        cSVReader.readNext();
                        HashMap hashMap = new HashMap();
                        hashMap.put("provenance", new HashSet());
                        hashMap.put("country", new HashSet());
                        hashMap.put("title", new HashSet());
                        hashMap.put("species_english_name", new HashSet());
                        hashMap.put("gear_used", new HashSet());
                        hashMap.put("type_of_vessel", new HashSet());
                        while (true) {
                            String[] readNext = cSVReader.readNext();
                            if (readNext == null) {
                                break;
                            }
                            gatherInfo(hashMap, "provenance", readNext[0]);
                            gatherInfo(hashMap, "country", readNext[1]);
                            gatherInfo(hashMap, "title", readNext[2]);
                            gatherInfo(hashMap, "gear_used", readNext[4]);
                            gatherInfo(hashMap, "type_of_vessel", readNext[5]);
                            hashMap.get("species_english_name").addAll(Arrays.asList(readNext[3].split("\\s*;\\s*")));
                        }
                        HashMap hashMap2 = new HashMap();
                        hashMap2.put("provenance", ExtractorHelper.covertToString(hashMap.get("provenance")));
                        hashMap2.put(ANNOTATIONS.getLocalName(ANNOTATIONS.COUNTRY), ExtractorHelper.covertToString(hashMap.get("country")));
                        hashMap2.put("title", ExtractorHelper.covertToString(hashMap.get("title")));
                        hashMap2.put("species_english_name", ExtractorHelper.covertToString(hashMap.get("species_english_name")));
                        hashMap2.put(ANNOTATIONS.getLocalName(ANNOTATIONS.GEAR), ExtractorHelper.covertToString(hashMap.get("gear_used")));
                        hashMap2.put(ANNOTATIONS.getLocalName(ANNOTATIONS.VESSEL), ExtractorHelper.covertToString(hashMap.get("type_of_vessel")));
                        if (cSVReader != null) {
                            if (0 != 0) {
                                try {
                                    cSVReader.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                cSVReader.close();
                            }
                        }
                        logger.info("time processing file : " + str + " : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " secs");
                        return hashMap2;
                    } finally {
                    }
                } catch (Throwable th3) {
                    if (cSVReader != null) {
                        if (th != null) {
                            try {
                                cSVReader.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            cSVReader.close();
                        }
                    }
                    throw th3;
                }
            } catch (Throwable th5) {
                logger.info("time processing file : " + str + " : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " secs");
                throw th5;
            }
        } catch (Exception e) {
            logger.error("error while extracting fields from  : " + str, (Throwable) e);
            throw e;
        }
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public List<Map<String, String>> extractInfo(String str) throws FileNotFoundException {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (String str2 : ExtractorHelper.getFilenames(str)) {
            i++;
            logger.info("Processing file : " + i + MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR + str2);
            try {
                Map<String, String> extractFieldsFromFile = extractFieldsFromFile(str2);
                long currentTimeMillis = System.currentTimeMillis();
                Map<String, String> enrichRecord = enrichRecord(extractFieldsFromFile, str2);
                logger.info("~> field enrichment time  : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " secs");
                arrayList.add(enrichRecord);
            } catch (Exception e) {
                logger.error("error while extracting info from : " + str2 + " . will skip this file", (Throwable) e);
            }
        }
        return arrayList;
    }

    private void gatherInfo(Map<String, Set<String>> map, String str, String str2) {
        if (str2 == null || str2.trim().length() == 0) {
            return;
        }
        map.get(str).add(str2.trim());
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public Map<String, String> enrichRecord(Map<String, String> map, String str) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        String[] split = str.split("/");
        String str2 = "http://smartfish.collection/statbase/" + split[split.length - 1].split("\\.")[0].toLowerCase();
        hashMap.put("documentID", str2);
        hashMap.putAll(map);
        ExtractorHelper.enrichSimpleField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.COUNTRY), new ExtractorHelper.QueryWrapperSimple() { // from class: org.gcube.textextractor.extractors.CSVExtractor.1
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperSimple
            public String doCall(ExtractedEntity extractedEntity) throws Exception {
                return ExtractorHelper.queryCountry(extractedEntity);
            }
        });
        try {
            if (map.get("species_english_name").trim().length() > 0) {
                String querySpecies = ExtractorHelper.querySpecies(ExtractorHelper.covertToStringList(map.get("species_english_name")));
                hashMap2.put("species_uris", ShortenCE4NameResponse.getURIFromJSON(querySpecies));
                hashMap.put("species_uris", querySpecies);
            }
        } catch (Exception e) {
            logger.warn("Error processing species : " + map.get("species_english_name"), (Throwable) e);
        }
        try {
            annotate(str2, hashMap2);
        } catch (FileNotFoundException e2) {
            logger.error("file : " + str + " not found", (Throwable) e2);
        }
        return hashMap;
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public String convertInfoToRowset(Map<String, String> map) {
        String str = map.get("documentID");
        map.remove("documentID");
        return ExtractorHelper.createRowseFromFields(str, InformationExtractor.collectionID, InformationExtractor.idxType, map.get("language"), map);
    }

    private void annotate(String str, Map<String, List<String>> map) throws FileNotFoundException {
        AnnotationBase annotationBase = AnnotationBase.getInstance();
        for (Map.Entry<String, List<String>> entry : map.entrySet()) {
            if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.COUNTRY) + "_uris")) {
                Iterator<String> it = entry.getValue().iterator();
                while (it.hasNext()) {
                    annotationBase.STATBASE_country(str, it.next());
                }
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.SPECIES) + "_uris")) {
                Iterator<String> it2 = entry.getValue().iterator();
                while (it2.hasNext()) {
                    annotationBase.STATBASE_species(str, it2.next());
                }
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.GEAR) + "_uris")) {
                Iterator<String> it3 = entry.getValue().iterator();
                while (it3.hasNext()) {
                    annotationBase.STATBASE_gear(str, it3.next());
                }
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.VESSEL) + "_uris")) {
                Iterator<String> it4 = entry.getValue().iterator();
                while (it4.hasNext()) {
                    annotationBase.STATBASE_vessel(str, it4.next());
                }
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.WATER_AREA) + "_uris")) {
                Iterator<String> it5 = entry.getValue().iterator();
                while (it5.hasNext()) {
                    annotationBase.STATBASE_water_area(str, it5.next());
                }
            }
            if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.LAND_AREA) + "_uris")) {
                Iterator<String> it6 = entry.getValue().iterator();
                while (it6.hasNext()) {
                    annotationBase.STATBASE_land_area(str, it6.next());
                }
            }
        }
    }
}
