package org.gcube.textextractor.extractors;

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.ehcache.config.TimeoutBehaviorConfiguration;
import org.apache.commons.io.FilenameUtils;
import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.executable.MachineMetadata;
import org.apache.tika.parser.html.HtmlParser;
import org.apache.tika.sax.BodyContentHandler;
import org.codehaus.jackson.util.MinimalPrettyPrinter;
import org.gcube.semantic.annotator.AnnotationBase;
import org.gcube.semantic.annotator.utils.ANNOTATIONS;
import org.gcube.textextractor.entities.ExtractedEntity;
import org.gcube.textextractor.entities.ShortenCE4NameResponse;
import org.gcube.textextractor.helpers.ExtractorHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/smartfish-doc-processor-2.1.1-3.10.0.jar:org/gcube/textextractor/extractors/HTMLExtractor.class */
public class HTMLExtractor extends InformationExtractor {
    private static final Logger logger = LoggerFactory.getLogger(HTMLExtractor.class);

    public static void main(String[] strArr) throws Exception {
        System.out.println(new HTMLExtractor().extractFieldsFromFile("/home/alex/Downloads/271.html"));
    }

    private static void makeMeaureField(Map<String, String> map) {
        String str = "";
        String str2 = "";
        String str3 = map.get(ANNOTATIONS.getLocalName(ANNOTATIONS.FISHING_CONTROL));
        String str4 = map.get(ANNOTATIONS.getLocalName(ANNOTATIONS.ACCESS_CONTROL));
        String str5 = map.get(ANNOTATIONS.getLocalName(ANNOTATIONS.ENFORCEMENT_METHOD));
        String str6 = map.get(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#fishing_control_fr"));
        String str7 = map.get(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#access_control_fr"));
        String str8 = map.get(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#enforcement_method_fr"));
        try {
            str3.isEmpty();
            str = str + str3 + TimeoutBehaviorConfiguration.DEFAULT_PROPERTY_SEPARATOR;
        } catch (Exception e) {
        }
        try {
            str4.isEmpty();
            str = str + str4 + TimeoutBehaviorConfiguration.DEFAULT_PROPERTY_SEPARATOR;
        } catch (Exception e2) {
        }
        try {
            str5.isEmpty();
            str = str + str5;
        } catch (Exception e3) {
        }
        try {
            str6.isEmpty();
            str2 = str2 + str6 + TimeoutBehaviorConfiguration.DEFAULT_PROPERTY_SEPARATOR;
        } catch (Exception e4) {
        }
        try {
            str7.isEmpty();
            str2 = str2 + str7 + TimeoutBehaviorConfiguration.DEFAULT_PROPERTY_SEPARATOR;
        } catch (Exception e5) {
        }
        try {
            str8.isEmpty();
            str2 = str2 + str8;
        } catch (Exception e6) {
        }
        if (!str.isEmpty()) {
            map.put(ANNOTATIONS.getLocalName(ANNOTATIONS.MEASURE), str);
        }
        if (str2.isEmpty()) {
            return;
        }
        map.put(ANNOTATIONS.getLocalName("http://www.fao.org/figis/onto/smartfish/annotation.owl#measure_fr"), str2);
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public Map<String, String> extractFieldsFromFile(String str) throws Exception {
        logger.info("Processing file : " + str);
        long currentTimeMillis = System.currentTimeMillis();
        try {
            try {
                FileInputStream fileInputStream = new FileInputStream(str);
                BodyContentHandler bodyContentHandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                metadata.set("Content-Type", "text/html; charset=utf-8");
                new HtmlParser().parse(fileInputStream, bodyContentHandler, metadata, new ParseContext());
                String removeEmptyLines = ExtractorHelper.removeEmptyLines(bodyContentHandler.toString());
                HashMap hashMap = new HashMap();
                hashMap.put("documentID", str);
                hashMap.put("text", removeEmptyLines);
                hashMap.put("title", metadata.get("title"));
                hashMap.put("language", new LanguageIdentifier(removeEmptyLines).getLanguage());
                hashMap.put("provenance", "WIOFish");
                long currentTimeMillis2 = System.currentTimeMillis();
                Map<String, String> customFields = customFields(str);
                long currentTimeMillis3 = System.currentTimeMillis();
                hashMap.putAll(customFields);
                logger.info("~> field extraction time  : " + ((currentTimeMillis3 - currentTimeMillis2) / 1000.0d) + " secs");
                logger.info("time processing file : " + str + " : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " secs");
                return hashMap;
            } catch (Exception e) {
                logger.error("error while extracting fields from  : " + str, (Throwable) e);
                throw e;
            }
        } catch (Throwable th) {
            logger.info("time processing file : " + str + " : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " secs");
            throw th;
        }
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public List<Map<String, String>> extractInfo(String str) throws FileNotFoundException {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (String str2 : ExtractorHelper.getFilenames(str)) {
            i++;
            logger.info("Processing file : " + i + MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR + str2);
            try {
                Map<String, String> extractFieldsFromFile = extractFieldsFromFile(str2);
                long currentTimeMillis = System.currentTimeMillis();
                Map<String, String> enrichRecord = enrichRecord(extractFieldsFromFile, str2);
                logger.info("~> field enrichment time  : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " secs");
                arrayList.add(enrichRecord);
            } catch (Exception e) {
                logger.error("error while extracting info from : " + str2 + " . will skip this file", (Throwable) e);
            }
        }
        return arrayList;
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public Map<String, String> enrichRecord(Map<String, String> map, String str) {
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        String name = FilenameUtils.getName(str);
        String str2 = "http://smartfish.collection/wiofish/" + name.substring(name.lastIndexOf("=") + 1).toLowerCase().toLowerCase();
        hashMap.putAll(map);
        hashMap.put("documentID", str2);
        ExtractorHelper.enrichSimpleField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.COUNTRY), new ExtractorHelper.QueryWrapperSimple() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.1
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperSimple
            public String doCall(ExtractedEntity extractedEntity) throws Exception {
                return ExtractorHelper.queryCountry(extractedEntity);
            }
        });
        ExtractorHelper.enrichSimpleField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.GEAR), new ExtractorHelper.QueryWrapperSimple() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.2
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperSimple
            public String doCall(ExtractedEntity extractedEntity) throws Exception {
                return ExtractorHelper.queryGear(extractedEntity);
            }
        });
        ExtractorHelper.enrichSimpleField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.VESSEL), new ExtractorHelper.QueryWrapperSimple() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.3
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperSimple
            public String doCall(ExtractedEntity extractedEntity) throws Exception {
                return ExtractorHelper.queryVessel(extractedEntity);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.MANAGEMENT), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.4
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryManagement(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.STATUS), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.5
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryExploitationStatus(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.ACCESS_CONTROL), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.6
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryAccessControl(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.FISHING_CONTROL), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.7
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryFishingControl(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.ENFORCEMENT_METHOD), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.8
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryEnforcementMethod(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.MEASURE), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.9
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryConservationMeasure(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.SECTOR), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.10
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.querySector(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.TECHNOLOGY_IN_USE), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.11
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryTechnologyInUse(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.OWNER_OF_ACCESS_RIGHT), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.12
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryOwnershipOfAccessRight(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.OTHER_INCOME_SOURCE), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.13
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryIncome(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.MARKET), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.14
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryMarkets(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.SEASONALITY), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.15
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.querySeasonality(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.POST_PROCESSING_METHOD), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.16
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryPostHarvestingProcess(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.DECISION_MAKER), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.17
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryRapresentativeForDecisionMaking(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.MANAGEMENT_INDICATOR), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.18
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryManagementIndicators(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.FINANCE_MGT_AUTHORITY), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.19
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryFinancingManagement(list);
            }
        });
        ExtractorHelper.enrichListField(map, hashMap, hashMap2, ANNOTATIONS.getLocalName(ANNOTATIONS.APPLICANT_FOR_ACCESS_RIGHT), new ExtractorHelper.QueryWrapperList() { // from class: org.gcube.textextractor.extractors.HTMLExtractor.20
            @Override // org.gcube.textextractor.helpers.ExtractorHelper.QueryWrapperList
            public String doCall(List<ExtractedEntity> list) throws Exception {
                return ExtractorHelper.queryAccessRightApplicant(list);
            }
        });
        try {
            try {
                String fileContent = ExtractorHelper.fileContent(str);
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                ArrayList arrayList3 = new ArrayList();
                ArrayList arrayList4 = new ArrayList();
                long currentTimeMillis = System.currentTimeMillis();
                if (map.get("species_english_name") != null && map.get("species_english_name").trim().length() > 0) {
                    Matcher matcher = Pattern.compile("<tr id='species_row_.*?'>.*?EnglishNameblock.*?>(.*?)</td>(.*?)</tr>").matcher(fileContent);
                    List<ExtractedEntity> covertToStringList = ExtractorHelper.covertToStringList(map.get("species_english_name"));
                    while (matcher.find()) {
                        String trim = matcher.group(1).trim();
                        String trim2 = matcher.group(2).trim();
                        if (trim.length() != 0) {
                            Matcher matcher2 = Pattern.compile("<td align='center'>.*?</td><td align='center'>.*?</td><td align='center'>(.*?)<br></td>.*?<img src=images/(.*?).gif></td><td width=33% align='center'><img src=images/(.*?).gif></td><td width=33% align='center'><img src=images/(.*?).gif></td>").matcher(trim2);
                            if (matcher2.find()) {
                                for (String str3 : trim.split("\\s*,\\s*")) {
                                    String trim3 = str3.trim();
                                    if (!containsSpecies(covertToStringList, trim3)) {
                                        System.out.println("Error : " + trim3 + " not in allSpecies : " + trim);
                                        throw new Exception("Error : " + trim3 + " not in allSpecies : " + trim);
                                    }
                                    if (!matcher2.group(1).trim().equalsIgnoreCase("Not Applicable")) {
                                        arrayList4.add(new ExtractedEntity(trim3, ""));
                                    }
                                    if (matcher2.group(2).trim().equalsIgnoreCase("tick_blue")) {
                                        arrayList.add(new ExtractedEntity(trim3, ""));
                                    }
                                    if (matcher2.group(3).trim().equalsIgnoreCase("tick_blue")) {
                                        arrayList2.add(new ExtractedEntity(trim3, ""));
                                    }
                                    if (matcher2.group(4).trim().equalsIgnoreCase("tick_blue")) {
                                        arrayList3.add(new ExtractedEntity(trim3, ""));
                                    }
                                }
                            }
                        }
                    }
                    hashMap.put(ANNOTATIONS.getLocalName(ANNOTATIONS.BYCATCH), Joiner.on(", ").join(arrayList2));
                    hashMap.put(ANNOTATIONS.getLocalName(ANNOTATIONS.TARGET), Joiner.on(", ").join(arrayList));
                    hashMap.put(ANNOTATIONS.getLocalName(ANNOTATIONS.THRETENED), Joiner.on(", ").join(arrayList4));
                    hashMap.put(ANNOTATIONS.getLocalName(ANNOTATIONS.DISCARD), Joiner.on(", ").join(arrayList3));
                    logger.info("extracting threatened,target,by-catch,discard dur : " + ((System.currentTimeMillis() - currentTimeMillis) / 1000.0d) + " sec");
                    long currentTimeMillis2 = System.currentTimeMillis();
                    String querySpecies = ExtractorHelper.querySpecies(ExtractorHelper.covertToStringList(map.get("species_english_name")));
                    logger.info("query species dur : " + ((System.currentTimeMillis() - currentTimeMillis2) / 1000.0d) + " sec");
                    long currentTimeMillis3 = System.currentTimeMillis();
                    if (arrayList4.size() > 0) {
                        hashMap2.put(ANNOTATIONS.getLocalName(ANNOTATIONS.THRETENED) + "_uris", ShortenCE4NameResponse.getURIFromJSON(ExtractorHelper.querySpecies(arrayList4)));
                    }
                    logger.info("query threatened species dur : " + ((System.currentTimeMillis() - currentTimeMillis3) / 1000.0d) + " sec");
                    long currentTimeMillis4 = System.currentTimeMillis();
                    if (arrayList.size() > 0) {
                        hashMap2.put(ANNOTATIONS.getLocalName(ANNOTATIONS.TARGET) + "_uris", ShortenCE4NameResponse.getURIFromJSON(ExtractorHelper.querySpecies(arrayList)));
                    }
                    logger.info("query target species dur : " + ((System.currentTimeMillis() - currentTimeMillis4) / 1000.0d) + " sec");
                    long currentTimeMillis5 = System.currentTimeMillis();
                    if (arrayList2.size() > 0) {
                        hashMap2.put(ANNOTATIONS.getLocalName(ANNOTATIONS.BYCATCH) + "_uris", ShortenCE4NameResponse.getURIFromJSON(ExtractorHelper.querySpecies(arrayList2)));
                    }
                    logger.info("query by catch species dur : " + ((System.currentTimeMillis() - currentTimeMillis5) / 1000.0d) + " sec");
                    long currentTimeMillis6 = System.currentTimeMillis();
                    if (arrayList3.size() > 0) {
                        hashMap2.put(ANNOTATIONS.getLocalName(ANNOTATIONS.DISCARD) + "_uris", ShortenCE4NameResponse.getURIFromJSON(ExtractorHelper.querySpecies(arrayList3)));
                    }
                    logger.info("query discard species dur : " + ((System.currentTimeMillis() - currentTimeMillis6) / 1000.0d) + " sec");
                    hashMap.put(ANNOTATIONS.getLocalName(ANNOTATIONS.SPECIES) + "_uris", querySpecies);
                }
            } catch (Exception e) {
                logger.error("Error processing species : " + map.get("species_english_name"), (Throwable) e);
            }
            try {
                long currentTimeMillis7 = System.currentTimeMillis();
                annotate(str2, hashMap2);
                logger.info("annotate dur : " + ((System.currentTimeMillis() - currentTimeMillis7) / 1000.0d) + " sec");
            } catch (FileNotFoundException e2) {
                logger.error("file : " + str + " not found", (Throwable) e2);
            }
            return hashMap;
        } catch (IOException e3) {
            logger.error("error while getting html contents");
            return null;
        }
    }

    static Map<String, String> customFields(String str) {
        HashMap hashMap = new HashMap();
        try {
            String fileContent = ExtractorHelper.fileContent(str);
            parseSimpleRow(ANNOTATIONS.getLocalName(ANNOTATIONS.COUNTRY), "<b>Reporting Area: </b>(.*?)</font>", fileContent, hashMap);
            parseSimpleRow("title", "<b>Fishery: </b>(.*?)</font>", fileContent, hashMap);
            parseSimpleRow("fishery_local_name", "<tr><td><b>Local name for this Fishery:</b></td><td>(.*?)</td></tr><tr>", fileContent, hashMap);
            parseSimpleRow(ANNOTATIONS.getLocalName(ANNOTATIONS.VESSEL), "<tr><td><b>Type of vessel</b></td><td>(.*?)</td></tr>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.APPLICANT_FOR_ACCESS_RIGHT), "<b>Who can apply for access rights</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.OWNER_OF_ACCESS_RIGHT), "<b>Ownership of access right</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.OTHER_INCOME_SOURCE), "<b>Alternative Incomes</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.MARKET), "<b>Markets</b></td></tr><tr><td>(.*?)</td></tr>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.POST_PROCESSING_METHOD), "<b>Post-harvest processing</b></td></tr><tr><td>(.*?)</td></tr>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.DECISION_MAKER), "<b>Representatives in decision making</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.GEAR), "<tr><td><b>Select gear used in this fishery</b></td><td>(.*?)<br></td></tr>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.STATUS), "<b>FAO Status:</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.MANAGEMENT_INDICATOR), "<td><b>Management Indicators</b>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.FINANCE_MGT_AUTHORITY), "<b>Financing management</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.ACCESS_CONTROL), "<b>Access controls used in management</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.FISHING_CONTROL), "<b>Fishing controls used in management</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTable(ANNOTATIONS.getLocalName(ANNOTATIONS.ENFORCEMENT_METHOD), "<b>Enforcement methods used</b></td><td>(.*?)</td>", fileContent, hashMap);
            parseTickTableMultipleLangs(ANNOTATIONS.getLocalName(ANNOTATIONS.SECTOR), "<b>Sector</b></td></tr><tr><td valign='top' colspan=2><table bgcolor='#eae8e8' class='table' cellspacing=1 width=100%><tr bgcolor='#ffffff'><td><table width=100% class='table' cellpadding=5><tr><td width=50% valign='top'>(.*?)</td><td valign='top' width=50%>", "<img src=images/tick_blue.gif>(.*?)<br>", fileContent, hashMap);
            parseTickTableMultipleLangs(ANNOTATIONS.getLocalName(ANNOTATIONS.TECHNOLOGY_IN_USE), "<a name='technology'>Technology Used</a>.*?</table>(.*?)</table>", "<img src=images/tick_blue.gif>(.*?)<br>", fileContent, hashMap);
            parseTickTableMultipleLangs(ANNOTATIONS.getLocalName(ANNOTATIONS.MANAGEMENT), "<b>Management Type</b>(.*?)</tr>", "<img src=images/tick_blue.gif>(.*?)<br>", fileContent, hashMap);
            makeMeaureField(hashMap);
            Matcher matcher = Pattern.compile("<tr id='dmtbl_row_0'><td>(.*?)</td><td>(.*?)<br></td>").matcher(fileContent);
            if (matcher.find()) {
                String trim = matcher.group(1).trim();
                String trim2 = matcher.group(2).trim();
                String trim3 = trim2.split("/")[0].trim();
                trim2.split("/")[1].trim();
                if (trim3 != null && trim3.trim().length() > 0) {
                    hashMap.put(ANNOTATIONS.getLocalName(ANNOTATIONS.AUTHORITY), Joiner.on(", ").join(trim, trim3, new Object[0]));
                }
            }
            Matcher matcher2 = Pattern.compile("<tr><td><b>Jan</b></td><td><b>Feb</b></td><td><b>Mar</b></td><td><b>Apr</b></td><td><b>May</b></td><td><b>Jun</b></td><td><b>Jul</b></td><td><b>Aug</b></td><td><b>Sep</b></td><td><b>Oct</b></td><td><b>Nov</b></td><td><b>Dec</b></td></tr><tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td></tr><tr>").matcher(fileContent);
            if (matcher2.find()) {
                String[] strArr = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"};
                ArrayList newArrayList = Lists.newArrayList();
                for (int i = 1; i <= matcher2.groupCount(); i++) {
                    if (matcher2.group(1).trim().equalsIgnoreCase("<img src=images/tick_blue.gif>")) {
                        newArrayList.add(strArr[i - 1]);
                    }
                }
                if (newArrayList.size() > 0) {
                    hashMap.put(ANNOTATIONS.getLocalName(ANNOTATIONS.SEASONALITY), Joiner.on(", ").join(newArrayList));
                }
            }
            Matcher matcher3 = Pattern.compile("<tr id='species_row_.*?'><td.*?>(.*?)</td>.*?EnglishNameblock.*?>(.*?)</td>").matcher(fileContent);
            ArrayList newArrayList2 = Lists.newArrayList();
            ArrayList newArrayList3 = Lists.newArrayList();
            while (matcher3.find()) {
                if (matcher3.group(1).trim().length() > 0) {
                    newArrayList2.add(matcher3.group(1).trim());
                }
                if (matcher3.group(2).trim().length() > 0) {
                    newArrayList3.add(matcher3.group(2).trim());
                }
            }
            if (newArrayList2.size() > 0) {
                hashMap.put("species_scientific_name", Joiner.on(", ").join(newArrayList2));
            }
            if (newArrayList3.size() > 0) {
                hashMap.put("species_english_name", Joiner.on(", ").join(newArrayList3));
            }
            return hashMap;
        } catch (IOException e) {
            logger.error("error while parsing the fields from : " + str, (Throwable) e);
            return null;
        }
    }

    @Override // org.gcube.textextractor.extractors.InformationExtractor
    public String convertInfoToRowset(Map<String, String> map) {
        return ExtractorHelper.createRowseFromFields(map.get("documentID"), InformationExtractor.collectionID, InformationExtractor.idxType, map.get("language"), map);
    }

    private void annotate(String str, Map<String, List<String>> map) throws FileNotFoundException {
        AnnotationBase annotationBase = AnnotationBase.getInstance();
        Set<Map.Entry<String, List<String>>> entrySet = map.entrySet();
        for (Map.Entry<String, List<String>> entry : entrySet) {
            if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.COUNTRY) + "_uris")) {
                Iterator<String> it = entry.getValue().iterator();
                while (it.hasNext()) {
                    annotationBase.WIOFISH_country(str, it.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.VESSEL) + "_uris")) {
                Iterator<String> it2 = entry.getValue().iterator();
                while (it2.hasNext()) {
                    annotationBase.WIOFISH_vessel(str, it2.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.GEAR) + "_uris")) {
                Iterator<String> it3 = entry.getValue().iterator();
                while (it3.hasNext()) {
                    annotationBase.WIOFISH_gear(str, it3.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.TARGET) + "_uris")) {
                Iterator<String> it4 = entry.getValue().iterator();
                while (it4.hasNext()) {
                    annotationBase.WIOFISH_targetSpecies(str, it4.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.BYCATCH) + "_uris")) {
                Iterator<String> it5 = entry.getValue().iterator();
                while (it5.hasNext()) {
                    annotationBase.WIOFISH_bycatchSpecies(str, it5.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.DISCARD) + "_uris")) {
                Iterator<String> it6 = entry.getValue().iterator();
                while (it6.hasNext()) {
                    annotationBase.WIOFISH_discardSpecies(str, it6.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.THRETENED) + "_uris")) {
                Iterator<String> it7 = entry.getValue().iterator();
                while (it7.hasNext()) {
                    annotationBase.WIOFISH_thretenedSpecies(str, it7.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.MANAGEMENT) + "_uris")) {
                Iterator<String> it8 = entry.getValue().iterator();
                while (it8.hasNext()) {
                    annotationBase.WIOFISH_management(str, it8.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.SECTOR) + "_uris")) {
                Iterator<String> it9 = entry.getValue().iterator();
                while (it9.hasNext()) {
                    annotationBase.WIOFISH_sector(str, it9.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.SEASONALITY) + "_uris")) {
                Iterator<String> it10 = entry.getValue().iterator();
                while (it10.hasNext()) {
                    annotationBase.WIOFISH_seasonality(str, it10.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.STATUS) + "_uris")) {
                Iterator<String> it11 = entry.getValue().iterator();
                while (it11.hasNext()) {
                    annotationBase.WIOFISH_status(str, it11.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.ACCESS_CONTROL) + "_uris")) {
                Iterator<String> it12 = entry.getValue().iterator();
                while (it12.hasNext()) {
                    annotationBase.WIOFISH_access_control(str, it12.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.FISHING_CONTROL) + "_uris")) {
                Iterator<String> it13 = entry.getValue().iterator();
                while (it13.hasNext()) {
                    annotationBase.WIOFISH_fishing_control(str, it13.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.ENFORCEMENT_METHOD) + "_uris")) {
                Iterator<String> it14 = entry.getValue().iterator();
                while (it14.hasNext()) {
                    annotationBase.WIOFISH_enforcement_method(str, it14.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.APPLICANT_FOR_ACCESS_RIGHT) + "_uris")) {
                Iterator<String> it15 = entry.getValue().iterator();
                while (it15.hasNext()) {
                    annotationBase.WIOFISH_accessRightApplicant(str, it15.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.OWNER_OF_ACCESS_RIGHT) + "_uris")) {
                Iterator<String> it16 = entry.getValue().iterator();
                while (it16.hasNext()) {
                    annotationBase.WIOFISH_ownershipOfAccessRight(str, it16.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.OTHER_INCOME_SOURCE) + "_uris")) {
                Iterator<String> it17 = entry.getValue().iterator();
                while (it17.hasNext()) {
                    annotationBase.WIOFISH_alternativeIncomeSource(str, it17.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.MARKET) + "_uris")) {
                Iterator<String> it18 = entry.getValue().iterator();
                while (it18.hasNext()) {
                    annotationBase.WIOFISH_market(str, it18.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.POST_PROCESSING_METHOD) + "_uris")) {
                Iterator<String> it19 = entry.getValue().iterator();
                while (it19.hasNext()) {
                    annotationBase.WIOFISH_postHarvestProcessing(str, it19.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.DECISION_MAKER) + "_uris")) {
                Iterator<String> it20 = entry.getValue().iterator();
                while (it20.hasNext()) {
                    annotationBase.WIOFISH_representativesInDecisionMaking(str, it20.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.MANAGEMENT_INDICATOR) + "_uris")) {
                Iterator<String> it21 = entry.getValue().iterator();
                while (it21.hasNext()) {
                    annotationBase.WIOFISH_managementIndicator(str, it21.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.FINANCE_MGT_AUTHORITY) + "_uris")) {
                Iterator<String> it22 = entry.getValue().iterator();
                while (it22.hasNext()) {
                    annotationBase.WIOFISH_financingManagement(str, it22.next());
                }
            } else if (entry.getKey().equals(ANNOTATIONS.getLocalName(ANNOTATIONS.TECHNOLOGY_IN_USE) + "_uris")) {
                Iterator<String> it23 = entry.getValue().iterator();
                while (it23.hasNext()) {
                    annotationBase.WIOFISH_technologiesInUse(str, it23.next());
                }
            } else {
                System.out.println("=> annotation uri not found: " + entry.getKey() + " all entries are : " + entrySet);
            }
        }
    }

    static void parseSimpleRow(String str, String str2, String str3, Map<String, String> map) {
        Matcher matcher = Pattern.compile(str2).matcher(str3);
        if (matcher.find()) {
            map.put(str, matcher.group(1).trim().replace("<br>", ""));
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:35:0x011c, code lost:
    
        r15 = com.google.common.collect.Lists.newArrayList();
        r16 = com.google.common.collect.Lists.newArrayList();
     */
    /* JADX WARN: Code restructure failed: missing block: B:46:0x00c9, code lost:
    
        r15 = com.google.common.collect.Lists.newArrayList();
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    static void parseTickTableMultipleLangs(java.lang.String r5, java.lang.String r6, java.lang.String r7, java.lang.String r8, java.util.Map<java.lang.String, java.lang.String> r9) {
        /*
            Method dump skipped, instructions count: 404
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.gcube.textextractor.extractors.HTMLExtractor.parseTickTableMultipleLangs(java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.util.Map):void");
    }

    static void parseTickTable(String str, String str2, String str3, String str4, Map<String, String> map) {
        Matcher matcher = Pattern.compile(str2).matcher(str4);
        if (matcher.find()) {
            Matcher matcher2 = Pattern.compile(str3).matcher(matcher.group(1).trim());
            ArrayList newArrayList = Lists.newArrayList();
            while (matcher2.find()) {
                String trim = matcher2.group(1).trim();
                if (trim.equalsIgnoreCase("None") || trim.equalsIgnoreCase(MachineMetadata.MACHINE_UNKNOWN)) {
                    newArrayList = Lists.newArrayList();
                    break;
                }
                newArrayList.add(trim);
            }
            if (newArrayList.size() > 0) {
                map.put(str, Joiner.on(", ").join(newArrayList));
            }
        }
    }

    static void parseTable(String str, String str2, String str3, Map<String, String> map) {
        Matcher matcher = Pattern.compile(str2).matcher(str3);
        if (matcher.find()) {
            List<String> splitToList = Splitter.on("<br>").trimResults().omitEmptyStrings().splitToList(matcher.group(1));
            ArrayList newArrayList = Lists.newArrayList();
            ArrayList newArrayList2 = Lists.newArrayList();
            for (String str4 : splitToList) {
                List<String> splitToList2 = Splitter.on(" / ").trimResults().omitEmptyStrings().splitToList(str4);
                if (splitToList2.size() == 2) {
                    String str5 = splitToList2.get(0);
                    String str6 = splitToList2.get(1);
                    if (str.equals(ANNOTATIONS.getLocalName(ANNOTATIONS.STATUS))) {
                        str5 = str5.replace("-", "");
                        str6 = str6.replace("-", "");
                    }
                    newArrayList.add(str5);
                    newArrayList2.add(str6);
                    if (str5.equalsIgnoreCase("None") || str5.equalsIgnoreCase(MachineMetadata.MACHINE_UNKNOWN)) {
                        newArrayList = Lists.newArrayList();
                        newArrayList2 = Lists.newArrayList();
                    }
                } else {
                    logger.warn("bad data for " + str + " : " + splitToList2 + " for row : " + str4);
                }
            }
            if (newArrayList.size() > 0) {
                map.put(str, Joiner.on(", ").join(newArrayList));
            }
            if (newArrayList2.size() > 0) {
                map.put(str + "_fr", Joiner.on(", ").join(newArrayList2));
            }
        }
    }

    private boolean containsSpecies(List<ExtractedEntity> list, String str) {
        Iterator<ExtractedEntity> it = list.iterator();
        while (it.hasNext()) {
            if (it.next().en_name.equalsIgnoreCase(str)) {
                return true;
            }
        }
        return false;
    }
}
