package gr.forth.ics.isl.xsearch.api;

import com.itextpdf.text.Annotation;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import gr.forth.ics.isl.textentitymining.Category;
import gr.forth.ics.isl.textentitymining.Entity;
import gr.forth.ics.isl.textentitymining.gate.GateEntityMiner;
import gr.forth.ics.isl.xsearch.resources.Resources;
import gr.forth.ics.isl.xsearch.util.HTMLTag;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.tika.metadata.DublinCore;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

/* loaded from: input_file:WEB-INF/classes/gr/forth/ics/isl/xsearch/api/processdocument.class */
public class processdocument extends HttpServlet {
    protected void processRequest(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws ServletException, IOException, ParserConfigurationException, TransformerConfigurationException {
        String parameter = httpServletRequest.getParameter(Annotation.URL);
        if (parameter == null) {
            parameter = "";
        }
        String trim = parameter.trim();
        if (trim.equals("")) {
            httpServletResponse.sendError(400, "The value of the parameter 'url' is null or empty.");
            return;
        }
        String decode = URLDecoder.decode(trim, "urf-8");
        String parameter2 = httpServletRequest.getParameter("categories");
        if (parameter2 == null) {
            parameter2 = "";
        }
        String trim2 = parameter2.trim();
        HashSet hashSet = new HashSet();
        if (trim2.equals("")) {
            Iterator<String> it = Resources.MINING_ACCEPTED_CATEGORIES.iterator();
            while (it.hasNext()) {
                hashSet.add(it.next());
            }
        } else {
            for (String str : trim2.split(";")) {
                hashSet.add(str.trim());
            }
        }
        if (!Resources.MINING_ALL_POSSIBLE_CATEGORIES.containsAll(hashSet)) {
            httpServletResponse.sendError(400, "One or more of the provided categories are not currently supported by the entity mining component.");
            return;
        }
        String str2 = "";
        try {
            URL url = new URL(decode);
            URLConnection openConnection = url.openConnection();
            if (openConnection.getContentType().equalsIgnoreCase("application/pdf")) {
                System.out.println("# Reading PDF file!");
                try {
                    PdfReader pdfReader = new PdfReader(url);
                    int numberOfPages = pdfReader.getNumberOfPages();
                    for (int i = 1; i <= numberOfPages; i++) {
                        str2 = str2 + PdfTextExtractor.getTextFromPage(pdfReader, i) + "\n";
                    }
                    pdfReader.close();
                } catch (Exception e) {
                    System.out.println("*** ERROR READING PDF CONTENT: " + e.getMessage());
                    httpServletResponse.sendError(400, "Error reading the contents of the PDF file. Please check the URL and/or the file.");
                    return;
                }
            } else {
                if (openConnection.getContentType().equalsIgnoreCase("application/msword")) {
                    httpServletResponse.sendError(400, "MS Word files are not currently supported.");
                    return;
                }
                str2 = new HTMLTag(url, true).getSourceCode();
            }
            if (str2 == null) {
                httpServletResponse.sendError(400, "The content of the given URL is NULL. Please check the URL.");
                return;
            }
            String parameter3 = httpServletRequest.getParameter(DublinCore.FORMAT);
            if (parameter3 == null) {
                parameter3 = "";
            }
            String trim3 = parameter3.trim();
            if (trim3.equals("")) {
                trim3 = "json";
            } else if (!trim3.toLowerCase().equals("json") && !trim3.toLowerCase().equals("xml") && !trim3.toLowerCase().equals("csv")) {
                httpServletResponse.sendError(400, "The value of the parameter 'format' is not valid. Valid values: {json, xml, csv}.");
                return;
            }
            synchronized (this) {
                if (str2.equals("")) {
                    httpServletResponse.sendError(400, "The content of the given URL is empty. Please check the URL.");
                    return;
                }
                String replace = str2.replace("<?xml ", "<html ");
                GateEntityMiner gateEntityMiner = new GateEntityMiner();
                gateEntityMiner.setAcceptedCategories(hashSet);
                gateEntityMiner.setTextToMine(replace);
                gateEntityMiner.findEntities();
                ArrayList<Category> entities = gateEntityMiner.getEntities();
                System.out.println("# Page mining was finished!");
                if (trim3.toLowerCase().equals("csv")) {
                    PrintWriter writer = httpServletResponse.getWriter();
                    httpServletResponse.setContentType("text/plain;charset=UTF-8");
                    writer.println("\"ENTITY_NAME\"\t\"CATEGORY_NAME\"");
                    for (int i2 = 0; i2 < entities.size(); i2++) {
                        Category category = entities.get(i2);
                        for (int i3 = 0; i3 < entities.get(i2).getEntities().size(); i3++) {
                            writer.println(entities.get(i2).getEntities().get(i3).getName() + "\t" + category.getName());
                        }
                    }
                    writer.close();
                } else if (trim3.toLowerCase().equals("xml")) {
                    Document newDocument = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
                    Element createElement = newDocument.createElement("identifiedEntities");
                    newDocument.appendChild(createElement);
                    for (int i4 = 0; i4 < entities.size(); i4++) {
                        Category category2 = entities.get(i4);
                        for (int i5 = 0; i5 < entities.get(i4).getEntities().size(); i5++) {
                            Entity entity = entities.get(i4).getEntities().get(i5);
                            Element createElement2 = newDocument.createElement("entity");
                            Element createElement3 = newDocument.createElement("entityName");
                            Element createElement4 = newDocument.createElement("categoryName");
                            createElement3.appendChild(newDocument.createTextNode(entity.getName()));
                            createElement4.appendChild(newDocument.createTextNode(category2.getName()));
                            createElement2.appendChild(createElement3);
                            createElement2.appendChild(createElement4);
                            createElement.appendChild(createElement2);
                        }
                    }
                    Transformer newTransformer = TransformerFactory.newInstance().newTransformer();
                    DOMSource dOMSource = new DOMSource(newDocument);
                    httpServletResponse.setContentType("application/xml;charset=UTF-8");
                    PrintWriter writer2 = httpServletResponse.getWriter();
                    try {
                        newTransformer.transform(dOMSource, new StreamResult(writer2));
                    } catch (TransformerException e2) {
                        Logger.getLogger(processdocument.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
                    }
                    writer2.close();
                } else {
                    JSONObject jSONObject = new JSONObject();
                    JSONArray jSONArray = new JSONArray();
                    for (int i6 = 0; i6 < entities.size(); i6++) {
                        Category category3 = entities.get(i6);
                        for (int i7 = 0; i7 < entities.get(i6).getEntities().size(); i7++) {
                            Entity entity2 = entities.get(i6).getEntities().get(i7);
                            JSONObject jSONObject2 = new JSONObject();
                            jSONObject2.put("categoryName", category3.getName());
                            jSONObject2.put("entityName", entity2.getName());
                            jSONArray.add(jSONObject2);
                        }
                    }
                    jSONObject.put("identifiedEntities", jSONArray);
                    httpServletResponse.setContentType("application/json;charset=UTF-8");
                    PrintWriter writer3 = httpServletResponse.getWriter();
                    writer3.print(jSONObject);
                    writer3.close();
                }
                System.out.println("# PROCESS DOCUMENT - FINISHED!");
            }
        } catch (Exception e3) {
            httpServletResponse.sendError(400, "Problem connecting to the given URL. Please check the URL.");
        }
    }

    protected void doGet(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws ServletException, IOException {
        try {
            processRequest(httpServletRequest, httpServletResponse);
        } catch (ParserConfigurationException e) {
            Logger.getLogger(processdocument.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
        } catch (TransformerConfigurationException e2) {
            Logger.getLogger(processdocument.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
        }
    }

    protected void doPost(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws ServletException, IOException {
        try {
            processRequest(httpServletRequest, httpServletResponse);
        } catch (ParserConfigurationException e) {
            Logger.getLogger(processdocument.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
        } catch (TransformerConfigurationException e2) {
            Logger.getLogger(processdocument.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
        }
    }

    public String getServletInfo() {
        return "Short description";
    }
}
