package net.matuschek.html;

import com.lowagie.text.html.HtmlTags;
import groovy.ui.text.StructuredSyntaxHandler;
import java.io.ByteArrayInputStream;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.StringTokenizer;
import java.util.Vector;
import net.matuschek.util.AttribValuePair;
import org.apache.log4j.Category;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:WEB-INF/lib/jobo-1.4.0.jar:net/matuschek/html/HtmlDocument.class */
public class HtmlDocument {
    private URL url;
    private byte[] content;
    private Document domDoc;
    private Category log;
    private String encoding;
    private URL baseURL;
    Vector<URL> links;

    private HtmlDocument(URL url) {
        this.url = null;
        this.content = null;
        this.domDoc = null;
        this.baseURL = null;
        this.log = Category.getInstance(getClass().getName());
        this.url = url;
    }

    public HtmlDocument(URL url, byte[] bArr) {
        this(url);
        this.content = bArr;
        parse();
    }

    public HtmlDocument(URL url, byte[] bArr, String str) {
        this(url);
        this.content = bArr;
        this.encoding = str;
        parse();
    }

    public HtmlDocument(URL url, String str) {
        this(url);
        this.content = new byte[str.length() + 1];
        for (int i = 0; i < str.length(); i++) {
            this.content[i] = (byte) str.charAt(i);
        }
        parse();
    }

    private void parse() {
        if (this.domDoc == null) {
            parseToDOM();
        }
        this.links = new Vector<>();
        extractLinks(this.domDoc.getDocumentElement(), this.links);
    }

    public Vector<URL> getLinks() {
        return this.links;
    }

    public Vector getImageLinks() {
        if (this.domDoc == null) {
            parseToDOM();
        }
        Vector<URL> vector = new Vector<>();
        extractImageLinks(this.domDoc.getDocumentElement(), vector);
        return vector;
    }

    public Vector getElements(String str) {
        if (this.domDoc == null) {
            parseToDOM();
        }
        Vector<Element> vector = new Vector<>();
        extractElements(this.domDoc.getDocumentElement(), str, vector);
        return vector;
    }

    protected void extractLinks(Element element, Vector<URL> vector) {
        if (element == null) {
            this.log.error("got a null element");
            return;
        }
        String lowerCase = element.getNodeName().toLowerCase();
        if (lowerCase.equals(HtmlTags.ANCHOR)) {
            addLink(element.getAttribute("href"), vector);
        } else if (lowerCase.equals("base")) {
            try {
                this.baseURL = new URL(element.getAttribute("href"));
                this.log.info("baseUR=" + this.baseURL);
            } catch (MalformedURLException e) {
            }
        } else if (lowerCase.equals("frame")) {
            addLink(element.getAttribute("src"), vector);
        } else if (lowerCase.equals("iframe")) {
            addLink(element.getAttribute("src"), vector);
        } else if (lowerCase.equals("image")) {
            addLink(element.getAttribute("src"), vector);
        } else if (lowerCase.equals(HtmlTags.IMAGE)) {
            addLink(element.getAttribute("src"), vector);
        } else if (lowerCase.equals("area")) {
            addLink(element.getAttribute("href"), vector);
        } else if (lowerCase.equals("meta")) {
            String attribute = element.getAttribute("http-equiv");
            if (attribute != null && attribute.equalsIgnoreCase("refresh")) {
                String attribute2 = element.getAttribute("content");
                if (attribute2 == null) {
                    attribute2 = "";
                }
                StringTokenizer stringTokenizer = new StringTokenizer(attribute2, ";");
                while (stringTokenizer.hasMoreTokens()) {
                    AttribValuePair attribValuePair = new AttribValuePair(stringTokenizer.nextToken().trim());
                    if (attribValuePair.getAttrib().equals("url")) {
                        addLink(attribValuePair.getValue(), vector);
                    }
                }
            }
        } else if (lowerCase.equals("body")) {
            String attribute3 = element.getAttribute(StructuredSyntaxHandler.BACKGROUND);
            if (attribute3 != null || attribute3.equals("")) {
                addLink(attribute3, vector);
            }
        } else {
            this.log.info("Ignore tag name: " + lowerCase);
        }
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            if (childNodes.item(i) instanceof Element) {
                extractLinks((Element) childNodes.item(i), vector);
            }
        }
    }

    protected void extractImageLinks(Element element, Vector<URL> vector) {
        if (element == null) {
            this.log.error("got a null element");
            return;
        }
        String nodeName = element.getNodeName();
        if (nodeName.equals(HtmlTags.IMAGE)) {
            addLink(element.getAttribute("src"), vector);
        }
        if (nodeName.equals("image")) {
            addLink(element.getAttribute("src"), vector);
        }
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            if (childNodes.item(i) instanceof Element) {
                extractImageLinks((Element) childNodes.item(i), vector);
            }
        }
    }

    protected void extractElements(Element element, String str, Vector<Element> vector) {
        if (element == null) {
            this.log.error("got a null element");
            return;
        }
        if (element.getNodeName().equals(str)) {
            vector.add(element);
        }
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            if (childNodes.item(i) instanceof Element) {
                extractElements((Element) childNodes.item(i), str, vector);
            }
        }
    }

    private void parseToDOM() {
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(this.content);
        Tidy tidy = new Tidy();
        tidy.setUpperCaseTags(false);
        tidy.setUpperCaseAttrs(false);
        tidy.setErrout(new PrintWriter(System.err));
        this.domDoc = tidy.parseDOM(byteArrayInputStream, null);
    }

    private void addLink(String str, Vector<URL> vector) {
        if (str == null || str.equals("")) {
            return;
        }
        int indexOf = str.indexOf("#");
        if (indexOf >= 0) {
            str = str.substring(0, indexOf);
        }
        if (this.encoding != null) {
            try {
                str = new String(str.getBytes(), this.encoding);
            } catch (UnsupportedEncodingException e) {
            }
        } else {
            try {
                str = new String(str.getBytes(), "ISO-8859-1");
            } catch (UnsupportedEncodingException e2) {
            }
        }
        try {
            vector.add(this.baseURL != null ? new URL(this.baseURL, str) : new URL(this.url, str));
        } catch (Exception e3) {
            this.log.debug("error during link extraction: " + e3.getMessage() + " " + str);
        }
    }

    public URL getBaseURL() {
        return this.baseURL;
    }
}
