/*
 * Decompiled with CFR 0.152.
 */
package net.matuschek.html;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.StringTokenizer;
import java.util.Vector;
import net.matuschek.util.AttribValuePair;
import org.apache.log4j.Category;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class HtmlDocument {
    private URL url = null;
    private byte[] content = null;
    private Document domDoc = null;
    private Category log = Category.getInstance((String)this.getClass().getName());
    private String encoding;
    private URL baseURL = null;
    Vector<URL> links;

    private HtmlDocument(URL url) {
        this.url = url;
    }

    public HtmlDocument(URL url, byte[] content) {
        this(url);
        this.content = content;
        this.parse();
    }

    public HtmlDocument(URL url, byte[] content, String newEncoding) {
        this(url);
        this.content = content;
        this.encoding = newEncoding;
        this.parse();
    }

    public HtmlDocument(URL url, String contentStr) {
        this(url);
        this.content = new byte[contentStr.length() + 1];
        for (int i = 0; i < contentStr.length(); ++i) {
            this.content[i] = (byte)contentStr.charAt(i);
        }
        this.parse();
    }

    private void parse() {
        if (this.domDoc == null) {
            this.parseToDOM();
        }
        this.links = new Vector();
        this.extractLinks(this.domDoc.getDocumentElement(), this.links);
    }

    public Vector<URL> getLinks() {
        return this.links;
    }

    public Vector getImageLinks() {
        if (this.domDoc == null) {
            this.parseToDOM();
        }
        Vector<URL> links = new Vector<URL>();
        this.extractImageLinks(this.domDoc.getDocumentElement(), links);
        return links;
    }

    public Vector getElements(String type) {
        if (this.domDoc == null) {
            this.parseToDOM();
        }
        Vector<Element> links = new Vector<Element>();
        this.extractElements(this.domDoc.getDocumentElement(), type, links);
        return links;
    }

    protected void extractLinks(Element element, Vector<URL> links) {
        if (element == null) {
            this.log.error((Object)"got a null element");
            return;
        }
        String name = element.getNodeName().toLowerCase();
        if (name.equals("a")) {
            this.addLink(element.getAttribute("href"), links);
        } else if (name.equals("base")) {
            try {
                this.baseURL = new URL(element.getAttribute("href"));
                this.log.info((Object)("baseUR=" + this.baseURL));
            }
            catch (MalformedURLException e) {}
        } else if (name.equals("frame")) {
            this.addLink(element.getAttribute("src"), links);
        } else if (name.equals("iframe")) {
            this.addLink(element.getAttribute("src"), links);
        } else if (name.equals("image")) {
            this.addLink(element.getAttribute("src"), links);
        } else if (name.equals("img")) {
            this.addLink(element.getAttribute("src"), links);
        } else if (name.equals("area")) {
            this.addLink(element.getAttribute("href"), links);
        } else if (name.equals("meta")) {
            String equiv = element.getAttribute("http-equiv");
            if (equiv != null && equiv.equalsIgnoreCase("refresh")) {
                String refreshcontent = element.getAttribute("content");
                if (refreshcontent == null) {
                    refreshcontent = "";
                }
                StringTokenizer st = new StringTokenizer(refreshcontent, ";");
                while (st.hasMoreTokens()) {
                    String token = st.nextToken().trim();
                    AttribValuePair av = new AttribValuePair(token);
                    if (!av.getAttrib().equals("url")) continue;
                    this.addLink(av.getValue(), links);
                }
            }
        } else if (name.equals("body")) {
            String background = element.getAttribute("background");
            if (background != null || background.equals("")) {
                this.addLink(background, links);
            }
        } else {
            this.log.info((Object)("Ignore tag name: " + name));
        }
        NodeList childs = element.getChildNodes();
        for (int i = 0; i < childs.getLength(); ++i) {
            if (!(childs.item(i) instanceof Element)) continue;
            this.extractLinks((Element)childs.item(i), links);
        }
    }

    protected void extractImageLinks(Element element, Vector<URL> links) {
        if (element == null) {
            this.log.error((Object)"got a null element");
            return;
        }
        String name = element.getNodeName();
        if (name.equals("img")) {
            this.addLink(element.getAttribute("src"), links);
        }
        if (name.equals("image")) {
            this.addLink(element.getAttribute("src"), links);
        }
        NodeList childs = element.getChildNodes();
        for (int i = 0; i < childs.getLength(); ++i) {
            if (!(childs.item(i) instanceof Element)) continue;
            this.extractImageLinks((Element)childs.item(i), links);
        }
    }

    protected void extractElements(Element element, String type, Vector<Element> elementList) {
        if (element == null) {
            this.log.error((Object)"got a null element");
            return;
        }
        String name = element.getNodeName();
        if (name.equals(type)) {
            elementList.add(element);
        }
        NodeList childs = element.getChildNodes();
        for (int i = 0; i < childs.getLength(); ++i) {
            if (!(childs.item(i) instanceof Element)) continue;
            this.extractElements((Element)childs.item(i), type, elementList);
        }
    }

    private void parseToDOM() {
        ByteArrayInputStream is = new ByteArrayInputStream(this.content);
        Tidy tidy = new Tidy();
        tidy.setUpperCaseTags(false);
        tidy.setUpperCaseAttrs(false);
        tidy.setErrout(new PrintWriter(System.err));
        this.domDoc = tidy.parseDOM((InputStream)is, null);
    }

    private void addLink(String newURL, Vector<URL> links) {
        if (newURL == null || newURL.equals("")) {
            return;
        }
        int pos = newURL.indexOf("#");
        if (pos >= 0) {
            newURL = newURL.substring(0, pos);
        }
        if (this.encoding != null) {
            try {
                newURL = new String(newURL.getBytes(), this.encoding);
            }
            catch (UnsupportedEncodingException e) {}
        } else {
            try {
                newURL = new String(newURL.getBytes(), "ISO-8859-1");
            }
            catch (UnsupportedEncodingException e) {
                // empty catch block
            }
        }
        try {
            URL u = null;
            u = this.baseURL != null ? new URL(this.baseURL, newURL) : new URL(this.url, newURL);
            links.add(u);
        }
        catch (Exception e) {
            this.log.debug((Object)("error during link extraction: " + e.getMessage() + " " + newURL));
        }
    }

    public URL getBaseURL() {
        return this.baseURL;
    }
}

