/*
 * Decompiled with CFR 0.152.
 */
package net.matuschek.spider;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.StringTokenizer;
import java.util.Vector;
import net.matuschek.html.FormFiller;
import net.matuschek.html.HtmlDocument;
import net.matuschek.http.DocManagerException;
import net.matuschek.http.DownloadRuleSet;
import net.matuschek.http.ExtendedURL;
import net.matuschek.http.HttpDoc;
import net.matuschek.http.HttpDocManager;
import net.matuschek.http.HttpException;
import net.matuschek.http.HttpHeader;
import net.matuschek.http.HttpTool;
import net.matuschek.http.HttpToolCallback;
import net.matuschek.http.NTLMAuthorization;
import net.matuschek.http.cookie.CookieManager;
import net.matuschek.spider.DefaultRobotExceptionHandler;
import net.matuschek.spider.HashedMemoryTaskList;
import net.matuschek.spider.NoRobots;
import net.matuschek.spider.RobotExceptionHandler;
import net.matuschek.spider.RobotTask;
import net.matuschek.spider.TaskList;
import net.matuschek.spider.URLCheck;
import net.matuschek.spider.WebRobotCallback;
import net.matuschek.spider.docfilter.FilterChain;
import net.matuschek.spider.docfilter.FilterException;
import org.apache.log4j.Category;
import org.w3c.dom.Element;

public class WebRobot
implements Runnable,
Cloneable {
    private static final String ROBOT_NAME = "JoBo";
    private static final String AGENT_NAME = "JoBo/1.4 (http://www.matuschek.net/jobo.html)";
    protected RobotExceptionHandler exceptionHandler = new DefaultRobotExceptionHandler();
    private static final int DEFAULT_DEPTH = 10;
    protected URL startURL = null;
    protected String startDir = "";
    protected int maxDepth = 10;
    protected boolean walkToOtherHosts = false;
    protected HttpDocManager docManager;
    protected HttpTool httpTool = new HttpTool();
    protected Category log;
    protected String startReferer = "-";
    protected NoRobots robCheck;
    protected TaskList todo = null;
    protected TaskList visited = null;
    protected boolean ignoreRobotsTxt = false;
    protected int sleepTime = 1;
    protected FormFiller formFiller = new FormFiller();
    protected Vector visitMany = new Vector();
    protected WebRobotCallback webRobotCallback = null;
    protected boolean stopIt = false;
    protected URLCheck urlCheck = null;
    protected boolean sleep;
    protected Vector allowedURLs = new Vector();
    protected boolean allowWholeHost = true;
    protected long maxDocumentAge = -1L;
    protected boolean allowWholeDomain = true;
    protected boolean flexibleHostCheck = false;
    protected FilterChain filters = null;
    protected boolean allowCaching = true;
    protected boolean duplicateCheck = false;
    private int memoryLevel = 0;
    protected boolean activatedNewTasks = true;
    protected boolean activatedUrlHistory = true;
    protected boolean activatedContentHistory = true;
    private byte[] memoryBuffer = new byte[204800];
    protected int iteration = 0;
    private static final int DEFAULT_EXPECTED_DOCUMENT_COUNT = 50000;
    protected int expectedDocumentCount = 50000;
    protected HashMap content2UrlMap;
    long countCache = 0L;
    long countWeb = 0L;
    long countNoRefresh = 0L;
    long countRefresh = 0L;
    boolean hasFormHandlers = false;
    protected Vector wasteParameters = new Vector();
    protected long startTime = System.currentTimeMillis();
    protected int maxRetries = 0;
    protected long expirationAge = -1L;

    public WebRobot(int expectedDocumentCount) {
        this.log = Category.getInstance((String)this.getClass().getName());
        this.content2UrlMap = new HashMap(expectedDocumentCount);
        this.registerVisitedList(new HashedMemoryTaskList(false, expectedDocumentCount));
        this.registerToDoList(new HashedMemoryTaskList(true, expectedDocumentCount));
        this.expectedDocumentCount = expectedDocumentCount;
        this.setAgentName(AGENT_NAME);
    }

    public WebRobot() {
        this(50000);
    }

    public void registerToDoList(TaskList todo) {
        this.todo = todo;
    }

    public void registerVisitedList(TaskList visited) {
        this.visited = visited;
    }

    public URL getStartURL() {
        return this.startURL;
    }

    public void setStartURL(URL startURL) {
        int pos;
        String path = startURL.getPath();
        this.startURL = startURL;
        this.startDir = path.endsWith("/") ? startURL.getHost() + path : ((pos = path.lastIndexOf("/")) < 0 ? startURL.getHost() + "/" : startURL.getHost() + path.substring(0, pos + 1));
    }

    public int getMaxDepth() {
        return this.maxDepth;
    }

    public void setMaxDepth(int maxDepth) {
        this.maxDepth = maxDepth;
    }

    public int getBandwidth() {
        return this.httpTool.getBandwidth();
    }

    public void setBandwidth(int bandwidth) {
        this.httpTool.setBandwidth(bandwidth);
    }

    public boolean getWalkToOtherHosts() {
        return this.walkToOtherHosts;
    }

    public void setWalkToOtherHosts(boolean walkToOtherHosts) {
        this.walkToOtherHosts = walkToOtherHosts;
    }

    public boolean getAllowWholeHost() {
        return this.allowWholeHost;
    }

    public void setAllowWholeHost(boolean allowWholeHost) {
        this.allowWholeHost = allowWholeHost;
    }

    public boolean getAllowWholeDomain() {
        return this.allowWholeDomain;
    }

    public void setAllowWholeDomain(boolean allowWholeDomain) {
        this.allowWholeDomain = allowWholeDomain;
    }

    public boolean getFlexibleHostCheck() {
        return this.flexibleHostCheck;
    }

    public void setFlexibleHostCheck(boolean flexibleHostCheck) {
        this.flexibleHostCheck = flexibleHostCheck;
    }

    public boolean getAllowCaching() {
        return this.allowCaching;
    }

    public void setAllowCaching(boolean allowCaching) {
        this.allowCaching = allowCaching;
    }

    public HttpDocManager getDocManager() {
        return this.docManager;
    }

    public void setDocManager(HttpDocManager docManager) {
        this.docManager = docManager;
    }

    public void setCookieManager(CookieManager cm) {
        this.httpTool.setCookieManager(cm);
    }

    public CookieManager getCookieManager() {
        return this.httpTool.getCookieManager();
    }

    public void setDownloadRuleSet(DownloadRuleSet rules) {
        this.httpTool.setDownloadRuleSet(rules);
    }

    public void setURLCheck(URLCheck check) {
        this.urlCheck = check;
    }

    public void setProxy(String proxyDescr) throws HttpException {
        this.httpTool.setProxy(proxyDescr);
    }

    public String getProxy() {
        return this.httpTool.getProxy();
    }

    public String getStartReferer() {
        return this.startReferer;
    }

    public void setStartReferer(String startReferer) {
        this.startReferer = startReferer;
    }

    public void setIgnoreRobotsTxt(boolean ignoreRobotsTxt) {
        this.robCheck.setIgnore(ignoreRobotsTxt);
    }

    public int getSleepTime() {
        return this.sleepTime;
    }

    public void setSleepTime(int sleepTime) {
        this.sleepTime = sleepTime;
    }

    public void setFromAddress(String fromAddress) {
        this.httpTool.setFromAddress(fromAddress);
    }

    public void setFormHandlers(Vector handlers) {
        this.formFiller.setFormHandlers(handlers);
        if (handlers != null && handlers.size() > 0) {
            this.hasFormHandlers = true;
        }
    }

    public Vector getFormHandlers() {
        return this.formFiller.getFormHandlers();
    }

    public String getAgentName() {
        if (this.httpTool != null) {
            return this.httpTool.getAgentName();
        }
        return null;
    }

    public void setAgentName(String name) {
        this.httpTool.setAgentName(name);
        this.robCheck = new NoRobots(name, this.httpTool);
    }

    public int getTimeout() {
        if (this.httpTool != null) {
            return this.httpTool.getTimeout();
        }
        return -1;
    }

    public void setTimeout(int timeout) {
        this.httpTool.setTimeout(timeout);
    }

    public NTLMAuthorization getNtlmAuthorization() {
        if (this.httpTool != null) {
            return this.httpTool.getNtlmAuthorization();
        }
        return null;
    }

    public void setNtlmAuthorization(NTLMAuthorization ntlmAuthorization) {
        this.httpTool.setNtlmAuthorization(ntlmAuthorization);
    }

    public boolean getIgnoreRobotsTxt() {
        return this.ignoreRobotsTxt;
    }

    public Vector getVisitMany() {
        return this.visitMany;
    }

    public void setVisitMany(Vector visitMany) {
        this.visitMany = visitMany;
    }

    public void setHttpToolCallback(HttpToolCallback callback) {
        this.httpTool.setCallback(callback);
    }

    public WebRobotCallback getWebRobotCallback() {
        return this.webRobotCallback;
    }

    public void setWebRobotCallback(WebRobotCallback webRobotCallback) {
        this.webRobotCallback = webRobotCallback;
    }

    public void setSleep(boolean sleep) {
        this.sleep = sleep;
    }

    public boolean isSleeping() {
        return this.sleep;
    }

    public void setAllowedURLs(Vector allowed) {
        this.allowedURLs = allowed;
    }

    public Vector getAllowedURLs() {
        return this.allowedURLs;
    }

    public void setEnableCookies(boolean enable) {
        this.httpTool.setEnableCookies(enable);
    }

    public boolean getEnableCookies() {
        return this.httpTool.getEnableCookies();
    }

    public void setMaxDocumentAge(long maxAge) {
        this.maxDocumentAge = maxAge;
    }

    public long getMaxDocumentAge() {
        return this.maxDocumentAge;
    }

    public void setFilters(FilterChain filters) {
        this.filters = filters;
    }

    public void clearCookies() {
        this.httpTool.clearCookies();
    }

    public void run() {
        this.work();
    }

    public void work() {
        RobotTask task = this.createRobotTask(this.startURL, this.maxDepth, this.startReferer);
        this.todo.add(task);
        this.walkTree();
        this.cleanUp();
        this.log.info((Object)("Documents retrieved by: Web=" + this.countWeb + " Cache=" + this.countCache + " Refresh=" + this.countRefresh + " NoRefresh=" + this.countNoRefresh));
    }

    public void stopRobot() {
        this.stopIt = true;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void walkTree() {
        while (this.todo.size() > 0 && !this.stopIt) {
            RobotTask task;
            TaskList taskList = this.visited;
            synchronized (taskList) {
                task = this.todo.removeFirst();
                if (this.visited.contains(task) && !this.visitMany.contains(task.getUrl().toString())) {
                    this.log.debug((Object)("already visited: " + task.getUrl()));
                    continue;
                }
                if (this.activatedUrlHistory) {
                    this.visited.add(task);
                }
            }
            boolean repeat = true;
            while (repeat) {
                try {
                    this.retrieveURL(task);
                    repeat = false;
                }
                catch (OutOfMemoryError memoryError) {
                    this.handleMemoryError(memoryError);
                }
            }
            while (this.sleep) {
                if (this.webRobotCallback != null) {
                    this.webRobotCallback.webRobotSleeping(true);
                }
                try {
                    Thread.sleep(1000L);
                }
                catch (InterruptedException interruptedException) {}
            }
            if (this.webRobotCallback != null) {
                this.webRobotCallback.webRobotSleeping(false);
            }
            if (this.webRobotCallback != null) {
                this.webRobotCallback.webRobotUpdateQueueStatus(this.todo.size());
            }
            this.spawnThread();
        }
        if (this.webRobotCallback != null) {
            this.finishThreads();
        }
    }

    protected void handleMemoryError(OutOfMemoryError memoryError) throws OutOfMemoryError {
        ++this.memoryLevel;
        this.log.error((Object)("OutOfMemoryError level=" + this.memoryLevel + "! (visited=" + this.visited.size() + ", todo=" + this.todo.size() + ")"));
        switch (this.memoryLevel) {
            case 1: {
                this.visited.clear();
                this.activatedUrlHistory = false;
                this.content2UrlMap.clear();
                this.activatedContentHistory = false;
                System.gc();
                break;
            }
            case 2: {
                this.activatedNewTasks = false;
                this.memoryBuffer = null;
                System.gc();
                break;
            }
            case 3: {
                throw memoryError;
            }
            default: {
                if (this.memoryBuffer != null) {
                    System.err.println(this.memoryBuffer[0]);
                }
                throw memoryError;
            }
        }
    }

    protected void finishThreads() {
        this.webRobotCallback.webRobotDone();
        if (this.docManager != null) {
            this.docManager.finish();
        }
    }

    protected synchronized void spawnThread() {
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void retrieveURL(RobotTask task) {
        if (task == null) {
            this.log.debug((Object)"Empty task found, ignoring");
            return;
        }
        long now = System.currentTimeMillis();
        this.updateProgressInfo();
        URL u = task.getUrl();
        String urlString = u.toString();
        String referer = task.getReferer();
        int depth = task.getMaxDepth();
        if (depth < 0) {
            this.log.info((Object)"Max search depth reached");
            return;
        }
        if (!this.isAllowed(u)) {
            this.log.info((Object)("Url '" + u + "' filtered out."));
            return;
        }
        if (u.getFile().equals("")) {
            try {
                urlString = urlString + "/";
                u = new URL(urlString);
                task.setUrl(u);
            }
            catch (MalformedURLException e) {
                this.log.error((Object)("URL not well formed: " + e.toString()));
                this.exceptionHandler.handleException(this, u, e);
                return;
            }
        }
        this.log.info((Object)("retrieving " + urlString));
        this.httpTool.setReferer(referer);
        HttpDoc doc = null;
        Vector<URL> links = null;
        boolean cached = false;
        boolean reScan = true;
        if (this.docManager != null && this.allowCaching && task.getMethod() == 1 && task.getParamString() == null) {
            doc = this.docManager.retrieveFromCache(u);
            if (doc != null) {
                ++this.countCache;
                long lastRetrieved = doc.getDateAsMilliSeconds();
                double ageInSeconds = (now - lastRetrieved) / 1000L;
                if (ageInSeconds < 0.0) {
                    this.log.warn((Object)"DocumentAge < 0!");
                }
                boolean bl = reScan = this.maxDocumentAge >= 0L && ageInSeconds > (double)this.maxDocumentAge;
                if (reScan) {
                    long lastModified = doc.getLastModifiedAsMilliSeconds();
                    Date lastModifiedDate = new Date(lastModified);
                    this.httpTool.setIfModifiedSince(lastModifiedDate);
                }
            } else {
                this.httpTool.setIfModifiedSince(null);
            }
        }
        if (reScan) {
            HttpDoc newDoc;
            boolean error = false;
            try {
                if (u.getProtocol().equalsIgnoreCase("file")) {
                    newDoc = this.retrieveFileURL(u, this.httpTool.getIfModifiedSince());
                } else {
                    newDoc = this.httpTool.retrieveDocument(u, task.getMethod(), task.getParamString());
                    if (newDoc != null) {
                        newDoc.setDate(now);
                    }
                    this.sleepNow();
                }
                if (newDoc != null && !newDoc.isNotModified()) {
                    if (!newDoc.isOk() && !newDoc.isRedirect()) {
                        error = true;
                    }
                } else if (doc != null) {
                    doc.setDate(now);
                    doc.setCached(false);
                    newDoc = null;
                }
            }
            catch (HttpException hex) {
                error = true;
                newDoc = null;
            }
            if (error) {
                int retry = task.retry();
                if (retry <= this.maxRetries) {
                    TaskList taskList = this.visited;
                    synchronized (taskList) {
                        this.todo.add(task);
                        this.visited.remove(task);
                    }
                    this.log.info((Object)("Adding " + u + " for retry no. " + retry));
                    return;
                }
                doc = this.docManager.retrieveFromCache(u);
                if (doc == null) {
                    this.log.warn((Object)("Unsuccessfull retries for " + u));
                    return;
                }
                long docDate = doc.getDateAsMilliSeconds();
                long age = now - docDate;
                if (this.expirationAge < 0L || (age /= 1000L) < this.expirationAge) {
                    newDoc = doc;
                    cached = true;
                    this.log.info((Object)("Cached document not expired: " + u));
                } else {
                    this.log.warn((Object)("Cached document expired: " + u));
                    this.docManager.removeDocument(u);
                    return;
                }
            }
            if (newDoc != null) {
                ++this.countWeb;
                doc = newDoc;
                links = null;
                ++this.countRefresh;
            } else {
                cached = true;
                ++this.countNoRefresh;
            }
        } else {
            cached = true;
            this.log.debug((Object)("Page " + u + " retrieved from cache"));
        }
        if (doc == null) {
            this.log.info((Object)("not downloaded " + u));
            return;
        }
        String duplicate = null;
        if (this.duplicateCheck) {
            duplicate = this.getContentVisitedURL(doc);
            if (duplicate != null) {
                this.log.info((Object)("URLs with same content found: " + urlString + " = " + duplicate));
            } else {
                try {
                    duplicate = this.docManager.findDuplicate(doc);
                    if (duplicate != null) {
                        this.log.info((Object)("URLs with same content found in cache: " + urlString + " = " + duplicate));
                    }
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (duplicate != null) {
                String pureDuplicate = WebRobot.removeParameters(duplicate);
                String pureUrl = WebRobot.removeParameters(urlString);
                if (!pureUrl.equals(pureDuplicate) && !cached) {
                    try {
                        HttpDoc linksDoc = this.docManager.retrieveFromCache(new URL(duplicate));
                        if (linksDoc != null) {
                            doc.setLinks(linksDoc.getLinks());
                        }
                        this.docManager.storeDocument(doc);
                    }
                    catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                try {
                    RobotTask newTask = this.createRobotTask(new URL(duplicate), depth, referer);
                    if (!this.visited.contains(newTask)) {
                        this.addTask(newTask);
                    }
                }
                catch (MalformedURLException e) {
                    e.printStackTrace();
                }
                return;
            }
        }
        if (doc.isUnauthorized()) {
            this.log.info((Object)("got HTTP Unauthorized for URL " + u));
        }
        if (doc.isOk() || cached) {
            if (this.webRobotCallback != null) {
                int contentLength = 0;
                if (doc.getContent() != null) {
                    contentLength = doc.getContent().length;
                }
                this.webRobotCallback.webRobotRetrievedDoc(urlString, contentLength);
            }
            try {
                if (doc.isHTML() && depth > 0) {
                    String contentType;
                    int index;
                    HtmlDocument htmlDoc = null;
                    HttpHeader contentTypeHeader = doc.getHeader("Content-type");
                    htmlDoc = contentTypeHeader != null ? ((index = (contentType = contentTypeHeader.getValue()).toLowerCase().indexOf("charset=")) > 0 ? new HtmlDocument(u, doc.getContent(), contentType.substring(index + 8)) : new HtmlDocument(u, doc.getContent())) : new HtmlDocument(u, doc.getContent());
                    if (depth > 0) {
                        if (duplicate != null) {
                            HttpDoc linksDoc = this.docManager.retrieveFromCache(new URL(duplicate));
                            doc.setLinks(linksDoc.getLinks());
                        } else if (cached) {
                            // empty if block
                        }
                        if (links == null) {
                            links = htmlDoc.getLinks();
                            doc.setLinks(links);
                        }
                        if (duplicate == null) {
                            HashSet<URL> checkedLinks = new HashSet<URL>();
                            for (int i = 0; i < links.size(); ++i) {
                                RobotTask newTask;
                                URL link = links.elementAt(i);
                                this.log.info((Object)("Link: " + link));
                                if (checkedLinks.contains(link)) continue;
                                checkedLinks.add(link);
                                String myReferer = u.toString();
                                if (u.getUserInfo() != null) {
                                    int endindex = myReferer.indexOf("@") + 1;
                                    myReferer = "http://" + myReferer.substring(endindex);
                                }
                                if (this.visited.contains(newTask = this.createRobotTask(links.elementAt(i), depth - 1, myReferer))) continue;
                                if (newTask.urlString.endsWith(".jpg")) {
                                    this.addTaskAtStart(newTask);
                                    continue;
                                }
                                this.addTask(newTask);
                            }
                        }
                    }
                    if (this.hasFormHandlers) {
                        Vector forms = htmlDoc.getElements("form");
                        for (int i = 0; i < forms.size(); ++i) {
                            ExtendedURL eurl = this.formFiller.fillForm(u, (Element)forms.elementAt(i));
                            if (eurl == null) continue;
                            RobotTask newTask = this.createRobotTask(eurl.getURL(), depth - 1, u.toString());
                            newTask.setParamString(eurl.getParams());
                            newTask.setMethod(eurl.getRequestMethod());
                            this.addTask(newTask);
                        }
                    }
                }
            }
            catch (OutOfMemoryError e) {
                throw e;
            }
            catch (Throwable e) {
                this.log.error((Object)("Unexpected error while extraction links from url '" + u + "':" + e));
                e.printStackTrace();
            }
            if (this.docManager != null) {
                try {
                    if (this.filters != null) {
                        doc = this.filters.process(doc);
                    } else {
                        this.log.debug((Object)"No filters defined");
                    }
                    if (this.isProcessingAllowed(doc)) {
                        this.docManager.processDocument(doc);
                    } else {
                        String md5 = doc.getHeaderValue("Content-MD5");
                        doc.setContent("Not for indexing".getBytes());
                        doc.setHeaderValue("Content-MD5", md5);
                    }
                    try {
                        this.docManager.storeDocument(doc);
                    }
                    catch (Exception e) {
                        this.log.warn((Object)("could not store (not for indexing) " + urlString + ": " + e.getMessage()));
                    }
                    if (this.activatedContentHistory && duplicate == null) {
                        this.setContentVisitedURL(doc, urlString);
                    }
                }
                catch (DocManagerException e1) {
                    this.log.error((Object)("could not process document: " + e1.getMessage()));
                    this.exceptionHandler.handleException(this, u, e1);
                }
                catch (FilterException e2) {
                    this.log.error((Object)e2.getMessage());
                }
            }
        } else if (doc.isRedirect()) {
            String ref = doc.getLocation();
            this.log.info((Object)("Got redirect to " + ref));
            try {
                URL u2 = new URL(u, ref);
                RobotTask newTask = this.createRobotTask(u2, depth - 1, referer);
                this.addTaskAtStart(newTask);
            }
            catch (MalformedURLException e) {}
        } else if (doc.isNotFound()) {
            this.exceptionHandler.handleException(this, u, new HttpException("Document not found"));
        } else if (doc.isUnauthorized()) {
            this.exceptionHandler.handleException(this, u, new HttpException("No authorization for the document."));
        } else {
            this.exceptionHandler.handleException(this, u, new HttpException("Unknown document error (Http return code " + doc.getHttpCode() + ")."));
        }
    }

    public void updateProgressInfo() {
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void sleepNow() {
        if (this.sleepTime > 0) {
            WebRobot webRobot = this;
            synchronized (webRobot) {
                if (this.webRobotCallback != null) {
                    this.webRobotCallback.webRobotSleeping(true);
                }
                try {
                    Thread.sleep(this.sleepTime * 1000);
                }
                catch (InterruptedException interruptedException) {
                    // empty catch block
                }
                if (this.webRobotCallback != null) {
                    this.webRobotCallback.webRobotSleeping(false);
                }
            }
        }
    }

    private HttpDoc retrieveFileURL(URL url, Date ifModifiedSince) throws HttpException {
        HttpDoc doc = new HttpDoc();
        try {
            long ifModifiedSinceTime;
            File file;
            String mimetypestr;
            String host = url.getHost();
            String filename = url.getFile();
            if (host == null || host.equals("")) {
                if (filename.startsWith("\\") || filename.startsWith("/")) {
                    filename = filename.substring(1);
                }
            } else {
                filename = "//" + host + filename;
            }
            if ((mimetypestr = this.getMimeTypeForFilename(filename)) != null) {
                HttpHeader header = new HttpHeader("content-type", mimetypestr);
                doc.addHeader(header);
            }
            if (!(file = new File(filename)).exists()) {
                doc.setHttpCode("httpcode 404");
                return doc;
            }
            long fileLastModified = file.lastModified();
            long l = ifModifiedSinceTime = ifModifiedSince == null ? 0L : ifModifiedSince.getTime();
            if (fileLastModified > ifModifiedSinceTime) {
                byte[] content = this.readFileToByteArray(file);
                doc.setContent(content);
                doc.setHttpCode("httpcode 200");
            } else {
                doc.setHttpCode("httpcode 304");
            }
            doc.setLastModified(fileLastModified);
            doc.setDate(System.currentTimeMillis());
            doc.setURL(url);
            return doc;
        }
        catch (Exception e) {
            throw new HttpException(e.getMessage());
        }
    }

    protected String getMimeTypeForFilename(String filename) {
        if (filename.endsWith(".html") || filename.endsWith(".htm")) {
            return "text/html";
        }
        return null;
    }

    protected void cleanUp() {
        this.stopIt = false;
        this.visited.clear();
        this.todo.clear();
    }

    protected void addTask(RobotTask task) {
        if (this.taskAddAllowed(task) && this.activatedNewTasks) {
            this.todo.add(task);
        }
    }

    protected void addTaskAtStart(RobotTask task) {
        if (this.taskAddAllowed(task) && this.activatedNewTasks) {
            this.todo.addAtStart(task);
        }
    }

    protected boolean taskAddAllowed(RobotTask task) {
        if (task == null) {
            this.log.info((Object)"Null task not allowed");
            return false;
        }
        if (!this.isAllowed(task.getUrl())) {
            return false;
        }
        return !this.todo.contains(task);
    }

    protected boolean isAllowed(URL u) {
        if (this.basicURLCheck(u)) {
            if (this.urlCheck != null && !this.urlCheck.checkURL(u)) {
                this.log.debug((Object)("not allowed by URLCheck:" + u));
                return false;
            }
            if (this.robCheck.ok(u)) {
                return true;
            }
            this.log.debug((Object)("not allowed by robots.txt:" + u));
            return false;
        }
        return false;
    }

    protected boolean isProcessingAllowed(HttpDoc doc) {
        URL u = doc.getURL();
        if (this.urlCheck != null && !this.urlCheck.checkURLForProcessing(u)) {
            this.log.debug((Object)("processing not allowed by URLCheck:" + u));
            return false;
        }
        DownloadRuleSet downloadRuleSet = this.httpTool.getDownloadRuleSet();
        if (downloadRuleSet != null && !downloadRuleSet.processAllowed(doc.getHttpHeaders())) {
            this.log.debug((Object)("processing not allowed by DownloadRuleSet:" + u));
            return false;
        }
        return true;
    }

    protected boolean basicURLCheck(URL currURL) {
        String currURLStr = currURL.getHost() + currURL.getPath();
        String currHost = currURL.getHost().toLowerCase();
        String startHost = this.startURL.getHost().toLowerCase();
        if (this.walkToOtherHosts) {
            return true;
        }
        if (currURLStr.startsWith(this.startDir)) {
            return true;
        }
        if (this.allowWholeHost && currURL.getHost().equalsIgnoreCase(this.startURL.getHost())) {
            return true;
        }
        if (this.flexibleHostCheck && this.cutWWW(currHost).equalsIgnoreCase(this.cutWWW(startHost))) {
            return true;
        }
        if (this.allowWholeDomain && currHost.endsWith(this.getDomain(startHost))) {
            return true;
        }
        for (int i = 0; i < this.allowedURLs.size(); ++i) {
            String s = (String)this.allowedURLs.elementAt(i);
            if (!currURLStr.startsWith(s)) continue;
            return true;
        }
        this.log.debug((Object)("URL " + currURLStr + " not allowed"));
        return false;
    }

    private String cutWWW(String hostname) {
        if (hostname.toLowerCase().startsWith("www.")) {
            return hostname.substring(4);
        }
        return hostname;
    }

    private String getDomain(String hostname) {
        int pos = hostname.indexOf(".");
        if (pos < 0) {
            return hostname;
        }
        return hostname.substring(pos + 1);
    }

    public RobotExceptionHandler getExceptionHandler() {
        return this.exceptionHandler;
    }

    public void setExceptionHandler(RobotExceptionHandler newExceptionHandler) {
        if (newExceptionHandler != null) {
            this.exceptionHandler = newExceptionHandler;
        }
    }

    public void setStart(String startURL) {
        try {
            this.setStartURL(new URL(startURL));
        }
        catch (MalformedURLException e) {
            e.printStackTrace();
        }
    }

    public String getStart() {
        URL url = this.getStartURL();
        if (url != null) {
            return url.toExternalForm();
        }
        return null;
    }

    public void finish() {
        if (this.httpTool != null) {
            this.httpTool.finish();
        }
        if (this.robCheck != null) {
            this.robCheck.finish();
        }
        if (this.docManager != null) {
            this.docManager.finish();
        }
    }

    public static void main(String[] args) {
        if (args.length > 0) {
            System.err.println("Arguments will be ignored!");
        }
        Field[] fields = WebRobot.class.getDeclaredFields();
        StringBuffer str = new StringBuffer(60);
        for (int i = 0; i < fields.length; ++i) {
            if (Modifier.isFinal(fields[i].getModifiers()) || Modifier.isStatic(fields[i].getModifiers())) continue;
            str.delete(0, str.length());
            str.append("\t\trobot." + fields[i].getName() + " = " + fields[i].getName() + ";");
            while (str.length() < 50) {
                str.append(" ");
            }
            System.out.println(str.toString() + "// (" + fields[i].getType().getName() + ")");
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public String getContentVisitedURL(HttpDoc doc) {
        String key = doc.getContentMD5();
        HashMap hashMap = this.content2UrlMap;
        synchronized (hashMap) {
            String url = (String)this.content2UrlMap.get(key);
            return url;
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void setContentVisitedURL(HttpDoc doc, String url) {
        String key = doc.getContentMD5();
        HashMap hashMap = this.content2UrlMap;
        synchronized (hashMap) {
            this.content2UrlMap.put(key, url);
        }
    }

    private final RobotTask createRobotTask(URL url, int maxDepth, String startReferer) {
        url = this.removeWasteParameters(url);
        return new RobotTask(url, maxDepth, startReferer);
    }

    public void setWasteParameters(Vector wasteParameters) {
        this.wasteParameters = wasteParameters;
    }

    public Vector getWasteParameters() {
        return this.wasteParameters;
    }

    public URL removeWasteParameters(URL url) {
        String newUrlString;
        String urlString = url.toExternalForm();
        if (urlString != (newUrlString = WebRobot.removeParametersFromString(urlString, this.wasteParameters))) {
            try {
                url = new URL(newUrlString);
            }
            catch (MalformedURLException ex) {
                ex.printStackTrace();
            }
        }
        return url;
    }

    public static String removeParametersFromString(String urlString, Vector wasteParameters) {
        int questionMark;
        if (wasteParameters != null && wasteParameters.size() > 0 && (questionMark = urlString.indexOf("?")) > 0 && questionMark < urlString.length()) {
            String rest;
            String parameters;
            int restPosition = urlString.indexOf("#", questionMark);
            if (restPosition < 0) {
                parameters = urlString.substring(questionMark + 1);
                rest = null;
            } else {
                parameters = urlString.substring(questionMark + 1, restPosition);
                rest = urlString.substring(restPosition);
            }
            StringBuffer filteredUrl = new StringBuffer(urlString.substring(0, questionMark));
            StringTokenizer tokenizer = new StringTokenizer(parameters, "&");
            String and = "?";
            boolean changed = false;
            while (tokenizer.hasMoreTokens()) {
                String token = tokenizer.nextToken();
                boolean keep = true;
                for (int w = 0; w < wasteParameters.size(); ++w) {
                    String wasteParameter = (String)wasteParameters.elementAt(w);
                    if (!token.startsWith(wasteParameter + "=")) continue;
                    keep = false;
                    changed = true;
                    break;
                }
                if (!keep) continue;
                filteredUrl.append(and);
                filteredUrl.append(token);
                and = "&";
            }
            if (rest != null) {
                filteredUrl.append(rest);
            }
            if (changed) {
                urlString = filteredUrl.toString();
            }
        }
        return urlString;
    }

    public void setMaxRetries(int maxRetries) {
        this.maxRetries = maxRetries;
    }

    public int getMaxRetries() {
        return this.maxRetries;
    }

    public void setExpirationAge(long age) {
        this.expirationAge = age;
    }

    public long getExpirationAge() {
        return this.expirationAge;
    }

    private static final String removeParameters(String url) {
        int pos = url.indexOf("?");
        return pos >= 0 ? url.substring(0, pos) : url;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected byte[] readFileToByteArray(File file) throws IOException {
        FileInputStream in = null;
        try {
            byte[] buffer = new byte[(int)file.length()];
            in = new FileInputStream(file);
            in.read(buffer);
            byte[] byArray = buffer;
            return byArray;
        }
        finally {
            if (in != null) {
                try {
                    in.close();
                }
                catch (IOException e) {}
            }
        }
    }
}

