package com.rapidminer.operator.loganalysis;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.ListDataRowReader;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import marytts.signalproc.adaptation.BaselineAdaptationSet;
import opennlp.tools.namefind.NameFinderME;
import org.jdom.JDOMException;
import org.polliwog.WeblogException;
import org.polliwog.data.Hit;
import org.polliwog.data.LogEntry;
import org.polliwog.data.LogEntryFormat;

/* loaded from: input_file:WEB-INF/lib/rapidminer-plugintext-1.0.0.jar:com/rapidminer/operator/loganalysis/LogFileSource.class */
public class LogFileSource extends Operator {
    private static final int NUM_ATTRIBUTES = 9;
    private Attribute ip;
    private Attribute agent;
    private Attribute uri;
    private Attribute time;
    private Attribute referer;
    private Attribute language;
    private Attribute browser;
    private Attribute os;
    private Attribute sessionId;
    private RegularExpressionMatcher osMatcher;
    private RegularExpressionMatcher browserMatcher;
    private RegularExpressionMatcher languageMatcher;
    private List<LogEntryFilter> filters;
    private Map<String, Integer> sessionMap;
    private Map<String, Hit> visitorMap;
    private int currentSession;
    private int sessionTimeout;
    private boolean reverseDNSLookup;
    Map<String, String> dnsMap;

    public LogFileSource(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.sessionMap = null;
        this.visitorMap = null;
        this.currentSession = 0;
        this.dnsMap = new HashMap();
    }

    @Override // com.rapidminer.operator.Operator
    public IOObject[] apply() throws OperatorException {
        LogEntry logEntry;
        if (isParameterSet("browser_matcher")) {
            this.browserMatcher = new RegularExpressionMatcher(getParameterList("browser_matcher"));
        } else {
            this.browserMatcher = null;
        }
        if (isParameterSet("os_matcher")) {
            this.osMatcher = new RegularExpressionMatcher(getParameterList("os_matcher"));
        } else {
            this.osMatcher = null;
        }
        if (isParameterSet("language_matcher")) {
            this.languageMatcher = new RegularExpressionMatcher(getParameterList("language_matcher"));
        } else {
            this.languageMatcher = null;
        }
        this.filters = new LinkedList();
        if (isParameterSet("robot_filter")) {
            try {
                final RegularExpressionMatcher regularExpressionMatcher = new RegularExpressionMatcher(new FileReader(getParameterAsFile("robot_filter")), false);
                this.filters.add(new LogEntryFilter() { // from class: com.rapidminer.operator.loganalysis.LogFileSource.1
                    @Override // com.rapidminer.operator.loganalysis.LogEntryFilter
                    public boolean accept(Hit hit) {
                        return !regularExpressionMatcher.isSubstringMatch(hit.getUserAgent());
                    }
                });
            } catch (IOException e) {
                throw new UserError(this, 302, getParameterAsFile("robot_filter"), e);
            }
        }
        if (isParameterSet("filetype_filter")) {
            try {
                final RegularExpressionMatcher regularExpressionMatcher2 = new RegularExpressionMatcher(new StringReader(getParameterAsString("filetype_filter")), false);
                this.filters.add(new LogEntryFilter() { // from class: com.rapidminer.operator.loganalysis.LogFileSource.2
                    @Override // com.rapidminer.operator.loganalysis.LogEntryFilter
                    public boolean accept(Hit hit) {
                        return !regularExpressionMatcher2.isSubstringMatch(hit.getRequestURI().getPath());
                    }
                });
            } catch (IOException e2) {
                throw new UserError(this, 302, getParameterAsFile("filetype_filter"), e2);
            }
        }
        if (getParameterAsBoolean("only_HTTP_200")) {
            this.filters.add(new LogEntryFilter() { // from class: com.rapidminer.operator.loganalysis.LogFileSource.3
                @Override // com.rapidminer.operator.loganalysis.LogEntryFilter
                public boolean accept(Hit hit) {
                    return hit.getStatus() == 200;
                }
            });
        }
        this.reverseDNSLookup = getParameterAsBoolean("dns_lookup");
        this.ip = AttributeFactory.createAttribute("ip", 1);
        this.agent = AttributeFactory.createAttribute("agent", 1);
        this.uri = AttributeFactory.createAttribute("uri", 1);
        this.referer = AttributeFactory.createAttribute("referer", 1);
        this.os = AttributeFactory.createAttribute("os_name", 1);
        this.language = AttributeFactory.createAttribute("language", 1);
        this.browser = AttributeFactory.createAttribute("browser", 1);
        this.time = AttributeFactory.createAttribute("time", 2);
        this.sessionId = AttributeFactory.createAttribute("session", 1);
        LinkedList linkedList = new LinkedList();
        linkedList.add(this.sessionId);
        linkedList.add(this.ip);
        linkedList.add(this.agent);
        linkedList.add(this.uri);
        linkedList.add(this.referer);
        linkedList.add(this.time);
        linkedList.add(this.os);
        linkedList.add(this.browser);
        linkedList.add(this.language);
        MemoryExampleTable memoryExampleTable = new MemoryExampleTable(linkedList);
        LinkedList linkedList2 = new LinkedList();
        File parameterAsFile = getParameterAsFile("config_file");
        try {
            LogEntryFormat logEntryFormat = new LogEntryFormat(parameterAsFile, ".gz");
            this.currentSession = 0;
            this.sessionMap = new HashMap();
            this.visitorMap = new HashMap();
            this.sessionTimeout = getParameterAsInt("session_timeout");
            File[] listFiles = getParameterAsFile("log_dir").listFiles(new FileFilter() { // from class: com.rapidminer.operator.loganalysis.LogFileSource.4
                @Override // java.io.FileFilter
                public boolean accept(File file) {
                    return file.isFile();
                }
            });
            for (int i = 0; i < listFiles.length; i++) {
                try {
                    BufferedReader bufferedReader = new BufferedReader(new FileReader(listFiles[i]));
                    int i2 = 0;
                    int i3 = 0;
                    for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                        String trim = readLine.trim();
                        if (!trim.startsWith("#") && trim.length() > 0) {
                            try {
                                logEntry = logEntryFormat.createEntry(trim, Hit.class);
                            } catch (WeblogException e3) {
                                logEntry = null;
                                i3++;
                            }
                            if (logEntry != null) {
                                boolean z = true;
                                Iterator<LogEntryFilter> it2 = this.filters.iterator();
                                while (it2.hasNext() && z) {
                                    if (!it2.next().accept((Hit) logEntry)) {
                                        z = false;
                                    }
                                }
                                if (z) {
                                    linkedList2.add(processEntry((Hit) logEntry));
                                    i2++;
                                }
                            }
                        }
                        if (i3 > 0) {
                            getProcess().getLog().logWarning(String.valueOf(listFiles[i].getAbsolutePath()) + ": Could not read " + i3 + "lines out of " + i2);
                        }
                    }
                } catch (FileNotFoundException e4) {
                    getProcess().getLog().logWarning(String.valueOf(listFiles[i].getAbsolutePath()) + ": Could not read this file. Ignoring it");
                } catch (IOException e5) {
                    getProcess().getLog().logWarning(String.valueOf(listFiles[i].getAbsolutePath()) + ": Could not read this file. Ignoring it");
                } catch (RuntimeException e6) {
                    e6.printStackTrace();
                }
            }
            memoryExampleTable.readExamples(new ListDataRowReader(linkedList2.iterator()));
            return new IOObject[]{memoryExampleTable.createExampleSet()};
        } catch (IOException e7) {
            throw new UserError(this, 302, parameterAsFile.getAbsolutePath(), e7);
        } catch (JDOMException e8) {
            throw new UserError(this, 302, parameterAsFile.getAbsolutePath(), e8);
        } catch (WeblogException e9) {
            throw new UserError(this, 302, parameterAsFile.getAbsolutePath(), e9);
        }
    }

    private DataRow processEntry(Hit hit) {
        int intValue;
        String str = String.valueOf(hit.getHostname()) + ":" + hit.getUserAgent();
        Hit hit2 = this.visitorMap.get(str);
        this.visitorMap.put(str, hit);
        boolean z = false;
        if (hit2 == null) {
            z = true;
        } else if (hit.getDate().getTime() - hit2.getDate().getTime() > this.sessionTimeout) {
            z = true;
        }
        if (z) {
            this.currentSession++;
            intValue = this.currentSession;
            this.sessionMap.put(str, Integer.valueOf(this.currentSession));
        } else {
            intValue = this.sessionMap.get(str).intValue();
        }
        DoubleArrayDataRow doubleArrayDataRow = new DoubleArrayDataRow(new double[9]);
        String hostname = hit.getHostname();
        if (this.reverseDNSLookup) {
            hostname = reverseDNSLookUp(hostname);
        }
        doubleArrayDataRow.set(this.ip, this.ip.getMapping().mapString(hostname));
        doubleArrayDataRow.set(this.agent, this.agent.getMapping().mapString(hit.getUserAgent()));
        doubleArrayDataRow.set(this.uri, this.uri.getMapping().mapString(hit.getRequestURI().toString()));
        if (hit.getRefererURI() != null) {
            doubleArrayDataRow.set(this.referer, this.referer.getMapping().mapString(hit.getRefererURI().toString()));
        } else {
            doubleArrayDataRow.set(this.referer, Double.NaN);
        }
        doubleArrayDataRow.set(this.time, (int) (hit.getDate().getTime() / 60000));
        if (this.browserMatcher != null) {
            String match = this.browserMatcher.getMatch(hit.getUserAgent());
            if (match == null) {
                match = NameFinderME.OTHER;
            }
            doubleArrayDataRow.set(this.browser, this.browser.getMapping().mapString(match));
        } else {
            doubleArrayDataRow.set(this.browser, Double.NaN);
        }
        if (this.osMatcher != null) {
            String match2 = this.osMatcher.getMatch(hit.getUserAgent());
            if (match2 == null) {
                match2 = NameFinderME.OTHER;
            }
            doubleArrayDataRow.set(this.os, this.os.getMapping().mapString(match2));
        } else {
            doubleArrayDataRow.set(this.os, Double.NaN);
        }
        if (this.languageMatcher != null) {
            String match3 = this.languageMatcher.getMatch(hit.getUserAgent());
            if (match3 == null) {
                match3 = NameFinderME.OTHER;
            }
            doubleArrayDataRow.set(this.language, this.language.getMapping().mapString(match3));
        } else {
            doubleArrayDataRow.set(this.language, Double.NaN);
        }
        doubleArrayDataRow.set(this.sessionId, this.sessionId.getMapping().mapString("s" + intValue));
        return doubleArrayDataRow;
    }

    private String reverseDNSLookUp(String str) {
        String str2;
        String str3 = this.dnsMap.get(str);
        if (str3 != null) {
            return str3;
        }
        try {
            str2 = InetAddress.getByName(str).getHostName();
        } catch (UnknownHostException e) {
            str2 = null;
        }
        this.dnsMap.put(str, str2);
        return str2 != null ? str2 : str;
    }

    @Override // com.rapidminer.operator.Operator
    public Class<?>[] getInputClasses() {
        return new Class[0];
    }

    @Override // com.rapidminer.operator.Operator
    public Class<?>[] getOutputClasses() {
        return new Class[]{ExampleSet.class};
    }

    @Override // com.rapidminer.operator.Operator, com.rapidminer.parameter.ParameterHandler
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        ParameterTypeFile parameterTypeFile = new ParameterTypeFile("config_file", "the format configuration file", "xml", false);
        parameterTypeFile.setExpert(false);
        parameterTypes.add(parameterTypeFile);
        ParameterTypeDirectory parameterTypeDirectory = new ParameterTypeDirectory("log_dir", "the directory containing the log files", false);
        parameterTypeDirectory.setExpert(false);
        parameterTypes.add(parameterTypeDirectory);
        parameterTypes.add(new ParameterTypeBoolean("dns_lookup", "Perform reverse dns lookup on the client ip", false));
        parameterTypes.add(new ParameterTypeFile("robot_filter", "file that contains regular expressions on user agents that should be filtered out. Each line must contain exactly one regular expression.", BaselineAdaptationSet.TEXT_EXTENSION_DEFAULT, true));
        parameterTypes.add(new ParameterTypeString("filetype_filter", "file that contains regular expressions on files that should be filtered out. Each line must contain exactly one regular expression.", true));
        parameterTypes.add(new ParameterTypeBoolean("only_HTTP_200", "Consider only entries with HTTP Response code 200", false));
        parameterTypes.add(new ParameterTypeList("browser_matcher", "file that contains regular expressions to match browser types. Each line must contain exactly an expression of the form <name>:<regular expression>.", new ParameterTypeString("regular_expression", "matches_languages", false)));
        parameterTypes.add(new ParameterTypeList("os_matcher", "file that contains regular expressions to match os types. Each line must contain exactly an expression of the form <name>:<regular expression>.", new ParameterTypeString("regular_expression", "matches os types", false)));
        parameterTypes.add(new ParameterTypeList("language_matcher", "file that contains regular expressions to match languages. Each line must contain exactly an expression of the form <name>:<regular expression>.", new ParameterTypeString("regular_expression", "matches_languages", false)));
        parameterTypes.add(new ParameterTypeInt("session_timeout", "Time between two requests from the same source, such that the second request can be assumed to be a new session", 0, Integer.MAX_VALUE, 400000));
        return parameterTypes;
    }
}
