/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.operator.preprocessing.filter;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.preprocessing.AbstractDataProcessing;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import java.util.List;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class TFIDFFilter
extends AbstractDataProcessing {
    public static final String PARAMETER_CALCULATE_TERM_FREQUENCIES = "calculate_term_frequencies";

    public TFIDFFilter(OperatorDescription description) {
        super(description);
    }

    @Override
    public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
        int i;
        if (exampleSet.size() < 1) {
            throw new UserError((Operator)this, 110, "1");
        }
        if (exampleSet.getAttributes().size() == 0) {
            throw new UserError((Operator)this, 106, new Object[0]);
        }
        for (Attribute attribute : exampleSet.getAttributes()) {
            if (attribute.isNumerical()) continue;
            throw new UserError((Operator)this, 104, this.getName(), attribute.getName());
        }
        double[] termFrequencySum = new double[exampleSet.size()];
        int[] documentFrequencies = new int[exampleSet.getAttributes().size()];
        int exampleCounter = 0;
        for (Example example : exampleSet) {
            i = 0;
            for (Attribute attribute : exampleSet.getAttributes()) {
                double value = example.getValue(attribute);
                int n = exampleCounter;
                termFrequencySum[n] = termFrequencySum[n] + value;
                if (value > 0.0) {
                    int n2 = i;
                    documentFrequencies[n2] = documentFrequencies[n2] + 1;
                }
                ++i;
            }
            ++exampleCounter;
            this.checkForStop();
        }
        double[] inverseDocumentFrequencies = new double[documentFrequencies.length];
        i = 0;
        while (i < exampleSet.getAttributes().size()) {
            inverseDocumentFrequencies[i] = Math.log((double)exampleSet.size() / (double)documentFrequencies[i]);
            ++i;
        }
        boolean calculateTermFrequencies = this.getParameterAsBoolean(PARAMETER_CALCULATE_TERM_FREQUENCIES);
        exampleCounter = 0;
        for (Example example : exampleSet) {
            int i2 = 0;
            for (Attribute attribute : exampleSet.getAttributes()) {
                double value = example.getValue(attribute);
                if (termFrequencySum[exampleCounter] == 0.0) {
                    example.setValue(attribute, 0.0);
                } else {
                    double tf = value;
                    if (calculateTermFrequencies) {
                        tf /= termFrequencySum[exampleCounter];
                    }
                    double idf = inverseDocumentFrequencies[i2];
                    example.setValue(attribute, tf * idf);
                }
                ++i2;
            }
            ++exampleCounter;
            this.checkForStop();
        }
        return exampleSet;
    }

    @Override
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = super.getParameterTypes();
        ParameterTypeBoolean type = new ParameterTypeBoolean(PARAMETER_CALCULATE_TERM_FREQUENCIES, "Indicates if term frequency values should be generated (must be done if input data is given as simple occurence counts).", true);
        type.setExpert(false);
        types.add(type);
        return types;
    }
}

