package com.rapidminer.operator;

import com.lowagie.text.ElementTags;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DoubleSparseArrayDataRow;
import com.rapidminer.example.table.ListDataRowReader;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.extraction.ExtractionException;
import com.rapidminer.operator.extraction.FeatureExtractor;
import com.rapidminer.tools.LoggingHandler;
import edu.udo.cs.wvtool.main.WVTDocumentInfo;
import edu.udo.cs.wvtool.main.WVTWordVector;
import edu.udo.cs.wvtool.wordlist.WVTWordList;
import java.io.File;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:WEB-INF/lib/rapidminer-plugintext-1.0.0.jar:com/rapidminer/operator/ExampleTableOutputFilter.class */
public class ExampleTableOutputFilter implements RapidMinerOutputFilter {
    public static final int ID_TYPE_LONG = 0;
    public static final int ID_TYPE_SHORT = 1;
    public static final int ID_TYPE_NUMERICAL = 2;
    public static final String[] ID_TYPE_NAMES = {"long", "short", ElementTags.NUMBER};
    private MemoryExampleTable exampleTable;
    private final boolean useSpecialAttributes;
    private final int idType;
    private Attribute[] wordAttributes;
    private Collection<Attribute> extractorAttributes;
    private final Attribute idAtt;
    private final Attribute labelAtt;
    private final FeatureExtractor extractor;
    private final LoggingHandler logger;
    private final boolean extendExampleSet;
    private ExampleSet exampleSetToExtend;
    private final List<DataRow> vectors = new LinkedList();
    private int counter = 1;
    private final Attribute sourceAtt = AttributeFactory.createAttribute("text_source", 1);
    private final Attribute typeAtt = AttributeFactory.createAttribute("content_type", 1);
    private final Attribute encodingAtt = AttributeFactory.createAttribute("content_encoding", 1);
    private final Attribute languageAtt = AttributeFactory.createAttribute("content_language", 1);
    private final Map<String, Example> myOwnPrivateIdExampleMap = new HashMap();

    public ExampleTableOutputFilter(Attribute attribute, WVTWordList wVTWordList, boolean z, int i, FeatureExtractor featureExtractor, ExampleSet exampleSet, LoggingHandler loggingHandler) {
        String str;
        this.extendExampleSet = exampleSet != null;
        this.exampleSetToExtend = exampleSet;
        this.idType = i;
        this.logger = loggingHandler;
        HashSet hashSet = new HashSet();
        this.useSpecialAttributes = z;
        LinkedList linkedList = new LinkedList();
        if (this.extendExampleSet) {
            this.labelAtt = exampleSet.getAttributes().getLabel();
            this.idAtt = exampleSet.getAttributes().getId();
            if (!this.idAtt.isNominal()) {
                loggingHandler.logWarning("The id attribute of the original example set is not nominal. This is likely to produce an error, please use nominal attributes to extend an example set.");
            }
            Iterator<Attribute> allAttributes = this.exampleSetToExtend.getAttributes().allAttributes();
            while (allAttributes.hasNext()) {
                hashSet.add(allAttributes.next().getName());
            }
            for (Example example : this.exampleSetToExtend) {
                this.myOwnPrivateIdExampleMap.put(example.getValueAsString(this.idAtt), example);
            }
        } else {
            if (attribute != null) {
                this.labelAtt = (Attribute) attribute.clone();
            } else {
                this.labelAtt = null;
            }
            if (this.idType == 2) {
                this.idAtt = AttributeFactory.createAttribute("id", 3);
            } else {
                this.idAtt = AttributeFactory.createAttribute("id", 1);
            }
            linkedList.add(this.idAtt);
        }
        hashSet.add(this.idAtt.getName());
        this.extractor = featureExtractor;
        loggingHandler.log("Total number of words is " + wVTWordList.getNumWords());
        this.wordAttributes = new Attribute[wVTWordList.getNumWords()];
        if (z) {
            linkedList.add(this.sourceAtt);
            linkedList.add(this.typeAtt);
            linkedList.add(this.encodingAtt);
            linkedList.add(this.languageAtt);
            hashSet.add(this.sourceAtt.getName());
            hashSet.add(this.typeAtt.getName());
            hashSet.add(this.encodingAtt.getName());
            hashSet.add(this.languageAtt.getName());
        }
        String str2 = "";
        if (this.labelAtt != null) {
            str2 = this.labelAtt.getName();
            hashSet.add(attribute.getName());
        }
        for (int i2 = 0; i2 < this.wordAttributes.length; i2++) {
            if (hashSet.contains(wVTWordList.getWord(i2))) {
                loggingHandler.logWarning("The original example example set already contains an attribute named \"" + wVTWordList.getWord(i2) + "\". This is likely to cause trouble. Please rename the attribute in the original example set.");
            }
            if (wVTWordList.getWord(i2).equals(str2)) {
                str = String.valueOf(str2) + "_";
                loggingHandler.logWarning("There is a term that equals the class attribute, renaming it");
            } else {
                str = wVTWordList.getWord(i2);
            }
            hashSet.add(str);
            Attribute createAttribute = AttributeFactory.createAttribute(str, 4);
            linkedList.add(createAttribute);
            this.wordAttributes[i2] = createAttribute;
        }
        if (featureExtractor != null) {
            this.extractorAttributes = featureExtractor.getAttributes();
            Iterator<Attribute> it2 = this.extractorAttributes.iterator();
            while (it2.hasNext()) {
                Attribute next = it2.next();
                if (hashSet.contains(next.getName())) {
                    it2.remove();
                    loggingHandler.log("The extractor added an attribute with a name that is already used by another attribute: [" + next.getName() + "]. This attribute is omitted. Please rename this attribute and rerun.");
                } else {
                    linkedList.add(next);
                }
            }
        } else {
            this.extractorAttributes = new LinkedList();
        }
        if (!this.extendExampleSet) {
            if (this.labelAtt != null) {
                linkedList.add(this.labelAtt);
            }
            this.exampleTable = new MemoryExampleTable(linkedList);
        } else {
            this.exampleTable = (MemoryExampleTable) exampleSet.getExampleTable();
            this.exampleTable.addAttributes(linkedList);
            Iterator it3 = linkedList.iterator();
            while (it3.hasNext()) {
                this.exampleSetToExtend.getAttributes().addRegular((Attribute) it3.next());
            }
        }
    }

    public void write(WVTWordVector wVTWordVector) {
        WVTDocumentInfo documentInfo = wVTWordVector.getDocumentInfo();
        String sourceName = documentInfo.getSourceName();
        String str = sourceName;
        if (this.idType == 1) {
            str = sourceName.substring(sourceName.lastIndexOf(File.separator) + 1);
        }
        if (this.extendExampleSet) {
            Example example = this.myOwnPrivateIdExampleMap.get(str);
            if (example == null) {
                this.logger.logWarning("Did not find example with id " + str + ". Please check whether the original example set has the same nominal id type as selected in the operator.");
                return;
            }
            double[] values = wVTWordVector.getValues();
            for (int i = 0; i < values.length; i++) {
                if (Double.isInfinite(values[i]) || Double.isNaN(values[i])) {
                    example.setValue(this.wordAttributes[i], 0.0d);
                } else {
                    example.setValue(this.wordAttributes[i], values[i]);
                }
            }
            return;
        }
        DoubleSparseArrayDataRow doubleSparseArrayDataRow = new DoubleSparseArrayDataRow(wVTWordVector.getValues().length + this.extractorAttributes.size());
        if (this.idType == 2) {
            Attribute attribute = this.idAtt;
            int i2 = this.counter;
            this.counter = i2 + 1;
            doubleSparseArrayDataRow.set(attribute, i2);
        } else {
            doubleSparseArrayDataRow.set(this.idAtt, this.idAtt.getMapping().mapString(str));
        }
        if (this.useSpecialAttributes) {
            doubleSparseArrayDataRow.set(this.sourceAtt, this.sourceAtt.getMapping().mapString(documentInfo.getSourceName()));
            doubleSparseArrayDataRow.set(this.typeAtt, this.typeAtt.getMapping().mapString(documentInfo.getContentType()));
            doubleSparseArrayDataRow.set(this.encodingAtt, this.encodingAtt.getMapping().mapString(documentInfo.getContentEncoding()));
            doubleSparseArrayDataRow.set(this.languageAtt, this.languageAtt.getMapping().mapString(documentInfo.getContentLanguage()));
        }
        if (this.labelAtt != null) {
            doubleSparseArrayDataRow.set(this.labelAtt, documentInfo.getClassValue());
        }
        double[] values2 = wVTWordVector.getValues();
        for (int i3 = 0; i3 < values2.length; i3++) {
            if (Double.isInfinite(values2[i3]) || Double.isNaN(values2[i3])) {
                doubleSparseArrayDataRow.set(this.wordAttributes[i3], 0.0d);
            } else {
                doubleSparseArrayDataRow.set(this.wordAttributes[i3], values2[i3]);
            }
        }
        try {
            if (this.extractor != null) {
                this.extractor.extract(documentInfo, doubleSparseArrayDataRow);
            }
        } catch (ExtractionException e) {
            this.logger.logWarning("Could not extract any values for item '" + sourceName + "':" + e);
        }
        doubleSparseArrayDataRow.trim();
        this.vectors.add(doubleSparseArrayDataRow);
    }

    @Override // com.rapidminer.operator.RapidMinerOutputFilter
    public ExampleSet createExampleSet() throws OperatorException {
        if (this.extendExampleSet) {
            return this.exampleSetToExtend;
        }
        if (this.exampleTable == null) {
            throw new OperatorException("WordVectorTool did not generate word vectors. Please check file names.");
        }
        this.exampleTable.readExamples(new ListDataRowReader(this.vectors.iterator()));
        return this.exampleTable.createExampleSet(this.labelAtt, null, this.idAtt);
    }

    @Override // com.rapidminer.operator.RapidMinerOutputFilter
    public void cleanUp() {
        this.exampleTable = null;
        this.exampleSetToExtend = null;
        this.vectors.clear();
        this.myOwnPrivateIdExampleMap.clear();
        this.wordAttributes = null;
        this.extractorAttributes.clear();
    }
}
