package com.rapidminer.operator.io;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.Tools;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

/* loaded from: input_file:WEB-INF/lib/rapidMiner-1.0.0.jar:com/rapidminer/operator/io/ArffExampleSource.class */
public class ArffExampleSource extends AbstractExampleSource {
    public static final String PARAMETER_DATA_FILE = "data_file";
    public static final String PARAMETER_LABEL_ATTRIBUTE = "label_attribute";
    public static final String PARAMETER_ID_ATTRIBUTE = "id_attribute";
    public static final String PARAMETER_WEIGHT_ATTRIBUTE = "weight_attribute";
    public static final String PARAMETER_DATAMANAGEMENT = "datamanagement";
    public static final String PARAMETER_DECIMAL_POINT_CHARACTER = "decimal_point_character";
    public static final String PARAMETER_SAMPLE_RATIO = "sample_ratio";
    public static final String PARAMETER_SAMPLE_SIZE = "sample_size";
    public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed";

    public ArffExampleSource(OperatorDescription operatorDescription) {
        super(operatorDescription);
    }

    @Override // com.rapidminer.operator.io.AbstractExampleSource
    public ExampleSet createExampleSet() throws OperatorException {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(getParameterAsInputStream("data_file"), getEncoding()));
            ArrayList arrayList = new ArrayList();
            Attribute attribute = null;
            Attribute attribute2 = null;
            Attribute attribute3 = null;
            StreamTokenizer createTokenizer = createTokenizer(bufferedReader);
            Tools.getFirstToken(createTokenizer);
            if (createTokenizer.ttype == -1) {
                throw new UserError(this, 302, getParameterAsString("data_file"), "file is empty");
            }
            if (!"@relation".equalsIgnoreCase(createTokenizer.sval)) {
                throw new IOException("expected the keyword @relation in line " + createTokenizer.lineno());
            }
            Tools.getNextToken(createTokenizer);
            Tools.getLastToken(createTokenizer, false);
            Tools.getFirstToken(createTokenizer);
            if (createTokenizer.ttype == -1) {
                throw new IOException("unexpected end of file in line " + createTokenizer.lineno() + ", attribute description expected...");
            }
            while ("@attribute".equalsIgnoreCase(createTokenizer.sval)) {
                Attribute createAttribute = createAttribute(createTokenizer);
                if (createAttribute == null) {
                    throw new IOException("Cannot read attribute information, maybe the value type is missing or a name containing spaces without quoting was used...");
                }
                arrayList.add(createAttribute);
                if (createAttribute.getName().equals(getParameterAsString("label_attribute"))) {
                    attribute = createAttribute;
                } else if (createAttribute.getName().equals(getParameterAsString("id_attribute"))) {
                    attribute3 = createAttribute;
                } else if (createAttribute.getName().equals(getParameterAsString("weight_attribute"))) {
                    attribute2 = createAttribute;
                }
            }
            if (!"@data".equalsIgnoreCase(createTokenizer.sval)) {
                throw new IOException("expected keyword '@data' in line " + createTokenizer.lineno());
            }
            if (arrayList.size() == 0) {
                throw new IOException("no attributes were declared in the ARFF file, please declare attributes with the '@attribute' keyword.");
            }
            MemoryExampleTable memoryExampleTable = new MemoryExampleTable(arrayList);
            Attribute[] attributes = memoryExampleTable.getAttributes();
            DataRowFactory dataRowFactory = new DataRowFactory(getParameterAsInt("datamanagement"), getParameterAsString("decimal_point_character").charAt(0));
            int parameterAsInt = getParameterAsInt("sample_size");
            double parameterAsDouble = getParameterAsDouble("sample_ratio");
            RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(getParameterAsInt("local_random_seed"));
            int i = 0;
            while (true) {
                DataRow createDataRow = createDataRow(createTokenizer, true, dataRowFactory, attributes);
                if (createDataRow == null || (parameterAsInt > -1 && i >= parameterAsInt)) {
                    break;
                }
                i++;
                if (parameterAsInt != -1 || randomGenerator.nextDouble() <= parameterAsDouble) {
                    memoryExampleTable.addDataRow(createDataRow);
                }
            }
            bufferedReader.close();
            HashMap hashMap = new HashMap();
            hashMap.put(attribute, "label");
            hashMap.put(attribute2, "weight");
            hashMap.put(attribute3, "id");
            return memoryExampleTable.createExampleSet(hashMap);
        } catch (IOException e) {
            throw new UserError(this, 302, getParameterAsString("data_file"), e.getMessage());
        }
    }

    private Attribute createAttribute(StreamTokenizer streamTokenizer) throws IOException {
        Attribute attribute = null;
        Tools.getNextToken(streamTokenizer);
        String str = streamTokenizer.sval;
        Tools.getNextToken(streamTokenizer);
        if (streamTokenizer.ttype == -3) {
            if (streamTokenizer.sval.equalsIgnoreCase("real")) {
                attribute = AttributeFactory.createAttribute(str, 4);
            } else if (streamTokenizer.sval.equalsIgnoreCase("integer")) {
                attribute = AttributeFactory.createAttribute(str, 3);
            } else if (streamTokenizer.sval.equalsIgnoreCase("numeric")) {
                attribute = AttributeFactory.createAttribute(str, 2);
            } else if (streamTokenizer.sval.equalsIgnoreCase("string")) {
                attribute = AttributeFactory.createAttribute(str, 5);
            } else if (streamTokenizer.sval.equalsIgnoreCase("date")) {
                attribute = AttributeFactory.createAttribute(str, 10);
            }
            Tools.waitForEOL(streamTokenizer);
        } else {
            attribute = AttributeFactory.createAttribute(str, 1);
            streamTokenizer.pushBack();
            if (streamTokenizer.nextToken() != 123) {
                throw new IOException("{ expected at beginning of nominal values definition in line " + streamTokenizer.lineno());
            }
            while (streamTokenizer.nextToken() != 125) {
                if (streamTokenizer.ttype == 10) {
                    throw new IOException("} expected at end of the nominal values definition in line " + streamTokenizer.lineno());
                }
                attribute.getMapping().mapString(streamTokenizer.sval);
            }
            if (attribute.getMapping().size() == 0) {
                throw new IOException("empty definition of nominal values is not suggested in line " + streamTokenizer.lineno());
            }
        }
        Tools.getLastToken(streamTokenizer, false);
        Tools.getFirstToken(streamTokenizer);
        if (streamTokenizer.ttype == -1) {
            throw new IOException("unexpected end of file before data section in line " + streamTokenizer.lineno());
        }
        return attribute;
    }

    private DataRow createDataRow(StreamTokenizer streamTokenizer, boolean z, DataRowFactory dataRowFactory, Attribute[] attributeArr) throws IOException {
        Tools.getFirstToken(streamTokenizer);
        if (streamTokenizer.ttype == -1) {
            return null;
        }
        return streamTokenizer.ttype == 123 ? createDataRowFromSparse(streamTokenizer, z, dataRowFactory, attributeArr) : createDataRowFromDense(streamTokenizer, z, dataRowFactory, attributeArr);
    }

    private DataRow createDataRowFromDense(StreamTokenizer streamTokenizer, boolean z, DataRowFactory dataRowFactory, Attribute[] attributeArr) throws IOException {
        String[] strArr = new String[attributeArr.length];
        for (int i = 0; i < attributeArr.length; i++) {
            if (i > 0) {
                Tools.getNextToken(streamTokenizer);
            }
            if (streamTokenizer.ttype == 63) {
                strArr[i] = "?";
            } else {
                if (streamTokenizer.ttype != -3) {
                    throw new IOException("not a valid value '" + streamTokenizer.sval + "' in line " + streamTokenizer.lineno());
                }
                strArr[i] = streamTokenizer.sval;
            }
        }
        if (z) {
            Tools.getLastToken(streamTokenizer, true);
        }
        return dataRowFactory.create(strArr, attributeArr);
    }

    private DataRow createDataRowFromSparse(StreamTokenizer streamTokenizer, boolean z, DataRowFactory dataRowFactory, Attribute[] attributeArr) throws IOException {
        String[] strArr = new String[attributeArr.length];
        for (int i = 0; i < strArr.length; i++) {
            strArr[i] = "0";
        }
        while (streamTokenizer.nextToken() != 10) {
            if (streamTokenizer.ttype == -1) {
                throw new IOException("unexpedted end of file in line " + streamTokenizer.lineno());
            }
            if (streamTokenizer.ttype == 125) {
                if (z) {
                    Tools.getLastToken(streamTokenizer, true);
                }
                return dataRowFactory.create(strArr, attributeArr);
            }
            int intValue = Integer.valueOf(streamTokenizer.sval).intValue();
            Tools.getNextToken(streamTokenizer);
            if (streamTokenizer.ttype == 63) {
                strArr[intValue] = "?";
            } else {
                if (streamTokenizer.ttype != -3) {
                    throw new IOException("not a valid value '" + streamTokenizer.sval + "' in line " + streamTokenizer.lineno());
                }
                strArr[intValue] = streamTokenizer.sval;
            }
        }
        throw new IOException("unexpedted end of line " + streamTokenizer.lineno());
    }

    private StreamTokenizer createTokenizer(Reader reader) {
        StreamTokenizer streamTokenizer = new StreamTokenizer(reader);
        streamTokenizer.resetSyntax();
        streamTokenizer.whitespaceChars(0, 32);
        streamTokenizer.wordChars(33, 255);
        streamTokenizer.whitespaceChars(44, 44);
        streamTokenizer.commentChar(37);
        streamTokenizer.quoteChar(34);
        streamTokenizer.quoteChar(39);
        streamTokenizer.ordinaryChar(123);
        streamTokenizer.ordinaryChar(125);
        streamTokenizer.eolIsSignificant(true);
        return streamTokenizer;
    }

    @Override // com.rapidminer.operator.Operator, com.rapidminer.parameter.ParameterHandler
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeFile("data_file", "The path to the data file.", "arff", false));
        ParameterTypeString parameterTypeString = new ParameterTypeString("label_attribute", "The (case sensitive) name of the label attribute");
        parameterTypeString.setExpert(false);
        parameterTypes.add(parameterTypeString);
        parameterTypes.add(new ParameterTypeString("id_attribute", "The (case sensitive) name of the id attribute"));
        parameterTypes.add(new ParameterTypeString("weight_attribute", "The (case sensitive) name of the weight attribute"));
        parameterTypes.add(new ParameterTypeCategory("datamanagement", "Determines, how the data is represented internally.", DataRowFactory.TYPE_NAMES, 0));
        parameterTypes.add(new ParameterTypeString("decimal_point_character", "Character that is used as decimal point.", "."));
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble("sample_ratio", "The fraction of the data set which should be read (1 = all; only used if sample_size = -1)", 0.0d, 1.0d, 1.0d);
        parameterTypeDouble.setExpert(false);
        parameterTypes.add(parameterTypeDouble);
        parameterTypes.add(new ParameterTypeInt("sample_size", "The exact number of samples which should be read (-1 = use sample ratio; if not -1, sample_ratio will not have any effect)", -1, Integer.MAX_VALUE, -1));
        parameterTypes.add(new ParameterTypeInt("local_random_seed", "Use the given random seed instead of global random numbers (only for permutation, -1: use global).", -1, Integer.MAX_VALUE, -1));
        return parameterTypes;
    }
}
