/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.PlainTextByLineStream;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class LeipzigDoccatSampleStream
extends FilterObjectStream<String, DocumentSample> {
    private final String language;
    private final int sentencesPerDocument;

    LeipzigDoccatSampleStream(String language, int sentencesPerDocument, InputStream in) throws IOException {
        super(new PlainTextByLineStream(in, LeipzigDoccatSampleStream.mapLanguageToEncoding(language)));
        this.language = language;
        this.sentencesPerDocument = sentencesPerDocument;
    }

    private static String mapLanguageToEncoding(String language) throws IOException {
        if (language == null) {
            throw new NullPointerException("language parameter must not be null!");
        }
        HashMap<String, String> encodingMap = new HashMap<String, String>();
        encodingMap.put("cat", "ISO-8859-1");
        encodingMap.put("de", "ISO-8859-1");
        encodingMap.put("dk", "ISO-8859-1");
        encodingMap.put("ee", "ISO-8859-4");
        encodingMap.put("en", "ISO-8859-1");
        encodingMap.put("fi", "ISO-8859-1");
        encodingMap.put("fr", "ISO-8859-1");
        encodingMap.put("it", "ISO-8859-1");
        encodingMap.put("jp", "UTF-8");
        encodingMap.put("kr", "UTF-8");
        encodingMap.put("nl", "ISO-8859-1");
        encodingMap.put("no", "ISO-8859-1");
        encodingMap.put("se", "ISO-8859-1");
        encodingMap.put("sorb", "ISO-8859-2");
        encodingMap.put("tr", "ISO-8859-9");
        String encoding = (String)encodingMap.get(language);
        if (encoding != null) {
            return encoding;
        }
        throw new IOException("Encoding for language " + language + " is not specified!");
    }

    @Override
    public DocumentSample read() throws IOException {
        String line;
        StringBuilder sampleText = new StringBuilder();
        for (int count = 0; count < this.sentencesPerDocument && (line = (String)this.samples.read()) != null; ++count) {
            String[] tokens = SimpleTokenizer.INSTANCE.tokenize(line);
            if (tokens.length == 0) {
                throw new IOException("Empty lines are not allowed!");
            }
            for (int i = 1; i < tokens.length; ++i) {
                sampleText.append(tokens[i]);
                sampleText.append(' ');
            }
        }
        if (sampleText.length() > 0) {
            return new DocumentSample(this.language, sampleText.toString());
        }
        return null;
    }
}

