package opennlp.tools.cmdline.tokenizer;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenSampleStream;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;

/* JADX WARN: Classes with same name are omitted:
  input_file:builds/deps.jar:marytts-server-5.0-jar-with-dependencies.jar:opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.class
  input_file:builds/deps.jar:opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.class
  input_file:marytts-server-5.0-jar-with-dependencies.jar:opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.class
  input_file:marytts-server-5.0-jar-with-dependencies.jar:opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.class
 */
/* loaded from: input_file:opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.class */
public final class TokenizerTrainerTool implements CmdLineTool {
    @Override // opennlp.tools.cmdline.CmdLineTool
    public String getName() {
        return "TokenizerTrainer";
    }

    @Override // opennlp.tools.cmdline.CmdLineTool
    public String getShortDescription() {
        return "trainer for the learnable tokenizer";
    }

    @Override // opennlp.tools.cmdline.CmdLineTool
    public String getHelp() {
        return "Usage: opennlp " + getName() + TrainingParameters.getParameterUsage() + " -data trainingData -model model\n" + TrainingParameters.getDescription();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static ObjectStream<TokenSample> openSampleData(String str, File file, Charset charset) {
        CmdLineUtil.checkInputFile(str + " Data", file);
        return new TokenSampleStream(new PlainTextByLineStream(CmdLineUtil.openInFile(file).getChannel(), charset));
    }

    @Override // opennlp.tools.cmdline.CmdLineTool
    public void run(String[] strArr) {
        if (strArr.length < 6) {
            System.out.println(getHelp());
            throw new TerminateToolException(1);
        }
        TrainingParameters trainingParameters = new TrainingParameters(strArr);
        if (!trainingParameters.isValid()) {
            System.out.println(getHelp());
            throw new TerminateToolException(1);
        }
        File file = new File(CmdLineUtil.getParameter("-data", strArr));
        File file2 = new File(CmdLineUtil.getParameter("-model", strArr));
        CmdLineUtil.checkOutputFile("tokenizer model", file2);
        ObjectStream<TokenSample> openSampleData = openSampleData("Training", file, trainingParameters.getEncoding());
        try {
            try {
                CmdLineUtil.writeModel("tokenizer", file2, TokenizerME.train(trainingParameters.getLanguage(), openSampleData, trainingParameters.isAlphaNumericOptimizationEnabled(), trainingParameters.getCutoff(), trainingParameters.getNumberOfIterations()));
            } catch (IOException e) {
                CmdLineUtil.printTrainingIoError(e);
                throw new TerminateToolException(-1);
            }
        } finally {
            try {
                openSampleData.close();
            } catch (IOException e2) {
            }
        }
    }
}
