/*
 * Decompiled with CFR 0.152.
 */
package net.olivo.lc4j;

import cern.colt.GenericSorting;
import cern.colt.Swapper;
import cern.colt.function.IntComparator;
import gnu.getopt.Getopt;
import gnu.getopt.LongOpt;
import it.unimi.dsi.fastutil.bytes.ByteArrayList;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import net.olivo.lc4j.IncrementalInt2IntMap;
import net.olivo.lc4j.LanguageModel;

public class LanguageCategorization {
    private static final boolean DEBUG = false;
    private static final int BUFFER_SIZE = 16384;
    private int MAX_LANGUAGES = 10;
    private int NUM_CHARS_TO_EXAMINE = 1000;
    private int USE_TOPMOST_NGRAMS = 400;
    private float UNKNOWN_THRESHOLD = 1.01f;
    private String LANGUAGE_MODELS_DIR = "models/";
    private LanguageModel[] language = null;
    private String[] languageName = null;
    private ByteArrayList wordSeparators = new ByteArrayList();

    public LanguageCategorization() {
        this.wordSeparators.add((byte)32);
        this.wordSeparators.add((byte)9);
        this.wordSeparators.add((byte)13);
        this.wordSeparators.add((byte)10);
        this.wordSeparators.add((byte)48);
        this.wordSeparators.add((byte)49);
        this.wordSeparators.add((byte)50);
        this.wordSeparators.add((byte)51);
        this.wordSeparators.add((byte)52);
        this.wordSeparators.add((byte)53);
        this.wordSeparators.add((byte)54);
        this.wordSeparators.add((byte)55);
        this.wordSeparators.add((byte)56);
        this.wordSeparators.add((byte)57);
    }

    public void setMaxLanguages(int maxLanguages) {
        this.MAX_LANGUAGES = maxLanguages;
    }

    public void setNumCharsToExamine(int numCharsToExamine) {
        this.NUM_CHARS_TO_EXAMINE = numCharsToExamine;
    }

    public void setUseTopmostNgrams(int useTopmostNgrams) {
        this.USE_TOPMOST_NGRAMS = useTopmostNgrams;
    }

    public void setUnknownThreshold(float unknownThreshold) {
        this.UNKNOWN_THRESHOLD = unknownThreshold;
    }

    public void setLanguageModelsDir(String languageModelsDir) {
        this.LANGUAGE_MODELS_DIR = languageModelsDir;
    }

    public int getMaxLanguages() {
        return this.MAX_LANGUAGES;
    }

    public int getNumCharsToExamine() {
        return this.NUM_CHARS_TO_EXAMINE;
    }

    public int getUseTopmostNgrams() {
        return this.USE_TOPMOST_NGRAMS;
    }

    public float getUnknownThreshold() {
        return this.UNKNOWN_THRESHOLD;
    }

    public String getLanguageModelsDir() {
        return this.LANGUAGE_MODELS_DIR;
    }

    public LanguageModel createLanguageModel(ByteArrayList input) {
        long startTime = System.currentTimeMillis();
        IncrementalInt2IntMap hash = new IncrementalInt2IntMap();
        LanguageModel languageModel = new LanguageModel();
        input.add((byte)32);
        ByteArrayList word = new ByteArrayList();
        word.add((byte)95);
        int m = this.NUM_CHARS_TO_EXAMINE > 0 ? Math.min(input.size(), this.NUM_CHARS_TO_EXAMINE) : input.size();
        for (int i = 0; i < m; ++i) {
            int wordLength;
            byte b = input.getByte(i);
            if (this.wordSeparators.indexOf(b) == -1) {
                word.add(b);
                continue;
            }
            word.add((byte)95);
            int length = wordLength = word.size();
            for (int k = 0; k < wordLength; ++k) {
                byte[] x = word.elements();
                if (length > 4) {
                    hash.inc(new ByteArrayList(x, k, 5).hashCode(), 1);
                }
                if (length > 3) {
                    hash.inc(new ByteArrayList(x, k, 4).hashCode(), 1);
                }
                if (length > 2) {
                    hash.inc(new ByteArrayList(x, k, 3).hashCode(), 1);
                }
                if (length > 1) {
                    hash.inc(new ByteArrayList(x, k, 2).hashCode(), 1);
                }
                hash.inc(new ByteArrayList(x, k, 1).hashCode(), 1);
                --length;
            }
            word.clear();
            word.add((byte)95);
        }
        int[] ngrams = hash.getOrderedKeysByScore();
        int n = this.USE_TOPMOST_NGRAMS > 0 ? Math.min(ngrams.length, this.USE_TOPMOST_NGRAMS) : ngrams.length;
        for (int k = 0; k < n; ++k) {
            try {
                languageModel.add(ngrams[k], hash.get(ngrams[k]));
                continue;
            }
            catch (IllegalArgumentException e) {
                System.err.println(e);
                System.err.println("WARNING: resulting language-model will be very likely invalid!");
                break;
            }
        }
        long endTime = System.currentTimeMillis();
        System.err.println("time taken to create language-model from input: " + (double)(endTime - startTime) / 1000.0 + "s");
        return languageModel;
    }

    public int calcDistance(LanguageModel lang1, LanguageModel lang2) {
        int distance = 0;
        int n = lang1.size();
        for (int i = 0; i < n; ++i) {
            int val = lang1.getNgram(i);
            int x = lang2.getPos(val);
            if (x != -1) {
                distance += Math.abs(x - i);
                continue;
            }
            distance += this.USE_TOPMOST_NGRAMS;
        }
        return distance;
    }

    public void loadLanguages(String path) throws IOException, FileNotFoundException {
        if (this.language == null) {
            long startTime = System.currentTimeMillis();
            File[] files = new File(path).listFiles();
            int n = files.length;
            this.language = new LanguageModel[n];
            this.languageName = new String[n];
            if (n == 0) {
                System.err.println("WARNING: no language-model files were found in the specified path (" + path + "). Please check.");
            }
            for (int i = 0; i < n; ++i) {
                this.language[i] = new LanguageModel();
                this.languageName[i] = files[i].getName();
                DataInputStream dis = new DataInputStream((InputStream)new FastBufferedInputStream((InputStream)new FileInputStream(files[i]), 16384));
                for (int k = 0; k < this.USE_TOPMOST_NGRAMS; ++k) {
                    try {
                        int input = dis.readInt();
                        int ngramFreq = dis.readInt();
                        this.language[i].add(input, ngramFreq);
                        continue;
                    }
                    catch (EOFException e) {
                        break;
                    }
                    catch (IllegalArgumentException e) {
                        System.err.println(e);
                        break;
                    }
                }
                dis.close();
            }
            long endTime = System.currentTimeMillis();
            System.err.println("time taken to load all available language-models: " + (double)(endTime - startTime) / 1000.0 + "s");
        }
        if (this.language == null || this.language.length == 0) {
            System.err.println("No language-model loaded.");
            return;
        }
    }

    public List findLanguage(ByteArrayList input) {
        ArrayList<String> ret = new ArrayList<String>();
        LanguageModel inputLM = this.createLanguageModel(input);
        try {
            this.loadLanguages(this.LANGUAGE_MODELS_DIR);
        }
        catch (Exception e) {
            System.err.println("An exception was thrown when trying to load languages. Returning null.");
            e.printStackTrace(System.err);
            return null;
        }
        long startTime = System.currentTimeMillis();
        int n = this.language.length;
        final int[] prob = new int[n];
        final int[] langIndex = new int[n];
        for (int i = 0; i < n; ++i) {
            prob[i] = this.calcDistance(inputLM, this.language[i]);
            langIndex[i] = i;
        }
        IntComparator comp = new IntComparator(){

            public int compare(int i, int j) {
                if (prob[i] > prob[j]) {
                    return 1;
                }
                if (prob[i] < prob[j]) {
                    return -1;
                }
                return 0;
            }
        };
        Swapper swapper = new Swapper(){

            public void swap(int i, int j) {
                int t = prob[i];
                prob[i] = prob[j];
                prob[j] = t;
                int u = langIndex[i];
                langIndex[i] = langIndex[j];
                langIndex[j] = u;
            }
        };
        GenericSorting.mergeSort((int)0, (int)n, (IntComparator)comp, (Swapper)swapper);
        int maxProb = prob[0];
        int countAnswers = 0;
        for (int i = 0; i < n && ((float)prob[i] < this.UNKNOWN_THRESHOLD * (float)maxProb || prob[i] == 0); ++i) {
            ++countAnswers;
            ret.add(this.languageName[langIndex[i]]);
        }
        if (countAnswers > this.MAX_LANGUAGES) {
            ret.clear();
            ret.add("UNKNOWN");
        }
        long endTime = System.currentTimeMillis();
        System.err.println("time taken to effectively determine the language: " + (double)(endTime - startTime) / 1000.0 + "s");
        return ret;
    }

    public static void main(String[] args) throws IOException {
        int ch;
        boolean createNewLanguage = false;
        String newLanguageName = null;
        LanguageCategorization lc = new LanguageCategorization();
        LongOpt[] longopts = new LongOpt[]{new LongOpt("help", 0, null, 104), new LongOpt("max-languages", 1, null, 109), new LongOpt("num-chars-to-examine", 1, null, 110), new LongOpt("use-topmost-ngrams", 1, null, 116), new LongOpt("unknown-threshold", 1, null, 117), new LongOpt("languageModel-dir", 1, null, 100), new LongOpt("create-new-languageModel", 0, null, 99)};
        Getopt g = new Getopt("LanguageCategorization", args, "m:n:u:n:t:d:ch", longopts);
        g.setOpterr(true);
        block12: while ((ch = g.getopt()) != -1) {
            switch (ch) {
                case 104: {
                    System.err.println("Usage: LanguageCategorization [OPTIONS]");
                    System.err.println("Determines the language in which stdin text is written.");
                    System.err.println("");
                    System.err.println("Optional arguments:");
                    System.err.println("  -m, --max-languages            the maximum number of languages to be determined (default: " + lc.getMaxLanguages() + ")");
                    System.err.println("  -n, --num-chars-to-examine     the number of characters to examine in the input (default: " + lc.getNumCharsToExamine() + ")");
                    System.err.println("  -t, --use-topmost-ngrams       forces the usage of n-grams up to this length (default: any length)");
                    System.err.println("  -u, --unknown-threshold        determines how much worse result must be in order not to be mentioned as an alternative (default: " + lc.getUnknownThreshold() + ")");
                    System.err.println("  -d, --languageModel-dir        use the given folder as the directory where to store/retrieve language-model files");
                    System.err.println("  -c, --create-new-languageModel creates a new language-model using the input text. The argument value is used as the name for the new language. Output goes to stdout");
                    System.err.println("");
                    System.err.println("Help:");
                    System.err.println("  -h, --help                     print this help screen");
                    System.err.println("");
                    return;
                }
                case 109: {
                    lc.setMaxLanguages(Integer.parseInt(g.getOptarg()));
                    continue block12;
                }
                case 110: {
                    lc.setNumCharsToExamine(Integer.parseInt(g.getOptarg()));
                    continue block12;
                }
                case 116: {
                    lc.setUseTopmostNgrams(Integer.parseInt(g.getOptarg()));
                    continue block12;
                }
                case 117: {
                    lc.setUnknownThreshold(Float.parseFloat(g.getOptarg()));
                    continue block12;
                }
                case 100: {
                    lc.setLanguageModelsDir(g.getOptarg());
                    continue block12;
                }
                case 99: {
                    createNewLanguage = true;
                    newLanguageName = g.getOptarg();
                    continue block12;
                }
                case 63: {
                    return;
                }
            }
        }
        ByteArrayList input = new ByteArrayList();
        DataInputStream dis = new DataInputStream((InputStream)new FastBufferedInputStream(System.in, 16384));
        try {
            while (true) {
                input.add(dis.readByte());
            }
        }
        catch (EOFException e) {
            dis.close();
            if (createNewLanguage) {
                LanguageModel languageModel = lc.createLanguageModel(input);
                DataOutputStream dos = new DataOutputStream((OutputStream)new FastBufferedOutputStream((OutputStream)System.out, 16384));
                for (int i = 0; i < languageModel.size(); ++i) {
                    int ngram = languageModel.getNgram(i);
                    int freq = languageModel.getFreq(i);
                    dos.writeInt(ngram);
                    dos.writeInt(freq);
                }
                dos.close();
            } else {
                System.out.println("probable language(s): " + lc.findLanguage(input));
            }
            return;
        }
    }
}

