/*
 * Decompiled with CFR 0.152.
 */
package net.olivo.lc4j;

import cern.colt.list.IntArrayList;
import gnu.getopt.Getopt;
import gnu.getopt.LongOpt;
import it.unimi.dsi.fastutil.bytes.ByteArrayList;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import net.olivo.lc4j.LanguageCategorization;
import net.olivo.lc4j.LanguageModel;

public class AgglomerativeClustering {
    private static final boolean DEBUG = true;
    private static final int BUFFER_SIZE = 16384;

    public static void main(String[] args) throws IOException {
        int ch;
        String path = null;
        LanguageCategorization lc = new LanguageCategorization();
        LongOpt[] longopts = new LongOpt[]{new LongOpt("help", 0, null, 104)};
        Getopt g = new Getopt("AgglomerativeClustering", args, "h", longopts);
        g.setOpterr(true);
        if (args.length < 1) {
            System.err.println("The path where the files to be clustered are must be specified on the command line.");
            System.err.println("See help for more details on usage.");
            return;
        }
        while ((ch = g.getopt()) != -1) {
            switch (ch) {
                case 104: {
                    System.err.println("Usage: AgglomerativeClustering [OPTIONS] path");
                    System.err.println("Cluster the files found in the given path.");
                    System.err.println("");
                    System.err.println("Required arguments:");
                    System.err.println("  path                           the path where the files to be clustered are");
                    System.err.println("");
                    System.err.println("Help:");
                    System.err.println("  -h, --help                     print this help screen");
                    System.err.println("");
                    return;
                }
                case 63: {
                    return;
                }
            }
        }
        path = args[0];
        System.err.println("loading language models from files in " + path);
        File[] files = new File(path).listFiles();
        int n = files.length;
        LanguageModel[] lm = new LanguageModel[n];
        for (int i = 0; i < n; ++i) {
            ByteArrayList input = new ByteArrayList();
            DataInputStream dis = new DataInputStream((InputStream)new FastBufferedInputStream((InputStream)new FileInputStream(files[i]), 16384));
            try {
                while (true) {
                    input.add(dis.readByte());
                }
            }
            catch (EOFException e) {
                dis.close();
                lm[i] = lc.createLanguageModel(input);
                continue;
            }
        }
        System.err.println("all language-models loaded");
        ArrayList currentClusters = new ArrayList();
        int numClusters = n;
        int useTopmostNgrams = lc.getUseTopmostNgrams();
        int[][] distance = new int[n][n];
        for (int i = 0; i < n; ++i) {
            Arrays.fill(distance[i], 0);
        }
        IntArrayList[] clusters = new IntArrayList[n];
        while (numClusters > 1) {
            int i;
            long startTime = System.currentTimeMillis();
            if (numClusters == n) {
                for (int i2 = 0; i2 < n; ++i2) {
                    for (int j = i2 + 1; j < n; ++j) {
                        distance[i2][j] = lc.calcDistance(lm[i2], lm[j]);
                        System.err.println("initializing distance <" + i2 + "," + j + ">: " + distance[i2][j]);
                    }
                    clusters[i2] = new IntArrayList();
                    clusters[i2].add(i2);
                }
                System.err.println("all distances initialized");
            }
            int minI = 0;
            int minJ = 0;
            int minDistance = Integer.MAX_VALUE;
            for (i = 0; i < n; ++i) {
                for (int j = i; j < n; ++j) {
                    if (distance[i][j] <= 0 || lm[i] == null || lm[j] == null || distance[i][j] >= minDistance) continue;
                    minDistance = distance[i][j];
                    minI = i;
                    minJ = j;
                }
            }
            System.err.println("minimal distance found between <" + minI + "," + minJ + ">: " + minDistance);
            lm[minI] = LanguageModel.merge(lm[minI], lm[minJ], useTopmostNgrams);
            clusters[minI].addAllOf(clusters[minJ]);
            lm[minJ] = null;
            for (i = 0; i < n; ++i) {
                distance[i][minJ] = -1;
            }
            clusters[minJ].clear();
            for (int j = 0; j < n; ++j) {
                if (lm[j] == null || j == minI) continue;
                distance[minI][j] = lc.calcDistance(lm[minI], lm[j]);
            }
            long endTime = System.currentTimeMillis();
            System.err.println("step " + (n - numClusters + 1) + ": merging clusters <" + minI + "," + minJ + ">");
            System.err.println("time taken: " + (double)(endTime - startTime) / 1000.0 + "s");
            System.out.println("step " + (n - numClusters + 1) + " - " + (numClusters - 1) + " clusters left:");
            for (int i3 = 0; i3 < n; ++i3) {
                if (clusters[i3].size() == 0) continue;
                System.out.print("\t");
                for (int j = 0; j < clusters[i3].size(); ++j) {
                    System.out.print("<" + files[clusters[i3].get(j)].getName() + "> ");
                }
                System.out.println("");
            }
            System.out.println("***");
            --numClusters;
        }
    }
}

