/*
 * Decompiled with CFR 0.152.
 */
package gr.forth.ics.isl.stellaclustering.lexicalanalyzer;

import gr.forth.ics.isl.stellaclustering.lexicalanalyzer.LexicalAnalyzerProperties;
import gr.forth.ics.isl.stellaclustering.stemmer.Stemmer;
import gr.forth.ics.isl.stellaclustering.util.Pair;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class FileTerms {
    private HashMap<String, Pair<Float, ArrayList<Integer>>> wordsMap = null;
    private LexicalAnalyzerProperties properties;
    private float maxTF = 0.0f;
    private int numberOfWords = 0;
    private StringBuffer sb;
    Pattern patternTermsWithDigits = Pattern.compile("\\d");
    Pattern patternTermsWithPunct = Pattern.compile("\\p{Punct}");

    public FileTerms(LexicalAnalyzerProperties properties) {
        this.wordsMap = new HashMap();
        this.properties = properties;
        this.sb = new StringBuffer();
    }

    private boolean validTermToAdd(String str) {
        char c;
        if (str == null || str.length() == 0) {
            return false;
        }
        if (str.length() <= this.properties.getMinWord()) {
            return false;
        }
        if (str.length() >= this.properties.getMaxWord()) {
            return false;
        }
        if (this.properties.getBlockStopWords() && this.properties.isStopWord(str)) {
            return false;
        }
        if (this.properties.getBlockTermsStartingWithNumbers() && Character.isDigit(c = str.charAt(0))) {
            return false;
        }
        if (this.properties.getBlockTermsWithPunctuation() && this.patternTermsWithPunct.matcher(str).find()) {
            return false;
        }
        if (this.properties.getBlockTermsWithNumbers() && this.patternTermsWithDigits.matcher(str).find()) {
            return false;
        }
        if (this.properties.getBlockGreekTerms() && (c = str.charAt(0)) >= '\u0370' && c <= '\u03ff') {
            return false;
        }
        return !this.properties.getBlockTermsWithManySameCharacters() || !this.hasManySameCharacters(str);
    }

    public HashMap<String, Pair<Float, ArrayList<Integer>>> getWordsMap() {
        return this.wordsMap;
    }

    public String getText() {
        return this.sb.toString();
    }

    public void addToMap(String string, int weight) {
        if (string == null || string.length() == 0) {
            return;
        }
        if (!this.validTermToAdd(string = string.toLowerCase())) {
            return;
        }
        int offset = this.sb.toString().length() + 1;
        this.sb.append(string + " ");
        Float tf = null;
        ArrayList<Integer> listTermPos = null;
        String stringOrig = string;
        if (this.properties.getUseStemmer()) {
            string = Stemmer.Stem(string);
            if (this.properties.getHoldBestUnstemmed()) {
                this.addUnstemmed(stringOrig, string);
            }
        }
        if (string.isEmpty()) {
            System.out.println("STEMMER ERROR! Trying to insert stemmed empty string. Unstemmed: " + stringOrig);
            return;
        }
        Pair<Float, ArrayList<Integer>> pair = this.wordsMap.get(string);
        ++this.numberOfWords;
        if (pair == null) {
            tf = new Float(0.0f);
            listTermPos = new ArrayList();
        } else {
            tf = pair.getFirst();
            listTermPos = pair.getSecond();
        }
        tf = Float.valueOf(tf.floatValue() + (float)weight);
        if (tf.floatValue() > this.maxTF) {
            this.maxTF = tf.floatValue();
        }
        pair = Pair.from(tf, listTermPos);
        if (this.properties.getStoreTermPos()) {
            listTermPos.add(offset);
        }
        this.wordsMap.put(string, pair);
    }

    public int getNumberOfWords() {
        return this.numberOfWords;
    }

    public void printMap(Map<String, Pair<Float, ArrayList<Integer>>> map) {
        if (map == null) {
            return;
        }
        for (String key : map.keySet()) {
            System.out.print(key + "\t");
            Pair<Float, ArrayList<Integer>> pair = map.get(key);
            Float tf = pair.getFirst();
            ArrayList<Integer> listTokenPos = pair.getSecond();
            System.out.print("TF: " + tf + "\tPositions:");
            if (this.properties.getStoreTermPos()) {
                for (int i = 0; i < listTokenPos.size(); ++i) {
                    System.out.print(listTokenPos.get(i));
                    System.out.print(",");
                }
            }
            System.out.println("");
        }
    }

    private boolean hasManySameCharacters(String str) {
        if (str == null || str.length() < this.properties.getMaxSameCharacters()) {
            return false;
        }
        int counter = 1;
        char c = str.charAt(0);
        for (int i = 1; i < str.length(); ++i) {
            if (c == str.charAt(i)) {
                if (++counter != this.properties.getMaxSameCharacters()) continue;
                return true;
            }
            c = str.charAt(i);
            counter = 1;
        }
        return false;
    }

    public void normalizeTF() {
        Iterator<String> it = this.wordsMap.keySet().iterator();
        float inverseOfTf = 1.0f / this.maxTF;
        while (it.hasNext()) {
            String key = it.next();
            Pair<Float, ArrayList<Integer>> pair = this.wordsMap.get(key);
            Float tf = pair.getFirst();
            tf = new Float(tf.floatValue() * inverseOfTf);
            pair.setFirst(tf);
        }
    }

    private void addUnstemmed(String stringOrig, String string) {
        TreeMap<String, TreeMap<String, Integer>> unstemmedWords = this.properties.getUnstemmedWords();
        if (unstemmedWords.containsKey(string)) {
            TreeMap<String, Integer> tmp = unstemmedWords.get(string);
            if (tmp.containsKey(stringOrig)) {
                Integer wordTF = tmp.get(stringOrig);
                tmp.put(stringOrig, new Integer(wordTF + 1));
            } else {
                tmp.put(stringOrig, new Integer(1));
            }
        } else {
            TreeMap<String, Integer> tree = new TreeMap<String, Integer>();
            tree.put(stringOrig, new Integer(1));
            unstemmedWords.put(string, tree);
        }
    }

    public String getValidToken(String token) {
        if (!this.validTermToAdd(token)) {
            return null;
        }
        String stringOrig = token;
        if (this.properties.getUseStemmer()) {
            token = Stemmer.Stem(token);
            if (this.properties.getHoldBestUnstemmed()) {
                this.addUnstemmed(stringOrig, token);
            }
        }
        if (token.isEmpty()) {
            System.out.println("STEMMER ERROR! Trying to insert stemmed empty string. Unstemmed: " + stringOrig);
            return null;
        }
        return token;
    }
}

