/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.tagger.maxent;

import edu.stanford.nlp.ling.WordTag;
import edu.stanford.nlp.tagger.maxent.DataWordTag;
import edu.stanford.nlp.tagger.maxent.GlobalHolder;
import edu.stanford.nlp.tagger.maxent.History;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.StringTokenizer;

public class ReadDataTagged {
    private String filename = null;
    private ArrayList v = new ArrayList();
    int wordIPos;
    int numSentences = 0;
    int numElements = 0;
    static String eosWord = "EOS";
    static String eosTag = "EOS";

    public ReadDataTagged(String filename, String delimiter) throws Exception {
        this.filename = filename;
        this.init(delimiter);
    }

    public void release() {
        this.v = null;
    }

    public DataWordTag get(int index) {
        return (DataWordTag)this.v.get(index);
    }

    private void init(String delimiter) throws Exception {
        ArrayList<String> words = new ArrayList<String>();
        ArrayList<String> tags = new ArrayList<String>();
        int endPos = 0;
        int prevPos = 0;
        int pos = 0;
        String s = null;
        BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(this.filename), "UTF-8"));
        String tag = "";
        String word = "";
        int maxLen = Integer.MIN_VALUE;
        int minLen = Integer.MAX_VALUE;
        while ((s = in.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(s);
            while (st.hasMoreTokens()) {
                String token = st.nextToken();
                int indexUnd = token.indexOf(delimiter);
                if (indexUnd < 0) {
                    throw new RuntimeException("can't find delimiter \"" + delimiter + "\" in word \"" + token + "\" line " + this.numSentences);
                }
                word = token.substring(0, indexUnd);
                tag = token.substring(indexUnd + 1);
                words.add(word);
                tags.add(tag);
                ++endPos;
            }
            if (endPos > maxLen) {
                maxLen = endPos;
            }
            if (endPos < minLen) {
                minLen = endPos;
            }
            words.add(eosWord);
            tags.add(eosTag);
            pos = pos + endPos + 1;
            this.numElements = this.numElements + endPos + 1;
            for (int i = 0; i < endPos + 1; ++i) {
                History h = new History(prevPos, prevPos + endPos, prevPos + i);
                WordTag wT = new WordTag();
                tag = (String)tags.get(i);
                word = (String)words.get(i);
                wT.setWord(word);
                wT.setTag(tag);
                GlobalHolder.pairs.add(wT);
                int y = GlobalHolder.tags.add(tag);
                DataWordTag dat = new DataWordTag(h, y);
                this.v.add(dat);
                GlobalHolder.dict.add(word, tag);
            }
            ++this.numSentences;
            prevPos += endPos + 1;
            endPos = 0;
            words.clear();
            tags.clear();
            if (this.numSentences % 100000 != 0) continue;
            System.err.println("[still reading] read " + this.numSentences + " sentences, min " + minLen + " words, max " + maxLen + " words");
        }
        in.close();
        System.err.println("read " + this.numSentences + " sentences, min " + minLen + " words, max " + maxLen + " words");
    }

    public int getSize() {
        return this.numElements;
    }

    public static void main(String[] args) {
    }
}

