/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.share.mccallum.ner;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import java.util.regex.Pattern;

public class ConllNer2003Sentence2TokenSequence
extends Pipe {
    static final String[] endings = new String[]{"ing", "ed", "ogy", "s", "ly", "ion", "tion", "ity", "ies"};
    static Pattern[] endingPatterns = new Pattern[endings.length];
    static final String[][][] endingNames = new String[2][3][endings.length];
    boolean saveSource;
    boolean doConjunctions;
    boolean doTags;
    boolean doPhrases;
    boolean doSpelling;
    boolean doDigitCollapses;
    boolean doDowncasing;

    public ConllNer2003Sentence2TokenSequence() {
        super(null, new LabelAlphabet());
        for (int i = 0; i < endings.length; ++i) {
            ConllNer2003Sentence2TokenSequence.endingPatterns[i] = Pattern.compile(".*" + endings[i] + "$");
            for (int j = 0; j < 3; ++j) {
                for (int k = 0; k < 2; ++k) {
                    ConllNer2003Sentence2TokenSequence.endingNames[k][j][i] = "W" + (k == 1 ? "-" : "") + j + "=<END" + endings[i] + ">";
                }
            }
        }
        this.saveSource = false;
        this.doConjunctions = false;
        this.doTags = true;
        this.doPhrases = true;
        this.doSpelling = false;
        this.doDigitCollapses = true;
        this.doDowncasing = false;
    }

    public ConllNer2003Sentence2TokenSequence(boolean extraFeatures) {
        super(null, new LabelAlphabet());
        for (int i = 0; i < endings.length; ++i) {
            ConllNer2003Sentence2TokenSequence.endingPatterns[i] = Pattern.compile(".*" + endings[i] + "$");
            for (int j = 0; j < 3; ++j) {
                for (int k = 0; k < 2; ++k) {
                    ConllNer2003Sentence2TokenSequence.endingNames[k][j][i] = "W" + (k == 1 ? "-" : "") + j + "=<END" + endings[i] + ">";
                }
            }
        }
        this.saveSource = false;
        this.doConjunctions = false;
        this.doTags = true;
        this.doPhrases = true;
        this.doSpelling = false;
        this.doDigitCollapses = true;
        this.doDowncasing = false;
        if (!extraFeatures) {
            this.doTags = false;
            this.doPhrases = false;
            this.doSpelling = false;
            this.doConjunctions = false;
            this.doDigitCollapses = false;
            this.doDowncasing = true;
        }
    }

    @Override
    public Instance pipe(Instance carrier) {
        String sentenceLines = (String)carrier.getData();
        String[] tokens = sentenceLines.split("\n");
        TokenSequence data = new TokenSequence(tokens.length);
        LabelSequence target = new LabelSequence((LabelAlphabet)this.getTargetAlphabet(), tokens.length);
        boolean[][] ending = new boolean[3][endings.length];
        boolean[][] endingp1 = new boolean[3][endings.length];
        boolean[][] endingp2 = new boolean[3][endings.length];
        StringBuffer source = this.saveSource ? new StringBuffer() : null;
        String prevLabel = "NOLABEL";
        Pattern ipattern = Pattern.compile("I-.*");
        for (int i = 0; i < tokens.length; ++i) {
            String label;
            String phrase;
            String tag;
            String word;
            if (tokens[i].length() != 0) {
                String[] features = tokens[i].split(" ");
                if (features.length != 4) {
                    throw new IllegalStateException("Line \"" + tokens[i] + "\" doesn't have four elements");
                }
                word = features[0];
                tag = features[1];
                phrase = features[2];
                label = features[3];
            } else {
                word = "-<S>-";
                tag = "-<S>-";
                phrase = "-<S>-";
                label = "O";
            }
            if (this.doDigitCollapses) {
                if (word.matches("19\\d\\d")) {
                    word = "<YEAR>";
                } else if (word.matches("19\\d\\ds")) {
                    word = "<YEARDECADE>";
                } else if (word.matches("19\\d\\d-\\d+")) {
                    word = "<YEARSPAN>";
                } else if (word.matches("\\d+\\\\/\\d")) {
                    word = "<FRACTION>";
                } else if (word.matches("\\d[\\d,\\.]*")) {
                    word = "<DIGITS>";
                } else if (word.matches("19\\d\\d-\\d\\d-\\d--d")) {
                    word = "<DATELINEDATE>";
                } else if (word.matches("19\\d\\d-\\d\\d-\\d\\d")) {
                    word = "<DATELINEDATE>";
                } else if (word.matches(".*-led")) {
                    word = "<LED>";
                } else if (word.matches(".*-sponsored")) {
                    word = "<LED>";
                }
            }
            if (this.doDowncasing) {
                word = word.toLowerCase();
            }
            Token token = new Token(word);
            if (this.doSpelling) {
                for (int j = 0; j < endings.length; ++j) {
                    ending[2][j] = ending[1][j];
                    ending[1][j] = ending[0][j];
                    ending[0][j] = endingPatterns[j].matcher(word).matches();
                    if (!ending[0][j]) continue;
                    token.setFeatureValue(endingNames[0][0][j], 1.0);
                }
            }
            if (this.doTags) {
                token.setFeatureValue("T=" + tag, 1.0);
            }
            if (this.doPhrases) {
                token.setFeatureValue("P=" + phrase, 1.0);
            }
            String oldLabel = label;
            if (ipattern.matcher(label).matches() && (prevLabel.length() < 3 || !prevLabel.substring(2).equals(label.substring(2)))) {
                label = "B" + oldLabel.substring(1);
            }
            prevLabel = oldLabel;
            data.add(token);
            target.add(label);
            if (!this.saveSource) continue;
            source.append(word);
            source.append(" ");
            source.append(label);
            source.append("\n");
        }
        carrier.setData(data);
        carrier.setTarget(target);
        if (this.saveSource) {
            carrier.setSource(source);
        }
        return carrier;
    }
}

