/*
 * Decompiled with CFR 0.152.
 */
package weka.classifiers.bayes;

import java.util.Enumeration;
import weka.classifiers.AbstractClassifier;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.SpecialFunctions;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

public class NaiveBayesMultinomial
extends AbstractClassifier
implements WeightedInstancesHandler,
TechnicalInformationHandler {
    static final long serialVersionUID = 5932177440181257085L;
    protected double[][] m_probOfWordGivenClass;
    protected double[] m_probOfClass;
    protected int m_numAttributes;
    protected int m_numClasses;
    protected double[] m_lnFactorialCache = new double[]{0.0, 0.0};
    protected Instances m_headerInfo;

    public String globalInfo() {
        return "Class for building and using a multinomial Naive Bayes classifier. For more information see,\n\n" + this.getTechnicalInformation().toString() + "\n\nThe core equation for this classifier:\n\nP[Ci|D] = (P[D|Ci] x P[Ci]) / P[D] (Bayes rule)\n\nwhere Ci is class i and D is a document.";
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Andrew Mccallum and Kamal Nigam");
        result.setValue(TechnicalInformation.Field.YEAR, "1998");
        result.setValue(TechnicalInformation.Field.TITLE, "A Comparison of Event Models for Naive Bayes Text Classification");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "AAAI-98 Workshop on 'Learning for Text Categorization'");
        return result;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capabilities.Capability.NOMINAL_CLASS);
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        return result;
    }

    @Override
    public void buildClassifier(Instances instances) throws Exception {
        this.getCapabilities().testWithFail(instances);
        instances = new Instances(instances);
        instances.deleteWithMissingClass();
        this.m_headerInfo = new Instances(instances, 0);
        this.m_numClasses = instances.numClasses();
        this.m_numAttributes = instances.numAttributes();
        this.m_probOfWordGivenClass = new double[this.m_numClasses][];
        for (int c = 0; c < this.m_numClasses; ++c) {
            this.m_probOfWordGivenClass[c] = new double[this.m_numAttributes];
            for (int att = 0; att < this.m_numAttributes; ++att) {
                this.m_probOfWordGivenClass[c][att] = 1.0;
            }
        }
        double[] docsPerClass = new double[this.m_numClasses];
        double[] wordsPerClass = new double[this.m_numClasses];
        Enumeration<Instance> enumInsts = instances.enumerateInstances();
        while (enumInsts.hasMoreElements()) {
            int classIndex;
            Instance instance = enumInsts.nextElement();
            int n = classIndex = (int)instance.value(instance.classIndex());
            docsPerClass[n] = docsPerClass[n] + instance.weight();
            for (int a = 0; a < instance.numValues(); ++a) {
                if (instance.index(a) == instance.classIndex() || instance.isMissingSparse(a)) continue;
                double numOccurences = instance.valueSparse(a) * instance.weight();
                if (numOccurences < 0.0) {
                    throw new Exception("Numeric attribute values must all be greater or equal to zero.");
                }
                int n2 = classIndex;
                wordsPerClass[n2] = wordsPerClass[n2] + numOccurences;
                double[] dArray = this.m_probOfWordGivenClass[classIndex];
                int n3 = instance.index(a);
                dArray[n3] = dArray[n3] + numOccurences;
            }
        }
        for (int c = 0; c < this.m_numClasses; ++c) {
            for (int v = 0; v < this.m_numAttributes; ++v) {
                this.m_probOfWordGivenClass[c][v] = Math.log(this.m_probOfWordGivenClass[c][v] / (wordsPerClass[c] + (double)this.m_numAttributes - 1.0));
            }
        }
        double numDocs = instances.sumOfWeights() + (double)this.m_numClasses;
        this.m_probOfClass = new double[this.m_numClasses];
        for (int h = 0; h < this.m_numClasses; ++h) {
            this.m_probOfClass[h] = (docsPerClass[h] + 1.0) / numDocs;
        }
    }

    @Override
    public double[] distributionForInstance(Instance instance) throws Exception {
        double[] probOfClassGivenDoc = new double[this.m_numClasses];
        double[] logDocGivenClass = new double[this.m_numClasses];
        for (int h = 0; h < this.m_numClasses; ++h) {
            logDocGivenClass[h] = this.probOfDocGivenClass(instance, h);
        }
        double max = logDocGivenClass[Utils.maxIndex(logDocGivenClass)];
        double probOfDoc = 0.0;
        for (int i = 0; i < this.m_numClasses; ++i) {
            probOfClassGivenDoc[i] = Math.exp(logDocGivenClass[i] - max) * this.m_probOfClass[i];
            probOfDoc += probOfClassGivenDoc[i];
        }
        Utils.normalize(probOfClassGivenDoc, probOfDoc);
        return probOfClassGivenDoc;
    }

    private double probOfDocGivenClass(Instance inst, int classIndex) {
        double answer = 0.0;
        for (int i = 0; i < inst.numValues(); ++i) {
            if (inst.index(i) == inst.classIndex()) continue;
            double freqOfWordInDoc = inst.valueSparse(i);
            answer += freqOfWordInDoc * this.m_probOfWordGivenClass[classIndex][inst.index(i)];
        }
        return answer;
    }

    public double lnFactorial(int n) {
        if (n < 0) {
            return SpecialFunctions.lnFactorial(n);
        }
        if (this.m_lnFactorialCache.length <= n) {
            double[] tmp = new double[n + 1];
            System.arraycopy(this.m_lnFactorialCache, 0, tmp, 0, this.m_lnFactorialCache.length);
            for (int i = this.m_lnFactorialCache.length; i < tmp.length; ++i) {
                tmp[i] = tmp[i - 1] + Math.log(i);
            }
            this.m_lnFactorialCache = tmp;
        }
        return this.m_lnFactorialCache[n];
    }

    public String toString() {
        int c;
        StringBuffer result = new StringBuffer("The independent probability of a class\n--------------------------------------\n");
        for (c = 0; c < this.m_numClasses; ++c) {
            result.append(this.m_headerInfo.classAttribute().value(c)).append("\t").append(Double.toString(this.m_probOfClass[c])).append("\n");
        }
        result.append("\nThe probability of a word given the class\n-----------------------------------------\n\t");
        for (c = 0; c < this.m_numClasses; ++c) {
            result.append(this.m_headerInfo.classAttribute().value(c)).append("\t");
        }
        result.append("\n");
        for (int w = 0; w < this.m_numAttributes; ++w) {
            if (w == this.m_headerInfo.classIndex()) continue;
            result.append(this.m_headerInfo.attribute(w).name()).append("\t");
            for (int c2 = 0; c2 < this.m_numClasses; ++c2) {
                result.append(Double.toString(Math.exp(this.m_probOfWordGivenClass[c2][w]))).append("\t");
            }
            result.append("\n");
        }
        return result.toString();
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 11301 $");
    }

    public static void main(String[] argv) {
        NaiveBayesMultinomial.runClassifier(new NaiveBayesMultinomial(), argv);
    }
}

