/*
 * Decompiled with CFR 0.152.
 */
package weka.clusterers;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.clusterers.RandomizableClusterer;
import weka.clusterers.UpdateableClusterer;
import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.Drawable;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.experiment.Stats;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Add;

public class Cobweb
extends RandomizableClusterer
implements Drawable,
TechnicalInformationHandler,
UpdateableClusterer {
    static final long serialVersionUID = 928406656495092318L;
    protected static final double m_normal = 1.0 / (2.0 * Math.sqrt(Math.PI));
    protected double m_acuity = 1.0;
    protected double m_cutoff = 0.01 * m_normal;
    protected CNode m_cobwebTree = null;
    protected int m_numberOfClusters = -1;
    protected boolean m_numberOfClustersDetermined = false;
    protected int m_numberSplits;
    protected int m_numberMerges;
    protected boolean m_saveInstances = false;

    public Cobweb() {
        this.m_SeedDefault = 42;
        this.setSeed(this.m_SeedDefault);
    }

    public String globalInfo() {
        return "Class implementing the Cobweb and Classit clustering algorithms.\n\nNote: the application of node operators (merging, splitting etc.) in terms of ordering and priority differs (and is somewhat ambiguous) between the original Cobweb and Classit papers. This algorithm always compares the best host, adding a new leaf, merging the two best hosts, and splitting the best host when considering where to place a new instance.\n\nFor more information see:\n\n" + this.getTechnicalInformation().toString();
    }

    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.ARTICLE);
        result.setValue(TechnicalInformation.Field.AUTHOR, "D. Fisher");
        result.setValue(TechnicalInformation.Field.YEAR, "1987");
        result.setValue(TechnicalInformation.Field.TITLE, "Knowledge acquisition via incremental conceptual clustering");
        result.setValue(TechnicalInformation.Field.JOURNAL, "Machine Learning");
        result.setValue(TechnicalInformation.Field.VOLUME, "2");
        result.setValue(TechnicalInformation.Field.NUMBER, "2");
        result.setValue(TechnicalInformation.Field.PAGES, "139-172");
        TechnicalInformation additional = result.add(TechnicalInformation.Type.ARTICLE);
        additional.setValue(TechnicalInformation.Field.AUTHOR, "J. H. Gennari and P. Langley and D. Fisher");
        additional.setValue(TechnicalInformation.Field.YEAR, "1990");
        additional.setValue(TechnicalInformation.Field.TITLE, "Models of incremental concept formation");
        additional.setValue(TechnicalInformation.Field.JOURNAL, "Artificial Intelligence");
        additional.setValue(TechnicalInformation.Field.VOLUME, "40");
        additional.setValue(TechnicalInformation.Field.PAGES, "11-61");
        return result;
    }

    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.NO_CLASS);
        result.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capabilities.Capability.DATE_ATTRIBUTES);
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.setMinimumNumberInstances(0);
        return result;
    }

    public void buildClusterer(Instances data) throws Exception {
        this.m_numberOfClusters = -1;
        this.m_cobwebTree = null;
        this.m_numberSplits = 0;
        this.m_numberMerges = 0;
        this.getCapabilities().testWithFail(data);
        data = new Instances(data);
        data.randomize(new Random(this.getSeed()));
        for (int i = 0; i < data.numInstances(); ++i) {
            this.updateClusterer(data.instance(i));
        }
        this.updateFinished();
    }

    public void updateFinished() {
        this.determineNumberOfClusters();
    }

    public int clusterInstance(Instance instance) throws Exception {
        CNode host = this.m_cobwebTree;
        CNode temp = null;
        this.determineNumberOfClusters();
        do {
            if (host.m_children == null) {
                temp = null;
                break;
            }
            host.updateStats(instance, false);
            temp = host.findHost(instance, true);
            host.updateStats(instance, true);
            if (temp == null) continue;
            host = temp;
        } while (temp != null);
        return host.m_clusterNum;
    }

    protected void determineNumberOfClusters() {
        if (!this.m_numberOfClustersDetermined && this.m_cobwebTree != null) {
            int[] numClusts = new int[]{0};
            try {
                this.m_cobwebTree.assignClusterNums(numClusts);
            }
            catch (Exception e) {
                e.printStackTrace();
                numClusts[0] = 0;
            }
            this.m_numberOfClusters = numClusts[0];
            this.m_numberOfClustersDetermined = true;
        }
    }

    public int numberOfClusters() {
        this.determineNumberOfClusters();
        return this.m_numberOfClusters;
    }

    public void updateClusterer(Instance newInstance) throws Exception {
        this.m_numberOfClustersDetermined = false;
        if (this.m_cobwebTree == null) {
            this.m_cobwebTree = new CNode(newInstance.numAttributes(), newInstance);
        } else {
            this.m_cobwebTree.addInstance(newInstance);
        }
    }

    public void addInstance(Instance newInstance) throws Exception {
        this.updateClusterer(newInstance);
    }

    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tAcuity.\n\t(default=1.0)", "A", 1, "-A <acuity>"));
        result.addElement(new Option("\tCutoff.\n\t(default=0.002)", "C", 1, "-C <cutoff>"));
        Enumeration en = super.listOptions();
        while (en.hasMoreElements()) {
            result.addElement((Option)en.nextElement());
        }
        return result.elements();
    }

    public void setOptions(String[] options) throws Exception {
        Double temp;
        String optionString = Utils.getOption('A', options);
        if (optionString.length() != 0) {
            temp = new Double(optionString);
            this.setAcuity(temp);
        } else {
            this.m_acuity = 1.0;
        }
        optionString = Utils.getOption('C', options);
        if (optionString.length() != 0) {
            temp = new Double(optionString);
            this.setCutoff(temp);
        } else {
            this.m_cutoff = 0.01 * m_normal;
        }
        super.setOptions(options);
    }

    public String acuityTipText() {
        return "set the minimum standard deviation for numeric attributes";
    }

    public void setAcuity(double a) {
        this.m_acuity = a;
    }

    public double getAcuity() {
        return this.m_acuity;
    }

    public String cutoffTipText() {
        return "set the category utility threshold by which to prune nodes";
    }

    public void setCutoff(double c) {
        this.m_cutoff = c;
    }

    public double getCutoff() {
        return this.m_cutoff;
    }

    public String saveInstanceDataTipText() {
        return "save instance information for visualization purposes";
    }

    public boolean getSaveInstanceData() {
        return this.m_saveInstances;
    }

    public void setSaveInstanceData(boolean newsaveInstances) {
        this.m_saveInstances = newsaveInstances;
    }

    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        result.add("-A");
        result.add("" + this.m_acuity);
        result.add("-C");
        result.add("" + this.m_cutoff);
        String[] options = super.getOptions();
        for (int i = 0; i < options.length; ++i) {
            result.add(options[i]);
        }
        return result.toArray(new String[result.size()]);
    }

    public String toString() {
        StringBuffer text = new StringBuffer();
        if (this.m_cobwebTree == null) {
            return "Cobweb hasn't been built yet!";
        }
        this.m_cobwebTree.dumpTree(0, text);
        return "Number of merges: " + this.m_numberMerges + "\nNumber of splits: " + this.m_numberSplits + "\nNumber of clusters: " + this.numberOfClusters() + "\n" + text.toString() + "\n\n";
    }

    public int graphType() {
        return 1;
    }

    public String graph() throws Exception {
        StringBuffer text = new StringBuffer();
        text.append("digraph CobwebTree {\n");
        this.m_cobwebTree.graphTree(text);
        text.append("}\n");
        return text.toString();
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 5538 $");
    }

    public static void main(String[] argv) {
        Cobweb.runClusterer(new Cobweb(), argv);
    }

    private class CNode
    implements Serializable,
    RevisionHandler {
        static final long serialVersionUID = 3452097436933325631L;
        private AttributeStats[] m_attStats;
        private int m_numAttributes;
        protected Instances m_clusterInstances = null;
        private FastVector m_children = null;
        private double m_totalInstances = 0.0;
        private int m_clusterNum = -1;

        public CNode(int numAttributes) {
            this.m_numAttributes = numAttributes;
        }

        public CNode(int numAttributes, Instance leafInstance) {
            this(numAttributes);
            if (this.m_clusterInstances == null) {
                this.m_clusterInstances = new Instances(leafInstance.dataset(), 1);
            }
            this.m_clusterInstances.add(leafInstance);
            this.updateStats(leafInstance, false);
        }

        protected void addInstance(Instance newInstance) throws Exception {
            if (this.m_clusterInstances == null) {
                this.m_clusterInstances = new Instances(newInstance.dataset(), 1);
                this.m_clusterInstances.add(newInstance);
                this.updateStats(newInstance, false);
                return;
            }
            if (this.m_children == null) {
                this.m_children = new FastVector();
                CNode tempSubCluster = new CNode(this.m_numAttributes, this.m_clusterInstances.instance(0));
                for (int i = 1; i < this.m_clusterInstances.numInstances(); ++i) {
                    tempSubCluster.m_clusterInstances.add(this.m_clusterInstances.instance(i));
                    tempSubCluster.updateStats(this.m_clusterInstances.instance(i), false);
                }
                this.m_children = new FastVector();
                this.m_children.addElement(tempSubCluster);
                this.m_children.addElement(new CNode(this.m_numAttributes, newInstance));
                this.m_clusterInstances.add(newInstance);
                this.updateStats(newInstance, false);
                if (this.categoryUtility() < Cobweb.this.m_cutoff) {
                    this.m_children = null;
                }
                return;
            }
            CNode bestHost = this.findHost(newInstance, false);
            if (bestHost != null) {
                bestHost.addInstance(newInstance);
            }
        }

        private double[] cuScoresForChildren(Instance newInstance) throws Exception {
            double[] categoryUtils = new double[this.m_children.size()];
            for (int i = 0; i < this.m_children.size(); ++i) {
                CNode temp = (CNode)this.m_children.elementAt(i);
                temp.updateStats(newInstance, false);
                categoryUtils[i] = this.categoryUtility();
                temp.updateStats(newInstance, true);
            }
            return categoryUtils;
        }

        private double cuScoreForBestTwoMerged(CNode merged, CNode a, CNode b, Instance newInstance) throws Exception {
            double mergedCU = -1.7976931348623157E308;
            merged.m_clusterInstances = new Instances(this.m_clusterInstances, 1);
            merged.addChildNode(a);
            merged.addChildNode(b);
            merged.updateStats(newInstance, false);
            this.m_children.removeElementAt(this.m_children.indexOf(a));
            this.m_children.removeElementAt(this.m_children.indexOf(b));
            this.m_children.addElement(merged);
            mergedCU = this.categoryUtility();
            merged.updateStats(newInstance, true);
            this.m_children.removeElementAt(this.m_children.indexOf(merged));
            this.m_children.addElement(a);
            this.m_children.addElement(b);
            return mergedCU;
        }

        private CNode findHost(Instance newInstance, boolean structureFrozen) throws Exception {
            if (!structureFrozen) {
                this.updateStats(newInstance, false);
            }
            double[] categoryUtils = this.cuScoresForChildren(newInstance);
            CNode newLeaf = new CNode(this.m_numAttributes, newInstance);
            this.m_children.addElement(newLeaf);
            double bestHostCU = this.categoryUtility();
            CNode finalBestHost = newLeaf;
            this.m_children.removeElementAt(this.m_children.size() - 1);
            int best = 0;
            int secondBest = 0;
            for (int i = 0; i < categoryUtils.length; ++i) {
                if (!(categoryUtils[i] > categoryUtils[secondBest])) continue;
                if (categoryUtils[i] > categoryUtils[best]) {
                    secondBest = best;
                    best = i;
                    continue;
                }
                secondBest = i;
            }
            CNode a = (CNode)this.m_children.elementAt(best);
            CNode b = (CNode)this.m_children.elementAt(secondBest);
            if (categoryUtils[best] > bestHostCU) {
                bestHostCU = categoryUtils[best];
                finalBestHost = a;
            }
            if (structureFrozen) {
                if (finalBestHost == newLeaf) {
                    return null;
                }
                return finalBestHost;
            }
            double mergedCU = -1.7976931348623157E308;
            CNode merged = new CNode(this.m_numAttributes);
            if (a != b && (mergedCU = this.cuScoreForBestTwoMerged(merged, a, b, newInstance)) > bestHostCU) {
                bestHostCU = mergedCU;
                finalBestHost = merged;
            }
            double splitCU = -1.7976931348623157E308;
            double splitBestChildCU = -1.7976931348623157E308;
            double splitPlusNewLeafCU = -1.7976931348623157E308;
            double splitPlusMergeBestTwoCU = -1.7976931348623157E308;
            if (a.m_children != null) {
                int i;
                FastVector tempChildren = new FastVector();
                for (i = 0; i < this.m_children.size(); ++i) {
                    CNode existingChild = (CNode)this.m_children.elementAt(i);
                    if (existingChild == a) continue;
                    tempChildren.addElement(existingChild);
                }
                for (i = 0; i < a.m_children.size(); ++i) {
                    CNode promotedChild = (CNode)a.m_children.elementAt(i);
                    tempChildren.addElement(promotedChild);
                }
                tempChildren.addElement(newLeaf);
                FastVector saveStatusQuo = this.m_children;
                this.m_children = tempChildren;
                splitPlusNewLeafCU = this.categoryUtility();
                tempChildren.removeElementAt(tempChildren.size() - 1);
                categoryUtils = this.cuScoresForChildren(newInstance);
                best = 0;
                secondBest = 0;
                for (int i2 = 0; i2 < categoryUtils.length; ++i2) {
                    if (!(categoryUtils[i2] > categoryUtils[secondBest])) continue;
                    if (categoryUtils[i2] > categoryUtils[best]) {
                        secondBest = best;
                        best = i2;
                        continue;
                    }
                    secondBest = i2;
                }
                CNode sa = (CNode)this.m_children.elementAt(best);
                CNode sb = (CNode)this.m_children.elementAt(secondBest);
                splitBestChildCU = categoryUtils[best];
                CNode mergedSplitChildren = new CNode(this.m_numAttributes);
                if (sa != sb) {
                    splitPlusMergeBestTwoCU = this.cuScoreForBestTwoMerged(mergedSplitChildren, sa, sb, newInstance);
                }
                splitCU = splitBestChildCU > splitPlusNewLeafCU ? splitBestChildCU : splitPlusNewLeafCU;
                double d = splitCU = splitCU > splitPlusMergeBestTwoCU ? splitCU : splitPlusMergeBestTwoCU;
                if (splitCU > bestHostCU) {
                    bestHostCU = splitCU;
                    finalBestHost = this;
                } else {
                    this.m_children = saveStatusQuo;
                }
            }
            if (finalBestHost != this) {
                this.m_clusterInstances.add(newInstance);
            } else {
                ++Cobweb.this.m_numberSplits;
            }
            if (finalBestHost == merged) {
                ++Cobweb.this.m_numberMerges;
                this.m_children.removeElementAt(this.m_children.indexOf(a));
                this.m_children.removeElementAt(this.m_children.indexOf(b));
                this.m_children.addElement(merged);
            }
            if (finalBestHost == newLeaf) {
                finalBestHost = new CNode(this.m_numAttributes);
                this.m_children.addElement(finalBestHost);
            }
            if (bestHostCU < Cobweb.this.m_cutoff) {
                if (finalBestHost == this) {
                    this.m_clusterInstances.add(newInstance);
                }
                this.m_children = null;
                finalBestHost = null;
            }
            if (finalBestHost == this) {
                this.updateStats(newInstance, true);
            }
            return finalBestHost;
        }

        protected void addChildNode(CNode child) {
            for (int i = 0; i < child.m_clusterInstances.numInstances(); ++i) {
                Instance temp = child.m_clusterInstances.instance(i);
                this.m_clusterInstances.add(temp);
                this.updateStats(temp, false);
            }
            if (this.m_children == null) {
                this.m_children = new FastVector();
            }
            this.m_children.addElement(child);
        }

        protected double categoryUtility() throws Exception {
            if (this.m_children == null) {
                throw new Exception("categoryUtility: No children!");
            }
            double totalCU = 0.0;
            for (int i = 0; i < this.m_children.size(); ++i) {
                CNode child = (CNode)this.m_children.elementAt(i);
                totalCU += this.categoryUtilityChild(child);
            }
            return totalCU /= (double)this.m_children.size();
        }

        protected double categoryUtilityChild(CNode child) throws Exception {
            double sum = 0.0;
            for (int i = 0; i < this.m_numAttributes; ++i) {
                if (this.m_clusterInstances.attribute(i).isNominal()) {
                    for (int j = 0; j < this.m_clusterInstances.attribute(i).numValues(); ++j) {
                        double x = child.getProbability(i, j);
                        double y = this.getProbability(i, j);
                        sum += x * x - y * y;
                    }
                    continue;
                }
                sum += m_normal / child.getStandardDev(i) - m_normal / this.getStandardDev(i);
            }
            return child.m_totalInstances / this.m_totalInstances * sum;
        }

        protected double getProbability(int attIndex, int valueIndex) throws Exception {
            if (!this.m_clusterInstances.attribute(attIndex).isNominal()) {
                throw new Exception("getProbability: attribute is not nominal");
            }
            if (this.m_attStats[attIndex].totalCount <= 0) {
                return 0.0;
            }
            return (double)this.m_attStats[attIndex].nominalCounts[valueIndex] / (double)this.m_attStats[attIndex].totalCount;
        }

        protected double getStandardDev(int attIndex) throws Exception {
            if (!this.m_clusterInstances.attribute(attIndex).isNumeric()) {
                throw new Exception("getStandardDev: attribute is not numeric");
            }
            this.m_attStats[attIndex].numericStats.calculateDerived();
            double stdDev = this.m_attStats[attIndex].numericStats.stdDev;
            if (Double.isNaN(stdDev) || Double.isInfinite(stdDev)) {
                return Cobweb.this.m_acuity;
            }
            return Math.max(Cobweb.this.m_acuity, stdDev);
        }

        protected void updateStats(Instance updateInstance, boolean delete) {
            int i;
            if (this.m_attStats == null) {
                this.m_attStats = new AttributeStats[this.m_numAttributes];
                for (i = 0; i < this.m_numAttributes; ++i) {
                    this.m_attStats[i] = new AttributeStats();
                    if (this.m_clusterInstances.attribute(i).isNominal()) {
                        this.m_attStats[i].nominalCounts = new int[this.m_clusterInstances.attribute(i).numValues()];
                        continue;
                    }
                    this.m_attStats[i].numericStats = new Stats();
                }
            }
            for (i = 0; i < this.m_numAttributes; ++i) {
                if (updateInstance.isMissing(i)) continue;
                double value = updateInstance.value(i);
                if (this.m_clusterInstances.attribute(i).isNominal()) {
                    int n = (int)value;
                    this.m_attStats[i].nominalCounts[n] = (int)((double)this.m_attStats[i].nominalCounts[n] + (delete ? -1.0 * updateInstance.weight() : updateInstance.weight()));
                    this.m_attStats[i].totalCount = (int)((double)this.m_attStats[i].totalCount + (delete ? -1.0 * updateInstance.weight() : updateInstance.weight()));
                    continue;
                }
                if (delete) {
                    this.m_attStats[i].numericStats.subtract(value, updateInstance.weight());
                    continue;
                }
                this.m_attStats[i].numericStats.add(value, updateInstance.weight());
            }
            this.m_totalInstances += delete ? -1.0 * updateInstance.weight() : updateInstance.weight();
        }

        private void assignClusterNums(int[] cl_num) throws Exception {
            if (this.m_children != null && this.m_children.size() < 2) {
                throw new Exception("assignClusterNums: tree not built correctly!");
            }
            this.m_clusterNum = cl_num[0];
            cl_num[0] = cl_num[0] + 1;
            if (this.m_children != null) {
                for (int i = 0; i < this.m_children.size(); ++i) {
                    CNode child = (CNode)this.m_children.elementAt(i);
                    child.assignClusterNums(cl_num);
                }
            }
        }

        protected void dumpTree(int depth, StringBuffer text) {
            if (depth == 0) {
                Cobweb.this.determineNumberOfClusters();
            }
            if (this.m_children == null) {
                text.append("\n");
                for (int j = 0; j < depth; ++j) {
                    text.append("|   ");
                }
                text.append("leaf " + this.m_clusterNum + " [" + this.m_clusterInstances.numInstances() + "]");
            } else {
                for (int i = 0; i < this.m_children.size(); ++i) {
                    text.append("\n");
                    for (int j = 0; j < depth; ++j) {
                        text.append("|   ");
                    }
                    text.append("node " + this.m_clusterNum + " [" + this.m_clusterInstances.numInstances() + "]");
                    ((CNode)this.m_children.elementAt(i)).dumpTree(depth + 1, text);
                }
            }
        }

        protected String dumpData() throws Exception {
            if (this.m_children == null) {
                return this.m_clusterInstances.toString();
            }
            CNode tempNode = new CNode(this.m_numAttributes);
            tempNode.m_clusterInstances = new Instances(this.m_clusterInstances, 1);
            for (int i = 0; i < this.m_children.size(); ++i) {
                tempNode.addChildNode((CNode)this.m_children.elementAt(i));
            }
            Instances tempInst = tempNode.m_clusterInstances;
            tempNode = null;
            Add af = new Add();
            af.setAttributeName("Cluster");
            String labels = "";
            for (int i = 0; i < this.m_children.size(); ++i) {
                CNode temp = (CNode)this.m_children.elementAt(i);
                labels = labels + "C" + temp.m_clusterNum;
                if (i >= this.m_children.size() - 1) continue;
                labels = labels + ",";
            }
            af.setNominalLabels(labels);
            af.setInputFormat(tempInst);
            tempInst = Filter.useFilter(tempInst, af);
            tempInst.setRelationName("Cluster " + this.m_clusterNum);
            int z = 0;
            for (int i = 0; i < this.m_children.size(); ++i) {
                CNode temp = (CNode)this.m_children.elementAt(i);
                for (int j = 0; j < temp.m_clusterInstances.numInstances(); ++j) {
                    tempInst.instance(z).setValue(this.m_numAttributes, (double)i);
                    ++z;
                }
            }
            return tempInst.toString();
        }

        protected void graphTree(StringBuffer text) throws Exception {
            text.append("N" + this.m_clusterNum + " [label=\"" + (this.m_children == null ? "leaf " : "node ") + this.m_clusterNum + " " + " (" + this.m_clusterInstances.numInstances() + ")\" " + (this.m_children == null ? "shape=box style=filled " : "") + (Cobweb.this.m_saveInstances ? "data =\n" + this.dumpData() + "\n,\n" : "") + "]\n");
            if (this.m_children != null) {
                CNode temp;
                int i;
                for (i = 0; i < this.m_children.size(); ++i) {
                    temp = (CNode)this.m_children.elementAt(i);
                    text.append("N" + this.m_clusterNum + "->" + "N" + temp.m_clusterNum + "\n");
                }
                for (i = 0; i < this.m_children.size(); ++i) {
                    temp = (CNode)this.m_children.elementAt(i);
                    temp.graphTree(text);
                }
            }
        }

        public String getRevision() {
            return RevisionUtils.extract("$Revision: 5538 $");
        }
    }
}

