/*
 * Decompiled with CFR 0.152.
 */
package weka.clusterers;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.clusterers.RandomizableClusterer;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

public class FarthestFirst
extends RandomizableClusterer
implements TechnicalInformationHandler {
    static final long serialVersionUID = 7499838100631329509L;
    protected Instances m_instances;
    protected ReplaceMissingValues m_ReplaceMissingFilter;
    protected int m_NumClusters = 2;
    protected Instances m_ClusterCentroids;
    private double[] m_Min;
    private double[] m_Max;

    public String globalInfo() {
        return "Cluster data using the FarthestFirst algorithm.\n\nFor more information see:\n\n" + this.getTechnicalInformation().toString() + "\n\n" + "Notes:\n" + "- works as a fast simple approximate clusterer\n" + "- modelled after SimpleKMeans, might be a useful initializer for it";
    }

    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.ARTICLE);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Hochbaum and Shmoys");
        result.setValue(TechnicalInformation.Field.YEAR, "1985");
        result.setValue(TechnicalInformation.Field.TITLE, "A best possible heuristic for the k-center problem");
        result.setValue(TechnicalInformation.Field.JOURNAL, "Mathematics of Operations Research");
        result.setValue(TechnicalInformation.Field.VOLUME, "10");
        result.setValue(TechnicalInformation.Field.NUMBER, "2");
        result.setValue(TechnicalInformation.Field.PAGES, "180-184");
        TechnicalInformation additional = result.add(TechnicalInformation.Type.INPROCEEDINGS);
        additional.setValue(TechnicalInformation.Field.AUTHOR, "Sanjoy Dasgupta");
        additional.setValue(TechnicalInformation.Field.TITLE, "Performance Guarantees for Hierarchical Clustering");
        additional.setValue(TechnicalInformation.Field.BOOKTITLE, "15th Annual Conference on Computational Learning Theory");
        additional.setValue(TechnicalInformation.Field.YEAR, "2002");
        additional.setValue(TechnicalInformation.Field.PAGES, "351-363");
        additional.setValue(TechnicalInformation.Field.PUBLISHER, "Springer");
        return result;
    }

    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.NO_CLASS);
        result.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capabilities.Capability.DATE_ATTRIBUTES);
        result.enable(Capabilities.Capability.MISSING_VALUES);
        return result;
    }

    public void buildClusterer(Instances data) throws Exception {
        this.getCapabilities().testWithFail(data);
        this.m_ReplaceMissingFilter = new ReplaceMissingValues();
        this.m_ReplaceMissingFilter.setInputFormat(data);
        this.m_instances = Filter.useFilter(data, this.m_ReplaceMissingFilter);
        this.initMinMax(this.m_instances);
        this.m_ClusterCentroids = new Instances(this.m_instances, this.m_NumClusters);
        int n = this.m_instances.numInstances();
        Random r = new Random(this.getSeed());
        boolean[] selected = new boolean[n];
        double[] minDistance = new double[n];
        for (int i = 0; i < n; ++i) {
            minDistance[i] = Double.MAX_VALUE;
        }
        int firstI = r.nextInt(n);
        this.m_ClusterCentroids.add(this.m_instances.instance(firstI));
        selected[firstI] = true;
        this.updateMinDistance(minDistance, selected, this.m_instances, this.m_instances.instance(firstI));
        if (this.m_NumClusters > n) {
            this.m_NumClusters = n;
        }
        for (int i = 1; i < this.m_NumClusters; ++i) {
            int nextI = this.farthestAway(minDistance, selected);
            this.m_ClusterCentroids.add(this.m_instances.instance(nextI));
            selected[nextI] = true;
            this.updateMinDistance(minDistance, selected, this.m_instances, this.m_instances.instance(nextI));
        }
        this.m_instances = new Instances(this.m_instances, 0);
    }

    protected void updateMinDistance(double[] minDistance, boolean[] selected, Instances data, Instance center) {
        for (int i = 0; i < selected.length; ++i) {
            double d;
            if (selected[i] || !((d = this.distance(center, data.instance(i))) < minDistance[i])) continue;
            minDistance[i] = d;
        }
    }

    protected int farthestAway(double[] minDistance, boolean[] selected) {
        double maxDistance = -1.0;
        int maxI = -1;
        for (int i = 0; i < selected.length; ++i) {
            if (selected[i] || !(maxDistance < minDistance[i])) continue;
            maxDistance = minDistance[i];
            maxI = i;
        }
        return maxI;
    }

    protected void initMinMax(Instances data) {
        int i;
        this.m_Min = new double[data.numAttributes()];
        this.m_Max = new double[data.numAttributes()];
        for (i = 0; i < data.numAttributes(); ++i) {
            this.m_Max[i] = Double.NaN;
            this.m_Min[i] = Double.NaN;
        }
        for (i = 0; i < data.numInstances(); ++i) {
            this.updateMinMax(data.instance(i));
        }
    }

    private void updateMinMax(Instance instance) {
        for (int j = 0; j < instance.numAttributes(); ++j) {
            if (Double.isNaN(this.m_Min[j])) {
                this.m_Min[j] = instance.value(j);
                this.m_Max[j] = instance.value(j);
                continue;
            }
            if (instance.value(j) < this.m_Min[j]) {
                this.m_Min[j] = instance.value(j);
                continue;
            }
            if (!(instance.value(j) > this.m_Max[j])) continue;
            this.m_Max[j] = instance.value(j);
        }
    }

    protected int clusterProcessedInstance(Instance instance) {
        double minDist = Double.MAX_VALUE;
        int bestCluster = 0;
        for (int i = 0; i < this.m_NumClusters; ++i) {
            double dist = this.distance(instance, this.m_ClusterCentroids.instance(i));
            if (!(dist < minDist)) continue;
            minDist = dist;
            bestCluster = i;
        }
        return bestCluster;
    }

    public int clusterInstance(Instance instance) throws Exception {
        this.m_ReplaceMissingFilter.input(instance);
        this.m_ReplaceMissingFilter.batchFinished();
        Instance inst = this.m_ReplaceMissingFilter.output();
        return this.clusterProcessedInstance(inst);
    }

    protected double distance(Instance first, Instance second) {
        double distance = 0.0;
        int p1 = 0;
        int p2 = 0;
        while (p1 < first.numValues() || p2 < second.numValues()) {
            double diff;
            int firstI = p1 >= first.numValues() ? this.m_instances.numAttributes() : first.index(p1);
            int secondI = p2 >= second.numValues() ? this.m_instances.numAttributes() : second.index(p2);
            if (firstI == this.m_instances.classIndex()) {
                ++p1;
                continue;
            }
            if (secondI == this.m_instances.classIndex()) {
                ++p2;
                continue;
            }
            if (firstI == secondI) {
                diff = this.difference(firstI, first.valueSparse(p1), second.valueSparse(p2));
                ++p1;
                ++p2;
            } else if (firstI > secondI) {
                diff = this.difference(secondI, 0.0, second.valueSparse(p2));
                ++p2;
            } else {
                diff = this.difference(firstI, first.valueSparse(p1), 0.0);
                ++p1;
            }
            distance += diff * diff;
        }
        return Math.sqrt(distance / (double)this.m_instances.numAttributes());
    }

    protected double difference(int index, double val1, double val2) {
        switch (this.m_instances.attribute(index).type()) {
            case 1: {
                if (Utils.isMissingValue(val1) || Utils.isMissingValue(val2) || (int)val1 != (int)val2) {
                    return 1.0;
                }
                return 0.0;
            }
            case 0: {
                if (Utils.isMissingValue(val1) || Utils.isMissingValue(val2)) {
                    if (Utils.isMissingValue(val1) && Utils.isMissingValue(val2)) {
                        return 1.0;
                    }
                    double diff = Utils.isMissingValue(val2) ? this.norm(val1, index) : this.norm(val2, index);
                    if (diff < 0.5) {
                        diff = 1.0 - diff;
                    }
                    return diff;
                }
                return this.norm(val1, index) - this.norm(val2, index);
            }
        }
        return 0.0;
    }

    protected double norm(double x, int i) {
        if (Double.isNaN(this.m_Min[i]) || Utils.eq(this.m_Max[i], this.m_Min[i])) {
            return 0.0;
        }
        return (x - this.m_Min[i]) / (this.m_Max[i] - this.m_Min[i]);
    }

    public int numberOfClusters() throws Exception {
        return this.m_NumClusters;
    }

    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tnumber of clusters. (default = 2).", "N", 1, "-N <num>"));
        Enumeration en = super.listOptions();
        while (en.hasMoreElements()) {
            result.addElement((Option)en.nextElement());
        }
        return result.elements();
    }

    public String numClustersTipText() {
        return "set number of clusters";
    }

    public void setNumClusters(int n) throws Exception {
        if (n < 0) {
            throw new Exception("Number of clusters must be > 0");
        }
        this.m_NumClusters = n;
    }

    public int getNumClusters() {
        return this.m_NumClusters;
    }

    public void setOptions(String[] options) throws Exception {
        String optionString = Utils.getOption('N', options);
        if (optionString.length() != 0) {
            this.setNumClusters(Integer.parseInt(optionString));
        }
        super.setOptions(options);
    }

    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        result.add("-N");
        result.add("" + this.getNumClusters());
        String[] options = super.getOptions();
        for (int i = 0; i < options.length; ++i) {
            result.add(options[i]);
        }
        return result.toArray(new String[result.size()]);
    }

    public String toString() {
        StringBuffer temp = new StringBuffer();
        temp.append("\n FarthestFirst\n==============\n");
        temp.append("\nCluster centroids:\n");
        for (int i = 0; i < this.m_NumClusters; ++i) {
            temp.append("\nCluster " + i + "\n\t");
            for (int j = 0; j < this.m_ClusterCentroids.numAttributes(); ++j) {
                if (this.m_ClusterCentroids.attribute(j).isNominal()) {
                    temp.append(" " + this.m_ClusterCentroids.attribute(j).value((int)this.m_ClusterCentroids.instance(i).value(j)));
                    continue;
                }
                temp.append(" " + this.m_ClusterCentroids.instance(i).value(j));
            }
        }
        temp.append("\n\n");
        return temp.toString();
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 8034 $");
    }

    public static void main(String[] argv) {
        FarthestFirst.runClusterer(new FarthestFirst(), argv);
    }
}

