ClusterEvaluator.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * This is not the original file distributed by the Apache Software Foundation
 * It has been modified by the Hipparchus project
 */

package org.hipparchus.clustering.evaluation;

import java.util.List;

import org.hipparchus.clustering.CentroidCluster;
import org.hipparchus.clustering.Cluster;
import org.hipparchus.clustering.Clusterable;
import org.hipparchus.clustering.DoublePoint;
import org.hipparchus.clustering.distance.DistanceMeasure;
import org.hipparchus.clustering.distance.EuclideanDistance;

/**
 * Base class for cluster evaluation methods.
 *
 * @param <T> type of the clustered points
 */
public abstract class ClusterEvaluator<T extends Clusterable> {

    /** The distance measure to use when evaluating the cluster. */
    private final DistanceMeasure measure;

    /**
     * Creates a new cluster evaluator with an {@link EuclideanDistance}
     * as distance measure.
     */
    public ClusterEvaluator() {
        this(new EuclideanDistance());
    }

    /**
     * Creates a new cluster evaluator with the given distance measure.
     * @param measure the distance measure to use
     */
    public ClusterEvaluator(final DistanceMeasure measure) {
        this.measure = measure;
    }

    /**
     * Computes the evaluation score for the given list of clusters.
     * @param clusters the clusters to evaluate
     * @return the computed score
     */
    public abstract double score(List<? extends Cluster<T>> clusters);

    /**
     * Returns whether the first evaluation score is considered to be better
     * than the second one by this evaluator.
     * <p>
     * Specific implementations shall override this method if the returned scores
     * do not follow the same ordering, i.e. smaller score is better.
     *
     * @param score1 the first score
     * @param score2 the second score
     * @return {@code true} if the first score is considered to be better, {@code false} otherwise
     */
    public boolean isBetterScore(double score1, double score2) {
        return score1 < score2;
    }

    /**
     * Calculates the distance between two {@link Clusterable} instances
     * with the configured {@link DistanceMeasure}.
     *
     * @param p1 the first clusterable
     * @param p2 the second clusterable
     * @return the distance between the two clusterables
     */
    protected double distance(final Clusterable p1, final Clusterable p2) {
        return measure.compute(p1.getPoint(), p2.getPoint());
    }

    /**
     * Computes the centroid for a cluster.
     *
     * @param cluster the cluster
     * @return the computed centroid for the cluster,
     * or {@code null} if the cluster does not contain any points
     */
    protected Clusterable centroidOf(final Cluster<T> cluster) {
        final List<T> points = cluster.getPoints();
        if (points.isEmpty()) {
            return null;
        }

        // in case the cluster is of type CentroidCluster, no need to compute the centroid
        if (cluster instanceof CentroidCluster) {
            return ((CentroidCluster<T>) cluster).getCenter();
        }

        final int dimension = points.get(0).getPoint().length;
        final double[] centroid = new double[dimension];
        for (final T p : points) {
            final double[] point = p.getPoint();
            for (int i = 0; i < centroid.length; i++) {
                centroid[i] += point[i];
            }
        }
        for (int i = 0; i < centroid.length; i++) {
            centroid[i] /= points.size();
        }
        return new DoublePoint(centroid);
    }

}