ClusterEvaluator.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */

  21. package org.hipparchus.clustering.evaluation;

  22. import java.util.List;

  23. import org.hipparchus.clustering.CentroidCluster;
  24. import org.hipparchus.clustering.Cluster;
  25. import org.hipparchus.clustering.Clusterable;
  26. import org.hipparchus.clustering.DoublePoint;
  27. import org.hipparchus.clustering.distance.DistanceMeasure;
  28. import org.hipparchus.clustering.distance.EuclideanDistance;

  29. /**
  30.  * Base class for cluster evaluation methods.
  31.  *
  32.  * @param <T> type of the clustered points
  33.  */
  34. public abstract class ClusterEvaluator<T extends Clusterable> {

  35.     /** The distance measure to use when evaluating the cluster. */
  36.     private final DistanceMeasure measure;

  37.     /**
  38.      * Creates a new cluster evaluator with an {@link EuclideanDistance}
  39.      * as distance measure.
  40.      */
  41.     public ClusterEvaluator() {
  42.         this(new EuclideanDistance());
  43.     }

  44.     /**
  45.      * Creates a new cluster evaluator with the given distance measure.
  46.      * @param measure the distance measure to use
  47.      */
  48.     public ClusterEvaluator(final DistanceMeasure measure) {
  49.         this.measure = measure;
  50.     }

  51.     /**
  52.      * Computes the evaluation score for the given list of clusters.
  53.      * @param clusters the clusters to evaluate
  54.      * @return the computed score
  55.      */
  56.     public abstract double score(List<? extends Cluster<T>> clusters);

  57.     /**
  58.      * Returns whether the first evaluation score is considered to be better
  59.      * than the second one by this evaluator.
  60.      * <p>
  61.      * Specific implementations shall override this method if the returned scores
  62.      * do not follow the same ordering, i.e. smaller score is better.
  63.      *
  64.      * @param score1 the first score
  65.      * @param score2 the second score
  66.      * @return {@code true} if the first score is considered to be better, {@code false} otherwise
  67.      */
  68.     public boolean isBetterScore(double score1, double score2) {
  69.         return score1 < score2;
  70.     }

  71.     /**
  72.      * Calculates the distance between two {@link Clusterable} instances
  73.      * with the configured {@link DistanceMeasure}.
  74.      *
  75.      * @param p1 the first clusterable
  76.      * @param p2 the second clusterable
  77.      * @return the distance between the two clusterables
  78.      */
  79.     protected double distance(final Clusterable p1, final Clusterable p2) {
  80.         return measure.compute(p1.getPoint(), p2.getPoint());
  81.     }

  82.     /**
  83.      * Computes the centroid for a cluster.
  84.      *
  85.      * @param cluster the cluster
  86.      * @return the computed centroid for the cluster,
  87.      * or {@code null} if the cluster does not contain any points
  88.      */
  89.     protected Clusterable centroidOf(final Cluster<T> cluster) {
  90.         final List<T> points = cluster.getPoints();
  91.         if (points.isEmpty()) {
  92.             return null;
  93.         }

  94.         // in case the cluster is of type CentroidCluster, no need to compute the centroid
  95.         if (cluster instanceof CentroidCluster) {
  96.             return ((CentroidCluster<T>) cluster).getCenter();
  97.         }

  98.         final int dimension = points.get(0).getPoint().length;
  99.         final double[] centroid = new double[dimension];
  100.         for (final T p : points) {
  101.             final double[] point = p.getPoint();
  102.             for (int i = 0; i < centroid.length; i++) {
  103.                 centroid[i] += point[i];
  104.             }
  105.         }
  106.         for (int i = 0; i < centroid.length; i++) {
  107.             centroid[i] /= points.size();
  108.         }
  109.         return new DoublePoint(centroid);
  110.     }

  111. }