MultivariateNormalMixtureExpectationMaximization.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This is not the original file distributed by the Apache Software Foundation
* It has been modified by the Hipparchus project
*/
package org.hipparchus.stat.fitting;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.hipparchus.distribution.multivariate.MixtureMultivariateNormalDistribution;
import org.hipparchus.distribution.multivariate.MultivariateNormalDistribution;
import org.hipparchus.exception.LocalizedCoreFormats;
import org.hipparchus.exception.MathIllegalArgumentException;
import org.hipparchus.exception.MathIllegalStateException;
import org.hipparchus.linear.Array2DRowRealMatrix;
import org.hipparchus.linear.RealMatrix;
import org.hipparchus.stat.correlation.Covariance;
import org.hipparchus.util.FastMath;
import org.hipparchus.util.MathArrays;
import org.hipparchus.util.Pair;
/**
* Expectation-Maximization algorithm for fitting the parameters of
* multivariate normal mixture model distributions.
*
* This implementation is pure original code based on <a
* href="https://www.ee.washington.edu/techsite/papers/documents/UWEETR-2010-0002.pdf">
* EM Demystified: An Expectation-Maximization Tutorial</a> by Yihua Chen and Maya R. Gupta,
* Department of Electrical Engineering, University of Washington, Seattle, WA 98195.
* It was verified using external tools like <a
* href="http://cran.r-project.org/web/packages/mixtools/index.html">CRAN Mixtools</a>
* (see the JUnit test cases) but it is <strong>not</strong> based on Mixtools code at all.
* The discussion of the origin of this class can be seen in the comments of the <a
* href="https://issues.apache.org/jira/browse/MATH-817">MATH-817</a> JIRA issue.
*/
public class MultivariateNormalMixtureExpectationMaximization {
/** Default maximum number of iterations allowed per fitting process. */
private static final int DEFAULT_MAX_ITERATIONS = 1000;
/** Default convergence threshold for fitting. */
private static final double DEFAULT_THRESHOLD = 1E-5;
/** The data to fit. */
private final double[][] data;
/** The model fit against the data. */
private MixtureMultivariateNormalDistribution fittedModel;
/** The log likelihood of the data given the fitted model. */
private double logLikelihood;
/**
* Creates an object to fit a multivariate normal mixture model to data.
*
* @param data Data to use in fitting procedure
* @throws MathIllegalArgumentException if data has no rows
* @throws MathIllegalArgumentException if rows of data have different numbers
* of columns
* @throws MathIllegalArgumentException if the number of columns in the data is
* less than 2
*/
public MultivariateNormalMixtureExpectationMaximization(double[][] data)
throws MathIllegalArgumentException {
if (data.length < 1) {
throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_SMALL,
data.length, 1);
}
this.data = new double[data.length][data[0].length];
for (int i = 0; i < data.length; i++) {
if (data[i].length != data[0].length) {
// Jagged arrays not allowed
throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
data[i].length, data[0].length);
}
if (data[i].length < 2) {
throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_SMALL,
data[i].length, 2, true);
}
this.data[i] = data[i].clone();
}
}
/**
* Fit a mixture model to the data supplied to the constructor.
*
* The quality of the fit depends on the concavity of the data provided to
* the constructor and the initial mixture provided to this function. If the
* data has many local optima, multiple runs of the fitting function with
* different initial mixtures may be required to find the optimal solution.
* If a MathIllegalArgumentException is encountered, it is possible that another
* initialization would work.
*
* @param initialMixture Model containing initial values of weights and
* multivariate normals
* @param maxIterations Maximum iterations allowed for fit
* @param threshold Convergence threshold computed as difference in
* logLikelihoods between successive iterations
* @throws MathIllegalArgumentException if any component's covariance matrix is
* singular during fitting
* @throws MathIllegalArgumentException if numComponents is less than one
* or threshold is less than Double.MIN_VALUE
* @throws MathIllegalArgumentException if initialMixture mean vector and data
* number of columns are not equal
*/
public void fit(final MixtureMultivariateNormalDistribution initialMixture,
final int maxIterations,
final double threshold)
throws MathIllegalArgumentException {
if (maxIterations < 1) {
throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_SMALL,
maxIterations, 1);
}
if (threshold < Double.MIN_VALUE) {
throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_SMALL,
threshold, Double.MIN_VALUE);
}
final int n = data.length;
// Number of data columns. Jagged data already rejected in constructor,
// so we can assume the lengths of each row are equal.
final int numCols = data[0].length;
final int k = initialMixture.getComponents().size();
final int numMeanColumns
= initialMixture.getComponents().get(0).getSecond().getMeans().length;
if (numMeanColumns != numCols) {
throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
numMeanColumns, numCols);
}
double previousLogLikelihood = 0d;
logLikelihood = Double.NEGATIVE_INFINITY;
// Initialize model to fit to initial mixture.
fittedModel = new MixtureMultivariateNormalDistribution(initialMixture.getComponents());
for (int numIterations = 0;
numIterations < maxIterations && FastMath.abs(previousLogLikelihood - logLikelihood) > threshold;
++numIterations) {
previousLogLikelihood = logLikelihood;
double sumLogLikelihood = 0d;
// Mixture components
final List<Pair<Double, MultivariateNormalDistribution>> components
= fittedModel.getComponents();
// Weight and distribution of each component
final double[] weights = new double[k];
final MultivariateNormalDistribution[] mvns = new MultivariateNormalDistribution[k];
for (int j = 0; j < k; j++) {
weights[j] = components.get(j).getFirst();
mvns[j] = components.get(j).getSecond();
}
// E-step: compute the data dependent parameters of the expectation
// function.
// The percentage of row's total density between a row and a
// component
final double[][] gamma = new double[n][k];
// Sum of gamma for each component
final double[] gammaSums = new double[k];
// Sum of gamma times its row for each each component
final double[][] gammaDataProdSums = new double[k][numCols];
for (int i = 0; i < n; i++) {
final double rowDensity = fittedModel.density(data[i]);
sumLogLikelihood += FastMath.log(rowDensity);
for (int j = 0; j < k; j++) {
gamma[i][j] = weights[j] * mvns[j].density(data[i]) / rowDensity;
gammaSums[j] += gamma[i][j];
for (int col = 0; col < numCols; col++) {
gammaDataProdSums[j][col] += gamma[i][j] * data[i][col];
}
}
}
logLikelihood = sumLogLikelihood / n;
// M-step: compute the new parameters based on the expectation
// function.
final double[] newWeights = new double[k];
final double[][] newMeans = new double[k][numCols];
for (int j = 0; j < k; j++) {
newWeights[j] = gammaSums[j] / n;
for (int col = 0; col < numCols; col++) {
newMeans[j][col] = gammaDataProdSums[j][col] / gammaSums[j];
}
}
// Compute new covariance matrices
final RealMatrix[] newCovMats = new RealMatrix[k];
for (int j = 0; j < k; j++) {
newCovMats[j] = new Array2DRowRealMatrix(numCols, numCols);
}
for (int i = 0; i < n; i++) {
for (int j = 0; j < k; j++) {
final RealMatrix vec
= new Array2DRowRealMatrix(MathArrays.ebeSubtract(data[i], newMeans[j]));
final RealMatrix dataCov
= vec.multiplyTransposed(vec).scalarMultiply(gamma[i][j]);
newCovMats[j] = newCovMats[j].add(dataCov);
}
}
// Converting to arrays for use by fitted model
final double[][][] newCovMatArrays = new double[k][numCols][numCols];
for (int j = 0; j < k; j++) {
newCovMats[j] = newCovMats[j].scalarMultiply(1d / gammaSums[j]);
newCovMatArrays[j] = newCovMats[j].getData();
}
// Update current model
fittedModel = new MixtureMultivariateNormalDistribution(newWeights,
newMeans,
newCovMatArrays);
}
if (FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) {
// Did not converge before the maximum number of iterations
throw new MathIllegalStateException(LocalizedCoreFormats.CONVERGENCE_FAILED);
}
}
/**
* Fit a mixture model to the data supplied to the constructor.
*
* The quality of the fit depends on the concavity of the data provided to
* the constructor and the initial mixture provided to this function. If the
* data has many local optima, multiple runs of the fitting function with
* different initial mixtures may be required to find the optimal solution.
* If a MathIllegalArgumentException is encountered, it is possible that another
* initialization would work.
*
* @param initialMixture Model containing initial values of weights and
* multivariate normals
* @throws MathIllegalArgumentException if any component's covariance matrix is
* singular during fitting
* @throws MathIllegalArgumentException if numComponents is less than one or
* threshold is less than Double.MIN_VALUE
*/
public void fit(MixtureMultivariateNormalDistribution initialMixture)
throws MathIllegalArgumentException {
fit(initialMixture, DEFAULT_MAX_ITERATIONS, DEFAULT_THRESHOLD);
}
/**
* Helper method to create a multivariate normal mixture model which can be
* used to initialize {@link #fit(MixtureMultivariateNormalDistribution)}.
*
* This method uses the data supplied to the constructor to try to determine
* a good mixture model at which to start the fit, but it is not guaranteed
* to supply a model which will find the optimal solution or even converge.
*
* @param data Data to estimate distribution
* @param numComponents Number of components for estimated mixture
* @return Multivariate normal mixture model estimated from the data
* @throws MathIllegalArgumentException if {@code numComponents} is greater
* than the number of data rows.
* @throws MathIllegalArgumentException if {@code numComponents < 2}.
* @throws MathIllegalArgumentException if data has less than 2 rows
* @throws MathIllegalArgumentException if rows of data have different numbers
* of columns
*/
public static MixtureMultivariateNormalDistribution estimate(final double[][] data,
final int numComponents)
throws MathIllegalArgumentException {
if (data.length < 2) {
throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_SMALL,
data.length, 2);
}
if (numComponents < 2) {
throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_SMALL,
numComponents, 2);
}
if (numComponents > data.length) {
throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_LARGE,
numComponents, data.length);
}
final int numRows = data.length;
final int numCols = data[0].length;
// sort the data
final DataRow[] sortedData = new DataRow[numRows];
for (int i = 0; i < numRows; i++) {
sortedData[i] = new DataRow(data[i]);
}
Arrays.sort(sortedData);
// uniform weight for each bin
final double weight = 1d / numComponents;
// components of mixture model to be created
final List<Pair<Double, MultivariateNormalDistribution>> components = new ArrayList<>(numComponents);
// create a component based on data in each bin
for (int binIndex = 0; binIndex < numComponents; binIndex++) {
// minimum index (inclusive) from sorted data for this bin
final int minIndex = (binIndex * numRows) / numComponents;
// maximum index (exclusive) from sorted data for this bin
final int maxIndex = ((binIndex + 1) * numRows) / numComponents;
// number of data records that will be in this bin
final int numBinRows = maxIndex - minIndex;
// data for this bin
final double[][] binData = new double[numBinRows][numCols];
// mean of each column for the data in the this bin
final double[] columnMeans = new double[numCols];
// populate bin and create component
for (int i = minIndex; i < maxIndex; i++) {
final int iBin = i - minIndex;
for (int j = 0; j < numCols; j++) {
final double val = sortedData[i].getRow()[j];
columnMeans[j] += val;
binData[iBin][j] = val;
}
}
MathArrays.scaleInPlace(1d / numBinRows, columnMeans);
// covariance matrix for this bin
final double[][] covMat
= new Covariance(binData).getCovarianceMatrix().getData();
final MultivariateNormalDistribution mvn
= new MultivariateNormalDistribution(columnMeans, covMat);
components.add(new Pair<Double, MultivariateNormalDistribution>(weight, mvn));
}
return new MixtureMultivariateNormalDistribution(components);
}
/**
* Gets the log likelihood of the data under the fitted model.
*
* @return Log likelihood of data or zero of no data has been fit
*/
public double getLogLikelihood() {
return logLikelihood;
}
/**
* Gets the fitted model.
*
* @return fitted model or {@code null} if no fit has been performed yet.
*/
public MixtureMultivariateNormalDistribution getFittedModel() {
return new MixtureMultivariateNormalDistribution(fittedModel.getComponents());
}
/**
* Class used for sorting user-supplied data.
*/
private static class DataRow implements Comparable<DataRow> {
/** One data row. */
private final double[] row;
/** Mean of the data row. */
private Double mean;
/**
* Create a data row.
* @param data Data to use for the row, a reference to the data is stored
*/
DataRow(final double[] data) {
// Store reference.
row = data; // NOPMD - storing a reference to the array is intentional and documented here
// Compute mean.
mean = 0d;
for (int i = 0; i < data.length; i++) {
mean += data[i];
}
mean /= data.length;
}
/**
* Compare two data rows.
* @param other The other row
* @return int for sorting
*/
@Override
public int compareTo(final DataRow other) {
return mean.compareTo(other.mean);
}
/** {@inheritDoc} */
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other instanceof DataRow) {
return MathArrays.equals(row, ((DataRow) other).row);
}
return false;
}
/** {@inheritDoc} */
@Override
public int hashCode() {
return Arrays.hashCode(row);
}
/**
* Get a data row.
* @return data row array (a reference to the stored array is returned)
*/
public double[] getRow() {
return row; // NOPMD - returning a reference to an internal array is documented here
}
}
}