MultivariateSummaryStatistics.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This is not the original file distributed by the Apache Software Foundation
* It has been modified by the Hipparchus project
*/
package org.hipparchus.stat.descriptive;
import java.io.Serializable;
import java.util.Arrays;
import org.hipparchus.exception.LocalizedCoreFormats;
import org.hipparchus.exception.MathIllegalArgumentException;
import org.hipparchus.linear.RealMatrix;
import org.hipparchus.stat.descriptive.moment.GeometricMean;
import org.hipparchus.stat.descriptive.moment.Mean;
import org.hipparchus.stat.descriptive.rank.Max;
import org.hipparchus.stat.descriptive.rank.Min;
import org.hipparchus.stat.descriptive.summary.Sum;
import org.hipparchus.stat.descriptive.summary.SumOfLogs;
import org.hipparchus.stat.descriptive.summary.SumOfSquares;
import org.hipparchus.stat.descriptive.vector.VectorialCovariance;
import org.hipparchus.stat.descriptive.vector.VectorialStorelessStatistic;
import org.hipparchus.util.FastMath;
import org.hipparchus.util.MathArrays;
import org.hipparchus.util.MathUtils;
/**
* Computes summary statistics for a stream of n-tuples added using the
* {@link #addValue(double[]) addValue} method. The data values are not stored
* in memory, so this class can be used to compute statistics for very large
* n-tuple streams.
* <p>
* To compute statistics for a stream of n-tuples, construct a
* {@link MultivariateSummaryStatistics} instance with dimension n and then use
* {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
* methods where Xxx is a statistic return an array of <code>double</code>
* values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element
* is the value of the given statistic for data range consisting of the i<sup>th</sup>
* element of each of the input n-tuples. For example, if <code>addValue</code> is
* called with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
* <code>getSum</code> will return a three-element array with values {0+3+6, 1+4+7, 2+5+8}
* <p>
* Note: This class is not thread-safe.
*/
public class MultivariateSummaryStatistics
implements StatisticalMultivariateSummary, Serializable {
/** Serialization UID */
private static final long serialVersionUID = 20160424L;
/** Dimension of the data. */
private final int k;
/** Sum statistic implementation */
private final StorelessMultivariateStatistic sumImpl;
/** Sum of squares statistic implementation */
private final StorelessMultivariateStatistic sumSqImpl;
/** Minimum statistic implementation */
private final StorelessMultivariateStatistic minImpl;
/** Maximum statistic implementation */
private final StorelessMultivariateStatistic maxImpl;
/** Sum of log statistic implementation */
private final StorelessMultivariateStatistic sumLogImpl;
/** Geometric mean statistic implementation */
private final StorelessMultivariateStatistic geoMeanImpl;
/** Mean statistic implementation */
private final StorelessMultivariateStatistic meanImpl;
/** Covariance statistic implementation */
private final VectorialCovariance covarianceImpl;
/** Count of values that have been added */
private long n;
/**
* Construct a MultivariateSummaryStatistics instance for the given
* dimension. The returned instance will compute the unbiased sample
* covariance.
* <p>
* The returned instance is <b>not</b> thread-safe.
*
* @param dimension dimension of the data
*/
public MultivariateSummaryStatistics(int dimension) {
this(dimension, true);
}
/**
* Construct a MultivariateSummaryStatistics instance for the given
* dimension.
* <p>
* The returned instance is <b>not</b> thread-safe.
*
* @param dimension dimension of the data
* @param covarianceBiasCorrection if true, the returned instance will compute
* the unbiased sample covariance, otherwise the population covariance
*/
public MultivariateSummaryStatistics(int dimension, boolean covarianceBiasCorrection) {
this.k = dimension;
sumImpl = new VectorialStorelessStatistic(k, new Sum());
sumSqImpl = new VectorialStorelessStatistic(k, new SumOfSquares());
minImpl = new VectorialStorelessStatistic(k, new Min());
maxImpl = new VectorialStorelessStatistic(k, new Max());
sumLogImpl = new VectorialStorelessStatistic(k, new SumOfLogs());
geoMeanImpl = new VectorialStorelessStatistic(k, new GeometricMean());
meanImpl = new VectorialStorelessStatistic(k, new Mean());
covarianceImpl = new VectorialCovariance(k, covarianceBiasCorrection);
}
/**
* Add an n-tuple to the data
*
* @param value the n-tuple to add
* @throws MathIllegalArgumentException if the array is null or the length
* of the array does not match the one used at construction
*/
public void addValue(double[] value) throws MathIllegalArgumentException {
MathUtils.checkNotNull(value, LocalizedCoreFormats.INPUT_ARRAY);
MathUtils.checkDimension(value.length, k);
sumImpl.increment(value);
sumSqImpl.increment(value);
minImpl.increment(value);
maxImpl.increment(value);
sumLogImpl.increment(value);
geoMeanImpl.increment(value);
meanImpl.increment(value);
covarianceImpl.increment(value);
n++;
}
/**
* Resets all statistics and storage.
*/
public void clear() {
this.n = 0;
minImpl.clear();
maxImpl.clear();
sumImpl.clear();
sumLogImpl.clear();
sumSqImpl.clear();
geoMeanImpl.clear();
meanImpl.clear();
covarianceImpl.clear();
}
/** {@inheritDoc} **/
@Override
public int getDimension() {
return k;
}
/** {@inheritDoc} **/
@Override
public long getN() {
return n;
}
/** {@inheritDoc} **/
@Override
public double[] getSum() {
return sumImpl.getResult();
}
/** {@inheritDoc} **/
@Override
public double[] getSumSq() {
return sumSqImpl.getResult();
}
/** {@inheritDoc} **/
@Override
public double[] getSumLog() {
return sumLogImpl.getResult();
}
/** {@inheritDoc} **/
@Override
public double[] getMean() {
return meanImpl.getResult();
}
/** {@inheritDoc} **/
@Override
public RealMatrix getCovariance() {
return covarianceImpl.getResult();
}
/** {@inheritDoc} **/
@Override
public double[] getMax() {
return maxImpl.getResult();
}
/** {@inheritDoc} **/
@Override
public double[] getMin() {
return minImpl.getResult();
}
/** {@inheritDoc} **/
@Override
public double[] getGeometricMean() {
return geoMeanImpl.getResult();
}
/**
* Returns an array whose i<sup>th</sup> entry is the standard deviation of the
* i<sup>th</sup> entries of the arrays that have been added using
* {@link #addValue(double[])}
*
* @return the array of component standard deviations
*/
@Override
public double[] getStandardDeviation() {
double[] stdDev = new double[k];
if (getN() < 1) {
Arrays.fill(stdDev, Double.NaN);
} else if (getN() < 2) {
Arrays.fill(stdDev, 0.0);
} else {
RealMatrix matrix = getCovariance();
for (int i = 0; i < k; ++i) {
stdDev[i] = FastMath.sqrt(matrix.getEntry(i, i));
}
}
return stdDev;
}
/**
* Generates a text report displaying
* summary statistics from values that
* have been added.
* @return String with line feeds displaying statistics
*/
@Override
public String toString() {
final String separator = ", ";
final String suffix = System.getProperty("line.separator");
StringBuilder outBuffer = new StringBuilder(200); // the size is just a wild guess
outBuffer.append("MultivariateSummaryStatistics:").append(suffix).
append("n: ").append(getN()).append(suffix);
append(outBuffer, getMin(), "min: ", separator, suffix);
append(outBuffer, getMax(), "max: ", separator, suffix);
append(outBuffer, getMean(), "mean: ", separator, suffix);
append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
outBuffer.append("covariance: ").append(getCovariance().toString()).append(suffix);
return outBuffer.toString();
}
/**
* Append a text representation of an array to a buffer.
* @param buffer buffer to fill
* @param data data array
* @param prefix text prefix
* @param separator elements separator
* @param suffix text suffix
*/
private void append(StringBuilder buffer, double[] data,
String prefix, String separator, String suffix) {
buffer.append(prefix);
for (int i = 0; i < data.length; ++i) {
if (i > 0) {
buffer.append(separator);
}
buffer.append(data[i]);
}
buffer.append(suffix);
}
/**
* Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code>
* instance and all statistics have the same values as this.
* @param object the object to test equality against.
* @return true if object equals this
*/
@Override
public boolean equals(Object object) {
if (object == this) {
return true;
}
if (!(object instanceof MultivariateSummaryStatistics)) {
return false;
}
MultivariateSummaryStatistics other = (MultivariateSummaryStatistics) object;
return other.getN() == getN() &&
MathArrays.equalsIncludingNaN(other.getGeometricMean(), getGeometricMean()) &&
MathArrays.equalsIncludingNaN(other.getMax(), getMax()) &&
MathArrays.equalsIncludingNaN(other.getMean(), getMean()) &&
MathArrays.equalsIncludingNaN(other.getMin(), getMin()) &&
MathArrays.equalsIncludingNaN(other.getSum(), getSum()) &&
MathArrays.equalsIncludingNaN(other.getSumSq(), getSumSq()) &&
MathArrays.equalsIncludingNaN(other.getSumLog(), getSumLog()) &&
other.getCovariance().equals(getCovariance());
}
/**
* Returns hash code based on values of statistics
*
* @return hash code
*/
@Override
public int hashCode() {
int result = 31 + MathUtils.hash(getN());
result = result * 31 + MathUtils.hash(getGeometricMean());
result = result * 31 + MathUtils.hash(getMax());
result = result * 31 + MathUtils.hash(getMean());
result = result * 31 + MathUtils.hash(getMin());
result = result * 31 + MathUtils.hash(getSum());
result = result * 31 + MathUtils.hash(getSumSq());
result = result * 31 + MathUtils.hash(getSumLog());
result = result * 31 + getCovariance().hashCode();
return result;
}
}