DescriptiveStatistics.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /*
- * This is not the original file distributed by the Apache Software Foundation
- * It has been modified by the Hipparchus project
- */
- package org.hipparchus.stat.descriptive;
- import java.io.Serializable;
- import java.util.Arrays;
- import java.util.function.DoubleConsumer;
- import org.hipparchus.exception.LocalizedCoreFormats;
- import org.hipparchus.exception.MathIllegalArgumentException;
- import org.hipparchus.exception.MathIllegalStateException;
- import org.hipparchus.stat.descriptive.moment.GeometricMean;
- import org.hipparchus.stat.descriptive.moment.Kurtosis;
- import org.hipparchus.stat.descriptive.moment.Mean;
- import org.hipparchus.stat.descriptive.moment.Skewness;
- import org.hipparchus.stat.descriptive.moment.Variance;
- import org.hipparchus.stat.descriptive.rank.Max;
- import org.hipparchus.stat.descriptive.rank.Min;
- import org.hipparchus.stat.descriptive.rank.Percentile;
- import org.hipparchus.stat.descriptive.summary.Sum;
- import org.hipparchus.stat.descriptive.summary.SumOfSquares;
- import org.hipparchus.util.FastMath;
- import org.hipparchus.util.MathUtils;
- import org.hipparchus.util.ResizableDoubleArray;
- /**
- * Maintains a dataset of values of a single variable and computes descriptive
- * statistics based on stored data.
- * <p>
- * The {@link #getWindowSize() windowSize} property sets a limit on the number
- * of values that can be stored in the dataset. The default value, INFINITE_WINDOW,
- * puts no limit on the size of the dataset. This value should be used with
- * caution, as the backing store will grow without bound in this case.
- * <p>
- * For very large datasets, {@link StreamingStatistics}, which does not store
- * the dataset, should be used instead of this class. If <code>windowSize</code>
- * is not INFINITE_WINDOW and more values are added than can be stored in the
- * dataset, new values are added in a "rolling" manner, with new values replacing
- * the "oldest" values in the dataset.
- * <p>
- * Note: this class is not threadsafe.
- */
- public class DescriptiveStatistics
- implements StatisticalSummary, DoubleConsumer, Serializable {
- /**
- * Represents an infinite window size. When the {@link #getWindowSize()}
- * returns this value, there is no limit to the number of data values
- * that can be stored in the dataset.
- */
- protected static final int INFINITE_WINDOW = -1;
- /** Serialization UID */
- private static final long serialVersionUID = 20160411L;
- /** The statistic used to calculate the population variance - fixed. */
- private static final UnivariateStatistic POPULATION_VARIANCE = new Variance(false);
- /** Maximum statistic implementation. */
- private final UnivariateStatistic maxImpl;
- /** Minimum statistic implementation. */
- private final UnivariateStatistic minImpl;
- /** Sum statistic implementation. */
- private final UnivariateStatistic sumImpl;
- /** Sum of squares statistic implementation. */
- private final UnivariateStatistic sumOfSquaresImpl;
- /** Mean statistic implementation. */
- private final UnivariateStatistic meanImpl;
- /** Variance statistic implementation. */
- private final UnivariateStatistic varianceImpl;
- /** Geometric mean statistic implementation. */
- private final UnivariateStatistic geometricMeanImpl;
- /** Kurtosis statistic implementation. */
- private final UnivariateStatistic kurtosisImpl;
- /** Skewness statistic implementation. */
- private final UnivariateStatistic skewnessImpl;
- /** Percentile statistic implementation. */
- private final Percentile percentileImpl;
- /** holds the window size. */
- private int windowSize;
- /** Stored data values. */
- private final ResizableDoubleArray eDA;
- /**
- * Construct a DescriptiveStatistics instance with an infinite window.
- */
- public DescriptiveStatistics() {
- this(INFINITE_WINDOW);
- }
- /**
- * Construct a DescriptiveStatistics instance with the specified window.
- *
- * @param size the window size.
- * @throws MathIllegalArgumentException if window size is less than 1 but
- * not equal to {@link #INFINITE_WINDOW}
- */
- public DescriptiveStatistics(int size) throws MathIllegalArgumentException {
- this(size, false, null);
- }
- /**
- * Construct a DescriptiveStatistics instance with an infinite window
- * and the initial data values in double[] initialDoubleArray.
- *
- * @param initialDoubleArray the initial double[].
- * @throws org.hipparchus.exception.NullArgumentException if the input array is null
- */
- public DescriptiveStatistics(double[] initialDoubleArray) {
- this(INFINITE_WINDOW, true, initialDoubleArray);
- }
- /**
- * Copy constructor.
- * <p>
- * Construct a new DescriptiveStatistics instance that
- * is a copy of original.
- *
- * @param original DescriptiveStatistics instance to copy
- * @throws org.hipparchus.exception.NullArgumentException if original is null
- */
- protected DescriptiveStatistics(DescriptiveStatistics original) {
- MathUtils.checkNotNull(original);
- // Copy data and window size
- this.windowSize = original.windowSize;
- this.eDA = original.eDA.copy();
- // Copy implementations
- this.maxImpl = original.maxImpl.copy();
- this.minImpl = original.minImpl.copy();
- this.meanImpl = original.meanImpl.copy();
- this.sumImpl = original.sumImpl.copy();
- this.sumOfSquaresImpl = original.sumOfSquaresImpl.copy();
- this.varianceImpl = original.varianceImpl.copy();
- this.geometricMeanImpl = original.geometricMeanImpl.copy();
- this.kurtosisImpl = original.kurtosisImpl.copy();
- this.skewnessImpl = original.skewnessImpl.copy();
- this.percentileImpl = original.percentileImpl.copy();
- }
- /**
- * Construct a DescriptiveStatistics instance with the specified window.
- *
- * @param windowSize the window size
- * @param hasInitialValues if initial values have been provided
- * @param initialValues the initial values
- * @throws org.hipparchus.exception.NullArgumentException if initialValues is null
- * @throws MathIllegalArgumentException if window size is less than 1 but
- * not equal to {@link #INFINITE_WINDOW}
- */
- DescriptiveStatistics(int windowSize, boolean hasInitialValues, double[] initialValues) {
- if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
- throw new MathIllegalArgumentException(
- LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
- }
- if (hasInitialValues) {
- MathUtils.checkNotNull(initialValues, LocalizedCoreFormats.INPUT_ARRAY);
- }
- this.windowSize = windowSize;
- int initialCapacity = this.windowSize < 0 ? 100 : this.windowSize;
- this.eDA = hasInitialValues ?
- new ResizableDoubleArray(initialValues) :
- new ResizableDoubleArray(initialCapacity);
- maxImpl = new Max();
- minImpl = new Min();
- sumImpl = new Sum();
- sumOfSquaresImpl = new SumOfSquares();
- meanImpl = new Mean();
- varianceImpl = new Variance();
- geometricMeanImpl = new GeometricMean();
- kurtosisImpl = new Kurtosis();
- skewnessImpl = new Skewness();
- percentileImpl = new Percentile();
- }
- /**
- * Returns a copy of this DescriptiveStatistics instance with the same internal state.
- *
- * @return a copy of this
- */
- public DescriptiveStatistics copy() {
- return new DescriptiveStatistics(this);
- }
- /**
- * Adds the value to the dataset. If the dataset is at the maximum size
- * (i.e., the number of stored elements equals the currently configured
- * windowSize), the first (oldest) element in the dataset is discarded
- * to make room for the new value.
- *
- * @param v the value to be added
- */
- public void addValue(double v) {
- if (windowSize != INFINITE_WINDOW) {
- if (getN() == windowSize) {
- eDA.addElementRolling(v);
- } else if (getN() < windowSize) {
- eDA.addElement(v);
- }
- } else {
- eDA.addElement(v);
- }
- }
- /** {@inheritDoc} */
- @Override
- public void accept(double v) {
- addValue(v);
- }
- /**
- * Resets all statistics and storage.
- */
- public void clear() {
- eDA.clear();
- }
- /**
- * Removes the most recent value from the dataset.
- *
- * @throws MathIllegalStateException if there are no elements stored
- */
- public void removeMostRecentValue() throws MathIllegalStateException {
- try {
- eDA.discardMostRecentElements(1);
- } catch (MathIllegalArgumentException ex) {
- throw new MathIllegalStateException(ex, LocalizedCoreFormats.NO_DATA);
- }
- }
- /**
- * Replaces the most recently stored value with the given value.
- * There must be at least one element stored to call this method.
- *
- * @param v the value to replace the most recent stored value
- * @return replaced value
- * @throws MathIllegalStateException if there are no elements stored
- */
- public double replaceMostRecentValue(double v) throws MathIllegalStateException {
- return eDA.substituteMostRecentElement(v);
- }
- /**
- * Apply the given statistic to the data associated with this set of statistics.
- * @param stat the statistic to apply
- * @return the computed value of the statistic.
- */
- public double apply(UnivariateStatistic stat) {
- // No try-catch or advertised exception here because arguments
- // are guaranteed valid.
- return eDA.compute(stat);
- }
- /** {@inheritDoc} */
- @Override
- public double getMean() {
- return apply(meanImpl);
- }
- /**
- * Returns the geometric mean of the available values.
- * <p>
- * See {@link GeometricMean} for details on the computing algorithm.
- *
- * @see <a href="http://www.xycoon.com/geometric_mean.htm">
- * Geometric mean</a>
- *
- * @return The geometricMean, Double.NaN if no values have been added,
- * or if any negative values have been added.
- */
- public double getGeometricMean() {
- return apply(geometricMeanImpl);
- }
- /**
- * Returns the standard deviation of the available values.
- * @return The standard deviation, Double.NaN if no values have been added
- * or 0.0 for a single value set.
- */
- @Override
- public double getStandardDeviation() {
- double stdDev = Double.NaN;
- if (getN() > 0) {
- if (getN() > 1) {
- stdDev = FastMath.sqrt(getVariance());
- } else {
- stdDev = 0.0;
- }
- }
- return stdDev;
- }
- /**
- * Returns the quadratic mean of the available values.
- *
- * @see <a href="http://mathworld.wolfram.com/Root-Mean-Square.html">
- * Root Mean Square</a>
- *
- * @return The quadratic mean or {@code Double.NaN} if no values
- * have been added.
- */
- public double getQuadraticMean() {
- final long n = getN();
- return n > 0 ? FastMath.sqrt(getSumOfSquares() / n) : Double.NaN;
- }
- /** {@inheritDoc} */
- @Override
- public double getVariance() {
- return apply(varianceImpl);
- }
- /**
- * Returns the population variance of the available values.
- *
- * @see <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
- * Population variance</a>
- *
- * @return The population variance, Double.NaN if no values have been added,
- * or 0.0 for a single value set.
- */
- public double getPopulationVariance() {
- return apply(POPULATION_VARIANCE);
- }
- /**
- * Returns the skewness of the available values. Skewness is a
- * measure of the asymmetry of a given distribution.
- *
- * @return The skewness, Double.NaN if less than 3 values have been added.
- */
- public double getSkewness() {
- return apply(skewnessImpl);
- }
- /**
- * Returns the Kurtosis of the available values. Kurtosis is a
- * measure of the "peakedness" of a distribution.
- *
- * @return The kurtosis, Double.NaN if less than 4 values have been added.
- */
- public double getKurtosis() {
- return apply(kurtosisImpl);
- }
- /** {@inheritDoc} */
- @Override
- public double getMax() {
- return apply(maxImpl);
- }
- /** {@inheritDoc} */
- @Override
- public double getMin() {
- return apply(minImpl);
- }
- /** {@inheritDoc} */
- @Override
- public double getSum() {
- return apply(sumImpl);
- }
- /**
- * Returns the sum of the squares of the available values.
- * @return The sum of the squares or Double.NaN if no
- * values have been added.
- */
- public double getSumOfSquares() {
- return apply(sumOfSquaresImpl);
- }
- /**
- * Returns an estimate for the pth percentile of the stored values.
- * <p>
- * The implementation provided here follows the first estimation procedure presented
- * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
- * </p><p>
- * <strong>Preconditions</strong>:<ul>
- * <li><code>0 < p ≤ 100</code> (otherwise an
- * <code>MathIllegalArgumentException</code> is thrown)</li>
- * <li>at least one value must be stored (returns <code>Double.NaN
- * </code> otherwise)</li>
- * </ul>
- *
- * @param p the requested percentile (scaled from 0 - 100)
- * @return An estimate for the pth percentile of the stored data
- * @throws MathIllegalArgumentException if p is not a valid quantile
- */
- public double getPercentile(final double p)
- throws MathIllegalArgumentException {
- percentileImpl.setQuantile(p);
- return apply(percentileImpl);
- }
- /** {@inheritDoc} */
- @Override
- public long getN() {
- return eDA.getNumElements();
- }
- /**
- * Returns the maximum number of values that can be stored in the
- * dataset, or INFINITE_WINDOW (-1) if there is no limit.
- *
- * @return The current window size or -1 if its Infinite.
- */
- public int getWindowSize() {
- return windowSize;
- }
- /**
- * WindowSize controls the number of values that contribute to the
- * reported statistics. For example, if windowSize is set to 3 and the
- * values {1,2,3,4,5} have been added <strong> in that order</strong> then
- * the <i>available values</i> are {3,4,5} and all reported statistics will
- * be based on these values. If {@code windowSize} is decreased as a result
- * of this call and there are more than the new value of elements in the
- * current dataset, values from the front of the array are discarded to
- * reduce the dataset to {@code windowSize} elements.
- *
- * @param windowSize sets the size of the window.
- * @throws MathIllegalArgumentException if window size is less than 1 but
- * not equal to {@link #INFINITE_WINDOW}
- */
- public void setWindowSize(int windowSize)
- throws MathIllegalArgumentException {
- if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
- throw new MathIllegalArgumentException(
- LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
- }
- this.windowSize = windowSize;
- // We need to check to see if we need to discard elements
- // from the front of the array. If the windowSize is less than
- // the current number of elements.
- if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
- eDA.discardFrontElements(eDA.getNumElements() - windowSize);
- }
- }
- /**
- * Returns the current set of values in an array of double primitives.
- * The order of addition is preserved. The returned array is a fresh
- * copy of the underlying data -- i.e., it is not a reference to the
- * stored data.
- *
- * @return the current set of numbers in the order in which they
- * were added to this set
- */
- public double[] getValues() {
- return eDA.getElements();
- }
- /**
- * Returns the current set of values in an array of double primitives,
- * sorted in ascending order. The returned array is a fresh
- * copy of the underlying data -- i.e., it is not a reference to the
- * stored data.
- * @return returns the current set of
- * numbers sorted in ascending order
- */
- public double[] getSortedValues() {
- double[] sort = getValues();
- Arrays.sort(sort);
- return sort;
- }
- /**
- * Returns the element at the specified index
- * @param index The Index of the element
- * @return return the element at the specified index
- */
- public double getElement(int index) {
- return eDA.getElement(index);
- }
- /**
- * Generates a text report displaying univariate statistics from values
- * that have been added. Each statistic is displayed on a separate line.
- *
- * @return String with line feeds displaying statistics
- */
- @Override
- public String toString() {
- final StringBuilder outBuffer = new StringBuilder(100);
- final String endl = "\n";
- outBuffer.append("DescriptiveStatistics:").append(endl).
- append("n: ").append(getN()).append(endl).
- append("min: ").append(getMin()).append(endl).
- append("max: ").append(getMax()).append(endl).
- append("mean: ").append(getMean()).append(endl).
- append("std dev: ").append(getStandardDeviation()).append(endl);
- try {
- // No catch for MIAE because actual parameter is valid below
- outBuffer.append("median: ").append(getPercentile(50)).append(endl);
- } catch (MathIllegalStateException ex) {
- outBuffer.append("median: unavailable").append(endl);
- }
- outBuffer.append("skewness: ").append(getSkewness()).append(endl).
- append("kurtosis: ").append(getKurtosis()).append(endl);
- return outBuffer.toString();
- }
- }