DescriptiveStatistics.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.descriptive;

  22. import java.io.Serializable;
  23. import java.util.Arrays;
  24. import java.util.function.DoubleConsumer;

  25. import org.hipparchus.exception.LocalizedCoreFormats;
  26. import org.hipparchus.exception.MathIllegalArgumentException;
  27. import org.hipparchus.exception.MathIllegalStateException;
  28. import org.hipparchus.stat.descriptive.moment.GeometricMean;
  29. import org.hipparchus.stat.descriptive.moment.Kurtosis;
  30. import org.hipparchus.stat.descriptive.moment.Mean;
  31. import org.hipparchus.stat.descriptive.moment.Skewness;
  32. import org.hipparchus.stat.descriptive.moment.Variance;
  33. import org.hipparchus.stat.descriptive.rank.Max;
  34. import org.hipparchus.stat.descriptive.rank.Min;
  35. import org.hipparchus.stat.descriptive.rank.Percentile;
  36. import org.hipparchus.stat.descriptive.summary.Sum;
  37. import org.hipparchus.stat.descriptive.summary.SumOfSquares;
  38. import org.hipparchus.util.FastMath;
  39. import org.hipparchus.util.MathUtils;
  40. import org.hipparchus.util.ResizableDoubleArray;


  41. /**
  42.  * Maintains a dataset of values of a single variable and computes descriptive
  43.  * statistics based on stored data.
  44.  * <p>
  45.  * The {@link #getWindowSize() windowSize} property sets a limit on the number
  46.  * of values that can be stored in the dataset. The default value, INFINITE_WINDOW,
  47.  * puts no limit on the size of the dataset. This value should be used with
  48.  * caution, as the backing store will grow without bound in this case.
  49.  * <p>
  50.  * For very large datasets, {@link StreamingStatistics}, which does not store
  51.  * the dataset, should be used instead of this class. If <code>windowSize</code>
  52.  * is not INFINITE_WINDOW and more values are added than can be stored in the
  53.  * dataset, new values are added in a "rolling" manner, with new values replacing
  54.  * the "oldest" values in the dataset.
  55.  * <p>
  56.  * Note: this class is not threadsafe.
  57.  */
  58. public class DescriptiveStatistics
  59.     implements StatisticalSummary, DoubleConsumer, Serializable {

  60.     /**
  61.      * Represents an infinite window size.  When the {@link #getWindowSize()}
  62.      * returns this value, there is no limit to the number of data values
  63.      * that can be stored in the dataset.
  64.      */
  65.     protected static final int INFINITE_WINDOW = -1;

  66.     /** Serialization UID */
  67.     private static final long serialVersionUID = 20160411L;

  68.     /** The statistic used to calculate the population variance - fixed. */
  69.     private static final UnivariateStatistic POPULATION_VARIANCE = new Variance(false);

  70.     /** Maximum statistic implementation. */
  71.     private final UnivariateStatistic          maxImpl;
  72.     /** Minimum statistic implementation. */
  73.     private final UnivariateStatistic          minImpl;
  74.     /** Sum statistic implementation. */
  75.     private final UnivariateStatistic          sumImpl;
  76.     /** Sum of squares statistic implementation. */
  77.     private final UnivariateStatistic          sumOfSquaresImpl;
  78.     /** Mean statistic implementation. */
  79.     private final UnivariateStatistic          meanImpl;
  80.     /** Variance statistic implementation. */
  81.     private final UnivariateStatistic          varianceImpl;
  82.     /** Geometric mean statistic implementation. */
  83.     private final UnivariateStatistic          geometricMeanImpl;
  84.     /** Kurtosis statistic implementation. */
  85.     private final UnivariateStatistic          kurtosisImpl;
  86.     /** Skewness statistic implementation. */
  87.     private final UnivariateStatistic          skewnessImpl;
  88.     /** Percentile statistic implementation. */
  89.     private final Percentile                   percentileImpl;

  90.     /** holds the window size. */
  91.     private int windowSize;

  92.     /** Stored data values. */
  93.     private final ResizableDoubleArray eDA;

  94.     /**
  95.      * Construct a DescriptiveStatistics instance with an infinite window.
  96.      */
  97.     public DescriptiveStatistics() {
  98.         this(INFINITE_WINDOW);
  99.     }

  100.     /**
  101.      * Construct a DescriptiveStatistics instance with the specified window.
  102.      *
  103.      * @param size the window size.
  104.      * @throws MathIllegalArgumentException if window size is less than 1 but
  105.      * not equal to {@link #INFINITE_WINDOW}
  106.      */
  107.     public DescriptiveStatistics(int size) throws MathIllegalArgumentException {
  108.         this(size, false, null);
  109.     }

  110.     /**
  111.      * Construct a DescriptiveStatistics instance with an infinite window
  112.      * and the initial data values in double[] initialDoubleArray.
  113.      *
  114.      * @param initialDoubleArray the initial double[].
  115.      * @throws org.hipparchus.exception.NullArgumentException if the input array is null
  116.      */
  117.     public DescriptiveStatistics(double[] initialDoubleArray) {
  118.         this(INFINITE_WINDOW, true, initialDoubleArray);
  119.     }

  120.     /**
  121.      * Copy constructor.
  122.      * <p>
  123.      * Construct a new DescriptiveStatistics instance that
  124.      * is a copy of original.
  125.      *
  126.      * @param original DescriptiveStatistics instance to copy
  127.      * @throws org.hipparchus.exception.NullArgumentException if original is null
  128.      */
  129.     protected DescriptiveStatistics(DescriptiveStatistics original) {
  130.         MathUtils.checkNotNull(original);

  131.         // Copy data and window size
  132.         this.windowSize = original.windowSize;
  133.         this.eDA        = original.eDA.copy();

  134.         // Copy implementations
  135.         this.maxImpl           = original.maxImpl.copy();
  136.         this.minImpl           = original.minImpl.copy();
  137.         this.meanImpl          = original.meanImpl.copy();
  138.         this.sumImpl           = original.sumImpl.copy();
  139.         this.sumOfSquaresImpl  = original.sumOfSquaresImpl.copy();
  140.         this.varianceImpl      = original.varianceImpl.copy();
  141.         this.geometricMeanImpl = original.geometricMeanImpl.copy();
  142.         this.kurtosisImpl      = original.kurtosisImpl.copy();
  143.         this.skewnessImpl      = original.skewnessImpl.copy();
  144.         this.percentileImpl    = original.percentileImpl.copy();
  145.     }

  146.     /**
  147.      * Construct a DescriptiveStatistics instance with the specified window.
  148.      *
  149.      * @param windowSize the window size
  150.      * @param hasInitialValues if initial values have been provided
  151.      * @param initialValues the initial values
  152.      * @throws org.hipparchus.exception.NullArgumentException if initialValues is null
  153.      * @throws MathIllegalArgumentException if window size is less than 1 but
  154.      * not equal to {@link #INFINITE_WINDOW}
  155.      */
  156.     DescriptiveStatistics(int windowSize, boolean hasInitialValues, double[] initialValues) {
  157.         if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
  158.             throw new MathIllegalArgumentException(
  159.                     LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
  160.         }

  161.         if (hasInitialValues) {
  162.             MathUtils.checkNotNull(initialValues, LocalizedCoreFormats.INPUT_ARRAY);
  163.         }

  164.         this.windowSize     = windowSize;
  165.         int initialCapacity = this.windowSize < 0 ? 100 : this.windowSize;
  166.         this.eDA            = hasInitialValues ?
  167.             new ResizableDoubleArray(initialValues) :
  168.             new ResizableDoubleArray(initialCapacity);

  169.         maxImpl           = new Max();
  170.         minImpl           = new Min();
  171.         sumImpl           = new Sum();
  172.         sumOfSquaresImpl  = new SumOfSquares();
  173.         meanImpl          = new Mean();
  174.         varianceImpl      = new Variance();
  175.         geometricMeanImpl = new GeometricMean();
  176.         kurtosisImpl      = new Kurtosis();
  177.         skewnessImpl      = new Skewness();
  178.         percentileImpl    = new Percentile();
  179.     }

  180.     /**
  181.      * Returns a copy of this DescriptiveStatistics instance with the same internal state.
  182.      *
  183.      * @return a copy of this
  184.      */
  185.     public DescriptiveStatistics copy() {
  186.         return new DescriptiveStatistics(this);
  187.     }

  188.     /**
  189.      * Adds the value to the dataset. If the dataset is at the maximum size
  190.      * (i.e., the number of stored elements equals the currently configured
  191.      * windowSize), the first (oldest) element in the dataset is discarded
  192.      * to make room for the new value.
  193.      *
  194.      * @param v the value to be added
  195.      */
  196.     public void addValue(double v) {
  197.         if (windowSize != INFINITE_WINDOW) {
  198.             if (getN() == windowSize) {
  199.                 eDA.addElementRolling(v);
  200.             } else if (getN() < windowSize) {
  201.                 eDA.addElement(v);
  202.             }
  203.         } else {
  204.             eDA.addElement(v);
  205.         }
  206.     }

  207.     /** {@inheritDoc} */
  208.     @Override
  209.     public void accept(double v) {
  210.         addValue(v);
  211.     }

  212.     /**
  213.      * Resets all statistics and storage.
  214.      */
  215.     public void clear() {
  216.         eDA.clear();
  217.     }

  218.     /**
  219.      * Removes the most recent value from the dataset.
  220.      *
  221.      * @throws MathIllegalStateException if there are no elements stored
  222.      */
  223.     public void removeMostRecentValue() throws MathIllegalStateException {
  224.         try {
  225.             eDA.discardMostRecentElements(1);
  226.         } catch (MathIllegalArgumentException ex) {
  227.             throw new MathIllegalStateException(ex, LocalizedCoreFormats.NO_DATA);
  228.         }
  229.     }

  230.     /**
  231.      * Replaces the most recently stored value with the given value.
  232.      * There must be at least one element stored to call this method.
  233.      *
  234.      * @param v the value to replace the most recent stored value
  235.      * @return replaced value
  236.      * @throws MathIllegalStateException if there are no elements stored
  237.      */
  238.     public double replaceMostRecentValue(double v) throws MathIllegalStateException {
  239.         return eDA.substituteMostRecentElement(v);
  240.     }

  241.     /**
  242.      * Apply the given statistic to the data associated with this set of statistics.
  243.      * @param stat the statistic to apply
  244.      * @return the computed value of the statistic.
  245.      */
  246.     public double apply(UnivariateStatistic stat) {
  247.         // No try-catch or advertised exception here because arguments
  248.         // are guaranteed valid.
  249.         return eDA.compute(stat);
  250.     }

  251.     /** {@inheritDoc} */
  252.     @Override
  253.     public double getMean() {
  254.         return apply(meanImpl);
  255.     }

  256.     /**
  257.      * Returns the geometric mean of the available values.
  258.      * <p>
  259.      * See {@link GeometricMean} for details on the computing algorithm.
  260.      *
  261.      * @see <a href="http://www.xycoon.com/geometric_mean.htm">
  262.      * Geometric mean</a>
  263.      *
  264.      * @return The geometricMean, Double.NaN if no values have been added,
  265.      * or if any negative values have been added.
  266.      */
  267.     public double getGeometricMean() {
  268.         return apply(geometricMeanImpl);
  269.     }

  270.     /**
  271.      * Returns the standard deviation of the available values.
  272.      * @return The standard deviation, Double.NaN if no values have been added
  273.      * or 0.0 for a single value set.
  274.      */
  275.     @Override
  276.     public double getStandardDeviation() {
  277.         double stdDev = Double.NaN;
  278.         if (getN() > 0) {
  279.             if (getN() > 1) {
  280.                 stdDev = FastMath.sqrt(getVariance());
  281.             } else {
  282.                 stdDev = 0.0;
  283.             }
  284.         }
  285.         return stdDev;
  286.     }

  287.     /**
  288.      * Returns the quadratic mean of the available values.
  289.      *
  290.      * @see <a href="http://mathworld.wolfram.com/Root-Mean-Square.html">
  291.      * Root Mean Square</a>
  292.      *
  293.      * @return The quadratic mean or {@code Double.NaN} if no values
  294.      * have been added.
  295.      */
  296.     public double getQuadraticMean() {
  297.         final long n = getN();
  298.         return n > 0 ? FastMath.sqrt(getSumOfSquares() / n) : Double.NaN;
  299.     }

  300.     /** {@inheritDoc} */
  301.     @Override
  302.     public double getVariance() {
  303.         return apply(varianceImpl);
  304.     }

  305.     /**
  306.      * Returns the population variance of the available values.
  307.      *
  308.      * @see <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
  309.      * Population variance</a>
  310.      *
  311.      * @return The population variance, Double.NaN if no values have been added,
  312.      * or 0.0 for a single value set.
  313.      */
  314.     public double getPopulationVariance() {
  315.         return apply(POPULATION_VARIANCE);
  316.     }

  317.     /**
  318.      * Returns the skewness of the available values. Skewness is a
  319.      * measure of the asymmetry of a given distribution.
  320.      *
  321.      * @return The skewness, Double.NaN if less than 3 values have been added.
  322.      */
  323.     public double getSkewness() {
  324.         return apply(skewnessImpl);
  325.     }

  326.     /**
  327.      * Returns the Kurtosis of the available values. Kurtosis is a
  328.      * measure of the "peakedness" of a distribution.
  329.      *
  330.      * @return The kurtosis, Double.NaN if less than 4 values have been added.
  331.      */
  332.     public double getKurtosis() {
  333.         return apply(kurtosisImpl);
  334.     }

  335.     /** {@inheritDoc} */
  336.     @Override
  337.     public double getMax() {
  338.         return apply(maxImpl);
  339.     }

  340.     /** {@inheritDoc} */
  341.     @Override
  342.     public double getMin() {
  343.         return apply(minImpl);
  344.     }

  345.     /** {@inheritDoc} */
  346.     @Override
  347.     public double getSum() {
  348.         return apply(sumImpl);
  349.     }

  350.     /**
  351.      * Returns the sum of the squares of the available values.
  352.      * @return The sum of the squares or Double.NaN if no
  353.      * values have been added.
  354.      */
  355.     public double getSumOfSquares() {
  356.         return apply(sumOfSquaresImpl);
  357.     }

  358.     /**
  359.      * Returns an estimate for the pth percentile of the stored values.
  360.      * <p>
  361.      * The implementation provided here follows the first estimation procedure presented
  362.      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
  363.      * </p><p>
  364.      * <strong>Preconditions</strong>:<ul>
  365.      * <li><code>0 &lt; p &le; 100</code> (otherwise an
  366.      * <code>MathIllegalArgumentException</code> is thrown)</li>
  367.      * <li>at least one value must be stored (returns <code>Double.NaN
  368.      *     </code> otherwise)</li>
  369.      * </ul>
  370.      *
  371.      * @param p the requested percentile (scaled from 0 - 100)
  372.      * @return An estimate for the pth percentile of the stored data
  373.      * @throws MathIllegalArgumentException if p is not a valid quantile
  374.      */
  375.     public double getPercentile(final double p)
  376.         throws MathIllegalArgumentException {

  377.         percentileImpl.setQuantile(p);
  378.         return apply(percentileImpl);
  379.     }

  380.     /** {@inheritDoc} */
  381.     @Override
  382.     public long getN() {
  383.         return eDA.getNumElements();
  384.     }

  385.     /**
  386.      * Returns the maximum number of values that can be stored in the
  387.      * dataset, or INFINITE_WINDOW (-1) if there is no limit.
  388.      *
  389.      * @return The current window size or -1 if its Infinite.
  390.      */
  391.     public int getWindowSize() {
  392.         return windowSize;
  393.     }

  394.     /**
  395.      * WindowSize controls the number of values that contribute to the
  396.      * reported statistics.  For example, if windowSize is set to 3 and the
  397.      * values {1,2,3,4,5} have been added <strong> in that order</strong> then
  398.      * the <i>available values</i> are {3,4,5} and all reported statistics will
  399.      * be based on these values. If {@code windowSize} is decreased as a result
  400.      * of this call and there are more than the new value of elements in the
  401.      * current dataset, values from the front of the array are discarded to
  402.      * reduce the dataset to {@code windowSize} elements.
  403.      *
  404.      * @param windowSize sets the size of the window.
  405.      * @throws MathIllegalArgumentException if window size is less than 1 but
  406.      * not equal to {@link #INFINITE_WINDOW}
  407.      */
  408.     public void setWindowSize(int windowSize)
  409.         throws MathIllegalArgumentException {

  410.         if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
  411.             throw new MathIllegalArgumentException(
  412.                     LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
  413.         }

  414.         this.windowSize = windowSize;

  415.         // We need to check to see if we need to discard elements
  416.         // from the front of the array.  If the windowSize is less than
  417.         // the current number of elements.
  418.         if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
  419.             eDA.discardFrontElements(eDA.getNumElements() - windowSize);
  420.         }
  421.     }

  422.     /**
  423.      * Returns the current set of values in an array of double primitives.
  424.      * The order of addition is preserved.  The returned array is a fresh
  425.      * copy of the underlying data -- i.e., it is not a reference to the
  426.      * stored data.
  427.      *
  428.      * @return the current set of numbers in the order in which they
  429.      * were added to this set
  430.      */
  431.     public double[] getValues() {
  432.         return eDA.getElements();
  433.     }

  434.     /**
  435.      * Returns the current set of values in an array of double primitives,
  436.      * sorted in ascending order.  The returned array is a fresh
  437.      * copy of the underlying data -- i.e., it is not a reference to the
  438.      * stored data.
  439.      * @return returns the current set of
  440.      * numbers sorted in ascending order
  441.      */
  442.     public double[] getSortedValues() {
  443.         double[] sort = getValues();
  444.         Arrays.sort(sort);
  445.         return sort;
  446.     }

  447.     /**
  448.      * Returns the element at the specified index
  449.      * @param index The Index of the element
  450.      * @return return the element at the specified index
  451.      */
  452.     public double getElement(int index) {
  453.         return eDA.getElement(index);
  454.     }

  455.     /**
  456.      * Generates a text report displaying univariate statistics from values
  457.      * that have been added.  Each statistic is displayed on a separate line.
  458.      *
  459.      * @return String with line feeds displaying statistics
  460.      */
  461.     @Override
  462.     public String toString() {
  463.         final StringBuilder outBuffer = new StringBuilder(100);
  464.         final String endl = "\n";
  465.         outBuffer.append("DescriptiveStatistics:").append(endl).
  466.                   append("n: ").append(getN()).append(endl).
  467.                   append("min: ").append(getMin()).append(endl).
  468.                   append("max: ").append(getMax()).append(endl).
  469.                   append("mean: ").append(getMean()).append(endl).
  470.                   append("std dev: ").append(getStandardDeviation()).append(endl);
  471.         try {
  472.             // No catch for MIAE because actual parameter is valid below
  473.             outBuffer.append("median: ").append(getPercentile(50)).append(endl);
  474.         } catch (MathIllegalStateException ex) {
  475.             outBuffer.append("median: unavailable").append(endl);
  476.         }
  477.         outBuffer.append("skewness: ").append(getSkewness()).append(endl).
  478.                   append("kurtosis: ").append(getKurtosis()).append(endl);
  479.         return outBuffer.toString();
  480.     }

  481. }