1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.descriptive; 23 24 import java.io.Serializable; 25 import java.util.Arrays; 26 import java.util.function.DoubleConsumer; 27 28 import org.hipparchus.exception.LocalizedCoreFormats; 29 import org.hipparchus.exception.MathIllegalArgumentException; 30 import org.hipparchus.exception.MathIllegalStateException; 31 import org.hipparchus.stat.descriptive.moment.GeometricMean; 32 import org.hipparchus.stat.descriptive.moment.Kurtosis; 33 import org.hipparchus.stat.descriptive.moment.Mean; 34 import org.hipparchus.stat.descriptive.moment.Skewness; 35 import org.hipparchus.stat.descriptive.moment.Variance; 36 import org.hipparchus.stat.descriptive.rank.Max; 37 import org.hipparchus.stat.descriptive.rank.Min; 38 import org.hipparchus.stat.descriptive.rank.Percentile; 39 import org.hipparchus.stat.descriptive.summary.Sum; 40 import org.hipparchus.stat.descriptive.summary.SumOfSquares; 41 import org.hipparchus.util.FastMath; 42 import org.hipparchus.util.MathUtils; 43 import org.hipparchus.util.ResizableDoubleArray; 44 45 46 /** 47 * Maintains a dataset of values of a single variable and computes descriptive 48 * statistics based on stored data. 49 * <p> 50 * The {@link #getWindowSize() windowSize} property sets a limit on the number 51 * of values that can be stored in the dataset. The default value, INFINITE_WINDOW, 52 * puts no limit on the size of the dataset. This value should be used with 53 * caution, as the backing store will grow without bound in this case. 54 * <p> 55 * For very large datasets, {@link StreamingStatistics}, which does not store 56 * the dataset, should be used instead of this class. If <code>windowSize</code> 57 * is not INFINITE_WINDOW and more values are added than can be stored in the 58 * dataset, new values are added in a "rolling" manner, with new values replacing 59 * the "oldest" values in the dataset. 60 * <p> 61 * Note: this class is not threadsafe. 62 */ 63 public class DescriptiveStatistics 64 implements StatisticalSummary, DoubleConsumer, Serializable { 65 66 /** 67 * Represents an infinite window size. When the {@link #getWindowSize()} 68 * returns this value, there is no limit to the number of data values 69 * that can be stored in the dataset. 70 */ 71 protected static final int INFINITE_WINDOW = -1; 72 73 /** Serialization UID */ 74 private static final long serialVersionUID = 20160411L; 75 76 /** The statistic used to calculate the population variance - fixed. */ 77 private static final UnivariateStatistic POPULATION_VARIANCE = new Variance(false); 78 79 /** Maximum statistic implementation. */ 80 private final UnivariateStatistic maxImpl; 81 /** Minimum statistic implementation. */ 82 private final UnivariateStatistic minImpl; 83 /** Sum statistic implementation. */ 84 private final UnivariateStatistic sumImpl; 85 /** Sum of squares statistic implementation. */ 86 private final UnivariateStatistic sumOfSquaresImpl; 87 /** Mean statistic implementation. */ 88 private final UnivariateStatistic meanImpl; 89 /** Variance statistic implementation. */ 90 private final UnivariateStatistic varianceImpl; 91 /** Geometric mean statistic implementation. */ 92 private final UnivariateStatistic geometricMeanImpl; 93 /** Kurtosis statistic implementation. */ 94 private final UnivariateStatistic kurtosisImpl; 95 /** Skewness statistic implementation. */ 96 private final UnivariateStatistic skewnessImpl; 97 /** Percentile statistic implementation. */ 98 private final Percentile percentileImpl; 99 100 /** holds the window size. */ 101 private int windowSize; 102 103 /** Stored data values. */ 104 private final ResizableDoubleArray eDA; 105 106 /** 107 * Construct a DescriptiveStatistics instance with an infinite window. 108 */ 109 public DescriptiveStatistics() { 110 this(INFINITE_WINDOW); 111 } 112 113 /** 114 * Construct a DescriptiveStatistics instance with the specified window. 115 * 116 * @param size the window size. 117 * @throws MathIllegalArgumentException if window size is less than 1 but 118 * not equal to {@link #INFINITE_WINDOW} 119 */ 120 public DescriptiveStatistics(int size) throws MathIllegalArgumentException { 121 this(size, false, null); 122 } 123 124 /** 125 * Construct a DescriptiveStatistics instance with an infinite window 126 * and the initial data values in double[] initialDoubleArray. 127 * 128 * @param initialDoubleArray the initial double[]. 129 * @throws org.hipparchus.exception.NullArgumentException if the input array is null 130 */ 131 public DescriptiveStatistics(double[] initialDoubleArray) { 132 this(INFINITE_WINDOW, true, initialDoubleArray); 133 } 134 135 /** 136 * Copy constructor. 137 * <p> 138 * Construct a new DescriptiveStatistics instance that 139 * is a copy of original. 140 * 141 * @param original DescriptiveStatistics instance to copy 142 * @throws org.hipparchus.exception.NullArgumentException if original is null 143 */ 144 protected DescriptiveStatistics(DescriptiveStatistics original) { 145 MathUtils.checkNotNull(original); 146 147 // Copy data and window size 148 this.windowSize = original.windowSize; 149 this.eDA = original.eDA.copy(); 150 151 // Copy implementations 152 this.maxImpl = original.maxImpl.copy(); 153 this.minImpl = original.minImpl.copy(); 154 this.meanImpl = original.meanImpl.copy(); 155 this.sumImpl = original.sumImpl.copy(); 156 this.sumOfSquaresImpl = original.sumOfSquaresImpl.copy(); 157 this.varianceImpl = original.varianceImpl.copy(); 158 this.geometricMeanImpl = original.geometricMeanImpl.copy(); 159 this.kurtosisImpl = original.kurtosisImpl.copy(); 160 this.skewnessImpl = original.skewnessImpl.copy(); 161 this.percentileImpl = original.percentileImpl.copy(); 162 } 163 164 /** 165 * Construct a DescriptiveStatistics instance with the specified window. 166 * 167 * @param windowSize the window size 168 * @param hasInitialValues if initial values have been provided 169 * @param initialValues the initial values 170 * @throws org.hipparchus.exception.NullArgumentException if initialValues is null 171 * @throws MathIllegalArgumentException if window size is less than 1 but 172 * not equal to {@link #INFINITE_WINDOW} 173 */ 174 DescriptiveStatistics(int windowSize, boolean hasInitialValues, double[] initialValues) { 175 if (windowSize < 1 && windowSize != INFINITE_WINDOW) { 176 throw new MathIllegalArgumentException( 177 LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize); 178 } 179 180 if (hasInitialValues) { 181 MathUtils.checkNotNull(initialValues, LocalizedCoreFormats.INPUT_ARRAY); 182 } 183 184 this.windowSize = windowSize; 185 int initialCapacity = this.windowSize < 0 ? 100 : this.windowSize; 186 this.eDA = hasInitialValues ? 187 new ResizableDoubleArray(initialValues) : 188 new ResizableDoubleArray(initialCapacity); 189 190 maxImpl = new Max(); 191 minImpl = new Min(); 192 sumImpl = new Sum(); 193 sumOfSquaresImpl = new SumOfSquares(); 194 meanImpl = new Mean(); 195 varianceImpl = new Variance(); 196 geometricMeanImpl = new GeometricMean(); 197 kurtosisImpl = new Kurtosis(); 198 skewnessImpl = new Skewness(); 199 percentileImpl = new Percentile(); 200 } 201 202 /** 203 * Returns a copy of this DescriptiveStatistics instance with the same internal state. 204 * 205 * @return a copy of this 206 */ 207 public DescriptiveStatistics copy() { 208 return new DescriptiveStatistics(this); 209 } 210 211 /** 212 * Adds the value to the dataset. If the dataset is at the maximum size 213 * (i.e., the number of stored elements equals the currently configured 214 * windowSize), the first (oldest) element in the dataset is discarded 215 * to make room for the new value. 216 * 217 * @param v the value to be added 218 */ 219 public void addValue(double v) { 220 if (windowSize != INFINITE_WINDOW) { 221 if (getN() == windowSize) { 222 eDA.addElementRolling(v); 223 } else if (getN() < windowSize) { 224 eDA.addElement(v); 225 } 226 } else { 227 eDA.addElement(v); 228 } 229 } 230 231 /** {@inheritDoc} */ 232 @Override 233 public void accept(double v) { 234 addValue(v); 235 } 236 237 /** 238 * Resets all statistics and storage. 239 */ 240 public void clear() { 241 eDA.clear(); 242 } 243 244 /** 245 * Removes the most recent value from the dataset. 246 * 247 * @throws MathIllegalStateException if there are no elements stored 248 */ 249 public void removeMostRecentValue() throws MathIllegalStateException { 250 try { 251 eDA.discardMostRecentElements(1); 252 } catch (MathIllegalArgumentException ex) { 253 throw new MathIllegalStateException(ex, LocalizedCoreFormats.NO_DATA); 254 } 255 } 256 257 /** 258 * Replaces the most recently stored value with the given value. 259 * There must be at least one element stored to call this method. 260 * 261 * @param v the value to replace the most recent stored value 262 * @return replaced value 263 * @throws MathIllegalStateException if there are no elements stored 264 */ 265 public double replaceMostRecentValue(double v) throws MathIllegalStateException { 266 return eDA.substituteMostRecentElement(v); 267 } 268 269 /** 270 * Apply the given statistic to the data associated with this set of statistics. 271 * @param stat the statistic to apply 272 * @return the computed value of the statistic. 273 */ 274 public double apply(UnivariateStatistic stat) { 275 // No try-catch or advertised exception here because arguments 276 // are guaranteed valid. 277 return eDA.compute(stat); 278 } 279 280 /** {@inheritDoc} */ 281 @Override 282 public double getMean() { 283 return apply(meanImpl); 284 } 285 286 /** 287 * Returns the geometric mean of the available values. 288 * <p> 289 * See {@link GeometricMean} for details on the computing algorithm. 290 * 291 * @see <a href="http://www.xycoon.com/geometric_mean.htm"> 292 * Geometric mean</a> 293 * 294 * @return The geometricMean, Double.NaN if no values have been added, 295 * or if any negative values have been added. 296 */ 297 public double getGeometricMean() { 298 return apply(geometricMeanImpl); 299 } 300 301 /** 302 * Returns the standard deviation of the available values. 303 * @return The standard deviation, Double.NaN if no values have been added 304 * or 0.0 for a single value set. 305 */ 306 @Override 307 public double getStandardDeviation() { 308 double stdDev = Double.NaN; 309 if (getN() > 0) { 310 if (getN() > 1) { 311 stdDev = FastMath.sqrt(getVariance()); 312 } else { 313 stdDev = 0.0; 314 } 315 } 316 return stdDev; 317 } 318 319 /** 320 * Returns the quadratic mean of the available values. 321 * 322 * @see <a href="http://mathworld.wolfram.com/Root-Mean-Square.html"> 323 * Root Mean Square</a> 324 * 325 * @return The quadratic mean or {@code Double.NaN} if no values 326 * have been added. 327 */ 328 public double getQuadraticMean() { 329 final long n = getN(); 330 return n > 0 ? FastMath.sqrt(getSumOfSquares() / n) : Double.NaN; 331 } 332 333 /** {@inheritDoc} */ 334 @Override 335 public double getVariance() { 336 return apply(varianceImpl); 337 } 338 339 /** 340 * Returns the population variance of the available values. 341 * 342 * @see <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance"> 343 * Population variance</a> 344 * 345 * @return The population variance, Double.NaN if no values have been added, 346 * or 0.0 for a single value set. 347 */ 348 public double getPopulationVariance() { 349 return apply(POPULATION_VARIANCE); 350 } 351 352 /** 353 * Returns the skewness of the available values. Skewness is a 354 * measure of the asymmetry of a given distribution. 355 * 356 * @return The skewness, Double.NaN if less than 3 values have been added. 357 */ 358 public double getSkewness() { 359 return apply(skewnessImpl); 360 } 361 362 /** 363 * Returns the Kurtosis of the available values. Kurtosis is a 364 * measure of the "peakedness" of a distribution. 365 * 366 * @return The kurtosis, Double.NaN if less than 4 values have been added. 367 */ 368 public double getKurtosis() { 369 return apply(kurtosisImpl); 370 } 371 372 /** {@inheritDoc} */ 373 @Override 374 public double getMax() { 375 return apply(maxImpl); 376 } 377 378 /** {@inheritDoc} */ 379 @Override 380 public double getMin() { 381 return apply(minImpl); 382 } 383 384 /** {@inheritDoc} */ 385 @Override 386 public double getSum() { 387 return apply(sumImpl); 388 } 389 390 /** 391 * Returns the sum of the squares of the available values. 392 * @return The sum of the squares or Double.NaN if no 393 * values have been added. 394 */ 395 public double getSumOfSquares() { 396 return apply(sumOfSquaresImpl); 397 } 398 399 /** 400 * Returns an estimate for the pth percentile of the stored values. 401 * <p> 402 * The implementation provided here follows the first estimation procedure presented 403 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> 404 * </p><p> 405 * <strong>Preconditions</strong>:<ul> 406 * <li><code>0 < p ≤ 100</code> (otherwise an 407 * <code>MathIllegalArgumentException</code> is thrown)</li> 408 * <li>at least one value must be stored (returns <code>Double.NaN 409 * </code> otherwise)</li> 410 * </ul> 411 * 412 * @param p the requested percentile (scaled from 0 - 100) 413 * @return An estimate for the pth percentile of the stored data 414 * @throws MathIllegalArgumentException if p is not a valid quantile 415 */ 416 public double getPercentile(final double p) 417 throws MathIllegalArgumentException { 418 419 percentileImpl.setQuantile(p); 420 return apply(percentileImpl); 421 } 422 423 /** {@inheritDoc} */ 424 @Override 425 public long getN() { 426 return eDA.getNumElements(); 427 } 428 429 /** 430 * Returns the maximum number of values that can be stored in the 431 * dataset, or INFINITE_WINDOW (-1) if there is no limit. 432 * 433 * @return The current window size or -1 if its Infinite. 434 */ 435 public int getWindowSize() { 436 return windowSize; 437 } 438 439 /** 440 * WindowSize controls the number of values that contribute to the 441 * reported statistics. For example, if windowSize is set to 3 and the 442 * values {1,2,3,4,5} have been added <strong> in that order</strong> then 443 * the <i>available values</i> are {3,4,5} and all reported statistics will 444 * be based on these values. If {@code windowSize} is decreased as a result 445 * of this call and there are more than the new value of elements in the 446 * current dataset, values from the front of the array are discarded to 447 * reduce the dataset to {@code windowSize} elements. 448 * 449 * @param windowSize sets the size of the window. 450 * @throws MathIllegalArgumentException if window size is less than 1 but 451 * not equal to {@link #INFINITE_WINDOW} 452 */ 453 public void setWindowSize(int windowSize) 454 throws MathIllegalArgumentException { 455 456 if (windowSize < 1 && windowSize != INFINITE_WINDOW) { 457 throw new MathIllegalArgumentException( 458 LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize); 459 } 460 461 this.windowSize = windowSize; 462 463 // We need to check to see if we need to discard elements 464 // from the front of the array. If the windowSize is less than 465 // the current number of elements. 466 if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) { 467 eDA.discardFrontElements(eDA.getNumElements() - windowSize); 468 } 469 } 470 471 /** 472 * Returns the current set of values in an array of double primitives. 473 * The order of addition is preserved. The returned array is a fresh 474 * copy of the underlying data -- i.e., it is not a reference to the 475 * stored data. 476 * 477 * @return the current set of numbers in the order in which they 478 * were added to this set 479 */ 480 public double[] getValues() { 481 return eDA.getElements(); 482 } 483 484 /** 485 * Returns the current set of values in an array of double primitives, 486 * sorted in ascending order. The returned array is a fresh 487 * copy of the underlying data -- i.e., it is not a reference to the 488 * stored data. 489 * @return returns the current set of 490 * numbers sorted in ascending order 491 */ 492 public double[] getSortedValues() { 493 double[] sort = getValues(); 494 Arrays.sort(sort); 495 return sort; 496 } 497 498 /** 499 * Returns the element at the specified index 500 * @param index The Index of the element 501 * @return return the element at the specified index 502 */ 503 public double getElement(int index) { 504 return eDA.getElement(index); 505 } 506 507 /** 508 * Generates a text report displaying univariate statistics from values 509 * that have been added. Each statistic is displayed on a separate line. 510 * 511 * @return String with line feeds displaying statistics 512 */ 513 @Override 514 public String toString() { 515 final StringBuilder outBuffer = new StringBuilder(100); 516 final String endl = "\n"; 517 outBuffer.append("DescriptiveStatistics:").append(endl). 518 append("n: ").append(getN()).append(endl). 519 append("min: ").append(getMin()).append(endl). 520 append("max: ").append(getMax()).append(endl). 521 append("mean: ").append(getMean()).append(endl). 522 append("std dev: ").append(getStandardDeviation()).append(endl); 523 try { 524 // No catch for MIAE because actual parameter is valid below 525 outBuffer.append("median: ").append(getPercentile(50)).append(endl); 526 } catch (MathIllegalStateException ex) { 527 outBuffer.append("median: unavailable").append(endl); 528 } 529 outBuffer.append("skewness: ").append(getSkewness()).append(endl). 530 append("kurtosis: ").append(getKurtosis()).append(endl); 531 return outBuffer.toString(); 532 } 533 534 }