1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.descriptive;
23
24 import java.io.Serializable;
25 import java.util.function.DoubleConsumer;
26
27 import org.hipparchus.exception.NullArgumentException;
28 import org.hipparchus.random.RandomGenerator;
29 import org.hipparchus.stat.descriptive.moment.GeometricMean;
30 import org.hipparchus.stat.descriptive.moment.Mean;
31 import org.hipparchus.stat.descriptive.moment.SecondMoment;
32 import org.hipparchus.stat.descriptive.moment.Variance;
33 import org.hipparchus.stat.descriptive.rank.Max;
34 import org.hipparchus.stat.descriptive.rank.Min;
35 import org.hipparchus.stat.descriptive.rank.RandomPercentile;
36 import org.hipparchus.stat.descriptive.summary.Sum;
37 import org.hipparchus.stat.descriptive.summary.SumOfLogs;
38 import org.hipparchus.stat.descriptive.summary.SumOfSquares;
39 import org.hipparchus.util.FastMath;
40 import org.hipparchus.util.MathUtils;
41 import org.hipparchus.util.Precision;
42
43 /**
44 * Computes summary statistics for a stream of data values added using the
45 * {@link #addValue(double) addValue} method. The data values are not stored in
46 * memory, so this class can be used to compute statistics for very large data
47 * streams.
48 * <p>
49 * By default, all statistics other than percentiles are maintained. Percentile
50 * calculations use an embedded {@link RandomPercentile} which carries more memory
51 * and compute overhead than the other statistics, so it is disabled by default.
52 * To enable percentiles, either pass {@code true} to the constructor or use a
53 * {@link StreamingStatisticsBuilder} to configure an instance with percentiles turned
54 * on. Other stats can also be selectively disabled using
55 * {@code StreamingStatisticsBulder}.
56 * <p>
57 * Note: This class is not thread-safe.
58 */
59 public class StreamingStatistics
60 implements StatisticalSummary, AggregatableStatistic<StreamingStatistics>,
61 DoubleConsumer, Serializable {
62
63 /** Serialization UID */
64 private static final long serialVersionUID = 20160422L;
65
66 /** count of values that have been added */
67 private long n;
68
69 /** SecondMoment is used to compute the mean and variance */
70 private final SecondMoment secondMoment;
71 /** min of values that have been added */
72 private final Min minImpl;
73 /** max of values that have been added */
74 private final Max maxImpl;
75 /** sum of values that have been added */
76 private final Sum sumImpl;
77 /** sum of the square of each value that has been added */
78 private final SumOfSquares sumOfSquaresImpl;
79 /** sumLog of values that have been added */
80 private final SumOfLogs sumOfLogsImpl;
81 /** mean of values that have been added */
82 private final Mean meanImpl;
83 /** variance of values that have been added */
84 private final Variance varianceImpl;
85 /** geoMean of values that have been added */
86 private final GeometricMean geoMeanImpl;
87 /** population variance of values that have been added */
88 private final Variance populationVariance;
89 /** source of percentiles */
90 private final RandomPercentile randomPercentile;
91
92 /** whether or not moment stats (sum, mean, variance) are maintained */
93 private final boolean computeMoments;
94 /** whether or not sum of squares and quadratic mean are maintained */
95 private final boolean computeSumOfSquares;
96 /** whether or not sum of logs and geometric mean are maintained */
97 private final boolean computeSumOfLogs;
98 /** whether or not min and max are maintained */
99 private final boolean computeExtrema;
100
101 /**
102 * Construct a new StreamingStatistics instance, maintaining all statistics
103 * other than percentiles.
104 */
105 public StreamingStatistics() {
106 this(Double.NaN, null);
107 }
108
109 /**
110 * Construct a new StreamingStatistics instance, maintaining all statistics
111 * other than percentiles and with/without percentiles per the arguments.
112 *
113 * @param epsilon bound on quantile estimation error (see {@link RandomGenerator})
114 * @param randomGenerator PRNG used in sampling and merge operations (null if percentiles should not be computed)
115 * @since 2.3
116 */
117 public StreamingStatistics(final double epsilon, final RandomGenerator randomGenerator) {
118 this(true, true, true, true, epsilon, randomGenerator);
119 }
120
121 /**
122 * Private constructor used by {@link StreamingStatisticsBuilder}.
123 *
124 * @param computeMoments whether or not moment stats (mean, sum, variance) are maintained
125 * @param computeSumOfLogs whether or not sum of logs and geometric mean are maintained
126 * @param computeSumOfSquares whether or not sum of squares and quadratic mean are maintained
127 * @param computeExtrema whether or not min and max are maintained
128 * @param epsilon bound on quantile estimation error (see {@link RandomGenerator})
129 * @param randomGenerator PRNG used in sampling and merge operations (null if percentiles should not be computed)
130 * @since 2.3
131 */
132 private StreamingStatistics(final boolean computeMoments,
133 final boolean computeSumOfLogs, final boolean computeSumOfSquares,
134 final boolean computeExtrema,
135 final double epsilon, final RandomGenerator randomGenerator) {
136 this.computeMoments = computeMoments;
137 this.computeSumOfLogs = computeSumOfLogs;
138 this.computeSumOfSquares = computeSumOfSquares;
139 this.computeExtrema = computeExtrema;
140
141 this.secondMoment = computeMoments ? new SecondMoment() : null;
142 this.maxImpl = computeExtrema ? new Max() : null;
143 this.minImpl = computeExtrema ? new Min() : null;
144 this.sumImpl = computeMoments ? new Sum() : null;
145 this.sumOfSquaresImpl = computeSumOfSquares ? new SumOfSquares() : null;
146 this.sumOfLogsImpl = computeSumOfLogs ? new SumOfLogs() : null;
147 this.meanImpl = computeMoments ? new Mean(this.secondMoment) : null;
148 this.varianceImpl = computeMoments ? new Variance(this.secondMoment) : null;
149 this.geoMeanImpl = computeSumOfLogs ? new GeometricMean(this.sumOfLogsImpl) : null;
150 this.populationVariance = computeMoments ? new Variance(false, this.secondMoment) : null;
151 this.randomPercentile = randomGenerator == null ? null : new RandomPercentile(epsilon, randomGenerator);
152 }
153
154 /**
155 * A copy constructor. Creates a deep-copy of the {@code original}.
156 *
157 * @param original the {@code StreamingStatistics} instance to copy
158 * @throws NullArgumentException if original is null
159 */
160 StreamingStatistics(StreamingStatistics original) throws NullArgumentException {
161 MathUtils.checkNotNull(original);
162
163 this.n = original.n;
164 this.secondMoment = original.computeMoments ? original.secondMoment.copy() : null;
165 this.maxImpl = original.computeExtrema ? original.maxImpl.copy() : null;
166 this.minImpl = original.computeExtrema ? original.minImpl.copy() : null;
167 this.sumImpl = original.computeMoments ? original.sumImpl.copy() : null;
168 this.sumOfLogsImpl = original.computeSumOfLogs ? original.sumOfLogsImpl.copy() : null;
169 this.sumOfSquaresImpl = original.computeSumOfSquares ? original.sumOfSquaresImpl.copy() : null;
170
171 // Keep statistics with embedded moments in synch
172 this.meanImpl = original.computeMoments ? new Mean(this.secondMoment) : null;
173 this.varianceImpl = original.computeMoments ? new Variance(this.secondMoment) : null;
174 this.geoMeanImpl = original.computeSumOfLogs ? new GeometricMean(this.sumOfLogsImpl) : null;
175 this.populationVariance = original.computeMoments ? new Variance(false, this.secondMoment) : null;
176 this.randomPercentile = original.randomPercentile != null ? original.randomPercentile.copy() : null;
177
178 this.computeMoments = original.computeMoments;
179 this.computeSumOfLogs = original.computeSumOfLogs;
180 this.computeSumOfSquares = original.computeSumOfSquares;
181 this.computeExtrema = original.computeExtrema;
182 }
183
184 /**
185 * Returns a copy of this StreamingStatistics instance with the same internal state.
186 *
187 * @return a copy of this
188 */
189 public StreamingStatistics copy() {
190 return new StreamingStatistics(this);
191 }
192
193 /**
194 * Return a {@link StatisticalSummaryValues} instance reporting current
195 * statistics.
196 * @return Current values of statistics
197 */
198 public StatisticalSummary getSummary() {
199 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
200 getMax(), getMin(), getSum());
201 }
202
203 /**
204 * Add a value to the data
205 * @param value the value to add
206 */
207 public void addValue(double value) {
208 if (computeMoments) {
209 secondMoment.increment(value);
210 sumImpl.increment(value);
211 }
212 if (computeExtrema) {
213 minImpl.increment(value);
214 maxImpl.increment(value);
215 }
216 if (computeSumOfSquares) {
217 sumOfSquaresImpl.increment(value);
218 }
219 if (computeSumOfLogs) {
220 sumOfLogsImpl.increment(value);
221 }
222 if (randomPercentile != null) {
223 randomPercentile.increment(value);
224 }
225 n++;
226 }
227
228 /** {@inheritDoc} */
229 @Override
230 public void accept(double value) {
231 addValue(value);
232 }
233
234 /**
235 * Resets all statistics and storage.
236 */
237 public void clear() {
238 this.n = 0;
239 if (computeExtrema) {
240 minImpl.clear();
241 maxImpl.clear();
242 }
243 if (computeMoments) {
244 sumImpl.clear();
245 secondMoment.clear();
246 }
247 if (computeSumOfLogs) {
248 sumOfLogsImpl.clear();
249 }
250 if (computeSumOfSquares) {
251 sumOfSquaresImpl.clear();
252 }
253 if (randomPercentile != null) {
254 randomPercentile.clear();
255 }
256 }
257
258 /** {@inheritDoc} */
259 @Override
260 public long getN() {
261 return n;
262 }
263
264 /** {@inheritDoc} */
265 @Override
266 public double getMax() {
267 return computeExtrema ? maxImpl.getResult() : Double.NaN;
268 }
269
270 /** {@inheritDoc} */
271 @Override
272 public double getMin() {
273 return computeExtrema ? minImpl.getResult() : Double.NaN;
274 }
275
276 /** {@inheritDoc} */
277 @Override
278 public double getSum() {
279 return computeMoments ? sumImpl.getResult() : Double.NaN;
280 }
281
282 /**
283 * Returns the sum of the squares of the values that have been added.
284 * <p>
285 * Double.NaN is returned if no values have been added.
286 *
287 * @return The sum of squares
288 */
289 public double getSumOfSquares() {
290 return computeSumOfSquares ? sumOfSquaresImpl.getResult() : Double.NaN;
291 }
292
293 /** {@inheritDoc} */
294 @Override
295 public double getMean() {
296 return computeMoments ? meanImpl.getResult() : Double.NaN;
297 }
298
299 /** {@inheritDoc} */
300 @Override
301 public double getVariance() {
302 return computeMoments ? varianceImpl.getResult() : Double.NaN;
303 }
304
305 /**
306 * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
307 * population variance</a> of the values that have been added.
308 * <p>
309 * Double.NaN is returned if no values have been added.
310 *
311 * @return the population variance
312 */
313 public double getPopulationVariance() {
314 return computeMoments ? populationVariance.getResult() : Double.NaN;
315 }
316
317 /**
318 * Returns the geometric mean of the values that have been added.
319 * <p>
320 * Double.NaN is returned if no values have been added.
321 *
322 * @return the geometric mean
323 */
324 public double getGeometricMean() {
325 return computeSumOfLogs ? geoMeanImpl.getResult() : Double.NaN;
326 }
327
328 /**
329 * Returns the sum of the logs of the values that have been added.
330 * <p>
331 * Double.NaN is returned if no values have been added.
332 *
333 * @return the sum of logs
334 */
335 public double getSumOfLogs() {
336 return computeSumOfLogs ? sumOfLogsImpl.getResult() : Double.NaN;
337 }
338
339 /**
340 * Returns a statistic related to the Second Central Moment. Specifically,
341 * what is returned is the sum of squared deviations from the sample mean
342 * among the values that have been added.
343 * <p>
344 * Returns <code>Double.NaN</code> if no data values have been added and
345 * returns <code>0</code> if there is just one value in the data set.
346 *
347 * @return second central moment statistic
348 */
349 public double getSecondMoment() {
350 return computeMoments ? secondMoment.getResult() : Double.NaN;
351 }
352
353 /**
354 * Returns the quadratic mean, a.k.a.
355 * <a href="http://mathworld.wolfram.com/Root-Mean-Square.html">
356 * root-mean-square</a> of the available values
357 *
358 * @return The quadratic mean or {@code Double.NaN} if no values
359 * have been added.
360 */
361 public double getQuadraticMean() {
362 if (computeSumOfSquares) {
363 long size = getN();
364 return size > 0 ? FastMath.sqrt(getSumOfSquares() / size) : Double.NaN;
365 } else {
366 return Double.NaN;
367 }
368 }
369
370 /**
371 * Returns the standard deviation of the values that have been added.
372 * <p>
373 * Double.NaN is returned if no values have been added.
374 *
375 * @return the standard deviation
376 */
377 @Override
378 public double getStandardDeviation() {
379 long size = getN();
380 if (computeMoments) {
381 if (size > 0) {
382 return size > 1 ? FastMath.sqrt(getVariance()) : 0.0;
383 } else {
384 return Double.NaN;
385 }
386 } else {
387 return Double.NaN;
388 }
389 }
390
391 /**
392 * Returns an estimate of the median of the values that have been entered.
393 * See {@link RandomPercentile} for a description of the algorithm used for large
394 * data streams.
395 *
396 * @return the median
397 */
398 public double getMedian() {
399 return randomPercentile != null ? randomPercentile.getResult(50d) : Double.NaN;
400 }
401
402 /**
403 * Returns an estimate of the given percentile of the values that have been entered.
404 * See {@link RandomPercentile} for a description of the algorithm used for large
405 * data streams.
406 *
407 * @param percentile the desired percentile (must be between 0 and 100)
408 * @return estimated percentile
409 */
410 public double getPercentile(double percentile) {
411 return randomPercentile == null ? Double.NaN : randomPercentile.getResult(percentile);
412 }
413
414 /**
415 * {@inheritDoc}
416 * Statistics are aggregated only when both this and other are maintaining them. For example,
417 * if this.computeMoments is false, but other.computeMoments is true, the moment data in other
418 * will be lost.
419 */
420 @Override
421 public void aggregate(StreamingStatistics other) {
422 MathUtils.checkNotNull(other);
423
424 if (other.n > 0) {
425 this.n += other.n;
426 if (computeMoments && other.computeMoments) {
427 this.secondMoment.aggregate(other.secondMoment);
428 this.sumImpl.aggregate(other.sumImpl);
429 }
430 if (computeExtrema && other.computeExtrema) {
431 this.minImpl.aggregate(other.minImpl);
432 this.maxImpl.aggregate(other.maxImpl);
433 }
434 if (computeSumOfLogs && other.computeSumOfLogs) {
435 this.sumOfLogsImpl.aggregate(other.sumOfLogsImpl);
436 }
437 if (computeSumOfSquares && other.computeSumOfSquares) {
438 this.sumOfSquaresImpl.aggregate(other.sumOfSquaresImpl);
439 }
440 if (randomPercentile != null && other.randomPercentile != null) {
441 this.randomPercentile.aggregate(other.randomPercentile);
442 }
443 }
444 }
445
446 /**
447 * Generates a text report displaying summary statistics from values that
448 * have been added.
449 *
450 * @return String with line feeds displaying statistics
451 */
452 @Override
453 public String toString() {
454 StringBuilder outBuffer = new StringBuilder(200); // the size is just a wild guess
455 String endl = "\n";
456 outBuffer.append("StreamingStatistics:").append(endl).
457 append("n: ").append(getN()).append(endl).
458 append("min: ").append(getMin()).append(endl).
459 append("max: ").append(getMax()).append(endl).
460 append("sum: ").append(getSum()).append(endl).
461 append("mean: ").append(getMean()).append(endl).
462 append("variance: ").append(getVariance()).append(endl).
463 append("population variance: ").append(getPopulationVariance()).append(endl).
464 append("standard deviation: ").append(getStandardDeviation()).append(endl).
465 append("geometric mean: ").append(getGeometricMean()).append(endl).
466 append("second moment: ").append(getSecondMoment()).append(endl).
467 append("sum of squares: ").append(getSumOfSquares()).append(endl).
468 append("sum of logs: ").append(getSumOfLogs()).append(endl);
469 return outBuffer.toString();
470 }
471
472 /**
473 * Returns true iff <code>object</code> is a <code>StreamingStatistics</code>
474 * instance and all statistics have the same values as this.
475 *
476 * @param object the object to test equality against.
477 * @return true if object equals this
478 */
479 @Override
480 public boolean equals(Object object) {
481 if (object == this) {
482 return true;
483 }
484 if (!(object instanceof StreamingStatistics)) {
485 return false;
486 }
487 StreamingStatistics other = (StreamingStatistics)object;
488 return other.getN() == getN() &&
489 Precision.equalsIncludingNaN(other.getMax(), getMax()) &&
490 Precision.equalsIncludingNaN(other.getMin(), getMin()) &&
491 Precision.equalsIncludingNaN(other.getSum(), getSum()) &&
492 Precision.equalsIncludingNaN(other.getGeometricMean(), getGeometricMean()) &&
493 Precision.equalsIncludingNaN(other.getMean(), getMean()) &&
494 Precision.equalsIncludingNaN(other.getSumOfSquares(), getSumOfSquares()) &&
495 Precision.equalsIncludingNaN(other.getSumOfLogs(), getSumOfLogs()) &&
496 Precision.equalsIncludingNaN(other.getVariance(), getVariance()) &&
497 Precision.equalsIncludingNaN(other.getMedian(), getMedian());
498 }
499
500 /**
501 * Returns hash code based on values of statistics.
502 * @return hash code
503 */
504 @Override
505 public int hashCode() {
506 int result = 31 + MathUtils.hash(getN());
507 result = result * 31 + MathUtils.hash(getMax());
508 result = result * 31 + MathUtils.hash(getMin());
509 result = result * 31 + MathUtils.hash(getSum());
510 result = result * 31 + MathUtils.hash(getGeometricMean());
511 result = result * 31 + MathUtils.hash(getMean());
512 result = result * 31 + MathUtils.hash(getSumOfSquares());
513 result = result * 31 + MathUtils.hash(getSumOfLogs());
514 result = result * 31 + MathUtils.hash(getVariance());
515 result = result * 31 + MathUtils.hash(getMedian());
516 return result;
517 }
518
519 /**
520 * Returns a {@link StreamingStatisticsBuilder} to source configured
521 * {@code StreamingStatistics} instances.
522 *
523 * @return a StreamingStatisticsBuilder instance
524 */
525 public static StreamingStatisticsBuilder builder() {
526 return new StreamingStatisticsBuilder();
527 }
528
529 /**
530 * Builder for StreamingStatistics instances.
531 */
532 public static class StreamingStatisticsBuilder {
533 /** whether or not moment statistics are maintained by instances created by this factory */
534 private boolean computeMoments;
535 /** whether or not sum of squares and quadratic mean are maintained by instances created by this factory */
536 private boolean computeSumOfSquares;
537 /** whether or not sum of logs and geometric mean are maintained by instances created by this factory */
538 private boolean computeSumOfLogs;
539 /** whether or not min and max are maintained by instances created by this factory */
540 private boolean computeExtrema;
541 /** bound on quantile estimation error for percentiles.
542 * @since 2.3
543 */
544 private double epsilon;
545 /** PRNG used in sampling and merge operations.
546 * @since 2.3
547 */
548 private RandomGenerator randomGenerator;
549
550 /** Simple constructor.
551 */
552 public StreamingStatisticsBuilder() {
553 computeMoments = true;
554 computeSumOfSquares = true;
555 computeSumOfLogs = true;
556 computeExtrema = true;
557 percentiles(Double.NaN, null);
558 }
559
560 /**
561 * Sets the computeMoments setting of the factory
562 *
563 * @param arg whether or not instances created using {@link #build()} will
564 * maintain moment statistics
565 * @return a factory with the given computeMoments property set
566 */
567 public StreamingStatisticsBuilder moments(boolean arg) {
568 this.computeMoments = arg;
569 return this;
570 }
571
572 /**
573 * Sets the computeSumOfLogs setting of the factory
574 *
575 * @param arg whether or not instances created using {@link #build()} will
576 * maintain log sums
577 * @return a factory with the given computeSumOfLogs property set
578 */
579 public StreamingStatisticsBuilder sumOfLogs(boolean arg) {
580 this.computeSumOfLogs = arg;
581 return this;
582 }
583
584 /**
585 * Sets the computeSumOfSquares setting of the factory.
586 *
587 * @param arg whether or not instances created using {@link #build()} will
588 * maintain sums of squares
589 * @return a factory with the given computeSumOfSquares property set
590 */
591 public StreamingStatisticsBuilder sumOfSquares(boolean arg) {
592 this.computeSumOfSquares = arg;
593 return this;
594 }
595
596 /**
597 * Sets the computePercentiles setting of the factory.
598 * @param epsilonBound bound on quantile estimation error (see {@link RandomGenerator})
599 * @param generator PRNG used in sampling and merge operations
600 * @return a factory with the given computePercentiles property set
601 * @since 2.3
602 */
603 public StreamingStatisticsBuilder percentiles(final double epsilonBound, final RandomGenerator generator) {
604 this.epsilon = epsilonBound;
605 this.randomGenerator = generator;
606 return this;
607 }
608
609 /**
610 * Sets the computeExtrema setting of the factory.
611 *
612 * @param arg whether or not instances created using {@link #build()} will
613 * compute min and max
614 * @return a factory with the given computeExtrema property set
615 */
616 public StreamingStatisticsBuilder extrema(boolean arg) {
617 this.computeExtrema = arg;
618 return this;
619 }
620
621 /**
622 * Builds a StreamingStatistics instance with currently defined properties.
623 *
624 * @return newly configured StreamingStatistics instance
625 */
626 public StreamingStatistics build() {
627 return new StreamingStatistics(computeMoments,
628 computeSumOfLogs, computeSumOfSquares,
629 computeExtrema,
630 epsilon, randomGenerator);
631 }
632 }
633 }