1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.descriptive.moment;
23
24 import java.io.Serializable;
25
26 import org.hipparchus.exception.MathIllegalArgumentException;
27 import org.hipparchus.exception.NullArgumentException;
28 import org.hipparchus.stat.StatUtils;
29 import org.hipparchus.stat.descriptive.AbstractStorelessUnivariateStatistic;
30 import org.hipparchus.stat.descriptive.AggregatableStatistic;
31 import org.hipparchus.stat.descriptive.WeightedEvaluation;
32 import org.hipparchus.util.MathArrays;
33 import org.hipparchus.util.MathUtils;
34
35 /**
36 * Computes the variance of the available values. By default, the unbiased
37 * "sample variance" definitional formula is used:
38 * <p>
39 * variance = sum((x_i - mean)^2) / (n - 1)
40 * <p>
41 * where mean is the {@link Mean} and <code>n</code> is the number
42 * of sample observations.
43 * <p>
44 * The definitional formula does not have good numerical properties, so
45 * this implementation does not compute the statistic using the definitional
46 * formula.
47 * <ul>
48 * <li> The <code>getResult</code> method computes the variance using
49 * updating formulas based on West's algorithm, as described in
50 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
51 * J. G. Lewis 1979, <i>Communications of the ACM</i>,
52 * vol. 22 no. 9, pp. 526-531.</a></li>
53 * <li> The <code>evaluate</code> methods leverage the fact that they have the
54 * full array of values in memory to execute a two-pass algorithm.
55 * Specifically, these methods use the "corrected two-pass algorithm" from
56 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
57 * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li>
58 * </ul>
59 * <p>
60 * Note that adding values using <code>increment</code> or
61 * <code>incrementAll</code> and then executing <code>getResult</code> will
62 * sometimes give a different, less accurate, result than executing
63 * <code>evaluate</code> with the full array of values. The former approach
64 * should only be used when the full array of values is not available.
65 * <p>
66 * The "population variance" ( sum((x_i - mean)^2) / n ) can also
67 * be computed using this statistic. The <code>isBiasCorrected</code>
68 * property determines whether the "population" or "sample" value is
69 * returned by the <code>evaluate</code> and <code>getResult</code> methods.
70 * To compute population variances, set this property to <code>false.</code>
71 * <p>
72 * <strong>Note that this implementation is not synchronized.</strong> If
73 * multiple threads access an instance of this class concurrently, and at least
74 * one of the threads invokes the <code>increment()</code> or
75 * <code>clear()</code> method, it must be synchronized externally.
76 */
77 public class Variance extends AbstractStorelessUnivariateStatistic
78 implements AggregatableStatistic<Variance>, WeightedEvaluation, Serializable {
79
80 /** Serializable version identifier */
81 private static final long serialVersionUID = 20150412L;
82
83 /** SecondMoment is used in incremental calculation of Variance*/
84 protected final SecondMoment moment;
85
86 /**
87 * Whether or not {@link #increment(double)} should increment
88 * the internal second moment. When a Variance is constructed with an
89 * external SecondMoment as a constructor parameter, this property is
90 * set to false and increments must be applied to the second moment
91 * directly.
92 */
93 protected final boolean incMoment;
94
95 /**
96 * Whether or not bias correction is applied when computing the
97 * value of the statistic. True means that bias is corrected. See
98 * {@link Variance} for details on the formula.
99 */
100 private final boolean isBiasCorrected;
101
102 /**
103 * Constructs a Variance with default (true) <code>isBiasCorrected</code>
104 * property.
105 */
106 public Variance() {
107 this(true);
108 }
109
110 /**
111 * Constructs a Variance based on an external second moment.
112 * <p>
113 * When this constructor is used, the statistic may only be
114 * incremented via the moment, i.e., {@link #increment(double)}
115 * does nothing; whereas {@code m2.increment(value)} increments
116 * both {@code m2} and the Variance instance constructed from it.
117 *
118 * @param m2 the SecondMoment (Third or Fourth moments work here as well.)
119 */
120 public Variance(final SecondMoment m2) {
121 this(true, m2);
122 }
123
124 /**
125 * Constructs a Variance with the specified <code>isBiasCorrected</code>
126 * property.
127 *
128 * @param isBiasCorrected setting for bias correction - true means
129 * bias will be corrected and is equivalent to using the argumentless
130 * constructor
131 */
132 public Variance(boolean isBiasCorrected) {
133 this(new SecondMoment(), true, isBiasCorrected);
134 }
135
136 /**
137 * Constructs a Variance with the specified <code>isBiasCorrected</code>
138 * property and the supplied external second moment.
139 *
140 * @param isBiasCorrected setting for bias correction - true means
141 * bias will be corrected
142 * @param m2 the SecondMoment (Third or Fourth moments work
143 * here as well.)
144 */
145 public Variance(boolean isBiasCorrected, SecondMoment m2) {
146 this(m2, false, isBiasCorrected);
147 }
148
149 /**
150 * Constructs a Variance with the specified parameters.
151 *
152 * @param m2 the SecondMoment (Third or Fourth moments work
153 * here as well.)
154 * @param incMoment if the moment shall be incremented
155 * @param isBiasCorrected setting for bias correction - true means
156 * bias will be corrected
157 */
158 private Variance(SecondMoment m2, boolean incMoment, boolean isBiasCorrected) {
159 this.moment = m2;
160 this.incMoment = incMoment;
161 this.isBiasCorrected = isBiasCorrected;
162 }
163
164 /**
165 * Copy constructor, creates a new {@code Variance} identical
166 * to the {@code original}.
167 *
168 * @param original the {@code Variance} instance to copy
169 * @throws NullArgumentException if original is null
170 */
171 public Variance(Variance original) throws NullArgumentException {
172 MathUtils.checkNotNull(original);
173 this.moment = original.moment.copy();
174 this.incMoment = original.incMoment;
175 this.isBiasCorrected = original.isBiasCorrected;
176 }
177
178 /**
179 * {@inheritDoc}
180 * <p>If all values are available, it is more accurate to use
181 * {@link #evaluate(double[])} rather than adding values one at a time
182 * using this method and then executing {@link #getResult}, since
183 * <code>evaluate</code> leverages the fact that is has the full
184 * list of values together to execute a two-pass algorithm.
185 * See {@link Variance}.
186 * <p>
187 * Note also that when {@link #Variance(SecondMoment)} is used to
188 * create a Variance, this method does nothing. In that case, the
189 * SecondMoment should be incremented directly.
190 */
191 @Override
192 public void increment(final double d) {
193 if (incMoment) {
194 moment.increment(d);
195 }
196 }
197
198 /** {@inheritDoc} */
199 @Override
200 public double getResult() {
201 if (moment.n == 0) {
202 return Double.NaN;
203 } else if (moment.n == 1) {
204 return 0d;
205 } else {
206 if (isBiasCorrected) {
207 return moment.m2 / (moment.n - 1d);
208 } else {
209 return moment.m2 / (moment.n);
210 }
211 }
212 }
213
214 /** {@inheritDoc} */
215 @Override
216 public long getN() {
217 return moment.getN();
218 }
219
220 /** {@inheritDoc} */
221 @Override
222 public void clear() {
223 if (incMoment) {
224 moment.clear();
225 }
226 }
227
228 /** {@inheritDoc} */
229 @Override
230 public void aggregate(Variance other) {
231 MathUtils.checkNotNull(other);
232 if (incMoment) {
233 this.moment.aggregate(other.moment);
234 }
235 }
236
237 /**
238 * Returns the variance of the entries in the specified portion of
239 * the input array, or <code>Double.NaN</code> if the designated subarray
240 * is empty. Note that Double.NaN may also be returned if the input
241 * includes NaN and / or infinite values.
242 * <p>
243 * See {@link Variance} for details on the computing algorithm.</p>
244 * <p>
245 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
246 * <p>
247 * Does not change the internal state of the statistic.</p>
248 * <p>
249 * Throws <code>MathIllegalArgumentException</code> if the array is null.</p>
250 *
251 * @param values the input array
252 * @param begin index of the first array element to include
253 * @param length the number of elements to include
254 * @return the variance of the values or Double.NaN if length = 0
255 * @throws MathIllegalArgumentException if the array is null or the array index
256 * parameters are not valid
257 */
258 @Override
259 public double evaluate(final double[] values, final int begin, final int length)
260 throws MathIllegalArgumentException {
261
262 double var = Double.NaN;
263
264 if (MathArrays.verifyValues(values, begin, length)) {
265 if (length == 1) {
266 var = 0.0;
267 } else if (length > 1) {
268 double m = StatUtils.mean(values, begin, length);
269 var = evaluate(values, m, begin, length);
270 }
271 }
272 return var;
273 }
274
275 /**
276 * Returns the weighted variance of the entries in the specified portion of
277 * the input array, or <code>Double.NaN</code> if the designated subarray
278 * is empty.
279 * <p>
280 * Uses the formula
281 * <pre>
282 * Σ(weights[i]*(values[i] - weightedMean)²)/(Σ(weights[i]) - 1)
283 * </pre>
284 * where weightedMean is the weighted mean.
285 * <p>
286 * This formula will not return the same result as the unweighted variance when all
287 * weights are equal, unless all weights are equal to 1. The formula assumes that
288 * weights are to be treated as "expansion values," as will be the case if for example
289 * the weights represent frequency counts. To normalize weights so that the denominator
290 * in the variance computation equals the length of the input vector minus one, use
291 * <pre>
292 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length));</code>
293 * </pre>
294 * <p>
295 * Returns 0 for a single-value (i.e. length = 1) sample.
296 * <p>
297 * Throws <code>IllegalArgumentException</code> if any of the following are true:
298 * <ul><li>the values array is null</li>
299 * <li>the weights array is null</li>
300 * <li>the weights array does not have the same length as the values array</li>
301 * <li>the weights array contains one or more infinite values</li>
302 * <li>the weights array contains one or more NaN values</li>
303 * <li>the weights array contains negative values</li>
304 * <li>the start and length arguments do not determine a valid array</li>
305 * </ul>
306 * <p>
307 * Does not change the internal state of the statistic.
308 *
309 * @param values the input array
310 * @param weights the weights array
311 * @param begin index of the first array element to include
312 * @param length the number of elements to include
313 * @return the weighted variance of the values or Double.NaN if length = 0
314 * @throws MathIllegalArgumentException if the parameters are not valid
315 */
316 @Override
317 public double evaluate(final double[] values, final double[] weights,
318 final int begin, final int length)
319 throws MathIllegalArgumentException {
320
321 double var = Double.NaN;
322 if (MathArrays.verifyValues(values, weights,begin, length)) {
323 if (length == 1) {
324 var = 0.0;
325 } else if (length > 1) {
326 Mean mean = new Mean();
327 double m = mean.evaluate(values, weights, begin, length);
328 var = evaluate(values, weights, m, begin, length);
329 }
330 }
331 return var;
332 }
333
334 /**
335 * Returns the variance of the entries in the specified portion of
336 * the input array, using the precomputed mean value. Returns
337 * <code>Double.NaN</code> if the designated subarray is empty.
338 * <p>
339 * See {@link Variance} for details on the computing algorithm.
340 * <p>
341 * The formula used assumes that the supplied mean value is the arithmetic
342 * mean of the sample data, not a known population parameter. This method
343 * is supplied only to save computation when the mean has already been
344 * computed.
345 * <p>
346 * Returns 0 for a single-value (i.e. length = 1) sample.
347 * <p>
348 * Does not change the internal state of the statistic.
349 *
350 * @param values the input array
351 * @param mean the precomputed mean value
352 * @param begin index of the first array element to include
353 * @param length the number of elements to include
354 * @return the variance of the values or Double.NaN if length = 0
355 * @throws MathIllegalArgumentException if the array is null or the array index
356 * parameters are not valid
357 */
358 public double evaluate(final double[] values, final double mean,
359 final int begin, final int length)
360 throws MathIllegalArgumentException {
361
362 double var = Double.NaN;
363 if (MathArrays.verifyValues(values, begin, length)) {
364 if (length == 1) {
365 var = 0.0;
366 } else if (length > 1) {
367 double accum = 0.0;
368 double accum2 = 0.0;
369 for (int i = begin; i < begin + length; i++) {
370 final double dev = values[i] - mean;
371 accum += dev * dev;
372 accum2 += dev;
373 }
374 double len = length;
375 if (isBiasCorrected) {
376 var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
377 } else {
378 var = (accum - (accum2 * accum2 / len)) / len;
379 }
380 }
381 }
382 return var;
383 }
384
385 /**
386 * Returns the variance of the entries in the input array, using the
387 * precomputed mean value. Returns <code>Double.NaN</code> if the array
388 * is empty.
389 * <p>
390 * See {@link Variance} for details on the computing algorithm.
391 * <p>
392 * If <code>isBiasCorrected</code> is <code>true</code> the formula used
393 * assumes that the supplied mean value is the arithmetic mean of the
394 * sample data, not a known population parameter. If the mean is a known
395 * population parameter, or if the "population" version of the variance is
396 * desired, set <code>isBiasCorrected</code> to <code>false</code> before
397 * invoking this method.
398 * <p>
399 * Returns 0 for a single-value (i.e. length = 1) sample.
400 * <p>
401 * Does not change the internal state of the statistic.
402 *
403 * @param values the input array
404 * @param mean the precomputed mean value
405 * @return the variance of the values or Double.NaN if the array is empty
406 * @throws MathIllegalArgumentException if the array is null
407 */
408 public double evaluate(final double[] values, final double mean)
409 throws MathIllegalArgumentException {
410 return evaluate(values, mean, 0, values.length);
411 }
412
413 /**
414 * Returns the weighted variance of the entries in the specified portion of
415 * the input array, using the precomputed weighted mean value. Returns
416 * <code>Double.NaN</code> if the designated subarray is empty.
417 * <p>
418 * Uses the formula
419 * <pre>
420 * Σ(weights[i]*(values[i] - mean)²)/(Σ(weights[i]) - 1)
421 * </pre>
422 * <p>
423 * The formula used assumes that the supplied mean value is the weighted arithmetic
424 * mean of the sample data, not a known population parameter. This method
425 * is supplied only to save computation when the mean has already been
426 * computed.
427 * <p>
428 * This formula will not return the same result as the unweighted variance when all
429 * weights are equal, unless all weights are equal to 1. The formula assumes that
430 * weights are to be treated as "expansion values," as will be the case if for example
431 * the weights represent frequency counts. To normalize weights so that the denominator
432 * in the variance computation equals the length of the input vector minus one, use
433 * <pre>
434 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean);</code>
435 * </pre>
436 * <p>
437 * Returns 0 for a single-value (i.e. length = 1) sample.
438 * <p>
439 * Throws <code>MathIllegalArgumentException</code> if any of the following are true:
440 * <ul><li>the values array is null</li>
441 * <li>the weights array is null</li>
442 * <li>the weights array does not have the same length as the values array</li>
443 * <li>the weights array contains one or more infinite values</li>
444 * <li>the weights array contains one or more NaN values</li>
445 * <li>the weights array contains negative values</li>
446 * <li>the start and length arguments do not determine a valid array</li>
447 * </ul>
448 * <p>
449 * Does not change the internal state of the statistic.
450 *
451 * @param values the input array
452 * @param weights the weights array
453 * @param mean the precomputed weighted mean value
454 * @param begin index of the first array element to include
455 * @param length the number of elements to include
456 * @return the variance of the values or Double.NaN if length = 0
457 * @throws MathIllegalArgumentException if the parameters are not valid
458 */
459 public double evaluate(final double[] values, final double[] weights,
460 final double mean, final int begin, final int length)
461 throws MathIllegalArgumentException {
462
463 double var = Double.NaN;
464
465 if (MathArrays.verifyValues(values, weights, begin, length)) {
466 if (length == 1) {
467 var = 0.0;
468 } else if (length > 1) {
469 double accum = 0.0;
470 double accum2 = 0.0;
471 for (int i = begin; i < begin + length; i++) {
472 final double dev = values[i] - mean;
473 accum += weights[i] * (dev * dev);
474 accum2 += weights[i] * dev;
475 }
476
477 double sumWts = 0;
478 for (int i = begin; i < begin + length; i++) {
479 sumWts += weights[i];
480 }
481
482 if (isBiasCorrected) {
483 var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0);
484 } else {
485 var = (accum - (accum2 * accum2 / sumWts)) / sumWts;
486 }
487 }
488 }
489 return var;
490 }
491
492 /**
493 * Returns the weighted variance of the values in the input array, using
494 * the precomputed weighted mean value.
495 * <p>
496 * Uses the formula
497 * <pre>
498 * Σ(weights[i]*(values[i] - mean)²)/(Σ(weights[i]) - 1)
499 * </pre>
500 * <p>
501 * The formula used assumes that the supplied mean value is the weighted arithmetic
502 * mean of the sample data, not a known population parameter. This method
503 * is supplied only to save computation when the mean has already been
504 * computed.
505 * <p>
506 * This formula will not return the same result as the unweighted variance when all
507 * weights are equal, unless all weights are equal to 1. The formula assumes that
508 * weights are to be treated as "expansion values," as will be the case if for example
509 * the weights represent frequency counts. To normalize weights so that the denominator
510 * in the variance computation equals the length of the input vector minus one, use
511 * <pre>
512 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean);</code>
513 * </pre>
514 * <p>
515 * Returns 0 for a single-value (i.e. length = 1) sample.
516 * <p>
517 * Throws <code>MathIllegalArgumentException</code> if any of the following are true:
518 * <ul><li>the values array is null</li>
519 * <li>the weights array is null</li>
520 * <li>the weights array does not have the same length as the values array</li>
521 * <li>the weights array contains one or more infinite values</li>
522 * <li>the weights array contains one or more NaN values</li>
523 * <li>the weights array contains negative values</li>
524 * </ul>
525 * <p>
526 * Does not change the internal state of the statistic.
527 *
528 * @param values the input array
529 * @param weights the weights array
530 * @param mean the precomputed weighted mean value
531 * @return the variance of the values or Double.NaN if length = 0
532 * @throws MathIllegalArgumentException if the parameters are not valid
533 */
534 public double evaluate(final double[] values, final double[] weights, final double mean)
535 throws MathIllegalArgumentException {
536 return evaluate(values, weights, mean, 0, values.length);
537 }
538
539 /** Check if bias is corrected.
540 * @return Returns the isBiasCorrected.
541 */
542 public boolean isBiasCorrected() {
543 return isBiasCorrected;
544 }
545
546 /**
547 * Returns a new copy of this variance with the given bias correction
548 * setting.
549 *
550 * @param biasCorrection The bias correction flag to set.
551 * @return a copy of this instance with the given bias correction setting
552 */
553 public Variance withBiasCorrection(boolean biasCorrection) {
554 return new Variance(this.moment, this.incMoment, biasCorrection);
555 }
556
557 /** {@inheritDoc} */
558 @Override
559 public Variance copy() {
560 return new Variance(this);
561 }
562
563 }