1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.descriptive.moment; 23 24 import java.io.Serializable; 25 26 import org.hipparchus.exception.MathIllegalArgumentException; 27 import org.hipparchus.exception.NullArgumentException; 28 import org.hipparchus.stat.StatUtils; 29 import org.hipparchus.stat.descriptive.AbstractStorelessUnivariateStatistic; 30 import org.hipparchus.stat.descriptive.AggregatableStatistic; 31 import org.hipparchus.stat.descriptive.WeightedEvaluation; 32 import org.hipparchus.util.MathArrays; 33 import org.hipparchus.util.MathUtils; 34 35 /** 36 * Computes the variance of the available values. By default, the unbiased 37 * "sample variance" definitional formula is used: 38 * <p> 39 * variance = sum((x_i - mean)^2) / (n - 1) 40 * <p> 41 * where mean is the {@link Mean} and <code>n</code> is the number 42 * of sample observations. 43 * <p> 44 * The definitional formula does not have good numerical properties, so 45 * this implementation does not compute the statistic using the definitional 46 * formula. 47 * <ul> 48 * <li> The <code>getResult</code> method computes the variance using 49 * updating formulas based on West's algorithm, as described in 50 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and 51 * J. G. Lewis 1979, <i>Communications of the ACM</i>, 52 * vol. 22 no. 9, pp. 526-531.</a></li> 53 * <li> The <code>evaluate</code> methods leverage the fact that they have the 54 * full array of values in memory to execute a two-pass algorithm. 55 * Specifically, these methods use the "corrected two-pass algorithm" from 56 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>, 57 * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li> 58 * </ul> 59 * <p> 60 * Note that adding values using <code>increment</code> or 61 * <code>incrementAll</code> and then executing <code>getResult</code> will 62 * sometimes give a different, less accurate, result than executing 63 * <code>evaluate</code> with the full array of values. The former approach 64 * should only be used when the full array of values is not available. 65 * <p> 66 * The "population variance" ( sum((x_i - mean)^2) / n ) can also 67 * be computed using this statistic. The <code>isBiasCorrected</code> 68 * property determines whether the "population" or "sample" value is 69 * returned by the <code>evaluate</code> and <code>getResult</code> methods. 70 * To compute population variances, set this property to <code>false.</code> 71 * <p> 72 * <strong>Note that this implementation is not synchronized.</strong> If 73 * multiple threads access an instance of this class concurrently, and at least 74 * one of the threads invokes the <code>increment()</code> or 75 * <code>clear()</code> method, it must be synchronized externally. 76 */ 77 public class Variance extends AbstractStorelessUnivariateStatistic 78 implements AggregatableStatistic<Variance>, WeightedEvaluation, Serializable { 79 80 /** Serializable version identifier */ 81 private static final long serialVersionUID = 20150412L; 82 83 /** SecondMoment is used in incremental calculation of Variance*/ 84 protected final SecondMoment moment; 85 86 /** 87 * Whether or not {@link #increment(double)} should increment 88 * the internal second moment. When a Variance is constructed with an 89 * external SecondMoment as a constructor parameter, this property is 90 * set to false and increments must be applied to the second moment 91 * directly. 92 */ 93 protected final boolean incMoment; 94 95 /** 96 * Whether or not bias correction is applied when computing the 97 * value of the statistic. True means that bias is corrected. See 98 * {@link Variance} for details on the formula. 99 */ 100 private final boolean isBiasCorrected; 101 102 /** 103 * Constructs a Variance with default (true) <code>isBiasCorrected</code> 104 * property. 105 */ 106 public Variance() { 107 this(true); 108 } 109 110 /** 111 * Constructs a Variance based on an external second moment. 112 * <p> 113 * When this constructor is used, the statistic may only be 114 * incremented via the moment, i.e., {@link #increment(double)} 115 * does nothing; whereas {@code m2.increment(value)} increments 116 * both {@code m2} and the Variance instance constructed from it. 117 * 118 * @param m2 the SecondMoment (Third or Fourth moments work here as well.) 119 */ 120 public Variance(final SecondMoment m2) { 121 this(true, m2); 122 } 123 124 /** 125 * Constructs a Variance with the specified <code>isBiasCorrected</code> 126 * property. 127 * 128 * @param isBiasCorrected setting for bias correction - true means 129 * bias will be corrected and is equivalent to using the argumentless 130 * constructor 131 */ 132 public Variance(boolean isBiasCorrected) { 133 this(new SecondMoment(), true, isBiasCorrected); 134 } 135 136 /** 137 * Constructs a Variance with the specified <code>isBiasCorrected</code> 138 * property and the supplied external second moment. 139 * 140 * @param isBiasCorrected setting for bias correction - true means 141 * bias will be corrected 142 * @param m2 the SecondMoment (Third or Fourth moments work 143 * here as well.) 144 */ 145 public Variance(boolean isBiasCorrected, SecondMoment m2) { 146 this(m2, false, isBiasCorrected); 147 } 148 149 /** 150 * Constructs a Variance with the specified parameters. 151 * 152 * @param m2 the SecondMoment (Third or Fourth moments work 153 * here as well.) 154 * @param incMoment if the moment shall be incremented 155 * @param isBiasCorrected setting for bias correction - true means 156 * bias will be corrected 157 */ 158 private Variance(SecondMoment m2, boolean incMoment, boolean isBiasCorrected) { 159 this.moment = m2; 160 this.incMoment = incMoment; 161 this.isBiasCorrected = isBiasCorrected; 162 } 163 164 /** 165 * Copy constructor, creates a new {@code Variance} identical 166 * to the {@code original}. 167 * 168 * @param original the {@code Variance} instance to copy 169 * @throws NullArgumentException if original is null 170 */ 171 public Variance(Variance original) throws NullArgumentException { 172 MathUtils.checkNotNull(original); 173 this.moment = original.moment.copy(); 174 this.incMoment = original.incMoment; 175 this.isBiasCorrected = original.isBiasCorrected; 176 } 177 178 /** 179 * {@inheritDoc} 180 * <p>If all values are available, it is more accurate to use 181 * {@link #evaluate(double[])} rather than adding values one at a time 182 * using this method and then executing {@link #getResult}, since 183 * <code>evaluate</code> leverages the fact that is has the full 184 * list of values together to execute a two-pass algorithm. 185 * See {@link Variance}. 186 * <p> 187 * Note also that when {@link #Variance(SecondMoment)} is used to 188 * create a Variance, this method does nothing. In that case, the 189 * SecondMoment should be incremented directly. 190 */ 191 @Override 192 public void increment(final double d) { 193 if (incMoment) { 194 moment.increment(d); 195 } 196 } 197 198 /** {@inheritDoc} */ 199 @Override 200 public double getResult() { 201 if (moment.n == 0) { 202 return Double.NaN; 203 } else if (moment.n == 1) { 204 return 0d; 205 } else { 206 if (isBiasCorrected) { 207 return moment.m2 / (moment.n - 1d); 208 } else { 209 return moment.m2 / (moment.n); 210 } 211 } 212 } 213 214 /** {@inheritDoc} */ 215 @Override 216 public long getN() { 217 return moment.getN(); 218 } 219 220 /** {@inheritDoc} */ 221 @Override 222 public void clear() { 223 if (incMoment) { 224 moment.clear(); 225 } 226 } 227 228 /** {@inheritDoc} */ 229 @Override 230 public void aggregate(Variance other) { 231 MathUtils.checkNotNull(other); 232 if (incMoment) { 233 this.moment.aggregate(other.moment); 234 } 235 } 236 237 /** 238 * Returns the variance of the entries in the specified portion of 239 * the input array, or <code>Double.NaN</code> if the designated subarray 240 * is empty. Note that Double.NaN may also be returned if the input 241 * includes NaN and / or infinite values. 242 * <p> 243 * See {@link Variance} for details on the computing algorithm.</p> 244 * <p> 245 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 246 * <p> 247 * Does not change the internal state of the statistic.</p> 248 * <p> 249 * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> 250 * 251 * @param values the input array 252 * @param begin index of the first array element to include 253 * @param length the number of elements to include 254 * @return the variance of the values or Double.NaN if length = 0 255 * @throws MathIllegalArgumentException if the array is null or the array index 256 * parameters are not valid 257 */ 258 @Override 259 public double evaluate(final double[] values, final int begin, final int length) 260 throws MathIllegalArgumentException { 261 262 double var = Double.NaN; 263 264 if (MathArrays.verifyValues(values, begin, length)) { 265 if (length == 1) { 266 var = 0.0; 267 } else if (length > 1) { 268 double m = StatUtils.mean(values, begin, length); 269 var = evaluate(values, m, begin, length); 270 } 271 } 272 return var; 273 } 274 275 /** 276 * Returns the weighted variance of the entries in the specified portion of 277 * the input array, or <code>Double.NaN</code> if the designated subarray 278 * is empty. 279 * <p> 280 * Uses the formula 281 * <pre> 282 * Σ(weights[i]*(values[i] - weightedMean)²)/(Σ(weights[i]) - 1) 283 * </pre> 284 * where weightedMean is the weighted mean. 285 * <p> 286 * This formula will not return the same result as the unweighted variance when all 287 * weights are equal, unless all weights are equal to 1. The formula assumes that 288 * weights are to be treated as "expansion values," as will be the case if for example 289 * the weights represent frequency counts. To normalize weights so that the denominator 290 * in the variance computation equals the length of the input vector minus one, use 291 * <pre> 292 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length));</code> 293 * </pre> 294 * <p> 295 * Returns 0 for a single-value (i.e. length = 1) sample. 296 * <p> 297 * Throws <code>IllegalArgumentException</code> if any of the following are true: 298 * <ul><li>the values array is null</li> 299 * <li>the weights array is null</li> 300 * <li>the weights array does not have the same length as the values array</li> 301 * <li>the weights array contains one or more infinite values</li> 302 * <li>the weights array contains one or more NaN values</li> 303 * <li>the weights array contains negative values</li> 304 * <li>the start and length arguments do not determine a valid array</li> 305 * </ul> 306 * <p> 307 * Does not change the internal state of the statistic. 308 * 309 * @param values the input array 310 * @param weights the weights array 311 * @param begin index of the first array element to include 312 * @param length the number of elements to include 313 * @return the weighted variance of the values or Double.NaN if length = 0 314 * @throws MathIllegalArgumentException if the parameters are not valid 315 */ 316 @Override 317 public double evaluate(final double[] values, final double[] weights, 318 final int begin, final int length) 319 throws MathIllegalArgumentException { 320 321 double var = Double.NaN; 322 if (MathArrays.verifyValues(values, weights,begin, length)) { 323 if (length == 1) { 324 var = 0.0; 325 } else if (length > 1) { 326 Mean mean = new Mean(); 327 double m = mean.evaluate(values, weights, begin, length); 328 var = evaluate(values, weights, m, begin, length); 329 } 330 } 331 return var; 332 } 333 334 /** 335 * Returns the variance of the entries in the specified portion of 336 * the input array, using the precomputed mean value. Returns 337 * <code>Double.NaN</code> if the designated subarray is empty. 338 * <p> 339 * See {@link Variance} for details on the computing algorithm. 340 * <p> 341 * The formula used assumes that the supplied mean value is the arithmetic 342 * mean of the sample data, not a known population parameter. This method 343 * is supplied only to save computation when the mean has already been 344 * computed. 345 * <p> 346 * Returns 0 for a single-value (i.e. length = 1) sample. 347 * <p> 348 * Does not change the internal state of the statistic. 349 * 350 * @param values the input array 351 * @param mean the precomputed mean value 352 * @param begin index of the first array element to include 353 * @param length the number of elements to include 354 * @return the variance of the values or Double.NaN if length = 0 355 * @throws MathIllegalArgumentException if the array is null or the array index 356 * parameters are not valid 357 */ 358 public double evaluate(final double[] values, final double mean, 359 final int begin, final int length) 360 throws MathIllegalArgumentException { 361 362 double var = Double.NaN; 363 if (MathArrays.verifyValues(values, begin, length)) { 364 if (length == 1) { 365 var = 0.0; 366 } else if (length > 1) { 367 double accum = 0.0; 368 double accum2 = 0.0; 369 for (int i = begin; i < begin + length; i++) { 370 final double dev = values[i] - mean; 371 accum += dev * dev; 372 accum2 += dev; 373 } 374 double len = length; 375 if (isBiasCorrected) { 376 var = (accum - (accum2 * accum2 / len)) / (len - 1.0); 377 } else { 378 var = (accum - (accum2 * accum2 / len)) / len; 379 } 380 } 381 } 382 return var; 383 } 384 385 /** 386 * Returns the variance of the entries in the input array, using the 387 * precomputed mean value. Returns <code>Double.NaN</code> if the array 388 * is empty. 389 * <p> 390 * See {@link Variance} for details on the computing algorithm. 391 * <p> 392 * If <code>isBiasCorrected</code> is <code>true</code> the formula used 393 * assumes that the supplied mean value is the arithmetic mean of the 394 * sample data, not a known population parameter. If the mean is a known 395 * population parameter, or if the "population" version of the variance is 396 * desired, set <code>isBiasCorrected</code> to <code>false</code> before 397 * invoking this method. 398 * <p> 399 * Returns 0 for a single-value (i.e. length = 1) sample. 400 * <p> 401 * Does not change the internal state of the statistic. 402 * 403 * @param values the input array 404 * @param mean the precomputed mean value 405 * @return the variance of the values or Double.NaN if the array is empty 406 * @throws MathIllegalArgumentException if the array is null 407 */ 408 public double evaluate(final double[] values, final double mean) 409 throws MathIllegalArgumentException { 410 return evaluate(values, mean, 0, values.length); 411 } 412 413 /** 414 * Returns the weighted variance of the entries in the specified portion of 415 * the input array, using the precomputed weighted mean value. Returns 416 * <code>Double.NaN</code> if the designated subarray is empty. 417 * <p> 418 * Uses the formula 419 * <pre> 420 * Σ(weights[i]*(values[i] - mean)²)/(Σ(weights[i]) - 1) 421 * </pre> 422 * <p> 423 * The formula used assumes that the supplied mean value is the weighted arithmetic 424 * mean of the sample data, not a known population parameter. This method 425 * is supplied only to save computation when the mean has already been 426 * computed. 427 * <p> 428 * This formula will not return the same result as the unweighted variance when all 429 * weights are equal, unless all weights are equal to 1. The formula assumes that 430 * weights are to be treated as "expansion values," as will be the case if for example 431 * the weights represent frequency counts. To normalize weights so that the denominator 432 * in the variance computation equals the length of the input vector minus one, use 433 * <pre> 434 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean);</code> 435 * </pre> 436 * <p> 437 * Returns 0 for a single-value (i.e. length = 1) sample. 438 * <p> 439 * Throws <code>MathIllegalArgumentException</code> if any of the following are true: 440 * <ul><li>the values array is null</li> 441 * <li>the weights array is null</li> 442 * <li>the weights array does not have the same length as the values array</li> 443 * <li>the weights array contains one or more infinite values</li> 444 * <li>the weights array contains one or more NaN values</li> 445 * <li>the weights array contains negative values</li> 446 * <li>the start and length arguments do not determine a valid array</li> 447 * </ul> 448 * <p> 449 * Does not change the internal state of the statistic. 450 * 451 * @param values the input array 452 * @param weights the weights array 453 * @param mean the precomputed weighted mean value 454 * @param begin index of the first array element to include 455 * @param length the number of elements to include 456 * @return the variance of the values or Double.NaN if length = 0 457 * @throws MathIllegalArgumentException if the parameters are not valid 458 */ 459 public double evaluate(final double[] values, final double[] weights, 460 final double mean, final int begin, final int length) 461 throws MathIllegalArgumentException { 462 463 double var = Double.NaN; 464 465 if (MathArrays.verifyValues(values, weights, begin, length)) { 466 if (length == 1) { 467 var = 0.0; 468 } else if (length > 1) { 469 double accum = 0.0; 470 double accum2 = 0.0; 471 for (int i = begin; i < begin + length; i++) { 472 final double dev = values[i] - mean; 473 accum += weights[i] * (dev * dev); 474 accum2 += weights[i] * dev; 475 } 476 477 double sumWts = 0; 478 for (int i = begin; i < begin + length; i++) { 479 sumWts += weights[i]; 480 } 481 482 if (isBiasCorrected) { 483 var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0); 484 } else { 485 var = (accum - (accum2 * accum2 / sumWts)) / sumWts; 486 } 487 } 488 } 489 return var; 490 } 491 492 /** 493 * Returns the weighted variance of the values in the input array, using 494 * the precomputed weighted mean value. 495 * <p> 496 * Uses the formula 497 * <pre> 498 * Σ(weights[i]*(values[i] - mean)²)/(Σ(weights[i]) - 1) 499 * </pre> 500 * <p> 501 * The formula used assumes that the supplied mean value is the weighted arithmetic 502 * mean of the sample data, not a known population parameter. This method 503 * is supplied only to save computation when the mean has already been 504 * computed. 505 * <p> 506 * This formula will not return the same result as the unweighted variance when all 507 * weights are equal, unless all weights are equal to 1. The formula assumes that 508 * weights are to be treated as "expansion values," as will be the case if for example 509 * the weights represent frequency counts. To normalize weights so that the denominator 510 * in the variance computation equals the length of the input vector minus one, use 511 * <pre> 512 * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean);</code> 513 * </pre> 514 * <p> 515 * Returns 0 for a single-value (i.e. length = 1) sample. 516 * <p> 517 * Throws <code>MathIllegalArgumentException</code> if any of the following are true: 518 * <ul><li>the values array is null</li> 519 * <li>the weights array is null</li> 520 * <li>the weights array does not have the same length as the values array</li> 521 * <li>the weights array contains one or more infinite values</li> 522 * <li>the weights array contains one or more NaN values</li> 523 * <li>the weights array contains negative values</li> 524 * </ul> 525 * <p> 526 * Does not change the internal state of the statistic. 527 * 528 * @param values the input array 529 * @param weights the weights array 530 * @param mean the precomputed weighted mean value 531 * @return the variance of the values or Double.NaN if length = 0 532 * @throws MathIllegalArgumentException if the parameters are not valid 533 */ 534 public double evaluate(final double[] values, final double[] weights, final double mean) 535 throws MathIllegalArgumentException { 536 return evaluate(values, weights, mean, 0, values.length); 537 } 538 539 /** Check if bias is corrected. 540 * @return Returns the isBiasCorrected. 541 */ 542 public boolean isBiasCorrected() { 543 return isBiasCorrected; 544 } 545 546 /** 547 * Returns a new copy of this variance with the given bias correction 548 * setting. 549 * 550 * @param biasCorrection The bias correction flag to set. 551 * @return a copy of this instance with the given bias correction setting 552 */ 553 public Variance withBiasCorrection(boolean biasCorrection) { 554 return new Variance(this.moment, this.incMoment, biasCorrection); 555 } 556 557 /** {@inheritDoc} */ 558 @Override 559 public Variance copy() { 560 return new Variance(this); 561 } 562 563 }