1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.correlation; 23 24 import org.hipparchus.exception.LocalizedCoreFormats; 25 import org.hipparchus.exception.MathIllegalArgumentException; 26 27 /** 28 * Bivariate Covariance implementation that does not require input data to be 29 * stored in memory. 30 * <p> 31 * This class is based on a paper written by Philippe Pébay: 32 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf"> 33 * Formulas for Robust, One-Pass Parallel Computation of Covariances and 34 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212, 35 * Sandia National Laboratories. It computes the covariance for a pair of variables. 36 * Use {@link StorelessCovariance} to estimate an entire covariance matrix. 37 * <p> 38 * Note: This class is package private as it is only used internally in 39 * the {@link StorelessCovariance} class. 40 */ 41 class StorelessBivariateCovariance { 42 43 /** the mean of variable x */ 44 private double meanX; 45 46 /** the mean of variable y */ 47 private double meanY; 48 49 /** number of observations */ 50 private double n; 51 52 /** the running covariance estimate */ 53 private double covarianceNumerator; 54 55 /** flag for bias correction */ 56 private boolean biasCorrected; 57 58 /** 59 * Create an empty {@link StorelessBivariateCovariance} instance with 60 * bias correction. 61 */ 62 StorelessBivariateCovariance() { 63 this(true); 64 } 65 66 /** 67 * Create an empty {@link StorelessBivariateCovariance} instance. 68 * 69 * @param biasCorrection if <code>true</code> the covariance estimate is corrected 70 * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction, 71 * i.e. n in the denominator. 72 */ 73 StorelessBivariateCovariance(final boolean biasCorrection) { 74 meanX = meanY = 0.0; 75 n = 0; 76 covarianceNumerator = 0.0; 77 biasCorrected = biasCorrection; 78 } 79 80 /** 81 * Update the covariance estimation with a pair of variables (x, y). 82 * 83 * @param x the x value 84 * @param y the y value 85 */ 86 public void increment(final double x, final double y) { 87 n++; 88 final double deltaX = x - meanX; 89 final double deltaY = y - meanY; 90 meanX += deltaX / n; 91 meanY += deltaY / n; 92 covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY; 93 } 94 95 /** 96 * Appends another bivariate covariance calculation to this. 97 * After this operation, statistics returned should be close to what would 98 * have been obtained by by performing all of the {@link #increment(double, double)} 99 * operations in {@code cov} directly on this. 100 * 101 * @param cov StorelessBivariateCovariance instance to append. 102 */ 103 public void append(StorelessBivariateCovariance cov) { 104 double oldN = n; 105 n += cov.n; 106 final double deltaX = cov.meanX - meanX; 107 final double deltaY = cov.meanY - meanY; 108 meanX += deltaX * cov.n / n; 109 meanY += deltaY * cov.n / n; 110 covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY; 111 } 112 113 /** 114 * Returns the number of observations. 115 * 116 * @return number of observations 117 */ 118 public double getN() { 119 return n; 120 } 121 122 /** 123 * Return the current covariance estimate. 124 * 125 * @return the current covariance 126 * @throws MathIllegalArgumentException if the number of observations 127 * is < 2 128 */ 129 public double getResult() throws MathIllegalArgumentException { 130 if (n < 2) { 131 throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_DIMENSION, 132 n, 2, true); 133 } 134 if (biasCorrected) { 135 return covarianceNumerator / (n - 1d); 136 } else { 137 return covarianceNumerator / n; 138 } 139 } 140 } 141