1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.correlation;
23
24 import org.hipparchus.exception.LocalizedCoreFormats;
25 import org.hipparchus.exception.MathIllegalArgumentException;
26
27 /**
28 * Bivariate Covariance implementation that does not require input data to be
29 * stored in memory.
30 * <p>
31 * This class is based on a paper written by Philippe Pébay:
32 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
33 * Formulas for Robust, One-Pass Parallel Computation of Covariances and
34 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
35 * Sandia National Laboratories. It computes the covariance for a pair of variables.
36 * Use {@link StorelessCovariance} to estimate an entire covariance matrix.
37 * <p>
38 * Note: This class is package private as it is only used internally in
39 * the {@link StorelessCovariance} class.
40 */
41 class StorelessBivariateCovariance {
42
43 /** the mean of variable x */
44 private double meanX;
45
46 /** the mean of variable y */
47 private double meanY;
48
49 /** number of observations */
50 private double n;
51
52 /** the running covariance estimate */
53 private double covarianceNumerator;
54
55 /** flag for bias correction */
56 private boolean biasCorrected;
57
58 /**
59 * Create an empty {@link StorelessBivariateCovariance} instance with
60 * bias correction.
61 */
62 StorelessBivariateCovariance() {
63 this(true);
64 }
65
66 /**
67 * Create an empty {@link StorelessBivariateCovariance} instance.
68 *
69 * @param biasCorrection if <code>true</code> the covariance estimate is corrected
70 * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
71 * i.e. n in the denominator.
72 */
73 StorelessBivariateCovariance(final boolean biasCorrection) {
74 meanX = meanY = 0.0;
75 n = 0;
76 covarianceNumerator = 0.0;
77 biasCorrected = biasCorrection;
78 }
79
80 /**
81 * Update the covariance estimation with a pair of variables (x, y).
82 *
83 * @param x the x value
84 * @param y the y value
85 */
86 public void increment(final double x, final double y) {
87 n++;
88 final double deltaX = x - meanX;
89 final double deltaY = y - meanY;
90 meanX += deltaX / n;
91 meanY += deltaY / n;
92 covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY;
93 }
94
95 /**
96 * Appends another bivariate covariance calculation to this.
97 * After this operation, statistics returned should be close to what would
98 * have been obtained by by performing all of the {@link #increment(double, double)}
99 * operations in {@code cov} directly on this.
100 *
101 * @param cov StorelessBivariateCovariance instance to append.
102 */
103 public void append(StorelessBivariateCovariance cov) {
104 double oldN = n;
105 n += cov.n;
106 final double deltaX = cov.meanX - meanX;
107 final double deltaY = cov.meanY - meanY;
108 meanX += deltaX * cov.n / n;
109 meanY += deltaY * cov.n / n;
110 covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY;
111 }
112
113 /**
114 * Returns the number of observations.
115 *
116 * @return number of observations
117 */
118 public double getN() {
119 return n;
120 }
121
122 /**
123 * Return the current covariance estimate.
124 *
125 * @return the current covariance
126 * @throws MathIllegalArgumentException if the number of observations
127 * is < 2
128 */
129 public double getResult() throws MathIllegalArgumentException {
130 if (n < 2) {
131 throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_DIMENSION,
132 n, 2, true);
133 }
134 if (biasCorrected) {
135 return covarianceNumerator / (n - 1d);
136 } else {
137 return covarianceNumerator / n;
138 }
139 }
140 }
141