1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.correlation;
23
24 import org.hipparchus.exception.LocalizedCoreFormats;
25 import org.hipparchus.exception.MathIllegalArgumentException;
26 import org.hipparchus.linear.BlockRealMatrix;
27 import org.hipparchus.linear.RealMatrix;
28 import org.hipparchus.stat.descriptive.moment.Mean;
29 import org.hipparchus.stat.descriptive.moment.Variance;
30
31 /**
32 * Computes covariances for pairs of arrays or columns of a matrix.
33 * <p>
34 * The constructors that take {@code RealMatrix} or {@code double[][]}
35 * arguments generate covariance matrices. The columns of the input
36 * matrices are assumed to represent variable values.
37 * <p>
38 * The constructor argument {@code biasCorrected} determines whether or
39 * not computed covariances are bias-corrected.
40 * <p>
41 * Unbiased covariances are given by the formula:
42 * <p>
43 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
44 * <p>
45 * where {@code E(X)} is the mean of {@code X} and {@code E(Y)}
46 * is the mean of the <code>Y</code> values.
47 * <p>
48 * Non-bias-corrected estimates use {@code n} in place of {@code n - 1}.
49 */
50 public class Covariance {
51
52 /** The covariance matrix. */
53 private final RealMatrix covarianceMatrix;
54
55 /** Number of observations (length of covariate vectors). */
56 private final int n;
57
58 /**
59 * Create a Covariance with no data.
60 */
61 public Covariance() {
62 super();
63 covarianceMatrix = null;
64 n = 0;
65 }
66
67 /**
68 * Create a Covariance matrix from a rectangular array
69 * whose columns represent covariates.
70 * <p>
71 * The <code>biasCorrected</code> parameter determines whether or not
72 * covariance estimates are bias-corrected.
73 * <p>
74 * The input array must be rectangular with at least one column
75 * and two rows.
76 *
77 * @param data rectangular array with columns representing covariates
78 * @param biasCorrected true means covariances are bias-corrected
79 * @throws MathIllegalArgumentException if the input data array is not
80 * rectangular with at least two rows and one column.
81 * @throws MathIllegalArgumentException if the input data array is not
82 * rectangular with at least one row and one column.
83 */
84 public Covariance(double[][] data, boolean biasCorrected)
85 throws MathIllegalArgumentException {
86 this(new BlockRealMatrix(data), biasCorrected);
87 }
88
89 /**
90 * Create a Covariance matrix from a rectangular array
91 * whose columns represent covariates.
92 * <p>
93 * The input array must be rectangular with at least one column
94 * and two rows.
95 *
96 * @param data rectangular array with columns representing covariates
97 * @throws MathIllegalArgumentException if the input data array is not
98 * rectangular with at least two rows and one column.
99 * @throws MathIllegalArgumentException if the input data array is not
100 * rectangular with at least one row and one column.
101 */
102 public Covariance(double[][] data) throws MathIllegalArgumentException {
103 this(data, true);
104 }
105
106 /**
107 * Create a covariance matrix from a matrix whose columns
108 * represent covariates.
109 * <p>
110 * The <code>biasCorrected</code> parameter determines whether or not
111 * covariance estimates are bias-corrected.
112 * <p>
113 * The matrix must have at least one column and two rows.
114 *
115 * @param matrix matrix with columns representing covariates
116 * @param biasCorrected true means covariances are bias-corrected
117 * @throws MathIllegalArgumentException if the input matrix does not have
118 * at least two rows and one column
119 */
120 public Covariance(RealMatrix matrix, boolean biasCorrected)
121 throws MathIllegalArgumentException {
122 checkSufficientData(matrix);
123 n = matrix.getRowDimension();
124 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
125 }
126
127 /**
128 * Create a covariance matrix from a matrix whose columns
129 * represent covariates.
130 * <p>
131 * The matrix must have at least one column and two rows.
132 *
133 * @param matrix matrix with columns representing covariates
134 * @throws MathIllegalArgumentException if the input matrix does not have
135 * at least two rows and one column
136 */
137 public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
138 this(matrix, true);
139 }
140
141 /**
142 * Returns the covariance matrix
143 *
144 * @return covariance matrix
145 */
146 public RealMatrix getCovarianceMatrix() {
147 return covarianceMatrix;
148 }
149
150 /**
151 * Returns the number of observations (length of covariate vectors)
152 *
153 * @return number of observations
154 */
155 public int getN() {
156 return n;
157 }
158
159 /**
160 * Compute a covariance matrix from a matrix whose columns represent covariates.
161 *
162 * @param matrix input matrix (must have at least one column and two rows)
163 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
164 * @return covariance matrix
165 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
166 */
167 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
168 throws MathIllegalArgumentException {
169
170 int dimension = matrix.getColumnDimension();
171 Variance variance = new Variance(biasCorrected);
172 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
173 for (int i = 0; i < dimension; i++) {
174 for (int j = 0; j < i; j++) {
175 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
176 outMatrix.setEntry(i, j, cov);
177 outMatrix.setEntry(j, i, cov);
178 }
179 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
180 }
181 return outMatrix;
182 }
183
184 /**
185 * Create a covariance matrix from a matrix whose columns represent
186 * covariates. Covariances are computed using the bias-corrected formula.
187 *
188 * @param matrix input matrix (must have at least one column and two rows)
189 * @return covariance matrix
190 * @throws MathIllegalArgumentException if matrix does not contain sufficient data
191 * @see #Covariance
192 */
193 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
194 throws MathIllegalArgumentException {
195 return computeCovarianceMatrix(matrix, true);
196 }
197
198 /**
199 * Compute a covariance matrix from a rectangular array whose columns represent covariates.
200 *
201 * @param data input array (must have at least one column and two rows)
202 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
203 * @return covariance matrix
204 * @throws MathIllegalArgumentException if the data array does not contain sufficient data
205 * @throws MathIllegalArgumentException if the input data array is not
206 * rectangular with at least one row and one column.
207 */
208 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
209 throws MathIllegalArgumentException {
210 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
211 }
212
213 /**
214 * Create a covariance matrix from a rectangular array whose columns represent
215 * covariates. Covariances are computed using the bias-corrected formula.
216 *
217 * @param data input array (must have at least one column and two rows)
218 * @return covariance matrix
219 * @throws MathIllegalArgumentException if the data array does not contain sufficient data
220 * @throws MathIllegalArgumentException if the input data array is not
221 * rectangular with at least one row and one column.
222 * @see #Covariance
223 */
224 protected RealMatrix computeCovarianceMatrix(double[][] data)
225 throws MathIllegalArgumentException {
226 return computeCovarianceMatrix(data, true);
227 }
228
229 /**
230 * Computes the covariance between the two arrays.
231 * <p>
232 * Array lengths must match and the common length must be at least 2.
233 *
234 * @param xArray first data array
235 * @param yArray second data array
236 * @param biasCorrected if true, returned value will be bias-corrected
237 * @return returns the covariance for the two arrays
238 * @throws MathIllegalArgumentException if the arrays lengths do not match or
239 * there is insufficient data
240 */
241 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
242 throws MathIllegalArgumentException {
243 Mean mean = new Mean();
244 double result = 0d;
245 int length = xArray.length;
246 if (length != yArray.length) {
247 throw new MathIllegalArgumentException(
248 LocalizedCoreFormats.DIMENSIONS_MISMATCH, length, yArray.length);
249 } else if (length < 2) {
250 throw new MathIllegalArgumentException(
251 LocalizedCoreFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
252 } else {
253 double xMean = mean.evaluate(xArray);
254 double yMean = mean.evaluate(yArray);
255 for (int i = 0; i < length; i++) {
256 double xDev = xArray[i] - xMean;
257 double yDev = yArray[i] - yMean;
258 result += (xDev * yDev - result) / (i + 1);
259 }
260 }
261 return biasCorrected ? result * (((double) length) / (length - 1)) : result;
262 }
263
264 /**
265 * Computes the covariance between the two arrays, using the bias-corrected
266 * formula.
267 * <p>
268 * Array lengths must match and the common length must be at least 2.
269 *
270 * @param xArray first data array
271 * @param yArray second data array
272 * @return returns the covariance for the two arrays
273 * @throws MathIllegalArgumentException if the arrays lengths do not match or
274 * there is insufficient data
275 */
276 public double covariance(final double[] xArray, final double[] yArray)
277 throws MathIllegalArgumentException {
278 return covariance(xArray, yArray, true);
279 }
280
281 /**
282 * Throws MathIllegalArgumentException if the matrix does not have at least
283 * one column and two rows.
284 *
285 * @param matrix matrix to check
286 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
287 * to compute covariance
288 */
289 private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
290 int nRows = matrix.getRowDimension();
291 int nCols = matrix.getColumnDimension();
292 if (nRows < 2 || nCols < 1) {
293 throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
294 nRows, nCols);
295 }
296 }
297 }