1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.correlation; 23 24 import org.hipparchus.exception.LocalizedCoreFormats; 25 import org.hipparchus.exception.MathIllegalArgumentException; 26 import org.hipparchus.linear.BlockRealMatrix; 27 import org.hipparchus.linear.RealMatrix; 28 import org.hipparchus.stat.descriptive.moment.Mean; 29 import org.hipparchus.stat.descriptive.moment.Variance; 30 31 /** 32 * Computes covariances for pairs of arrays or columns of a matrix. 33 * <p> 34 * The constructors that take {@code RealMatrix} or {@code double[][]} 35 * arguments generate covariance matrices. The columns of the input 36 * matrices are assumed to represent variable values. 37 * <p> 38 * The constructor argument {@code biasCorrected} determines whether or 39 * not computed covariances are bias-corrected. 40 * <p> 41 * Unbiased covariances are given by the formula: 42 * <p> 43 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code> 44 * <p> 45 * where {@code E(X)} is the mean of {@code X} and {@code E(Y)} 46 * is the mean of the <code>Y</code> values. 47 * <p> 48 * Non-bias-corrected estimates use {@code n} in place of {@code n - 1}. 49 */ 50 public class Covariance { 51 52 /** The covariance matrix. */ 53 private final RealMatrix covarianceMatrix; 54 55 /** Number of observations (length of covariate vectors). */ 56 private final int n; 57 58 /** 59 * Create a Covariance with no data. 60 */ 61 public Covariance() { 62 super(); 63 covarianceMatrix = null; 64 n = 0; 65 } 66 67 /** 68 * Create a Covariance matrix from a rectangular array 69 * whose columns represent covariates. 70 * <p> 71 * The <code>biasCorrected</code> parameter determines whether or not 72 * covariance estimates are bias-corrected. 73 * <p> 74 * The input array must be rectangular with at least one column 75 * and two rows. 76 * 77 * @param data rectangular array with columns representing covariates 78 * @param biasCorrected true means covariances are bias-corrected 79 * @throws MathIllegalArgumentException if the input data array is not 80 * rectangular with at least two rows and one column. 81 * @throws MathIllegalArgumentException if the input data array is not 82 * rectangular with at least one row and one column. 83 */ 84 public Covariance(double[][] data, boolean biasCorrected) 85 throws MathIllegalArgumentException { 86 this(new BlockRealMatrix(data), biasCorrected); 87 } 88 89 /** 90 * Create a Covariance matrix from a rectangular array 91 * whose columns represent covariates. 92 * <p> 93 * The input array must be rectangular with at least one column 94 * and two rows. 95 * 96 * @param data rectangular array with columns representing covariates 97 * @throws MathIllegalArgumentException if the input data array is not 98 * rectangular with at least two rows and one column. 99 * @throws MathIllegalArgumentException if the input data array is not 100 * rectangular with at least one row and one column. 101 */ 102 public Covariance(double[][] data) throws MathIllegalArgumentException { 103 this(data, true); 104 } 105 106 /** 107 * Create a covariance matrix from a matrix whose columns 108 * represent covariates. 109 * <p> 110 * The <code>biasCorrected</code> parameter determines whether or not 111 * covariance estimates are bias-corrected. 112 * <p> 113 * The matrix must have at least one column and two rows. 114 * 115 * @param matrix matrix with columns representing covariates 116 * @param biasCorrected true means covariances are bias-corrected 117 * @throws MathIllegalArgumentException if the input matrix does not have 118 * at least two rows and one column 119 */ 120 public Covariance(RealMatrix matrix, boolean biasCorrected) 121 throws MathIllegalArgumentException { 122 checkSufficientData(matrix); 123 n = matrix.getRowDimension(); 124 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected); 125 } 126 127 /** 128 * Create a covariance matrix from a matrix whose columns 129 * represent covariates. 130 * <p> 131 * The matrix must have at least one column and two rows. 132 * 133 * @param matrix matrix with columns representing covariates 134 * @throws MathIllegalArgumentException if the input matrix does not have 135 * at least two rows and one column 136 */ 137 public Covariance(RealMatrix matrix) throws MathIllegalArgumentException { 138 this(matrix, true); 139 } 140 141 /** 142 * Returns the covariance matrix 143 * 144 * @return covariance matrix 145 */ 146 public RealMatrix getCovarianceMatrix() { 147 return covarianceMatrix; 148 } 149 150 /** 151 * Returns the number of observations (length of covariate vectors) 152 * 153 * @return number of observations 154 */ 155 public int getN() { 156 return n; 157 } 158 159 /** 160 * Compute a covariance matrix from a matrix whose columns represent covariates. 161 * 162 * @param matrix input matrix (must have at least one column and two rows) 163 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 164 * @return covariance matrix 165 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data 166 */ 167 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) 168 throws MathIllegalArgumentException { 169 170 int dimension = matrix.getColumnDimension(); 171 Variance variance = new Variance(biasCorrected); 172 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension); 173 for (int i = 0; i < dimension; i++) { 174 for (int j = 0; j < i; j++) { 175 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected); 176 outMatrix.setEntry(i, j, cov); 177 outMatrix.setEntry(j, i, cov); 178 } 179 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i))); 180 } 181 return outMatrix; 182 } 183 184 /** 185 * Create a covariance matrix from a matrix whose columns represent 186 * covariates. Covariances are computed using the bias-corrected formula. 187 * 188 * @param matrix input matrix (must have at least one column and two rows) 189 * @return covariance matrix 190 * @throws MathIllegalArgumentException if matrix does not contain sufficient data 191 * @see #Covariance 192 */ 193 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) 194 throws MathIllegalArgumentException { 195 return computeCovarianceMatrix(matrix, true); 196 } 197 198 /** 199 * Compute a covariance matrix from a rectangular array whose columns represent covariates. 200 * 201 * @param data input array (must have at least one column and two rows) 202 * @param biasCorrected determines whether or not covariance estimates are bias-corrected 203 * @return covariance matrix 204 * @throws MathIllegalArgumentException if the data array does not contain sufficient data 205 * @throws MathIllegalArgumentException if the input data array is not 206 * rectangular with at least one row and one column. 207 */ 208 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) 209 throws MathIllegalArgumentException { 210 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected); 211 } 212 213 /** 214 * Create a covariance matrix from a rectangular array whose columns represent 215 * covariates. Covariances are computed using the bias-corrected formula. 216 * 217 * @param data input array (must have at least one column and two rows) 218 * @return covariance matrix 219 * @throws MathIllegalArgumentException if the data array does not contain sufficient data 220 * @throws MathIllegalArgumentException if the input data array is not 221 * rectangular with at least one row and one column. 222 * @see #Covariance 223 */ 224 protected RealMatrix computeCovarianceMatrix(double[][] data) 225 throws MathIllegalArgumentException { 226 return computeCovarianceMatrix(data, true); 227 } 228 229 /** 230 * Computes the covariance between the two arrays. 231 * <p> 232 * Array lengths must match and the common length must be at least 2. 233 * 234 * @param xArray first data array 235 * @param yArray second data array 236 * @param biasCorrected if true, returned value will be bias-corrected 237 * @return returns the covariance for the two arrays 238 * @throws MathIllegalArgumentException if the arrays lengths do not match or 239 * there is insufficient data 240 */ 241 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected) 242 throws MathIllegalArgumentException { 243 Mean mean = new Mean(); 244 double result = 0d; 245 int length = xArray.length; 246 if (length != yArray.length) { 247 throw new MathIllegalArgumentException( 248 LocalizedCoreFormats.DIMENSIONS_MISMATCH, length, yArray.length); 249 } else if (length < 2) { 250 throw new MathIllegalArgumentException( 251 LocalizedCoreFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2); 252 } else { 253 double xMean = mean.evaluate(xArray); 254 double yMean = mean.evaluate(yArray); 255 for (int i = 0; i < length; i++) { 256 double xDev = xArray[i] - xMean; 257 double yDev = yArray[i] - yMean; 258 result += (xDev * yDev - result) / (i + 1); 259 } 260 } 261 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result; 262 } 263 264 /** 265 * Computes the covariance between the two arrays, using the bias-corrected 266 * formula. 267 * <p> 268 * Array lengths must match and the common length must be at least 2. 269 * 270 * @param xArray first data array 271 * @param yArray second data array 272 * @return returns the covariance for the two arrays 273 * @throws MathIllegalArgumentException if the arrays lengths do not match or 274 * there is insufficient data 275 */ 276 public double covariance(final double[] xArray, final double[] yArray) 277 throws MathIllegalArgumentException { 278 return covariance(xArray, yArray, true); 279 } 280 281 /** 282 * Throws MathIllegalArgumentException if the matrix does not have at least 283 * one column and two rows. 284 * 285 * @param matrix matrix to check 286 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data 287 * to compute covariance 288 */ 289 private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException { 290 int nRows = matrix.getRowDimension(); 291 int nCols = matrix.getColumnDimension(); 292 if (nRows < 2 || nCols < 1) { 293 throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_ROWS_AND_COLUMNS, 294 nRows, nCols); 295 } 296 } 297 }