1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.regression; 23 24 import java.io.Serializable; 25 import java.util.Arrays; 26 27 import org.hipparchus.exception.MathIllegalArgumentException; 28 import org.hipparchus.util.FastMath; 29 import org.hipparchus.util.MathUtils; 30 31 /** 32 * Results of a Multiple Linear Regression model fit. 33 * 34 */ 35 public class RegressionResults implements Serializable { 36 37 /** INDEX of Sum of Squared Errors */ 38 private static final int SSE_IDX = 0; 39 /** INDEX of Sum of Squares of Model */ 40 private static final int SST_IDX = 1; 41 /** INDEX of R-Squared of regression */ 42 private static final int RSQ_IDX = 2; 43 /** INDEX of Mean Squared Error */ 44 private static final int MSE_IDX = 3; 45 /** INDEX of Adjusted R Squared */ 46 private static final int ADJRSQ_IDX = 4; 47 /** UID */ 48 private static final long serialVersionUID = 1l; 49 /** regression slope parameters */ 50 private final double[] parameters; 51 /** variance covariance matrix of parameters */ 52 private final double[][] varCovData; 53 /** boolean flag for variance covariance matrix in symm compressed storage */ 54 private final boolean isSymmetricVCD; 55 /** rank of the solution */ 56 @SuppressWarnings("unused") 57 private final int rank; 58 /** number of observations on which results are based */ 59 private final long nobs; 60 /** boolean flag indicator of whether a constant was included*/ 61 private final boolean containsConstant; 62 /** array storing global results, SSE, MSE, RSQ, adjRSQ */ 63 private final double[] globalFitInfo; 64 65 /** 66 * Set the default constructor to private access 67 * to prevent inadvertent instantiation 68 */ 69 @SuppressWarnings("unused") 70 private RegressionResults() { 71 this.parameters = null; 72 this.varCovData = null; 73 this.rank = -1; 74 this.nobs = -1; 75 this.containsConstant = false; 76 this.isSymmetricVCD = false; 77 this.globalFitInfo = null; 78 } 79 80 /** 81 * Constructor for Regression Results. 82 * 83 * @param parameters a double array with the regression slope estimates 84 * @param varcov the variance covariance matrix, stored either in a square matrix 85 * or as a compressed 86 * @param isSymmetricCompressed a flag which denotes that the variance covariance 87 * matrix is in symmetric compressed format 88 * @param nobs the number of observations of the regression estimation 89 * @param rank the number of independent variables in the regression 90 * @param sumy the sum of the independent variable 91 * @param sumysq the sum of the squared independent variable 92 * @param sse sum of squared errors 93 * @param containsConstant true model has constant, false model does not have constant 94 * @param copyData if true a deep copy of all input data is made, if false only references 95 * are copied and the RegressionResults become mutable 96 */ 97 public RegressionResults( 98 final double[] parameters, final double[][] varcov, 99 final boolean isSymmetricCompressed, 100 final long nobs, final int rank, 101 final double sumy, final double sumysq, final double sse, 102 final boolean containsConstant, 103 final boolean copyData) { 104 if (copyData) { 105 this.parameters = parameters.clone(); 106 this.varCovData = new double[varcov.length][]; 107 for (int i = 0; i < varcov.length; i++) { 108 this.varCovData[i] = varcov[i].clone(); 109 } 110 } else { 111 this.parameters = parameters; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter 112 this.varCovData = varcov; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter 113 } 114 this.isSymmetricVCD = isSymmetricCompressed; 115 this.nobs = nobs; 116 this.rank = rank; 117 this.containsConstant = containsConstant; 118 this.globalFitInfo = new double[5]; 119 Arrays.fill(this.globalFitInfo, Double.NaN); 120 121 if (rank > 0) { 122 this.globalFitInfo[SST_IDX] = containsConstant ? 123 (sumysq - sumy * sumy / nobs) : sumysq; 124 } 125 126 this.globalFitInfo[SSE_IDX] = sse; 127 this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] / (nobs - rank); 128 this.globalFitInfo[RSQ_IDX] = 1.0 - this.globalFitInfo[SSE_IDX] / this.globalFitInfo[SST_IDX]; 129 130 if (!containsConstant) { 131 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (1.0 - this.globalFitInfo[RSQ_IDX]) * (((double) nobs) / (nobs - rank)); 132 } else { 133 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) / (globalFitInfo[SST_IDX] * (nobs - rank)); 134 } 135 } 136 137 /** 138 * <p>Returns the parameter estimate for the regressor at the given index.</p> 139 * 140 * <p>A redundant regressor will have its redundancy flag set, as well as 141 * a parameters estimated equal to {@code Double.NaN}</p> 142 * 143 * @param index Index. 144 * @return the parameters estimated for regressor at index. 145 * @throws MathIllegalArgumentException if {@code index} is not in the interval 146 * {@code [0, number of parameters)}. 147 */ 148 public double getParameterEstimate(int index) throws MathIllegalArgumentException { 149 if (parameters == null) { 150 return Double.NaN; 151 } 152 MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1); 153 return this.parameters[index]; 154 } 155 156 /** 157 * <p>Returns a copy of the regression parameters estimates.</p> 158 * 159 * <p>The parameter estimates are returned in the natural order of the data.</p> 160 * 161 * <p>A redundant regressor will have its redundancy flag set, as will 162 * a parameter estimate equal to {@code Double.NaN}.</p> 163 * 164 * @return array of parameter estimates, null if no estimation occurred 165 */ 166 public double[] getParameterEstimates() { 167 if (this.parameters == null) { 168 return null; // NOPMD 169 } 170 return parameters.clone(); 171 } 172 173 /** 174 * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 175 * error of the parameter estimate at index</a>, 176 * usually denoted s(b<sub>index</sub>). 177 * 178 * @param index Index. 179 * @return the standard errors associated with parameters estimated at index. 180 * @throws MathIllegalArgumentException if {@code index} is not in the interval 181 * {@code [0, number of parameters)}. 182 */ 183 public double getStdErrorOfEstimate(int index) throws MathIllegalArgumentException { 184 if (parameters == null) { 185 return Double.NaN; 186 } 187 MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1); 188 double var = this.getVcvElement(index, index); 189 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 190 return FastMath.sqrt(var); 191 } 192 return Double.NaN; 193 } 194 195 /** 196 * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 197 * error of the parameter estimates</a>, 198 * usually denoted s(b<sub>i</sub>).</p> 199 * 200 * <p>If there are problems with an ill conditioned design matrix then the regressor 201 * which is redundant will be assigned <code>Double.NaN</code>. </p> 202 * 203 * @return an array standard errors associated with parameters estimates, 204 * null if no estimation occurred 205 */ 206 public double[] getStdErrorOfEstimates() { 207 if (parameters == null) { 208 return null; // NOPMD 209 } 210 double[] se = new double[this.parameters.length]; 211 for (int i = 0; i < this.parameters.length; i++) { 212 double var = this.getVcvElement(i, i); 213 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 214 se[i] = FastMath.sqrt(var); 215 continue; 216 } 217 se[i] = Double.NaN; 218 } 219 return se; 220 } 221 222 /** 223 * <p>Returns the covariance between regression parameters i and j.</p> 224 * 225 * <p>If there are problems with an ill conditioned design matrix then the covariance 226 * which involves redundant columns will be assigned {@code Double.NaN}. </p> 227 * 228 * @param i {@code i}th regression parameter. 229 * @param j {@code j}th regression parameter. 230 * @return the covariance of the parameter estimates. 231 * @throws MathIllegalArgumentException if {@code i} or {@code j} is not in the 232 * interval {@code [0, number of parameters)}. 233 */ 234 public double getCovarianceOfParameters(int i, int j) throws MathIllegalArgumentException { 235 if (parameters == null) { 236 return Double.NaN; 237 } 238 MathUtils.checkRangeInclusive(i, 0, this.parameters.length - 1); 239 MathUtils.checkRangeInclusive(j, 0, this.parameters.length - 1); 240 return this.getVcvElement(i, j); 241 } 242 243 /** 244 * <p>Returns the number of parameters estimated in the model.</p> 245 * 246 * <p>This is the maximum number of regressors, some techniques may drop 247 * redundant parameters</p> 248 * 249 * @return number of regressors, -1 if not estimated 250 */ 251 public int getNumberOfParameters() { 252 if (this.parameters == null) { 253 return -1; 254 } 255 return this.parameters.length; 256 } 257 258 /** 259 * Returns the number of observations added to the regression model. 260 * 261 * @return Number of observations, -1 if an error condition prevents estimation 262 */ 263 public long getN() { 264 return this.nobs; 265 } 266 267 /** 268 * <p>Returns the sum of squared deviations of the y values about their mean.</p> 269 * 270 * <p>This is defined as SSTO 271 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p> 272 * 273 * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p> 274 * 275 * @return sum of squared deviations of y values 276 */ 277 public double getTotalSumSquares() { 278 return this.globalFitInfo[SST_IDX]; 279 } 280 281 /** 282 * <p>Returns the sum of squared deviations of the predicted y values about 283 * their mean (which equals the mean of y).</p> 284 * 285 * <p>This is usually abbreviated SSR or SSM. It is defined as SSM 286 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p> 287 * 288 * <p><strong>Preconditions</strong>:</p><ul> 289 * <li>At least two observations (with at least two different x values) 290 * must have been added before invoking this method. If this method is 291 * invoked before a model can be estimated, <code>Double.NaN</code> is 292 * returned. 293 * </li></ul> 294 * 295 * @return sum of squared deviations of predicted y values 296 */ 297 public double getRegressionSumSquares() { 298 return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX]; 299 } 300 301 /** 302 * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm"> 303 * sum of squared errors</a> (SSE) associated with the regression 304 * model.</p> 305 * 306 * <p>The return value is constrained to be non-negative - i.e., if due to 307 * rounding errors the computational formula returns a negative result, 308 * 0 is returned.</p> 309 * 310 * <p><strong>Preconditions</strong>:</p> 311 * <ul> 312 * <li>numberOfParameters data pairs 313 * must have been added before invoking this method. If this method is 314 * invoked before a model can be estimated, <code>Double,NaN</code> is 315 * returned. 316 * </li></ul> 317 * 318 * @return sum of squared errors associated with the regression model 319 */ 320 public double getErrorSumSquares() { 321 return this.globalFitInfo[ SSE_IDX]; 322 } 323 324 /** 325 * <p>Returns the sum of squared errors divided by the degrees of freedom, 326 * usually abbreviated MSE.</p> 327 * 328 * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model, 329 * or if there is no variation in <code>x</code>, this returns 330 * <code>Double.NaN</code>.</p> 331 * 332 * @return sum of squared deviations of y values 333 */ 334 public double getMeanSquareError() { 335 return this.globalFitInfo[ MSE_IDX]; 336 } 337 338 /** 339 * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm"> 340 * coefficient of multiple determination</a>, 341 * usually denoted r-square.</p> 342 * 343 * <p><strong>Preconditions</strong>:</p> 344 * <ul> 345 * <li>At least numberOfParameters observations (with at least numberOfParameters different x values) 346 * must have been added before invoking this method. If this method is 347 * invoked before a model can be estimated, {@code Double,NaN} is 348 * returned. 349 * </li></ul> 350 * 351 * @return r-square, a double in the interval [0, 1] 352 */ 353 public double getRSquared() { 354 return this.globalFitInfo[ RSQ_IDX]; 355 } 356 357 /** 358 * <p>Returns the adjusted R-squared statistic, defined by the formula 359 * \( 360 * R_\mathrm{adj}^2 = 1 - \frac{\mathrm{SSR} (n - 1)}{\mathrm{SSTO} (n - p)} 361 * \) 362 * where SSR is the sum of squared residuals}, 363 * SSTO is the total sum of squares}, n is the number 364 * of observations and p is the number of parameters estimated (including the intercept).</p> 365 * 366 * <p>If the regression is estimated without an intercept term, what is returned is</p><pre> 367 * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code> 368 * </pre> 369 * 370 * @return adjusted R-Squared statistic 371 */ 372 public double getAdjustedRSquared() { 373 return this.globalFitInfo[ ADJRSQ_IDX]; 374 } 375 376 /** 377 * Returns true if the regression model has been computed including an intercept. 378 * In this case, the coefficient of the intercept is the first element of the 379 * {@link #getParameterEstimates() parameter estimates}. 380 * @return true if the model has an intercept term 381 */ 382 public boolean hasIntercept() { 383 return this.containsConstant; 384 } 385 386 /** 387 * Gets the i-jth element of the variance-covariance matrix. 388 * 389 * @param i first variable index 390 * @param j second variable index 391 * @return the requested variance-covariance matrix entry 392 */ 393 private double getVcvElement(int i, int j) { 394 if (this.isSymmetricVCD) { 395 if (this.varCovData.length > 1) { 396 //could be stored in upper or lower triangular 397 if (i == j) { 398 return varCovData[i][i]; 399 } else if (i >= varCovData[j].length) { 400 return varCovData[i][j]; 401 } else { 402 return varCovData[j][i]; 403 } 404 } else {//could be in single array 405 if (i > j) { 406 return varCovData[0][(i + 1) * i / 2 + j]; 407 } else { 408 return varCovData[0][(j + 1) * j / 2 + i]; 409 } 410 } 411 } else { 412 return this.varCovData[i][j]; 413 } 414 } 415 }