1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.regression; 23 24 import java.io.Serializable; 25 import java.util.Arrays; 26 27 import org.hipparchus.exception.MathIllegalArgumentException; 28 import org.hipparchus.util.FastMath; 29 import org.hipparchus.util.MathUtils; 30 31 /** 32 * Results of a Multiple Linear Regression model fit. 33 * 34 */ 35 public class RegressionResults implements Serializable { 36 37 /** INDEX of Sum of Squared Errors */ 38 private static final int SSE_IDX = 0; 39 /** INDEX of Sum of Squares of Model */ 40 private static final int SST_IDX = 1; 41 /** INDEX of R-Squared of regression */ 42 private static final int RSQ_IDX = 2; 43 /** INDEX of Mean Squared Error */ 44 private static final int MSE_IDX = 3; 45 /** INDEX of Adjusted R Squared */ 46 private static final int ADJRSQ_IDX = 4; 47 /** UID */ 48 private static final long serialVersionUID = 1l; 49 /** regression slope parameters */ 50 private final double[] parameters; 51 /** variance covariance matrix of parameters */ 52 private final double[][] varCovData; 53 /** boolean flag for variance covariance matrix in symm compressed storage */ 54 private final boolean isSymmetricVCD; 55 /** rank of the solution */ 56 @SuppressWarnings("unused") 57 private final int rank; 58 /** number of observations on which results are based */ 59 private final long nobs; 60 /** boolean flag indicator of whether a constant was included*/ 61 private final boolean containsConstant; 62 /** array storing global results, SSE, MSE, RSQ, adjRSQ */ 63 private final double[] globalFitInfo; 64 65 /** 66 * Set the default constructor to private access 67 * to prevent inadvertent instantiation 68 */ 69 @SuppressWarnings("unused") 70 private RegressionResults() { 71 this.parameters = null; 72 this.varCovData = null; 73 this.rank = -1; 74 this.nobs = -1; 75 this.containsConstant = false; 76 this.isSymmetricVCD = false; 77 this.globalFitInfo = null; 78 } 79 80 /** 81 * Constructor for Regression Results. 82 * 83 * @param parameters a double array with the regression slope estimates 84 * @param varcov the variance covariance matrix, stored either in a square matrix 85 * or as a compressed 86 * @param isSymmetricCompressed a flag which denotes that the variance covariance 87 * matrix is in symmetric compressed format 88 * @param nobs the number of observations of the regression estimation 89 * @param rank the number of independent variables in the regression 90 * @param sumy the sum of the independent variable 91 * @param sumysq the sum of the squared independent variable 92 * @param sse sum of squared errors 93 * @param containsConstant true model has constant, false model does not have constant 94 * @param copyData if true a deep copy of all input data is made, if false only references 95 * are copied and the RegressionResults become mutable 96 */ 97 public RegressionResults( 98 final double[] parameters, final double[][] varcov, 99 final boolean isSymmetricCompressed, 100 final long nobs, final int rank, 101 final double sumy, final double sumysq, final double sse, 102 final boolean containsConstant, 103 final boolean copyData) { 104 if (copyData) { 105 this.parameters = parameters.clone(); 106 this.varCovData = new double[varcov.length][]; 107 for (int i = 0; i < varcov.length; i++) { 108 this.varCovData[i] = varcov[i].clone(); 109 } 110 } else { 111 this.parameters = parameters; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter 112 this.varCovData = varcov; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter 113 } 114 this.isSymmetricVCD = isSymmetricCompressed; 115 this.nobs = nobs; 116 this.rank = rank; 117 this.containsConstant = containsConstant; 118 this.globalFitInfo = new double[5]; 119 Arrays.fill(this.globalFitInfo, Double.NaN); 120 121 if (rank > 0) { 122 this.globalFitInfo[SST_IDX] = containsConstant ? 123 (sumysq - sumy * sumy / nobs) : sumysq; 124 } 125 126 this.globalFitInfo[SSE_IDX] = sse; 127 this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] / 128 (nobs - rank); 129 this.globalFitInfo[RSQ_IDX] = 1.0 - 130 this.globalFitInfo[SSE_IDX] / 131 this.globalFitInfo[SST_IDX]; 132 133 if (!containsConstant) { 134 this.globalFitInfo[ADJRSQ_IDX] = 1.0- 135 (1.0 - this.globalFitInfo[RSQ_IDX]) * 136 ( (double) nobs / ( (double) (nobs - rank))); 137 } else { 138 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) / 139 (globalFitInfo[SST_IDX] * (nobs - rank)); 140 } 141 } 142 143 /** 144 * <p>Returns the parameter estimate for the regressor at the given index.</p> 145 * 146 * <p>A redundant regressor will have its redundancy flag set, as well as 147 * a parameters estimated equal to {@code Double.NaN}</p> 148 * 149 * @param index Index. 150 * @return the parameters estimated for regressor at index. 151 * @throws MathIllegalArgumentException if {@code index} is not in the interval 152 * {@code [0, number of parameters)}. 153 */ 154 public double getParameterEstimate(int index) throws MathIllegalArgumentException { 155 if (parameters == null) { 156 return Double.NaN; 157 } 158 MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1); 159 return this.parameters[index]; 160 } 161 162 /** 163 * <p>Returns a copy of the regression parameters estimates.</p> 164 * 165 * <p>The parameter estimates are returned in the natural order of the data.</p> 166 * 167 * <p>A redundant regressor will have its redundancy flag set, as will 168 * a parameter estimate equal to {@code Double.NaN}.</p> 169 * 170 * @return array of parameter estimates, null if no estimation occurred 171 */ 172 public double[] getParameterEstimates() { 173 if (this.parameters == null) { 174 return null; // NOPMD 175 } 176 return parameters.clone(); 177 } 178 179 /** 180 * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 181 * error of the parameter estimate at index</a>, 182 * usually denoted s(b<sub>index</sub>). 183 * 184 * @param index Index. 185 * @return the standard errors associated with parameters estimated at index. 186 * @throws MathIllegalArgumentException if {@code index} is not in the interval 187 * {@code [0, number of parameters)}. 188 */ 189 public double getStdErrorOfEstimate(int index) throws MathIllegalArgumentException { 190 if (parameters == null) { 191 return Double.NaN; 192 } 193 MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1); 194 double var = this.getVcvElement(index, index); 195 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 196 return FastMath.sqrt(var); 197 } 198 return Double.NaN; 199 } 200 201 /** 202 * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard 203 * error of the parameter estimates</a>, 204 * usually denoted s(b<sub>i</sub>).</p> 205 * 206 * <p>If there are problems with an ill conditioned design matrix then the regressor 207 * which is redundant will be assigned <code>Double.NaN</code>. </p> 208 * 209 * @return an array standard errors associated with parameters estimates, 210 * null if no estimation occurred 211 */ 212 public double[] getStdErrorOfEstimates() { 213 if (parameters == null) { 214 return null; // NOPMD 215 } 216 double[] se = new double[this.parameters.length]; 217 for (int i = 0; i < this.parameters.length; i++) { 218 double var = this.getVcvElement(i, i); 219 if (!Double.isNaN(var) && var > Double.MIN_VALUE) { 220 se[i] = FastMath.sqrt(var); 221 continue; 222 } 223 se[i] = Double.NaN; 224 } 225 return se; 226 } 227 228 /** 229 * <p>Returns the covariance between regression parameters i and j.</p> 230 * 231 * <p>If there are problems with an ill conditioned design matrix then the covariance 232 * which involves redundant columns will be assigned {@code Double.NaN}. </p> 233 * 234 * @param i {@code i}th regression parameter. 235 * @param j {@code j}th regression parameter. 236 * @return the covariance of the parameter estimates. 237 * @throws MathIllegalArgumentException if {@code i} or {@code j} is not in the 238 * interval {@code [0, number of parameters)}. 239 */ 240 public double getCovarianceOfParameters(int i, int j) throws MathIllegalArgumentException { 241 if (parameters == null) { 242 return Double.NaN; 243 } 244 MathUtils.checkRangeInclusive(i, 0, this.parameters.length - 1); 245 MathUtils.checkRangeInclusive(j, 0, this.parameters.length - 1); 246 return this.getVcvElement(i, j); 247 } 248 249 /** 250 * <p>Returns the number of parameters estimated in the model.</p> 251 * 252 * <p>This is the maximum number of regressors, some techniques may drop 253 * redundant parameters</p> 254 * 255 * @return number of regressors, -1 if not estimated 256 */ 257 public int getNumberOfParameters() { 258 if (this.parameters == null) { 259 return -1; 260 } 261 return this.parameters.length; 262 } 263 264 /** 265 * Returns the number of observations added to the regression model. 266 * 267 * @return Number of observations, -1 if an error condition prevents estimation 268 */ 269 public long getN() { 270 return this.nobs; 271 } 272 273 /** 274 * <p>Returns the sum of squared deviations of the y values about their mean.</p> 275 * 276 * <p>This is defined as SSTO 277 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p> 278 * 279 * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p> 280 * 281 * @return sum of squared deviations of y values 282 */ 283 public double getTotalSumSquares() { 284 return this.globalFitInfo[SST_IDX]; 285 } 286 287 /** 288 * <p>Returns the sum of squared deviations of the predicted y values about 289 * their mean (which equals the mean of y).</p> 290 * 291 * <p>This is usually abbreviated SSR or SSM. It is defined as SSM 292 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p> 293 * 294 * <p><strong>Preconditions</strong>:</p><ul> 295 * <li>At least two observations (with at least two different x values) 296 * must have been added before invoking this method. If this method is 297 * invoked before a model can be estimated, <code>Double.NaN</code> is 298 * returned. 299 * </li></ul> 300 * 301 * @return sum of squared deviations of predicted y values 302 */ 303 public double getRegressionSumSquares() { 304 return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX]; 305 } 306 307 /** 308 * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm"> 309 * sum of squared errors</a> (SSE) associated with the regression 310 * model.</p> 311 * 312 * <p>The return value is constrained to be non-negative - i.e., if due to 313 * rounding errors the computational formula returns a negative result, 314 * 0 is returned.</p> 315 * 316 * <p><strong>Preconditions</strong>:</p> 317 * <ul> 318 * <li>numberOfParameters data pairs 319 * must have been added before invoking this method. If this method is 320 * invoked before a model can be estimated, <code>Double,NaN</code> is 321 * returned. 322 * </li></ul> 323 * 324 * @return sum of squared errors associated with the regression model 325 */ 326 public double getErrorSumSquares() { 327 return this.globalFitInfo[ SSE_IDX]; 328 } 329 330 /** 331 * <p>Returns the sum of squared errors divided by the degrees of freedom, 332 * usually abbreviated MSE.</p> 333 * 334 * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model, 335 * or if there is no variation in <code>x</code>, this returns 336 * <code>Double.NaN</code>.</p> 337 * 338 * @return sum of squared deviations of y values 339 */ 340 public double getMeanSquareError() { 341 return this.globalFitInfo[ MSE_IDX]; 342 } 343 344 /** 345 * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm"> 346 * coefficient of multiple determination</a>, 347 * usually denoted r-square.</p> 348 * 349 * <p><strong>Preconditions</strong>:</p> 350 * <ul> 351 * <li>At least numberOfParameters observations (with at least numberOfParameters different x values) 352 * must have been added before invoking this method. If this method is 353 * invoked before a model can be estimated, {@code Double,NaN} is 354 * returned. 355 * </li></ul> 356 * 357 * @return r-square, a double in the interval [0, 1] 358 */ 359 public double getRSquared() { 360 return this.globalFitInfo[ RSQ_IDX]; 361 } 362 363 /** 364 * <p>Returns the adjusted R-squared statistic, defined by the formula 365 * \( 366 * R_\mathrm{adj}^2 = 1 - \frac{\mathrm{SSR} (n - 1)}{\mathrm{SSTO} (n - p)} 367 * \) 368 * where SSR is the sum of squared residuals}, 369 * SSTO is the total sum of squares}, n is the number 370 * of observations and p is the number of parameters estimated (including the intercept).</p> 371 * 372 * <p>If the regression is estimated without an intercept term, what is returned is</p><pre> 373 * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code> 374 * </pre> 375 * 376 * @return adjusted R-Squared statistic 377 */ 378 public double getAdjustedRSquared() { 379 return this.globalFitInfo[ ADJRSQ_IDX]; 380 } 381 382 /** 383 * Returns true if the regression model has been computed including an intercept. 384 * In this case, the coefficient of the intercept is the first element of the 385 * {@link #getParameterEstimates() parameter estimates}. 386 * @return true if the model has an intercept term 387 */ 388 public boolean hasIntercept() { 389 return this.containsConstant; 390 } 391 392 /** 393 * Gets the i-jth element of the variance-covariance matrix. 394 * 395 * @param i first variable index 396 * @param j second variable index 397 * @return the requested variance-covariance matrix entry 398 */ 399 private double getVcvElement(int i, int j) { 400 if (this.isSymmetricVCD) { 401 if (this.varCovData.length > 1) { 402 //could be stored in upper or lower triangular 403 if (i == j) { 404 return varCovData[i][i]; 405 } else if (i >= varCovData[j].length) { 406 return varCovData[i][j]; 407 } else { 408 return varCovData[j][i]; 409 } 410 } else {//could be in single array 411 if (i > j) { 412 return varCovData[0][(i + 1) * i / 2 + j]; 413 } else { 414 return varCovData[0][(j + 1) * j / 2 + i]; 415 } 416 } 417 } else { 418 return this.varCovData[i][j]; 419 } 420 } 421 }