1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.regression;
23
24 import java.io.Serializable;
25 import java.util.Arrays;
26
27 import org.hipparchus.exception.MathIllegalArgumentException;
28 import org.hipparchus.util.FastMath;
29 import org.hipparchus.util.MathUtils;
30
31 /**
32 * Results of a Multiple Linear Regression model fit.
33 *
34 */
35 public class RegressionResults implements Serializable {
36
37 /** INDEX of Sum of Squared Errors */
38 private static final int SSE_IDX = 0;
39 /** INDEX of Sum of Squares of Model */
40 private static final int SST_IDX = 1;
41 /** INDEX of R-Squared of regression */
42 private static final int RSQ_IDX = 2;
43 /** INDEX of Mean Squared Error */
44 private static final int MSE_IDX = 3;
45 /** INDEX of Adjusted R Squared */
46 private static final int ADJRSQ_IDX = 4;
47 /** UID */
48 private static final long serialVersionUID = 1l;
49 /** regression slope parameters */
50 private final double[] parameters;
51 /** variance covariance matrix of parameters */
52 private final double[][] varCovData;
53 /** boolean flag for variance covariance matrix in symm compressed storage */
54 private final boolean isSymmetricVCD;
55 /** rank of the solution */
56 @SuppressWarnings("unused")
57 private final int rank;
58 /** number of observations on which results are based */
59 private final long nobs;
60 /** boolean flag indicator of whether a constant was included*/
61 private final boolean containsConstant;
62 /** array storing global results, SSE, MSE, RSQ, adjRSQ */
63 private final double[] globalFitInfo;
64
65 /**
66 * Set the default constructor to private access
67 * to prevent inadvertent instantiation
68 */
69 @SuppressWarnings("unused")
70 private RegressionResults() {
71 this.parameters = null;
72 this.varCovData = null;
73 this.rank = -1;
74 this.nobs = -1;
75 this.containsConstant = false;
76 this.isSymmetricVCD = false;
77 this.globalFitInfo = null;
78 }
79
80 /**
81 * Constructor for Regression Results.
82 *
83 * @param parameters a double array with the regression slope estimates
84 * @param varcov the variance covariance matrix, stored either in a square matrix
85 * or as a compressed
86 * @param isSymmetricCompressed a flag which denotes that the variance covariance
87 * matrix is in symmetric compressed format
88 * @param nobs the number of observations of the regression estimation
89 * @param rank the number of independent variables in the regression
90 * @param sumy the sum of the independent variable
91 * @param sumysq the sum of the squared independent variable
92 * @param sse sum of squared errors
93 * @param containsConstant true model has constant, false model does not have constant
94 * @param copyData if true a deep copy of all input data is made, if false only references
95 * are copied and the RegressionResults become mutable
96 */
97 public RegressionResults(
98 final double[] parameters, final double[][] varcov,
99 final boolean isSymmetricCompressed,
100 final long nobs, final int rank,
101 final double sumy, final double sumysq, final double sse,
102 final boolean containsConstant,
103 final boolean copyData) {
104 if (copyData) {
105 this.parameters = parameters.clone();
106 this.varCovData = new double[varcov.length][];
107 for (int i = 0; i < varcov.length; i++) {
108 this.varCovData[i] = varcov[i].clone();
109 }
110 } else {
111 this.parameters = parameters; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter
112 this.varCovData = varcov; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter
113 }
114 this.isSymmetricVCD = isSymmetricCompressed;
115 this.nobs = nobs;
116 this.rank = rank;
117 this.containsConstant = containsConstant;
118 this.globalFitInfo = new double[5];
119 Arrays.fill(this.globalFitInfo, Double.NaN);
120
121 if (rank > 0) {
122 this.globalFitInfo[SST_IDX] = containsConstant ?
123 (sumysq - sumy * sumy / nobs) : sumysq;
124 }
125
126 this.globalFitInfo[SSE_IDX] = sse;
127 this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] / (nobs - rank);
128 this.globalFitInfo[RSQ_IDX] = 1.0 - this.globalFitInfo[SSE_IDX] / this.globalFitInfo[SST_IDX];
129
130 if (!containsConstant) {
131 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (1.0 - this.globalFitInfo[RSQ_IDX]) * (((double) nobs) / (nobs - rank));
132 } else {
133 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) / (globalFitInfo[SST_IDX] * (nobs - rank));
134 }
135 }
136
137 /**
138 * <p>Returns the parameter estimate for the regressor at the given index.</p>
139 *
140 * <p>A redundant regressor will have its redundancy flag set, as well as
141 * a parameters estimated equal to {@code Double.NaN}</p>
142 *
143 * @param index Index.
144 * @return the parameters estimated for regressor at index.
145 * @throws MathIllegalArgumentException if {@code index} is not in the interval
146 * {@code [0, number of parameters)}.
147 */
148 public double getParameterEstimate(int index) throws MathIllegalArgumentException {
149 if (parameters == null) {
150 return Double.NaN;
151 }
152 MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1);
153 return this.parameters[index];
154 }
155
156 /**
157 * <p>Returns a copy of the regression parameters estimates.</p>
158 *
159 * <p>The parameter estimates are returned in the natural order of the data.</p>
160 *
161 * <p>A redundant regressor will have its redundancy flag set, as will
162 * a parameter estimate equal to {@code Double.NaN}.</p>
163 *
164 * @return array of parameter estimates, null if no estimation occurred
165 */
166 public double[] getParameterEstimates() {
167 if (this.parameters == null) {
168 return null; // NOPMD
169 }
170 return parameters.clone();
171 }
172
173 /**
174 * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
175 * error of the parameter estimate at index</a>,
176 * usually denoted s(b<sub>index</sub>).
177 *
178 * @param index Index.
179 * @return the standard errors associated with parameters estimated at index.
180 * @throws MathIllegalArgumentException if {@code index} is not in the interval
181 * {@code [0, number of parameters)}.
182 */
183 public double getStdErrorOfEstimate(int index) throws MathIllegalArgumentException {
184 if (parameters == null) {
185 return Double.NaN;
186 }
187 MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1);
188 double var = this.getVcvElement(index, index);
189 if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
190 return FastMath.sqrt(var);
191 }
192 return Double.NaN;
193 }
194
195 /**
196 * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
197 * error of the parameter estimates</a>,
198 * usually denoted s(b<sub>i</sub>).</p>
199 *
200 * <p>If there are problems with an ill conditioned design matrix then the regressor
201 * which is redundant will be assigned <code>Double.NaN</code>. </p>
202 *
203 * @return an array standard errors associated with parameters estimates,
204 * null if no estimation occurred
205 */
206 public double[] getStdErrorOfEstimates() {
207 if (parameters == null) {
208 return null; // NOPMD
209 }
210 double[] se = new double[this.parameters.length];
211 for (int i = 0; i < this.parameters.length; i++) {
212 double var = this.getVcvElement(i, i);
213 if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
214 se[i] = FastMath.sqrt(var);
215 continue;
216 }
217 se[i] = Double.NaN;
218 }
219 return se;
220 }
221
222 /**
223 * <p>Returns the covariance between regression parameters i and j.</p>
224 *
225 * <p>If there are problems with an ill conditioned design matrix then the covariance
226 * which involves redundant columns will be assigned {@code Double.NaN}. </p>
227 *
228 * @param i {@code i}th regression parameter.
229 * @param j {@code j}th regression parameter.
230 * @return the covariance of the parameter estimates.
231 * @throws MathIllegalArgumentException if {@code i} or {@code j} is not in the
232 * interval {@code [0, number of parameters)}.
233 */
234 public double getCovarianceOfParameters(int i, int j) throws MathIllegalArgumentException {
235 if (parameters == null) {
236 return Double.NaN;
237 }
238 MathUtils.checkRangeInclusive(i, 0, this.parameters.length - 1);
239 MathUtils.checkRangeInclusive(j, 0, this.parameters.length - 1);
240 return this.getVcvElement(i, j);
241 }
242
243 /**
244 * <p>Returns the number of parameters estimated in the model.</p>
245 *
246 * <p>This is the maximum number of regressors, some techniques may drop
247 * redundant parameters</p>
248 *
249 * @return number of regressors, -1 if not estimated
250 */
251 public int getNumberOfParameters() {
252 if (this.parameters == null) {
253 return -1;
254 }
255 return this.parameters.length;
256 }
257
258 /**
259 * Returns the number of observations added to the regression model.
260 *
261 * @return Number of observations, -1 if an error condition prevents estimation
262 */
263 public long getN() {
264 return this.nobs;
265 }
266
267 /**
268 * <p>Returns the sum of squared deviations of the y values about their mean.</p>
269 *
270 * <p>This is defined as SSTO
271 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
272 *
273 * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
274 *
275 * @return sum of squared deviations of y values
276 */
277 public double getTotalSumSquares() {
278 return this.globalFitInfo[SST_IDX];
279 }
280
281 /**
282 * <p>Returns the sum of squared deviations of the predicted y values about
283 * their mean (which equals the mean of y).</p>
284 *
285 * <p>This is usually abbreviated SSR or SSM. It is defined as SSM
286 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
287 *
288 * <p><strong>Preconditions</strong>:</p><ul>
289 * <li>At least two observations (with at least two different x values)
290 * must have been added before invoking this method. If this method is
291 * invoked before a model can be estimated, <code>Double.NaN</code> is
292 * returned.
293 * </li></ul>
294 *
295 * @return sum of squared deviations of predicted y values
296 */
297 public double getRegressionSumSquares() {
298 return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
299 }
300
301 /**
302 * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
303 * sum of squared errors</a> (SSE) associated with the regression
304 * model.</p>
305 *
306 * <p>The return value is constrained to be non-negative - i.e., if due to
307 * rounding errors the computational formula returns a negative result,
308 * 0 is returned.</p>
309 *
310 * <p><strong>Preconditions</strong>:</p>
311 * <ul>
312 * <li>numberOfParameters data pairs
313 * must have been added before invoking this method. If this method is
314 * invoked before a model can be estimated, <code>Double,NaN</code> is
315 * returned.
316 * </li></ul>
317 *
318 * @return sum of squared errors associated with the regression model
319 */
320 public double getErrorSumSquares() {
321 return this.globalFitInfo[ SSE_IDX];
322 }
323
324 /**
325 * <p>Returns the sum of squared errors divided by the degrees of freedom,
326 * usually abbreviated MSE.</p>
327 *
328 * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
329 * or if there is no variation in <code>x</code>, this returns
330 * <code>Double.NaN</code>.</p>
331 *
332 * @return sum of squared deviations of y values
333 */
334 public double getMeanSquareError() {
335 return this.globalFitInfo[ MSE_IDX];
336 }
337
338 /**
339 * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
340 * coefficient of multiple determination</a>,
341 * usually denoted r-square.</p>
342 *
343 * <p><strong>Preconditions</strong>:</p>
344 * <ul>
345 * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
346 * must have been added before invoking this method. If this method is
347 * invoked before a model can be estimated, {@code Double,NaN} is
348 * returned.
349 * </li></ul>
350 *
351 * @return r-square, a double in the interval [0, 1]
352 */
353 public double getRSquared() {
354 return this.globalFitInfo[ RSQ_IDX];
355 }
356
357 /**
358 * <p>Returns the adjusted R-squared statistic, defined by the formula
359 * \(
360 * R_\mathrm{adj}^2 = 1 - \frac{\mathrm{SSR} (n - 1)}{\mathrm{SSTO} (n - p)}
361 * \)
362 * where SSR is the sum of squared residuals},
363 * SSTO is the total sum of squares}, n is the number
364 * of observations and p is the number of parameters estimated (including the intercept).</p>
365 *
366 * <p>If the regression is estimated without an intercept term, what is returned is</p><pre>
367 * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
368 * </pre>
369 *
370 * @return adjusted R-Squared statistic
371 */
372 public double getAdjustedRSquared() {
373 return this.globalFitInfo[ ADJRSQ_IDX];
374 }
375
376 /**
377 * Returns true if the regression model has been computed including an intercept.
378 * In this case, the coefficient of the intercept is the first element of the
379 * {@link #getParameterEstimates() parameter estimates}.
380 * @return true if the model has an intercept term
381 */
382 public boolean hasIntercept() {
383 return this.containsConstant;
384 }
385
386 /**
387 * Gets the i-jth element of the variance-covariance matrix.
388 *
389 * @param i first variable index
390 * @param j second variable index
391 * @return the requested variance-covariance matrix entry
392 */
393 private double getVcvElement(int i, int j) {
394 if (this.isSymmetricVCD) {
395 if (this.varCovData.length > 1) {
396 //could be stored in upper or lower triangular
397 if (i == j) {
398 return varCovData[i][i];
399 } else if (i >= varCovData[j].length) {
400 return varCovData[i][j];
401 } else {
402 return varCovData[j][i];
403 }
404 } else {//could be in single array
405 if (i > j) {
406 return varCovData[0][(i + 1) * i / 2 + j];
407 } else {
408 return varCovData[0][(j + 1) * j / 2 + i];
409 }
410 }
411 } else {
412 return this.varCovData[i][j];
413 }
414 }
415 }