View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.regression;
23  
24  import java.io.Serializable;
25  import java.util.Arrays;
26  
27  import org.hipparchus.exception.MathIllegalArgumentException;
28  import org.hipparchus.util.FastMath;
29  import org.hipparchus.util.MathUtils;
30  
31  /**
32   * Results of a Multiple Linear Regression model fit.
33   *
34   */
35  public class RegressionResults implements Serializable {
36  
37      /** INDEX of Sum of Squared Errors */
38      private static final int SSE_IDX = 0;
39      /** INDEX of Sum of Squares of Model */
40      private static final int SST_IDX = 1;
41      /** INDEX of R-Squared of regression */
42      private static final int RSQ_IDX = 2;
43      /** INDEX of Mean Squared Error */
44      private static final int MSE_IDX = 3;
45      /** INDEX of Adjusted R Squared */
46      private static final int ADJRSQ_IDX = 4;
47      /** UID */
48      private static final long serialVersionUID = 1l;
49      /** regression slope parameters */
50      private final double[] parameters;
51      /** variance covariance matrix of parameters */
52      private final double[][] varCovData;
53      /** boolean flag for variance covariance matrix in symm compressed storage */
54      private final boolean isSymmetricVCD;
55      /** rank of the solution */
56      @SuppressWarnings("unused")
57      private final int rank;
58      /** number of observations on which results are based */
59      private final long nobs;
60      /** boolean flag indicator of whether a constant was included*/
61      private final boolean containsConstant;
62      /** array storing global results, SSE, MSE, RSQ, adjRSQ */
63      private final double[] globalFitInfo;
64  
65      /**
66       *  Set the default constructor to private access
67       *  to prevent inadvertent instantiation
68       */
69      @SuppressWarnings("unused")
70      private RegressionResults() {
71          this.parameters = null;
72          this.varCovData = null;
73          this.rank = -1;
74          this.nobs = -1;
75          this.containsConstant = false;
76          this.isSymmetricVCD = false;
77          this.globalFitInfo = null;
78      }
79  
80      /**
81       * Constructor for Regression Results.
82       *
83       * @param parameters a double array with the regression slope estimates
84       * @param varcov the variance covariance matrix, stored either in a square matrix
85       * or as a compressed
86       * @param isSymmetricCompressed a flag which denotes that the variance covariance
87       * matrix is in symmetric compressed format
88       * @param nobs the number of observations of the regression estimation
89       * @param rank the number of independent variables in the regression
90       * @param sumy the sum of the independent variable
91       * @param sumysq the sum of the squared independent variable
92       * @param sse sum of squared errors
93       * @param containsConstant true model has constant,  false model does not have constant
94       * @param copyData if true a deep copy of all input data is made, if false only references
95       * are copied and the RegressionResults become mutable
96       */
97      public RegressionResults(
98              final double[] parameters, final double[][] varcov,
99              final boolean isSymmetricCompressed,
100             final long nobs, final int rank,
101             final double sumy, final double sumysq, final double sse,
102             final boolean containsConstant,
103             final boolean copyData) {
104         if (copyData) {
105             this.parameters = parameters.clone();
106             this.varCovData = new double[varcov.length][];
107             for (int i = 0; i < varcov.length; i++) {
108                 this.varCovData[i] = varcov[i].clone();
109             }
110         } else {
111             this.parameters = parameters; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter
112             this.varCovData = varcov; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter
113         }
114         this.isSymmetricVCD = isSymmetricCompressed;
115         this.nobs = nobs;
116         this.rank = rank;
117         this.containsConstant = containsConstant;
118         this.globalFitInfo = new double[5];
119         Arrays.fill(this.globalFitInfo, Double.NaN);
120 
121         if (rank > 0) {
122             this.globalFitInfo[SST_IDX] = containsConstant ?
123                     (sumysq - sumy * sumy / nobs) : sumysq;
124         }
125 
126         this.globalFitInfo[SSE_IDX] = sse;
127         this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
128                 (nobs - rank);
129         this.globalFitInfo[RSQ_IDX] = 1.0 -
130                 this.globalFitInfo[SSE_IDX] /
131                 this.globalFitInfo[SST_IDX];
132 
133         if (!containsConstant) {
134             this.globalFitInfo[ADJRSQ_IDX] = 1.0-
135                     (1.0 - this.globalFitInfo[RSQ_IDX]) *
136                     ( (double) nobs / ( (double) (nobs - rank)));
137         } else {
138             this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
139                     (globalFitInfo[SST_IDX] * (nobs - rank));
140         }
141     }
142 
143     /**
144      * <p>Returns the parameter estimate for the regressor at the given index.</p>
145      *
146      * <p>A redundant regressor will have its redundancy flag set, as well as
147      *  a parameters estimated equal to {@code Double.NaN}</p>
148      *
149      * @param index Index.
150      * @return the parameters estimated for regressor at index.
151      * @throws MathIllegalArgumentException if {@code index} is not in the interval
152      * {@code [0, number of parameters)}.
153      */
154     public double getParameterEstimate(int index) throws MathIllegalArgumentException {
155         if (parameters == null) {
156             return Double.NaN;
157         }
158         MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1);
159         return this.parameters[index];
160     }
161 
162     /**
163      * <p>Returns a copy of the regression parameters estimates.</p>
164      *
165      * <p>The parameter estimates are returned in the natural order of the data.</p>
166      *
167      * <p>A redundant regressor will have its redundancy flag set, as will
168      *  a parameter estimate equal to {@code Double.NaN}.</p>
169      *
170      * @return array of parameter estimates, null if no estimation occurred
171      */
172     public double[] getParameterEstimates() {
173         if (this.parameters == null) {
174             return null; // NOPMD
175         }
176         return parameters.clone();
177     }
178 
179     /**
180      * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
181      * error of the parameter estimate at index</a>,
182      * usually denoted s(b<sub>index</sub>).
183      *
184      * @param index Index.
185      * @return the standard errors associated with parameters estimated at index.
186      * @throws MathIllegalArgumentException if {@code index} is not in the interval
187      * {@code [0, number of parameters)}.
188      */
189     public double getStdErrorOfEstimate(int index) throws MathIllegalArgumentException {
190         if (parameters == null) {
191             return Double.NaN;
192         }
193         MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1);
194         double var = this.getVcvElement(index, index);
195         if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
196             return FastMath.sqrt(var);
197         }
198         return Double.NaN;
199     }
200 
201     /**
202      * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
203      * error of the parameter estimates</a>,
204      * usually denoted s(b<sub>i</sub>).</p>
205      *
206      * <p>If there are problems with an ill conditioned design matrix then the regressor
207      * which is redundant will be assigned <code>Double.NaN</code>. </p>
208      *
209      * @return an array standard errors associated with parameters estimates,
210      *  null if no estimation occurred
211      */
212     public double[] getStdErrorOfEstimates() {
213         if (parameters == null) {
214             return null; // NOPMD
215         }
216         double[] se = new double[this.parameters.length];
217         for (int i = 0; i < this.parameters.length; i++) {
218             double var = this.getVcvElement(i, i);
219             if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
220                 se[i] = FastMath.sqrt(var);
221                 continue;
222             }
223             se[i] = Double.NaN;
224         }
225         return se;
226     }
227 
228     /**
229      * <p>Returns the covariance between regression parameters i and j.</p>
230      *
231      * <p>If there are problems with an ill conditioned design matrix then the covariance
232      * which involves redundant columns will be assigned {@code Double.NaN}. </p>
233      *
234      * @param i {@code i}th regression parameter.
235      * @param j {@code j}th regression parameter.
236      * @return the covariance of the parameter estimates.
237      * @throws MathIllegalArgumentException if {@code i} or {@code j} is not in the
238      * interval {@code [0, number of parameters)}.
239      */
240     public double getCovarianceOfParameters(int i, int j) throws MathIllegalArgumentException {
241         if (parameters == null) {
242             return Double.NaN;
243         }
244         MathUtils.checkRangeInclusive(i, 0, this.parameters.length - 1);
245         MathUtils.checkRangeInclusive(j, 0, this.parameters.length - 1);
246         return this.getVcvElement(i, j);
247     }
248 
249     /**
250      * <p>Returns the number of parameters estimated in the model.</p>
251      *
252      * <p>This is the maximum number of regressors, some techniques may drop
253      * redundant parameters</p>
254      *
255      * @return number of regressors, -1 if not estimated
256      */
257     public int getNumberOfParameters() {
258         if (this.parameters == null) {
259             return -1;
260         }
261         return this.parameters.length;
262     }
263 
264     /**
265      * Returns the number of observations added to the regression model.
266      *
267      * @return Number of observations, -1 if an error condition prevents estimation
268      */
269     public long getN() {
270         return this.nobs;
271     }
272 
273     /**
274      * <p>Returns the sum of squared deviations of the y values about their mean.</p>
275      *
276      * <p>This is defined as SSTO
277      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
278      *
279      * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
280      *
281      * @return sum of squared deviations of y values
282      */
283     public double getTotalSumSquares() {
284         return this.globalFitInfo[SST_IDX];
285     }
286 
287     /**
288      * <p>Returns the sum of squared deviations of the predicted y values about
289      * their mean (which equals the mean of y).</p>
290      *
291      * <p>This is usually abbreviated SSR or SSM.  It is defined as SSM
292      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
293      *
294      * <p><strong>Preconditions</strong>:</p><ul>
295      * <li>At least two observations (with at least two different x values)
296      * must have been added before invoking this method. If this method is
297      * invoked before a model can be estimated, <code>Double.NaN</code> is
298      * returned.
299      * </li></ul>
300      *
301      * @return sum of squared deviations of predicted y values
302      */
303     public double getRegressionSumSquares() {
304         return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
305     }
306 
307     /**
308      * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
309      * sum of squared errors</a> (SSE) associated with the regression
310      * model.</p>
311      *
312      * <p>The return value is constrained to be non-negative - i.e., if due to
313      * rounding errors the computational formula returns a negative result,
314      * 0 is returned.</p>
315      *
316      * <p><strong>Preconditions</strong>:</p>
317      * <ul>
318      * <li>numberOfParameters data pairs
319      * must have been added before invoking this method. If this method is
320      * invoked before a model can be estimated, <code>Double,NaN</code> is
321      * returned.
322      * </li></ul>
323      *
324      * @return sum of squared errors associated with the regression model
325      */
326     public double getErrorSumSquares() {
327         return this.globalFitInfo[ SSE_IDX];
328     }
329 
330     /**
331      * <p>Returns the sum of squared errors divided by the degrees of freedom,
332      * usually abbreviated MSE.</p>
333      *
334      * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
335      * or if there is no variation in <code>x</code>, this returns
336      * <code>Double.NaN</code>.</p>
337      *
338      * @return sum of squared deviations of y values
339      */
340     public double getMeanSquareError() {
341         return this.globalFitInfo[ MSE_IDX];
342     }
343 
344     /**
345      * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
346      * coefficient of multiple determination</a>,
347      * usually denoted r-square.</p>
348      *
349      * <p><strong>Preconditions</strong>:</p>
350      * <ul>
351      * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
352      * must have been added before invoking this method. If this method is
353      * invoked before a model can be estimated, {@code Double,NaN} is
354      * returned.
355      * </li></ul>
356      *
357      * @return r-square, a double in the interval [0, 1]
358      */
359     public double getRSquared() {
360         return this.globalFitInfo[ RSQ_IDX];
361     }
362 
363     /**
364      * <p>Returns the adjusted R-squared statistic, defined by the formula
365      * \(
366      * R_\mathrm{adj}^2 = 1 - \frac{\mathrm{SSR} (n - 1)}{\mathrm{SSTO} (n - p)}
367      * \)
368      * where SSR is the sum of squared residuals},
369      * SSTO is the total sum of squares}, n is the number
370      * of observations and p is the number of parameters estimated (including the intercept).</p>
371      *
372      * <p>If the regression is estimated without an intercept term, what is returned is</p><pre>
373      * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
374      * </pre>
375      *
376      * @return adjusted R-Squared statistic
377      */
378     public double getAdjustedRSquared() {
379         return this.globalFitInfo[ ADJRSQ_IDX];
380     }
381 
382     /**
383      * Returns true if the regression model has been computed including an intercept.
384      * In this case, the coefficient of the intercept is the first element of the
385      * {@link #getParameterEstimates() parameter estimates}.
386      * @return true if the model has an intercept term
387      */
388     public boolean hasIntercept() {
389         return this.containsConstant;
390     }
391 
392     /**
393      * Gets the i-jth element of the variance-covariance matrix.
394      *
395      * @param i first variable index
396      * @param j second variable index
397      * @return the requested variance-covariance matrix entry
398      */
399     private double getVcvElement(int i, int j) {
400         if (this.isSymmetricVCD) {
401             if (this.varCovData.length > 1) {
402                 //could be stored in upper or lower triangular
403                 if (i == j) {
404                     return varCovData[i][i];
405                 } else if (i >= varCovData[j].length) {
406                     return varCovData[i][j];
407                 } else {
408                     return varCovData[j][i];
409                 }
410             } else {//could be in single array
411                 if (i > j) {
412                     return varCovData[0][(i + 1) * i / 2 + j];
413                 } else {
414                     return varCovData[0][(j + 1) * j / 2 + i];
415                 }
416             }
417         } else {
418             return this.varCovData[i][j];
419         }
420     }
421 }