RegressionResults.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.regression;

  22. import java.io.Serializable;
  23. import java.util.Arrays;

  24. import org.hipparchus.exception.MathIllegalArgumentException;
  25. import org.hipparchus.util.FastMath;
  26. import org.hipparchus.util.MathUtils;

  27. /**
  28.  * Results of a Multiple Linear Regression model fit.
  29.  *
  30.  */
  31. public class RegressionResults implements Serializable {

  32.     /** INDEX of Sum of Squared Errors */
  33.     private static final int SSE_IDX = 0;
  34.     /** INDEX of Sum of Squares of Model */
  35.     private static final int SST_IDX = 1;
  36.     /** INDEX of R-Squared of regression */
  37.     private static final int RSQ_IDX = 2;
  38.     /** INDEX of Mean Squared Error */
  39.     private static final int MSE_IDX = 3;
  40.     /** INDEX of Adjusted R Squared */
  41.     private static final int ADJRSQ_IDX = 4;
  42.     /** UID */
  43.     private static final long serialVersionUID = 1l;
  44.     /** regression slope parameters */
  45.     private final double[] parameters;
  46.     /** variance covariance matrix of parameters */
  47.     private final double[][] varCovData;
  48.     /** boolean flag for variance covariance matrix in symm compressed storage */
  49.     private final boolean isSymmetricVCD;
  50.     /** rank of the solution */
  51.     @SuppressWarnings("unused")
  52.     private final int rank;
  53.     /** number of observations on which results are based */
  54.     private final long nobs;
  55.     /** boolean flag indicator of whether a constant was included*/
  56.     private final boolean containsConstant;
  57.     /** array storing global results, SSE, MSE, RSQ, adjRSQ */
  58.     private final double[] globalFitInfo;

  59.     /**
  60.      *  Set the default constructor to private access
  61.      *  to prevent inadvertent instantiation
  62.      */
  63.     @SuppressWarnings("unused")
  64.     private RegressionResults() {
  65.         this.parameters = null;
  66.         this.varCovData = null;
  67.         this.rank = -1;
  68.         this.nobs = -1;
  69.         this.containsConstant = false;
  70.         this.isSymmetricVCD = false;
  71.         this.globalFitInfo = null;
  72.     }

  73.     /**
  74.      * Constructor for Regression Results.
  75.      *
  76.      * @param parameters a double array with the regression slope estimates
  77.      * @param varcov the variance covariance matrix, stored either in a square matrix
  78.      * or as a compressed
  79.      * @param isSymmetricCompressed a flag which denotes that the variance covariance
  80.      * matrix is in symmetric compressed format
  81.      * @param nobs the number of observations of the regression estimation
  82.      * @param rank the number of independent variables in the regression
  83.      * @param sumy the sum of the independent variable
  84.      * @param sumysq the sum of the squared independent variable
  85.      * @param sse sum of squared errors
  86.      * @param containsConstant true model has constant,  false model does not have constant
  87.      * @param copyData if true a deep copy of all input data is made, if false only references
  88.      * are copied and the RegressionResults become mutable
  89.      */
  90.     public RegressionResults(
  91.             final double[] parameters, final double[][] varcov,
  92.             final boolean isSymmetricCompressed,
  93.             final long nobs, final int rank,
  94.             final double sumy, final double sumysq, final double sse,
  95.             final boolean containsConstant,
  96.             final boolean copyData) {
  97.         if (copyData) {
  98.             this.parameters = parameters.clone();
  99.             this.varCovData = new double[varcov.length][];
  100.             for (int i = 0; i < varcov.length; i++) {
  101.                 this.varCovData[i] = varcov[i].clone();
  102.             }
  103.         } else {
  104.             this.parameters = parameters; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter
  105.             this.varCovData = varcov; // NOPMD - storing a reference to the array is controlled by a user-supplied parameter
  106.         }
  107.         this.isSymmetricVCD = isSymmetricCompressed;
  108.         this.nobs = nobs;
  109.         this.rank = rank;
  110.         this.containsConstant = containsConstant;
  111.         this.globalFitInfo = new double[5];
  112.         Arrays.fill(this.globalFitInfo, Double.NaN);

  113.         if (rank > 0) {
  114.             this.globalFitInfo[SST_IDX] = containsConstant ?
  115.                     (sumysq - sumy * sumy / nobs) : sumysq;
  116.         }

  117.         this.globalFitInfo[SSE_IDX] = sse;
  118.         this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] / (nobs - rank);
  119.         this.globalFitInfo[RSQ_IDX] = 1.0 - this.globalFitInfo[SSE_IDX] / this.globalFitInfo[SST_IDX];

  120.         if (!containsConstant) {
  121.             this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (1.0 - this.globalFitInfo[RSQ_IDX]) * (((double) nobs) / (nobs - rank));
  122.         } else {
  123.             this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) / (globalFitInfo[SST_IDX] * (nobs - rank));
  124.         }
  125.     }

  126.     /**
  127.      * <p>Returns the parameter estimate for the regressor at the given index.</p>
  128.      *
  129.      * <p>A redundant regressor will have its redundancy flag set, as well as
  130.      *  a parameters estimated equal to {@code Double.NaN}</p>
  131.      *
  132.      * @param index Index.
  133.      * @return the parameters estimated for regressor at index.
  134.      * @throws MathIllegalArgumentException if {@code index} is not in the interval
  135.      * {@code [0, number of parameters)}.
  136.      */
  137.     public double getParameterEstimate(int index) throws MathIllegalArgumentException {
  138.         if (parameters == null) {
  139.             return Double.NaN;
  140.         }
  141.         MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1);
  142.         return this.parameters[index];
  143.     }

  144.     /**
  145.      * <p>Returns a copy of the regression parameters estimates.</p>
  146.      *
  147.      * <p>The parameter estimates are returned in the natural order of the data.</p>
  148.      *
  149.      * <p>A redundant regressor will have its redundancy flag set, as will
  150.      *  a parameter estimate equal to {@code Double.NaN}.</p>
  151.      *
  152.      * @return array of parameter estimates, null if no estimation occurred
  153.      */
  154.     public double[] getParameterEstimates() {
  155.         if (this.parameters == null) {
  156.             return null; // NOPMD
  157.         }
  158.         return parameters.clone();
  159.     }

  160.     /**
  161.      * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
  162.      * error of the parameter estimate at index</a>,
  163.      * usually denoted s(b<sub>index</sub>).
  164.      *
  165.      * @param index Index.
  166.      * @return the standard errors associated with parameters estimated at index.
  167.      * @throws MathIllegalArgumentException if {@code index} is not in the interval
  168.      * {@code [0, number of parameters)}.
  169.      */
  170.     public double getStdErrorOfEstimate(int index) throws MathIllegalArgumentException {
  171.         if (parameters == null) {
  172.             return Double.NaN;
  173.         }
  174.         MathUtils.checkRangeInclusive(index, 0, this.parameters.length - 1);
  175.         double var = this.getVcvElement(index, index);
  176.         if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
  177.             return FastMath.sqrt(var);
  178.         }
  179.         return Double.NaN;
  180.     }

  181.     /**
  182.      * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
  183.      * error of the parameter estimates</a>,
  184.      * usually denoted s(b<sub>i</sub>).</p>
  185.      *
  186.      * <p>If there are problems with an ill conditioned design matrix then the regressor
  187.      * which is redundant will be assigned <code>Double.NaN</code>. </p>
  188.      *
  189.      * @return an array standard errors associated with parameters estimates,
  190.      *  null if no estimation occurred
  191.      */
  192.     public double[] getStdErrorOfEstimates() {
  193.         if (parameters == null) {
  194.             return null; // NOPMD
  195.         }
  196.         double[] se = new double[this.parameters.length];
  197.         for (int i = 0; i < this.parameters.length; i++) {
  198.             double var = this.getVcvElement(i, i);
  199.             if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
  200.                 se[i] = FastMath.sqrt(var);
  201.                 continue;
  202.             }
  203.             se[i] = Double.NaN;
  204.         }
  205.         return se;
  206.     }

  207.     /**
  208.      * <p>Returns the covariance between regression parameters i and j.</p>
  209.      *
  210.      * <p>If there are problems with an ill conditioned design matrix then the covariance
  211.      * which involves redundant columns will be assigned {@code Double.NaN}. </p>
  212.      *
  213.      * @param i {@code i}th regression parameter.
  214.      * @param j {@code j}th regression parameter.
  215.      * @return the covariance of the parameter estimates.
  216.      * @throws MathIllegalArgumentException if {@code i} or {@code j} is not in the
  217.      * interval {@code [0, number of parameters)}.
  218.      */
  219.     public double getCovarianceOfParameters(int i, int j) throws MathIllegalArgumentException {
  220.         if (parameters == null) {
  221.             return Double.NaN;
  222.         }
  223.         MathUtils.checkRangeInclusive(i, 0, this.parameters.length - 1);
  224.         MathUtils.checkRangeInclusive(j, 0, this.parameters.length - 1);
  225.         return this.getVcvElement(i, j);
  226.     }

  227.     /**
  228.      * <p>Returns the number of parameters estimated in the model.</p>
  229.      *
  230.      * <p>This is the maximum number of regressors, some techniques may drop
  231.      * redundant parameters</p>
  232.      *
  233.      * @return number of regressors, -1 if not estimated
  234.      */
  235.     public int getNumberOfParameters() {
  236.         if (this.parameters == null) {
  237.             return -1;
  238.         }
  239.         return this.parameters.length;
  240.     }

  241.     /**
  242.      * Returns the number of observations added to the regression model.
  243.      *
  244.      * @return Number of observations, -1 if an error condition prevents estimation
  245.      */
  246.     public long getN() {
  247.         return this.nobs;
  248.     }

  249.     /**
  250.      * <p>Returns the sum of squared deviations of the y values about their mean.</p>
  251.      *
  252.      * <p>This is defined as SSTO
  253.      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
  254.      *
  255.      * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
  256.      *
  257.      * @return sum of squared deviations of y values
  258.      */
  259.     public double getTotalSumSquares() {
  260.         return this.globalFitInfo[SST_IDX];
  261.     }

  262.     /**
  263.      * <p>Returns the sum of squared deviations of the predicted y values about
  264.      * their mean (which equals the mean of y).</p>
  265.      *
  266.      * <p>This is usually abbreviated SSR or SSM.  It is defined as SSM
  267.      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
  268.      *
  269.      * <p><strong>Preconditions</strong>:</p><ul>
  270.      * <li>At least two observations (with at least two different x values)
  271.      * must have been added before invoking this method. If this method is
  272.      * invoked before a model can be estimated, <code>Double.NaN</code> is
  273.      * returned.
  274.      * </li></ul>
  275.      *
  276.      * @return sum of squared deviations of predicted y values
  277.      */
  278.     public double getRegressionSumSquares() {
  279.         return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
  280.     }

  281.     /**
  282.      * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
  283.      * sum of squared errors</a> (SSE) associated with the regression
  284.      * model.</p>
  285.      *
  286.      * <p>The return value is constrained to be non-negative - i.e., if due to
  287.      * rounding errors the computational formula returns a negative result,
  288.      * 0 is returned.</p>
  289.      *
  290.      * <p><strong>Preconditions</strong>:</p>
  291.      * <ul>
  292.      * <li>numberOfParameters data pairs
  293.      * must have been added before invoking this method. If this method is
  294.      * invoked before a model can be estimated, <code>Double,NaN</code> is
  295.      * returned.
  296.      * </li></ul>
  297.      *
  298.      * @return sum of squared errors associated with the regression model
  299.      */
  300.     public double getErrorSumSquares() {
  301.         return this.globalFitInfo[ SSE_IDX];
  302.     }

  303.     /**
  304.      * <p>Returns the sum of squared errors divided by the degrees of freedom,
  305.      * usually abbreviated MSE.</p>
  306.      *
  307.      * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
  308.      * or if there is no variation in <code>x</code>, this returns
  309.      * <code>Double.NaN</code>.</p>
  310.      *
  311.      * @return sum of squared deviations of y values
  312.      */
  313.     public double getMeanSquareError() {
  314.         return this.globalFitInfo[ MSE_IDX];
  315.     }

  316.     /**
  317.      * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
  318.      * coefficient of multiple determination</a>,
  319.      * usually denoted r-square.</p>
  320.      *
  321.      * <p><strong>Preconditions</strong>:</p>
  322.      * <ul>
  323.      * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
  324.      * must have been added before invoking this method. If this method is
  325.      * invoked before a model can be estimated, {@code Double,NaN} is
  326.      * returned.
  327.      * </li></ul>
  328.      *
  329.      * @return r-square, a double in the interval [0, 1]
  330.      */
  331.     public double getRSquared() {
  332.         return this.globalFitInfo[ RSQ_IDX];
  333.     }

  334.     /**
  335.      * <p>Returns the adjusted R-squared statistic, defined by the formula
  336.      * \(
  337.      * R_\mathrm{adj}^2 = 1 - \frac{\mathrm{SSR} (n - 1)}{\mathrm{SSTO} (n - p)}
  338.      * \)
  339.      * where SSR is the sum of squared residuals},
  340.      * SSTO is the total sum of squares}, n is the number
  341.      * of observations and p is the number of parameters estimated (including the intercept).</p>
  342.      *
  343.      * <p>If the regression is estimated without an intercept term, what is returned is</p><pre>
  344.      * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
  345.      * </pre>
  346.      *
  347.      * @return adjusted R-Squared statistic
  348.      */
  349.     public double getAdjustedRSquared() {
  350.         return this.globalFitInfo[ ADJRSQ_IDX];
  351.     }

  352.     /**
  353.      * Returns true if the regression model has been computed including an intercept.
  354.      * In this case, the coefficient of the intercept is the first element of the
  355.      * {@link #getParameterEstimates() parameter estimates}.
  356.      * @return true if the model has an intercept term
  357.      */
  358.     public boolean hasIntercept() {
  359.         return this.containsConstant;
  360.     }

  361.     /**
  362.      * Gets the i-jth element of the variance-covariance matrix.
  363.      *
  364.      * @param i first variable index
  365.      * @param j second variable index
  366.      * @return the requested variance-covariance matrix entry
  367.      */
  368.     private double getVcvElement(int i, int j) {
  369.         if (this.isSymmetricVCD) {
  370.             if (this.varCovData.length > 1) {
  371.                 //could be stored in upper or lower triangular
  372.                 if (i == j) {
  373.                     return varCovData[i][i];
  374.                 } else if (i >= varCovData[j].length) {
  375.                     return varCovData[i][j];
  376.                 } else {
  377.                     return varCovData[j][i];
  378.                 }
  379.             } else {//could be in single array
  380.                 if (i > j) {
  381.                     return varCovData[0][(i + 1) * i / 2 + j];
  382.                 } else {
  383.                     return varCovData[0][(j + 1) * j / 2 + i];
  384.                 }
  385.             }
  386.         } else {
  387.             return this.varCovData[i][j];
  388.         }
  389.     }
  390. }