MillerUpdatingRegression.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.regression;

  22. import java.util.Arrays;

  23. import org.hipparchus.exception.LocalizedCoreFormats;
  24. import org.hipparchus.exception.MathIllegalArgumentException;
  25. import org.hipparchus.stat.LocalizedStatFormats;
  26. import org.hipparchus.util.FastMath;
  27. import org.hipparchus.util.MathUtils;
  28. import org.hipparchus.util.Precision;

  29. /**
  30.  * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
  31.  *
  32.  * <p>The algorithm is described in:</p>
  33.  * <pre>
  34.  * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
  35.  * Author(s): Alan J. Miller
  36.  * Source: Journal of the Royal Statistical Society.
  37.  * Series C (Applied Statistics), Vol. 41, No. 2
  38.  * (1992), pp. 458-478
  39.  * Published by: Blackwell Publishing for the Royal Statistical Society
  40.  * Stable URL: <a href="https://www.jstor.org/stable/2347583">Algorithm AS 274:
  41.  * Least Squares Routines to Supplement Those of Gentleman</a> </pre>
  42.  *
  43.  * <p>This method for multiple regression forms the solution to the OLS problem
  44.  * by updating the QR decomposition as described by Gentleman.</p>
  45.  *
  46.  */
  47. public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {

  48.     /** number of variables in regression */
  49.     private final int nvars;
  50.     /** diagonals of cross products matrix */
  51.     private final double[] d;
  52.     /** the elements of the R`Y */
  53.     private final double[] rhs;
  54.     /** the off diagonal portion of the R matrix */
  55.     private final double[] r;
  56.     /** the tolerance for each of the variables */
  57.     private final double[] tol;
  58.     /** residual sum of squares for all nested regressions */
  59.     private final double[] rss;
  60.     /** order of the regressors */
  61.     private final int[] vorder;
  62.     /** scratch space for tolerance calc */
  63.     private final double[] work_tolset;
  64.     /** number of observations entered */
  65.     private long nobs;
  66.     /** sum of squared errors of largest regression */
  67.     private double sserr;
  68.     /** has rss been called? */
  69.     private boolean rss_set;
  70.     /** has the tolerance setting method been called */
  71.     private boolean tol_set;
  72.     /** flags for variables with linear dependency problems */
  73.     private final boolean[] lindep;
  74.     /** singular x values */
  75.     private final double[] x_sing;
  76.     /** workspace for singularity method */
  77.     private final double[] work_sing;
  78.     /** summation of Y variable */
  79.     private double sumy;
  80.     /** summation of squared Y values */
  81.     private double sumsqy;
  82.     /** boolean flag whether a regression constant is added */
  83.     private final boolean hasIntercept;
  84.     /** zero tolerance */
  85.     private final double epsilon;
  86.     /**
  87.      *  Set the default constructor to private access
  88.      *  to prevent inadvertent instantiation
  89.      */
  90.     @SuppressWarnings("unused")
  91.     private MillerUpdatingRegression() {
  92.         this(-1, false, Double.NaN);
  93.     }

  94.     /**
  95.      * This is the augmented constructor for the MillerUpdatingRegression class.
  96.      *
  97.      * @param numberOfVariables number of regressors to expect, not including constant
  98.      * @param includeConstant include a constant automatically
  99.      * @param errorTolerance  zero tolerance, how machine zero is determined
  100.      * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
  101.      */
  102.     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
  103.             throws MathIllegalArgumentException {
  104.         if (numberOfVariables < 1) {
  105.             throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
  106.         }
  107.         if (includeConstant) {
  108.             this.nvars = numberOfVariables + 1;
  109.         } else {
  110.             this.nvars = numberOfVariables;
  111.         }
  112.         this.hasIntercept = includeConstant;
  113.         this.nobs = 0;
  114.         this.d = new double[this.nvars];
  115.         this.rhs = new double[this.nvars];
  116.         this.r = new double[this.nvars * (this.nvars - 1) / 2];
  117.         this.tol = new double[this.nvars];
  118.         this.rss = new double[this.nvars];
  119.         this.vorder = new int[this.nvars];
  120.         this.x_sing = new double[this.nvars];
  121.         this.work_sing = new double[this.nvars];
  122.         this.work_tolset = new double[this.nvars];
  123.         this.lindep = new boolean[this.nvars];
  124.         for (int i = 0; i < this.nvars; i++) {
  125.             vorder[i] = i;
  126.         }
  127.         if (errorTolerance > 0) {
  128.             this.epsilon = errorTolerance;
  129.         } else {
  130.             this.epsilon = -errorTolerance;
  131.         }
  132.     }

  133.     /**
  134.      * Primary constructor for the MillerUpdatingRegression.
  135.      *
  136.      * @param numberOfVariables maximum number of potential regressors
  137.      * @param includeConstant include a constant automatically
  138.      * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
  139.      */
  140.     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
  141.             throws MathIllegalArgumentException {
  142.         this(numberOfVariables, includeConstant, Precision.EPSILON);
  143.     }

  144.     /**
  145.      * A getter method which determines whether a constant is included.
  146.      * @return true regression has an intercept, false no intercept
  147.      */
  148.     @Override
  149.     public boolean hasIntercept() {
  150.         return this.hasIntercept;
  151.     }

  152.     /**
  153.      * Gets the number of observations added to the regression model.
  154.      * @return number of observations
  155.      */
  156.     @Override
  157.     public long getN() {
  158.         return this.nobs;
  159.     }

  160.     /**
  161.      * Adds an observation to the regression model.
  162.      * @param x the array with regressor values
  163.      * @param y  the value of dependent variable given these regressors
  164.      * @exception MathIllegalArgumentException if the length of {@code x} does not equal
  165.      * the number of independent variables in the model
  166.      */
  167.     @Override
  168.     public void addObservation(final double[] x, final double y)
  169.             throws MathIllegalArgumentException {

  170.         if ((!this.hasIntercept && x.length != nvars) ||
  171.                (this.hasIntercept && x.length + 1 != nvars)) {
  172.             throw new MathIllegalArgumentException(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION,
  173.                     x.length, nvars);
  174.         }
  175.         if (!this.hasIntercept) {
  176.             include(x.clone(), 1.0, y);
  177.         } else {
  178.             final double[] tmp = new double[x.length + 1];
  179.             System.arraycopy(x, 0, tmp, 1, x.length);
  180.             tmp[0] = 1.0;
  181.             include(tmp, 1.0, y);
  182.         }
  183.         ++nobs;

  184.     }

  185.     /**
  186.      * Adds multiple observations to the model.
  187.      * @param x observations on the regressors
  188.      * @param y observations on the regressand
  189.      * @throws MathIllegalArgumentException if {@code x} is not rectangular, does not match
  190.      * the length of {@code y} or does not contain sufficient data to estimate the model
  191.      */
  192.     @Override
  193.     public void addObservations(double[][] x, double[] y) throws MathIllegalArgumentException {
  194.         MathUtils.checkNotNull(x, LocalizedCoreFormats.INPUT_ARRAY);
  195.         MathUtils.checkNotNull(y, LocalizedCoreFormats.INPUT_ARRAY);
  196.         MathUtils.checkDimension(x.length, y.length);
  197.         if (x.length == 0) {  // Must be no y data either
  198.             throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
  199.         }
  200.         if (x[0].length + 1 > x.length) {
  201.             throw new MathIllegalArgumentException(
  202.                   LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  203.                   x.length, x[0].length);
  204.         }
  205.         for (int i = 0; i < x.length; i++) {
  206.             addObservation(x[i], y[i]);
  207.         }
  208.     }

  209.     /**
  210.      * The include method is where the QR decomposition occurs. This statement forms all
  211.      * intermediate data which will be used for all derivative measures.
  212.      * According to the miller paper, note that in the original implementation the x vector
  213.      * is overwritten. In this implementation, the include method is passed a copy of the
  214.      * original data vector so that there is no contamination of the data. Additionally,
  215.      * this method differs slightly from Gentleman's method, in that the assumption is
  216.      * of dense design matrices, there is some advantage in using the original gentleman algorithm
  217.      * on sparse matrices.
  218.      *
  219.      * @param x observations on the regressors
  220.      * @param wi weight of the this observation (-1,1)
  221.      * @param yi observation on the regressand
  222.      */
  223.     private void include(final double[] x, final double wi, final double yi) {
  224.         int nextr = 0;
  225.         double w = wi;
  226.         double y = yi;
  227.         double xi;
  228.         double di;
  229.         double wxi;
  230.         double dpi;
  231.         double xk;
  232.         double _w;
  233.         this.rss_set = false;
  234.         sumy = smartAdd(yi, sumy);
  235.         sumsqy = smartAdd(sumsqy, yi * yi);
  236.         for (int i = 0; i < x.length; i++) {
  237.             if (w == 0.0) {
  238.                 return;
  239.             }
  240.             xi = x[i];

  241.             if (xi == 0.0) {
  242.                 nextr += nvars - i - 1;
  243.                 continue;
  244.             }
  245.             di = d[i];
  246.             wxi = w * xi;
  247.             _w = w;
  248.             if (di != 0.0) {
  249.                 dpi = smartAdd(di, wxi * xi);
  250.                 final double tmp = wxi * xi / di;
  251.                 if (FastMath.abs(tmp) > Precision.EPSILON) {
  252.                     w = (di * w) / dpi;
  253.                 }
  254.             } else {
  255.                 dpi = wxi * xi;
  256.                 w = 0.0;
  257.             }
  258.             d[i] = dpi;
  259.             for (int k = i + 1; k < nvars; k++) {
  260.                 xk = x[k];
  261.                 x[k] = smartAdd(xk, -xi * r[nextr]);
  262.                 if (di != 0.0) {
  263.                     r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
  264.                 } else {
  265.                     r[nextr] = xk / xi;
  266.                 }
  267.                 ++nextr;
  268.             }
  269.             xk = y;
  270.             y = smartAdd(xk, -xi * rhs[i]);
  271.             if (di != 0.0) {
  272.                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
  273.             } else {
  274.                 rhs[i] = xk / xi;
  275.             }
  276.         }
  277.         sserr = smartAdd(sserr, w * y * y);
  278.     }

  279.     /**
  280.      * Adds to number a and b such that the contamination due to
  281.      * numerical smallness of one addend does not corrupt the sum.
  282.      * @param a - an addend
  283.      * @param b - an addend
  284.      * @return the sum of the a and b
  285.      */
  286.     private double smartAdd(double a, double b) {
  287.         final double _a = FastMath.abs(a);
  288.         final double _b = FastMath.abs(b);
  289.         if (_a > _b) {
  290.             final double eps = _a * Precision.EPSILON;
  291.             if (_b > eps) {
  292.                 return a + b;
  293.             }
  294.             return a;
  295.         } else {
  296.             final double eps = _b * Precision.EPSILON;
  297.             if (_a > eps) {
  298.                 return a + b;
  299.             }
  300.             return b;
  301.         }
  302.     }

  303.     /**
  304.      * As the name suggests,  clear wipes the internals and reorders everything in the
  305.      * canonical order.
  306.      */
  307.     @Override
  308.     public void clear() {
  309.         Arrays.fill(this.d, 0.0);
  310.         Arrays.fill(this.rhs, 0.0);
  311.         Arrays.fill(this.r, 0.0);
  312.         Arrays.fill(this.tol, 0.0);
  313.         Arrays.fill(this.rss, 0.0);
  314.         Arrays.fill(this.work_tolset, 0.0);
  315.         Arrays.fill(this.work_sing, 0.0);
  316.         Arrays.fill(this.x_sing, 0.0);
  317.         Arrays.fill(this.lindep, false);
  318.         for (int i = 0; i < nvars; i++) {
  319.             this.vorder[i] = i;
  320.         }
  321.         this.nobs = 0;
  322.         this.sserr = 0.0;
  323.         this.sumy = 0.0;
  324.         this.sumsqy = 0.0;
  325.         this.rss_set = false;
  326.         this.tol_set = false;
  327.     }

  328.     /**
  329.      * This sets up tolerances for singularity testing.
  330.      */
  331.     private void tolset() {
  332.         int pos;
  333.         double total;
  334.         final double eps = this.epsilon;
  335.         for (int i = 0; i < nvars; i++) {
  336.             this.work_tolset[i] = FastMath.sqrt(d[i]);
  337.         }
  338.         tol[0] = eps * this.work_tolset[0];
  339.         for (int col = 1; col < nvars; col++) {
  340.             pos = col - 1;
  341.             total = work_tolset[col];
  342.             for (int row = 0; row < col; row++) {
  343.                 total += FastMath.abs(r[pos]) * work_tolset[row];
  344.                 pos += nvars - row - 2;
  345.             }
  346.             tol[col] = eps * total;
  347.         }
  348.         tol_set = true;
  349.     }

  350.     /**
  351.      * The regcf method conducts the linear regression and extracts the
  352.      * parameter vector. Notice that the algorithm can do subset regression
  353.      * with no alteration.
  354.      *
  355.      * @param nreq how many of the regressors to include (either in canonical
  356.      * order, or in the current reordered state)
  357.      * @return an array with the estimated slope coefficients
  358.      * @throws MathIllegalArgumentException if {@code nreq} is less than 1
  359.      * or greater than the number of independent variables
  360.      */
  361.     private double[] regcf(int nreq) throws MathIllegalArgumentException {
  362.         int nextr;
  363.         if (nreq < 1) {
  364.             throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
  365.         }
  366.         if (nreq > this.nvars) {
  367.             throw new MathIllegalArgumentException(
  368.                     LocalizedStatFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
  369.         }
  370.         if (!this.tol_set) {
  371.             tolset();
  372.         }
  373.         final double[] ret = new double[nreq];
  374.         boolean rankProblem = false;
  375.         for (int i = nreq - 1; i > -1; i--) {
  376.             if (FastMath.sqrt(d[i]) < tol[i]) {
  377.                 ret[i] = 0.0;
  378.                 d[i] = 0.0;
  379.                 rankProblem = true;
  380.             } else {
  381.                 ret[i] = rhs[i];
  382.                 nextr = i * (nvars + nvars - i - 1) / 2;
  383.                 for (int j = i + 1; j < nreq; j++) {
  384.                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
  385.                     ++nextr;
  386.                 }
  387.             }
  388.         }
  389.         if (rankProblem) {
  390.             for (int i = 0; i < nreq; i++) {
  391.                 if (this.lindep[i]) {
  392.                     ret[i] = Double.NaN;
  393.                 }
  394.             }
  395.         }
  396.         return ret;
  397.     }

  398.     /**
  399.      * The method which checks for singularities and then eliminates the offending
  400.      * columns.
  401.      */
  402.     private void singcheck() {
  403.         int pos;
  404.         for (int i = 0; i < nvars; i++) {
  405.             work_sing[i] = FastMath.sqrt(d[i]);
  406.         }
  407.         for (int col = 0; col < nvars; col++) {
  408.             // Set elements within R to zero if they are less than tol(col) in
  409.             // absolute value after being scaled by the square root of their row
  410.             // multiplier
  411.             final double temp = tol[col];
  412.             pos = col - 1;
  413.             for (int row = 0; row < col - 1; row++) {
  414.                 if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
  415.                     r[pos] = 0.0;
  416.                 }
  417.                 pos += nvars - row - 2;
  418.             }
  419.             // If diagonal element is near zero, set it to zero, set appropriate
  420.             // element of LINDEP, and use INCLUD to augment the projections in
  421.             // the lower rows of the orthogonalization.
  422.             lindep[col] = false;
  423.             if (work_sing[col] < temp) {
  424.                 lindep[col] = true;
  425.                 if (col < nvars - 1) {
  426.                     Arrays.fill(x_sing, 0.0);
  427.                     int _pi = col * (nvars + nvars - col - 1) / 2;
  428.                     for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
  429.                         x_sing[_xi] = r[_pi];
  430.                         r[_pi] = 0.0;
  431.                     }
  432.                     final double y = rhs[col];
  433.                     final double weight = d[col];
  434.                     d[col] = 0.0;
  435.                     rhs[col] = 0.0;
  436.                     this.include(x_sing, weight, y);
  437.                 } else {
  438.                     sserr += d[col] * rhs[col] * rhs[col];
  439.                 }
  440.             }
  441.         }
  442.     }

  443.     /**
  444.      * Calculates the sum of squared errors for the full regression
  445.      * and all subsets in the following manner: <pre>
  446.      * rss[] ={
  447.      * ResidualSumOfSquares_allNvars,
  448.      * ResidualSumOfSquares_FirstNvars-1,
  449.      * ResidualSumOfSquares_FirstNvars-2,
  450.      * ..., ResidualSumOfSquares_FirstVariable} </pre>
  451.      */
  452.     private void ss() {
  453.         double total = sserr;
  454.         rss[nvars - 1] = sserr;
  455.         for (int i = nvars - 1; i > 0; i--) {
  456.             total += d[i] * rhs[i] * rhs[i];
  457.             rss[i - 1] = total;
  458.         }
  459.         rss_set = true;
  460.     }

  461.     /**
  462.      * Calculates the cov matrix assuming only the first nreq variables are
  463.      * included in the calculation. The returned array contains a symmetric
  464.      * matrix stored in lower triangular form. The matrix will have
  465.      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
  466.      * cov =
  467.      * {
  468.      *  cov_00,
  469.      *  cov_10, cov_11,
  470.      *  cov_20, cov_21, cov22,
  471.      *  ...
  472.      * } </pre>
  473.      *
  474.      * @param nreq how many of the regressors to include (either in canonical
  475.      * order, or in the current reordered state)
  476.      * @return an array with the variance covariance of the included
  477.      * regressors in lower triangular form
  478.      */
  479.     private double[] cov(int nreq) {
  480.         if (this.nobs <= nreq) {
  481.             return null; // NOPMD
  482.         }
  483.         double rnk = 0.0;
  484.         for (int i = 0; i < nreq; i++) {
  485.             if (!this.lindep[i]) {
  486.                 rnk += 1.0;
  487.             }
  488.         }
  489.         final double var = rss[nreq - 1] / (nobs - rnk);
  490.         final double[] rinv = new double[nreq * (nreq - 1) / 2];
  491.         inverse(rinv, nreq);
  492.         final double[] covmat = new double[nreq * (nreq + 1) / 2];
  493.         Arrays.fill(covmat, Double.NaN);
  494.         int pos2;
  495.         int pos1;
  496.         int start = 0;
  497.         for (int row = 0; row < nreq; row++) {
  498.             pos2 = start;
  499.             if (!this.lindep[row]) {
  500.                 for (int col = row; col < nreq; col++) {
  501.                     if (!this.lindep[col]) {
  502.                         pos1 = start + col - row;
  503.                         double total;
  504.                         if (row == col) {
  505.                             total = 1.0 / d[col];
  506.                         } else {
  507.                             total = rinv[pos1 - 1] / d[col];
  508.                         }
  509.                         for (int k = col + 1; k < nreq; k++) {
  510.                             if (!this.lindep[k]) {
  511.                                 total += rinv[pos1] * rinv[pos2] / d[k];
  512.                             }
  513.                             ++pos1;
  514.                             ++pos2;
  515.                         }
  516.                         covmat[ (col + 1) * col / 2 + row] = total * var;
  517.                     } else {
  518.                         pos2 += nreq - col - 1;
  519.                     }
  520.                 }
  521.             }
  522.             start += nreq - row - 1;
  523.         }
  524.         return covmat;
  525.     }

  526.     /**
  527.      * This internal method calculates the inverse of the upper-triangular portion
  528.      * of the R matrix.
  529.      * @param rinv  the storage for the inverse of r
  530.      * @param nreq how many of the regressors to include (either in canonical
  531.      * order, or in the current reordered state)
  532.      */
  533.     private void inverse(double[] rinv, int nreq) {
  534.         int pos = nreq * (nreq - 1) / 2 - 1;
  535.         Arrays.fill(rinv, Double.NaN);
  536.         for (int row = nreq - 1; row > 0; --row) {
  537.             if (!this.lindep[row]) {
  538.                 final int start = (row - 1) * (nvars + nvars - row) / 2;
  539.                 for (int col = nreq; col > row; --col) {
  540.                     int pos1 = start;
  541.                     int pos2 = pos;
  542.                     double total = 0.0;
  543.                     for (int k = row; k < col - 1; k++) {
  544.                         pos2 += nreq - k - 1;
  545.                         if (!this.lindep[k]) {
  546.                             total += -r[pos1] * rinv[pos2];
  547.                         }
  548.                         ++pos1;
  549.                     }
  550.                     rinv[pos] = total - r[pos1];
  551.                     --pos;
  552.                 }
  553.             } else {
  554.                 pos -= nreq - row;
  555.             }
  556.         }
  557.     }

  558.     /**
  559.      * In the original algorithm only the partial correlations of the regressors
  560.      * is returned to the user. In this implementation, we have <pre>
  561.      * corr =
  562.      * {
  563.      *   corrxx - lower triangular
  564.      *   corrxy - bottom row of the matrix
  565.      * }
  566.      * Replaces subroutines PCORR and COR of:
  567.      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
  568.      *
  569.      * <p>Calculate partial correlations after the variables in rows
  570.      * 1, 2, ..., IN have been forced into the regression.
  571.      * If IN = 1, and the first row of R represents a constant in the
  572.      * model, then the usual simple correlations are returned.</p>
  573.      *
  574.      * <p>If IN = 0, the value returned in array CORMAT for the correlation
  575.      * of variables Xi &amp; Xj is:</p><pre>
  576.      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
  577.      *
  578.      * <p>On return, array CORMAT contains the upper triangle of the matrix of
  579.      * partial correlations stored by rows, excluding the 1's on the diagonal.
  580.      * e.g. if IN = 2, the consecutive elements returned are:
  581.      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
  582.      * Array YCORR stores the partial correlations with the Y-variable
  583.      * starting with YCORR(IN+1) = partial correlation with the variable in
  584.      * position (IN+1). </p>
  585.      *
  586.      * @param in how many of the regressors to include (either in canonical
  587.      * order, or in the current reordered state)
  588.      * @return an array with the partial correlations of the remainder of
  589.      * regressors with each other and the regressand, in lower triangular form
  590.      */
  591.     public double[] getPartialCorrelations(int in) {
  592.         final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
  593.         int pos;
  594.         int pos1;
  595.         int pos2;
  596.         final int rms_off = -in;
  597.         final int wrk_off = -(in + 1);
  598.         final double[] rms = new double[nvars - in];
  599.         final double[] work = new double[nvars - in - 1];
  600.         double sumxx;
  601.         double sumxy;
  602.         double sumyy;
  603.         final int offXX = (nvars - in) * (nvars - in - 1) / 2;
  604.         if (in < -1 || in >= nvars) {
  605.             return null; // NOPMD
  606.         }
  607.         final int nvm = nvars - 1;
  608.         final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
  609.         if (d[in] > 0.0) {
  610.             rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
  611.         }
  612.         for (int col = in + 1; col < nvars; col++) {
  613.             pos = base_pos + col - 1 - in;
  614.             sumxx = d[col];
  615.             for (int row = in; row < col; row++) {
  616.                 sumxx += d[row] * r[pos] * r[pos];
  617.                 pos += nvars - row - 2;
  618.             }
  619.             if (sumxx > 0.0) {
  620.                 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
  621.             } else {
  622.                 rms[col + rms_off] = 0.0;
  623.             }
  624.         }
  625.         sumyy = sserr;
  626.         for (int row = in; row < nvars; row++) {
  627.             sumyy += d[row] * rhs[row] * rhs[row];
  628.         }
  629.         if (sumyy > 0.0) {
  630.             sumyy = 1.0 / FastMath.sqrt(sumyy);
  631.         }
  632.         pos = 0;
  633.         for (int col1 = in; col1 < nvars; col1++) {
  634.             sumxy = 0.0;
  635.             Arrays.fill(work, 0.0);
  636.             pos1 = base_pos + col1 - in - 1;
  637.             for (int row = in; row < col1; row++) {
  638.                 pos2 = pos1 + 1;
  639.                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
  640.                     work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
  641.                     pos2++;
  642.                 }
  643.                 sumxy += d[row] * r[pos1] * rhs[row];
  644.                 pos1 += nvars - row - 2;
  645.             }
  646.             pos2 = pos1 + 1;
  647.             for (int col2 = col1 + 1; col2 < nvars; col2++) {
  648.                 work[col2 + wrk_off] += d[col1] * r[pos2];
  649.                 ++pos2;
  650.                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
  651.                         work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
  652.                 ++pos;
  653.             }
  654.             sumxy += d[col1] * rhs[col1];
  655.             output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
  656.         }

  657.         return output;
  658.     }

  659.     /**
  660.      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
  661.      * Move variable from position FROM to position TO in an
  662.      * orthogonal reduction produced by AS75.1.
  663.      *
  664.      * @param from initial position
  665.      * @param to destination
  666.      */
  667.     private void vmove(int from, int to) {
  668.         double d1;
  669.         double d2;
  670.         double X;
  671.         double d1new;
  672.         double d2new;
  673.         double cbar;
  674.         double sbar;
  675.         double Y;
  676.         int first;
  677.         int inc;
  678.         int m1;
  679.         int m2;
  680.         int mp1;
  681.         int pos;
  682.         boolean bSkipTo40 = false;
  683.         if (from == to) {
  684.             return;
  685.         }
  686.         if (!this.rss_set) {
  687.             ss();
  688.         }
  689.         final int count;
  690.         if (from < to) {
  691.             first = from;
  692.             inc = 1;
  693.             count = to - from;
  694.         } else {
  695.             first = from - 1;
  696.             inc = -1;
  697.             count = from - to;
  698.         }

  699.         int m = first;
  700.         int idx = 0;
  701.         while (idx < count) {
  702.             m1 = m * (nvars + nvars - m - 1) / 2;
  703.             m2 = m1 + nvars - m - 1;
  704.             mp1 = m + 1;

  705.             d1 = d[m];
  706.             d2 = d[mp1];
  707.             // Special cases.
  708.             if (d1 > this.epsilon || d2 > this.epsilon) {
  709.                 X = r[m1];
  710.                 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
  711.                     X = 0.0;
  712.                 }
  713.                 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
  714.                     d[m] = d2;
  715.                     d[mp1] = d1;
  716.                     r[m1] = 0.0;
  717.                     for (int col = m + 2; col < nvars; col++) {
  718.                         ++m1;
  719.                         X = r[m1];
  720.                         r[m1] = r[m2];
  721.                         r[m2] = X;
  722.                         ++m2;
  723.                     }
  724.                     X = rhs[m];
  725.                     rhs[m] = rhs[mp1];
  726.                     rhs[mp1] = X;
  727.                     bSkipTo40 = true;
  728.                     //break;
  729.                 } else if (d2 < this.epsilon) {
  730.                     d[m] = d1 * X * X;
  731.                     r[m1] = 1.0 / X;
  732.                     for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
  733.                         r[_i] /= X;
  734.                     }
  735.                     rhs[m] /= X;
  736.                     bSkipTo40 = true;
  737.                     //break;
  738.                 }
  739.                 if (!bSkipTo40) {
  740.                     d1new = d2 + d1 * X * X;
  741.                     cbar = d2 / d1new;
  742.                     sbar = X * d1 / d1new;
  743.                     d2new = d1 * cbar;
  744.                     d[m] = d1new;
  745.                     d[mp1] = d2new;
  746.                     r[m1] = sbar;
  747.                     for (int col = m + 2; col < nvars; col++) {
  748.                         ++m1;
  749.                         Y = r[m1];
  750.                         r[m1] = cbar * r[m2] + sbar * Y;
  751.                         r[m2] = Y - X * r[m2];
  752.                         ++m2;
  753.                     }
  754.                     Y = rhs[m];
  755.                     rhs[m] = cbar * rhs[mp1] + sbar * Y;
  756.                     rhs[mp1] = Y - X * rhs[mp1];
  757.                 }
  758.             }
  759.             if (m > 0) {
  760.                 pos = m;
  761.                 for (int row = 0; row < m; row++) {
  762.                     X = r[pos];
  763.                     r[pos] = r[pos - 1];
  764.                     r[pos - 1] = X;
  765.                     pos += nvars - row - 2;
  766.                 }
  767.             }
  768.             // Adjust variable order (VORDER), the tolerances (TOL) and
  769.             // the vector of residual sums of squares (RSS).
  770.             m1 = vorder[m];
  771.             vorder[m] = vorder[mp1];
  772.             vorder[mp1] = m1;
  773.             X = tol[m];
  774.             tol[m] = tol[mp1];
  775.             tol[mp1] = X;
  776.             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];

  777.             m += inc;
  778.             ++idx;
  779.         }
  780.     }

  781.     /**
  782.      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
  783.      *
  784.      * <p> Re-order the variables in an orthogonal reduction produced by
  785.      * AS75.1 so that the N variables in LIST start at position POS1,
  786.      * though will not necessarily be in the same order as in LIST.
  787.      * Any variables in VORDER before position POS1 are not moved.
  788.      * Auxiliary routine called: VMOVE. </p>
  789.      *
  790.      * <p>This internal method reorders the regressors.</p>
  791.      *
  792.      * @param list the regressors to move
  793.      * @param pos1 where the list will be placed
  794.      * @return -1 error, 0 everything ok
  795.      */
  796.     private int reorderRegressors(int[] list, int pos1) {
  797.         int next;
  798.         int i;
  799.         int l;
  800.         if (list.length < 1 || list.length > nvars + 1 - pos1) {
  801.             return -1;
  802.         }
  803.         next = pos1;
  804.         i = pos1;
  805.         while (i < nvars) {
  806.             l = vorder[i];
  807.             for (int j = 0; j < list.length; j++) {
  808.                 if (l == list[j] && i > next) {
  809.                     this.vmove(i, next);
  810.                     ++next;
  811.                     if (next >= list.length + pos1) {
  812.                         return 0;
  813.                     } else {
  814.                         break;
  815.                     }
  816.                 }
  817.             }
  818.             ++i;
  819.         }
  820.         return 0;
  821.     }

  822.     /**
  823.      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
  824.      *
  825.      * @param  row_data returns the diagonal of the hat matrix for this observation
  826.      * @return the diagonal element of the hatmatrix
  827.      */
  828.     public double getDiagonalOfHatMatrix(double[] row_data) {
  829.         double[] wk = new double[this.nvars];
  830.         int pos;
  831.         double total;

  832.         if (row_data.length > nvars) {
  833.             return Double.NaN;
  834.         }
  835.         double[] xrow;
  836.         if (this.hasIntercept) {
  837.             xrow = new double[row_data.length + 1];
  838.             xrow[0] = 1.0;
  839.             System.arraycopy(row_data, 0, xrow, 1, row_data.length);
  840.         } else {
  841.             xrow = row_data;
  842.         }
  843.         double hii = 0.0;
  844.         for (int col = 0; col < xrow.length; col++) {
  845.             if (FastMath.sqrt(d[col]) < tol[col]) {
  846.                 wk[col] = 0.0;
  847.             } else {
  848.                 pos = col - 1;
  849.                 total = xrow[col];
  850.                 for (int row = 0; row < col; row++) {
  851.                     total = smartAdd(total, -wk[row] * r[pos]);
  852.                     pos += nvars - row - 2;
  853.                 }
  854.                 wk[col] = total;
  855.                 hii = smartAdd(hii, (total * total) / d[col]);
  856.             }
  857.         }
  858.         return hii;
  859.     }

  860.     /**
  861.      * Gets the order of the regressors, useful if some type of reordering
  862.      * has been called. Calling regress with int[]{} args will trigger
  863.      * a reordering.
  864.      *
  865.      * @return int[] with the current order of the regressors
  866.      */
  867.     public int[] getOrderOfRegressors(){
  868.         return vorder.clone();
  869.     }

  870.     /**
  871.      * Conducts a regression on the data in the model, using all regressors.
  872.      *
  873.      * @return RegressionResults the structure holding all regression results
  874.      * @exception  MathIllegalArgumentException - thrown if number of observations is
  875.      * less than the number of variables
  876.      */
  877.     @Override
  878.     public RegressionResults regress() throws MathIllegalArgumentException {
  879.         return regress(this.nvars);
  880.     }

  881.     /**
  882.      * Conducts a regression on the data in the model, using a subset of regressors.
  883.      *
  884.      * @param numberOfRegressors many of the regressors to include (either in canonical
  885.      * order, or in the current reordered state)
  886.      * @return RegressionResults the structure holding all regression results
  887.      * @exception  MathIllegalArgumentException - thrown if number of observations is
  888.      * less than the number of variables or number of regressors requested
  889.      * is greater than the regressors in the model
  890.      */
  891.     public RegressionResults regress(int numberOfRegressors) throws MathIllegalArgumentException {
  892.         if (this.nobs <= numberOfRegressors) {
  893.            throw new MathIllegalArgumentException(
  894.                    LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  895.                    this.nobs, numberOfRegressors);
  896.         }
  897.         if( numberOfRegressors > this.nvars ){
  898.             throw new MathIllegalArgumentException(
  899.                     LocalizedStatFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
  900.         }

  901.         tolset();
  902.         singcheck();

  903.         double[] beta = this.regcf(numberOfRegressors);

  904.         ss();

  905.         double[] cov = this.cov(numberOfRegressors);

  906.         int rnk = 0;
  907.         for (int i = 0; i < this.lindep.length; i++) {
  908.             if (!this.lindep[i]) {
  909.                 ++rnk;
  910.             }
  911.         }

  912.         boolean needsReorder = false;
  913.         for (int i = 0; i < numberOfRegressors; i++) {
  914.             if (this.vorder[i] != i) {
  915.                 needsReorder = true;
  916.                 break;
  917.             }
  918.         }
  919.         if (!needsReorder) {
  920.             return new RegressionResults(
  921.                     beta, new double[][]{cov}, true, this.nobs, rnk,
  922.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  923.         } else {
  924.             double[] betaNew = new double[beta.length];
  925.             double[] covNew = new double[cov.length];

  926.             int[] newIndices = new int[beta.length];
  927.             for (int i = 0; i < nvars; i++) {
  928.                 for (int j = 0; j < numberOfRegressors; j++) {
  929.                     if (this.vorder[j] == i) {
  930.                         betaNew[i] = beta[ j];
  931.                         newIndices[i] = j;
  932.                     }
  933.                 }
  934.             }

  935.             int idx1 = 0;
  936.             int idx2;
  937.             int _i;
  938.             int _j;
  939.             for (int i = 0; i < beta.length; i++) {
  940.                 _i = newIndices[i];
  941.                 for (int j = 0; j <= i; j++, idx1++) {
  942.                     _j = newIndices[j];
  943.                     if (_i > _j) {
  944.                         idx2 = _i * (_i + 1) / 2 + _j;
  945.                     } else {
  946.                         idx2 = _j * (_j + 1) / 2 + _i;
  947.                     }
  948.                     covNew[idx1] = cov[idx2];
  949.                 }
  950.             }
  951.             return new RegressionResults(
  952.                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
  953.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  954.         }
  955.     }

  956.     /**
  957.      * Conducts a regression on the data in the model, using regressors in array
  958.      * Calling this method will change the internal order of the regressors
  959.      * and care is required in interpreting the hatmatrix.
  960.      *
  961.      * @param  variablesToInclude array of variables to include in regression
  962.      * @return RegressionResults the structure holding all regression results
  963.      * @exception  MathIllegalArgumentException - thrown if number of observations is
  964.      * less than the number of variables, the number of regressors requested
  965.      * is greater than the regressors in the model or a regressor index in
  966.      * regressor array does not exist
  967.      */
  968.     @Override
  969.     public RegressionResults regress(int[] variablesToInclude) throws MathIllegalArgumentException {
  970.         if (variablesToInclude.length > this.nvars) {
  971.             throw new MathIllegalArgumentException(
  972.                     LocalizedStatFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
  973.         }
  974.         if (this.nobs <= this.nvars) {
  975.             throw new MathIllegalArgumentException(
  976.                     LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
  977.                     this.nobs, this.nvars);
  978.         }
  979.         Arrays.sort(variablesToInclude);
  980.         int iExclude = 0;
  981.         for (int i = 0; i < variablesToInclude.length; i++) {
  982.             if (i >= this.nvars) {
  983.                 throw new MathIllegalArgumentException(
  984.                         LocalizedCoreFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
  985.             }
  986.             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
  987.                 variablesToInclude[i] = -1;
  988.                 ++iExclude;
  989.             }
  990.         }
  991.         int[] series;
  992.         if (iExclude > 0) {
  993.             int j = 0;
  994.             series = new int[variablesToInclude.length - iExclude];
  995.             for (int i = 0; i < variablesToInclude.length; i++) {
  996.                 if (variablesToInclude[i] > -1) {
  997.                     series[j] = variablesToInclude[i];
  998.                     ++j;
  999.                 }
  1000.             }
  1001.         } else {
  1002.             series = variablesToInclude;
  1003.         }

  1004.         reorderRegressors(series, 0);
  1005.         tolset();
  1006.         singcheck();

  1007.         double[] beta = this.regcf(series.length);

  1008.         ss();

  1009.         double[] cov = this.cov(series.length);

  1010.         int rnk = 0;
  1011.         for (int i = 0; i < this.lindep.length; i++) {
  1012.             if (!this.lindep[i]) {
  1013.                 ++rnk;
  1014.             }
  1015.         }

  1016.         boolean needsReorder = false;
  1017.         for (int i = 0; i < this.nvars; i++) {
  1018.             if (this.vorder[i] != series[i]) {
  1019.                 needsReorder = true;
  1020.                 break;
  1021.             }
  1022.         }
  1023.         if (!needsReorder) {
  1024.             return new RegressionResults(
  1025.                     beta, new double[][]{cov}, true, this.nobs, rnk,
  1026.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  1027.         } else {
  1028.             double[] betaNew = new double[beta.length];
  1029.             int[] newIndices = new int[beta.length];
  1030.             for (int i = 0; i < series.length; i++) {
  1031.                 for (int j = 0; j < this.vorder.length; j++) {
  1032.                     if (this.vorder[j] == series[i]) {
  1033.                         betaNew[i] = beta[ j];
  1034.                         newIndices[i] = j;
  1035.                     }
  1036.                 }
  1037.             }
  1038.             double[] covNew = new double[cov.length];
  1039.             int idx1 = 0;
  1040.             int idx2;
  1041.             int _i;
  1042.             int _j;
  1043.             for (int i = 0; i < beta.length; i++) {
  1044.                 _i = newIndices[i];
  1045.                 for (int j = 0; j <= i; j++, idx1++) {
  1046.                     _j = newIndices[j];
  1047.                     if (_i > _j) {
  1048.                         idx2 = _i * (_i + 1) / 2 + _j;
  1049.                     } else {
  1050.                         idx2 = _j * (_j + 1) / 2 + _i;
  1051.                     }
  1052.                     covNew[idx1] = cov[idx2];
  1053.                 }
  1054.             }
  1055.             return new RegressionResults(
  1056.                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
  1057.                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
  1058.         }
  1059.     }
  1060. }