View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.regression;
23  
24  import java.util.Arrays;
25  
26  import org.hipparchus.exception.LocalizedCoreFormats;
27  import org.hipparchus.exception.MathIllegalArgumentException;
28  import org.hipparchus.stat.LocalizedStatFormats;
29  import org.hipparchus.util.FastMath;
30  import org.hipparchus.util.MathUtils;
31  import org.hipparchus.util.Precision;
32  
33  /**
34   * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
35   *
36   * <p>The algorithm is described in:</p>
37   * <pre>
38   * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
39   * Author(s): Alan J. Miller
40   * Source: Journal of the Royal Statistical Society.
41   * Series C (Applied Statistics), Vol. 41, No. 2
42   * (1992), pp. 458-478
43   * Published by: Blackwell Publishing for the Royal Statistical Society
44   * Stable URL: <a href="https://www.jstor.org/stable/2347583">Algorithm AS 274:
45   * Least Squares Routines to Supplement Those of Gentleman</a> </pre>
46   *
47   * <p>This method for multiple regression forms the solution to the OLS problem
48   * by updating the QR decomposition as described by Gentleman.</p>
49   *
50   */
51  public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
52  
53      /** number of variables in regression */
54      private final int nvars;
55      /** diagonals of cross products matrix */
56      private final double[] d;
57      /** the elements of the R`Y */
58      private final double[] rhs;
59      /** the off diagonal portion of the R matrix */
60      private final double[] r;
61      /** the tolerance for each of the variables */
62      private final double[] tol;
63      /** residual sum of squares for all nested regressions */
64      private final double[] rss;
65      /** order of the regressors */
66      private final int[] vorder;
67      /** scratch space for tolerance calc */
68      private final double[] work_tolset;
69      /** number of observations entered */
70      private long nobs;
71      /** sum of squared errors of largest regression */
72      private double sserr;
73      /** has rss been called? */
74      private boolean rss_set;
75      /** has the tolerance setting method been called */
76      private boolean tol_set;
77      /** flags for variables with linear dependency problems */
78      private final boolean[] lindep;
79      /** singular x values */
80      private final double[] x_sing;
81      /** workspace for singularity method */
82      private final double[] work_sing;
83      /** summation of Y variable */
84      private double sumy;
85      /** summation of squared Y values */
86      private double sumsqy;
87      /** boolean flag whether a regression constant is added */
88      private final boolean hasIntercept;
89      /** zero tolerance */
90      private final double epsilon;
91      /**
92       *  Set the default constructor to private access
93       *  to prevent inadvertent instantiation
94       */
95      @SuppressWarnings("unused")
96      private MillerUpdatingRegression() {
97          this(-1, false, Double.NaN);
98      }
99  
100     /**
101      * This is the augmented constructor for the MillerUpdatingRegression class.
102      *
103      * @param numberOfVariables number of regressors to expect, not including constant
104      * @param includeConstant include a constant automatically
105      * @param errorTolerance  zero tolerance, how machine zero is determined
106      * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
107      */
108     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
109             throws MathIllegalArgumentException {
110         if (numberOfVariables < 1) {
111             throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
112         }
113         if (includeConstant) {
114             this.nvars = numberOfVariables + 1;
115         } else {
116             this.nvars = numberOfVariables;
117         }
118         this.hasIntercept = includeConstant;
119         this.nobs = 0;
120         this.d = new double[this.nvars];
121         this.rhs = new double[this.nvars];
122         this.r = new double[this.nvars * (this.nvars - 1) / 2];
123         this.tol = new double[this.nvars];
124         this.rss = new double[this.nvars];
125         this.vorder = new int[this.nvars];
126         this.x_sing = new double[this.nvars];
127         this.work_sing = new double[this.nvars];
128         this.work_tolset = new double[this.nvars];
129         this.lindep = new boolean[this.nvars];
130         for (int i = 0; i < this.nvars; i++) {
131             vorder[i] = i;
132         }
133         if (errorTolerance > 0) {
134             this.epsilon = errorTolerance;
135         } else {
136             this.epsilon = -errorTolerance;
137         }
138     }
139 
140     /**
141      * Primary constructor for the MillerUpdatingRegression.
142      *
143      * @param numberOfVariables maximum number of potential regressors
144      * @param includeConstant include a constant automatically
145      * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
146      */
147     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
148             throws MathIllegalArgumentException {
149         this(numberOfVariables, includeConstant, Precision.EPSILON);
150     }
151 
152     /**
153      * A getter method which determines whether a constant is included.
154      * @return true regression has an intercept, false no intercept
155      */
156     @Override
157     public boolean hasIntercept() {
158         return this.hasIntercept;
159     }
160 
161     /**
162      * Gets the number of observations added to the regression model.
163      * @return number of observations
164      */
165     @Override
166     public long getN() {
167         return this.nobs;
168     }
169 
170     /**
171      * Adds an observation to the regression model.
172      * @param x the array with regressor values
173      * @param y  the value of dependent variable given these regressors
174      * @exception MathIllegalArgumentException if the length of {@code x} does not equal
175      * the number of independent variables in the model
176      */
177     @Override
178     public void addObservation(final double[] x, final double y)
179             throws MathIllegalArgumentException {
180 
181         if ((!this.hasIntercept && x.length != nvars) ||
182                (this.hasIntercept && x.length + 1 != nvars)) {
183             throw new MathIllegalArgumentException(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION,
184                     x.length, nvars);
185         }
186         if (!this.hasIntercept) {
187             include(x.clone(), 1.0, y);
188         } else {
189             final double[] tmp = new double[x.length + 1];
190             System.arraycopy(x, 0, tmp, 1, x.length);
191             tmp[0] = 1.0;
192             include(tmp, 1.0, y);
193         }
194         ++nobs;
195 
196     }
197 
198     /**
199      * Adds multiple observations to the model.
200      * @param x observations on the regressors
201      * @param y observations on the regressand
202      * @throws MathIllegalArgumentException if {@code x} is not rectangular, does not match
203      * the length of {@code y} or does not contain sufficient data to estimate the model
204      */
205     @Override
206     public void addObservations(double[][] x, double[] y) throws MathIllegalArgumentException {
207         MathUtils.checkNotNull(x, LocalizedCoreFormats.INPUT_ARRAY);
208         MathUtils.checkNotNull(y, LocalizedCoreFormats.INPUT_ARRAY);
209         MathUtils.checkDimension(x.length, y.length);
210         if (x.length == 0) {  // Must be no y data either
211             throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
212         }
213         if (x[0].length + 1 > x.length) {
214             throw new MathIllegalArgumentException(
215                   LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
216                   x.length, x[0].length);
217         }
218         for (int i = 0; i < x.length; i++) {
219             addObservation(x[i], y[i]);
220         }
221     }
222 
223     /**
224      * The include method is where the QR decomposition occurs. This statement forms all
225      * intermediate data which will be used for all derivative measures.
226      * According to the miller paper, note that in the original implementation the x vector
227      * is overwritten. In this implementation, the include method is passed a copy of the
228      * original data vector so that there is no contamination of the data. Additionally,
229      * this method differs slightly from Gentleman's method, in that the assumption is
230      * of dense design matrices, there is some advantage in using the original gentleman algorithm
231      * on sparse matrices.
232      *
233      * @param x observations on the regressors
234      * @param wi weight of the this observation (-1,1)
235      * @param yi observation on the regressand
236      */
237     private void include(final double[] x, final double wi, final double yi) {
238         int nextr = 0;
239         double w = wi;
240         double y = yi;
241         double xi;
242         double di;
243         double wxi;
244         double dpi;
245         double xk;
246         double _w;
247         this.rss_set = false;
248         sumy = smartAdd(yi, sumy);
249         sumsqy = smartAdd(sumsqy, yi * yi);
250         for (int i = 0; i < x.length; i++) {
251             if (w == 0.0) {
252                 return;
253             }
254             xi = x[i];
255 
256             if (xi == 0.0) {
257                 nextr += nvars - i - 1;
258                 continue;
259             }
260             di = d[i];
261             wxi = w * xi;
262             _w = w;
263             if (di != 0.0) {
264                 dpi = smartAdd(di, wxi * xi);
265                 final double tmp = wxi * xi / di;
266                 if (FastMath.abs(tmp) > Precision.EPSILON) {
267                     w = (di * w) / dpi;
268                 }
269             } else {
270                 dpi = wxi * xi;
271                 w = 0.0;
272             }
273             d[i] = dpi;
274             for (int k = i + 1; k < nvars; k++) {
275                 xk = x[k];
276                 x[k] = smartAdd(xk, -xi * r[nextr]);
277                 if (di != 0.0) {
278                     r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
279                 } else {
280                     r[nextr] = xk / xi;
281                 }
282                 ++nextr;
283             }
284             xk = y;
285             y = smartAdd(xk, -xi * rhs[i]);
286             if (di != 0.0) {
287                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
288             } else {
289                 rhs[i] = xk / xi;
290             }
291         }
292         sserr = smartAdd(sserr, w * y * y);
293     }
294 
295     /**
296      * Adds to number a and b such that the contamination due to
297      * numerical smallness of one addend does not corrupt the sum.
298      * @param a - an addend
299      * @param b - an addend
300      * @return the sum of the a and b
301      */
302     private double smartAdd(double a, double b) {
303         final double _a = FastMath.abs(a);
304         final double _b = FastMath.abs(b);
305         if (_a > _b) {
306             final double eps = _a * Precision.EPSILON;
307             if (_b > eps) {
308                 return a + b;
309             }
310             return a;
311         } else {
312             final double eps = _b * Precision.EPSILON;
313             if (_a > eps) {
314                 return a + b;
315             }
316             return b;
317         }
318     }
319 
320     /**
321      * As the name suggests,  clear wipes the internals and reorders everything in the
322      * canonical order.
323      */
324     @Override
325     public void clear() {
326         Arrays.fill(this.d, 0.0);
327         Arrays.fill(this.rhs, 0.0);
328         Arrays.fill(this.r, 0.0);
329         Arrays.fill(this.tol, 0.0);
330         Arrays.fill(this.rss, 0.0);
331         Arrays.fill(this.work_tolset, 0.0);
332         Arrays.fill(this.work_sing, 0.0);
333         Arrays.fill(this.x_sing, 0.0);
334         Arrays.fill(this.lindep, false);
335         for (int i = 0; i < nvars; i++) {
336             this.vorder[i] = i;
337         }
338         this.nobs = 0;
339         this.sserr = 0.0;
340         this.sumy = 0.0;
341         this.sumsqy = 0.0;
342         this.rss_set = false;
343         this.tol_set = false;
344     }
345 
346     /**
347      * This sets up tolerances for singularity testing.
348      */
349     private void tolset() {
350         int pos;
351         double total;
352         final double eps = this.epsilon;
353         for (int i = 0; i < nvars; i++) {
354             this.work_tolset[i] = FastMath.sqrt(d[i]);
355         }
356         tol[0] = eps * this.work_tolset[0];
357         for (int col = 1; col < nvars; col++) {
358             pos = col - 1;
359             total = work_tolset[col];
360             for (int row = 0; row < col; row++) {
361                 total += FastMath.abs(r[pos]) * work_tolset[row];
362                 pos += nvars - row - 2;
363             }
364             tol[col] = eps * total;
365         }
366         tol_set = true;
367     }
368 
369     /**
370      * The regcf method conducts the linear regression and extracts the
371      * parameter vector. Notice that the algorithm can do subset regression
372      * with no alteration.
373      *
374      * @param nreq how many of the regressors to include (either in canonical
375      * order, or in the current reordered state)
376      * @return an array with the estimated slope coefficients
377      * @throws MathIllegalArgumentException if {@code nreq} is less than 1
378      * or greater than the number of independent variables
379      */
380     private double[] regcf(int nreq) throws MathIllegalArgumentException {
381         int nextr;
382         if (nreq < 1) {
383             throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
384         }
385         if (nreq > this.nvars) {
386             throw new MathIllegalArgumentException(
387                     LocalizedStatFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
388         }
389         if (!this.tol_set) {
390             tolset();
391         }
392         final double[] ret = new double[nreq];
393         boolean rankProblem = false;
394         for (int i = nreq - 1; i > -1; i--) {
395             if (FastMath.sqrt(d[i]) < tol[i]) {
396                 ret[i] = 0.0;
397                 d[i] = 0.0;
398                 rankProblem = true;
399             } else {
400                 ret[i] = rhs[i];
401                 nextr = i * (nvars + nvars - i - 1) / 2;
402                 for (int j = i + 1; j < nreq; j++) {
403                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
404                     ++nextr;
405                 }
406             }
407         }
408         if (rankProblem) {
409             for (int i = 0; i < nreq; i++) {
410                 if (this.lindep[i]) {
411                     ret[i] = Double.NaN;
412                 }
413             }
414         }
415         return ret;
416     }
417 
418     /**
419      * The method which checks for singularities and then eliminates the offending
420      * columns.
421      */
422     private void singcheck() {
423         int pos;
424         for (int i = 0; i < nvars; i++) {
425             work_sing[i] = FastMath.sqrt(d[i]);
426         }
427         for (int col = 0; col < nvars; col++) {
428             // Set elements within R to zero if they are less than tol(col) in
429             // absolute value after being scaled by the square root of their row
430             // multiplier
431             final double temp = tol[col];
432             pos = col - 1;
433             for (int row = 0; row < col - 1; row++) {
434                 if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
435                     r[pos] = 0.0;
436                 }
437                 pos += nvars - row - 2;
438             }
439             // If diagonal element is near zero, set it to zero, set appropriate
440             // element of LINDEP, and use INCLUD to augment the projections in
441             // the lower rows of the orthogonalization.
442             lindep[col] = false;
443             if (work_sing[col] < temp) {
444                 lindep[col] = true;
445                 if (col < nvars - 1) {
446                     Arrays.fill(x_sing, 0.0);
447                     int _pi = col * (nvars + nvars - col - 1) / 2;
448                     for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
449                         x_sing[_xi] = r[_pi];
450                         r[_pi] = 0.0;
451                     }
452                     final double y = rhs[col];
453                     final double weight = d[col];
454                     d[col] = 0.0;
455                     rhs[col] = 0.0;
456                     this.include(x_sing, weight, y);
457                 } else {
458                     sserr += d[col] * rhs[col] * rhs[col];
459                 }
460             }
461         }
462     }
463 
464     /**
465      * Calculates the sum of squared errors for the full regression
466      * and all subsets in the following manner: <pre>
467      * rss[] ={
468      * ResidualSumOfSquares_allNvars,
469      * ResidualSumOfSquares_FirstNvars-1,
470      * ResidualSumOfSquares_FirstNvars-2,
471      * ..., ResidualSumOfSquares_FirstVariable} </pre>
472      */
473     private void ss() {
474         double total = sserr;
475         rss[nvars - 1] = sserr;
476         for (int i = nvars - 1; i > 0; i--) {
477             total += d[i] * rhs[i] * rhs[i];
478             rss[i - 1] = total;
479         }
480         rss_set = true;
481     }
482 
483     /**
484      * Calculates the cov matrix assuming only the first nreq variables are
485      * included in the calculation. The returned array contains a symmetric
486      * matrix stored in lower triangular form. The matrix will have
487      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
488      * cov =
489      * {
490      *  cov_00,
491      *  cov_10, cov_11,
492      *  cov_20, cov_21, cov22,
493      *  ...
494      * } </pre>
495      *
496      * @param nreq how many of the regressors to include (either in canonical
497      * order, or in the current reordered state)
498      * @return an array with the variance covariance of the included
499      * regressors in lower triangular form
500      */
501     private double[] cov(int nreq) {
502         if (this.nobs <= nreq) {
503             return null; // NOPMD
504         }
505         double rnk = 0.0;
506         for (int i = 0; i < nreq; i++) {
507             if (!this.lindep[i]) {
508                 rnk += 1.0;
509             }
510         }
511         final double var = rss[nreq - 1] / (nobs - rnk);
512         final double[] rinv = new double[nreq * (nreq - 1) / 2];
513         inverse(rinv, nreq);
514         final double[] covmat = new double[nreq * (nreq + 1) / 2];
515         Arrays.fill(covmat, Double.NaN);
516         int pos2;
517         int pos1;
518         int start = 0;
519         for (int row = 0; row < nreq; row++) {
520             pos2 = start;
521             if (!this.lindep[row]) {
522                 for (int col = row; col < nreq; col++) {
523                     if (!this.lindep[col]) {
524                         pos1 = start + col - row;
525                         double total;
526                         if (row == col) {
527                             total = 1.0 / d[col];
528                         } else {
529                             total = rinv[pos1 - 1] / d[col];
530                         }
531                         for (int k = col + 1; k < nreq; k++) {
532                             if (!this.lindep[k]) {
533                                 total += rinv[pos1] * rinv[pos2] / d[k];
534                             }
535                             ++pos1;
536                             ++pos2;
537                         }
538                         covmat[ (col + 1) * col / 2 + row] = total * var;
539                     } else {
540                         pos2 += nreq - col - 1;
541                     }
542                 }
543             }
544             start += nreq - row - 1;
545         }
546         return covmat;
547     }
548 
549     /**
550      * This internal method calculates the inverse of the upper-triangular portion
551      * of the R matrix.
552      * @param rinv  the storage for the inverse of r
553      * @param nreq how many of the regressors to include (either in canonical
554      * order, or in the current reordered state)
555      */
556     private void inverse(double[] rinv, int nreq) {
557         int pos = nreq * (nreq - 1) / 2 - 1;
558         Arrays.fill(rinv, Double.NaN);
559         for (int row = nreq - 1; row > 0; --row) {
560             if (!this.lindep[row]) {
561                 final int start = (row - 1) * (nvars + nvars - row) / 2;
562                 for (int col = nreq; col > row; --col) {
563                     int pos1 = start;
564                     int pos2 = pos;
565                     double total = 0.0;
566                     for (int k = row; k < col - 1; k++) {
567                         pos2 += nreq - k - 1;
568                         if (!this.lindep[k]) {
569                             total += -r[pos1] * rinv[pos2];
570                         }
571                         ++pos1;
572                     }
573                     rinv[pos] = total - r[pos1];
574                     --pos;
575                 }
576             } else {
577                 pos -= nreq - row;
578             }
579         }
580     }
581 
582     /**
583      * In the original algorithm only the partial correlations of the regressors
584      * is returned to the user. In this implementation, we have <pre>
585      * corr =
586      * {
587      *   corrxx - lower triangular
588      *   corrxy - bottom row of the matrix
589      * }
590      * Replaces subroutines PCORR and COR of:
591      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
592      *
593      * <p>Calculate partial correlations after the variables in rows
594      * 1, 2, ..., IN have been forced into the regression.
595      * If IN = 1, and the first row of R represents a constant in the
596      * model, then the usual simple correlations are returned.</p>
597      *
598      * <p>If IN = 0, the value returned in array CORMAT for the correlation
599      * of variables Xi &amp; Xj is:</p><pre>
600      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
601      *
602      * <p>On return, array CORMAT contains the upper triangle of the matrix of
603      * partial correlations stored by rows, excluding the 1's on the diagonal.
604      * e.g. if IN = 2, the consecutive elements returned are:
605      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
606      * Array YCORR stores the partial correlations with the Y-variable
607      * starting with YCORR(IN+1) = partial correlation with the variable in
608      * position (IN+1). </p>
609      *
610      * @param in how many of the regressors to include (either in canonical
611      * order, or in the current reordered state)
612      * @return an array with the partial correlations of the remainder of
613      * regressors with each other and the regressand, in lower triangular form
614      */
615     public double[] getPartialCorrelations(int in) {
616         final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
617         int pos;
618         int pos1;
619         int pos2;
620         final int rms_off = -in;
621         final int wrk_off = -(in + 1);
622         final double[] rms = new double[nvars - in];
623         final double[] work = new double[nvars - in - 1];
624         double sumxx;
625         double sumxy;
626         double sumyy;
627         final int offXX = (nvars - in) * (nvars - in - 1) / 2;
628         if (in < -1 || in >= nvars) {
629             return null; // NOPMD
630         }
631         final int nvm = nvars - 1;
632         final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
633         if (d[in] > 0.0) {
634             rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
635         }
636         for (int col = in + 1; col < nvars; col++) {
637             pos = base_pos + col - 1 - in;
638             sumxx = d[col];
639             for (int row = in; row < col; row++) {
640                 sumxx += d[row] * r[pos] * r[pos];
641                 pos += nvars - row - 2;
642             }
643             if (sumxx > 0.0) {
644                 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
645             } else {
646                 rms[col + rms_off] = 0.0;
647             }
648         }
649         sumyy = sserr;
650         for (int row = in; row < nvars; row++) {
651             sumyy += d[row] * rhs[row] * rhs[row];
652         }
653         if (sumyy > 0.0) {
654             sumyy = 1.0 / FastMath.sqrt(sumyy);
655         }
656         pos = 0;
657         for (int col1 = in; col1 < nvars; col1++) {
658             sumxy = 0.0;
659             Arrays.fill(work, 0.0);
660             pos1 = base_pos + col1 - in - 1;
661             for (int row = in; row < col1; row++) {
662                 pos2 = pos1 + 1;
663                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
664                     work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
665                     pos2++;
666                 }
667                 sumxy += d[row] * r[pos1] * rhs[row];
668                 pos1 += nvars - row - 2;
669             }
670             pos2 = pos1 + 1;
671             for (int col2 = col1 + 1; col2 < nvars; col2++) {
672                 work[col2 + wrk_off] += d[col1] * r[pos2];
673                 ++pos2;
674                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
675                         work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
676                 ++pos;
677             }
678             sumxy += d[col1] * rhs[col1];
679             output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
680         }
681 
682         return output;
683     }
684 
685     /**
686      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
687      * Move variable from position FROM to position TO in an
688      * orthogonal reduction produced by AS75.1.
689      *
690      * @param from initial position
691      * @param to destination
692      */
693     private void vmove(int from, int to) {
694         double d1;
695         double d2;
696         double X;
697         double d1new;
698         double d2new;
699         double cbar;
700         double sbar;
701         double Y;
702         int first;
703         int inc;
704         int m1;
705         int m2;
706         int mp1;
707         int pos;
708         boolean bSkipTo40 = false;
709         if (from == to) {
710             return;
711         }
712         if (!this.rss_set) {
713             ss();
714         }
715         final int count;
716         if (from < to) {
717             first = from;
718             inc = 1;
719             count = to - from;
720         } else {
721             first = from - 1;
722             inc = -1;
723             count = from - to;
724         }
725 
726         int m = first;
727         int idx = 0;
728         while (idx < count) {
729             m1 = m * (nvars + nvars - m - 1) / 2;
730             m2 = m1 + nvars - m - 1;
731             mp1 = m + 1;
732 
733             d1 = d[m];
734             d2 = d[mp1];
735             // Special cases.
736             if (d1 > this.epsilon || d2 > this.epsilon) {
737                 X = r[m1];
738                 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
739                     X = 0.0;
740                 }
741                 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
742                     d[m] = d2;
743                     d[mp1] = d1;
744                     r[m1] = 0.0;
745                     for (int col = m + 2; col < nvars; col++) {
746                         ++m1;
747                         X = r[m1];
748                         r[m1] = r[m2];
749                         r[m2] = X;
750                         ++m2;
751                     }
752                     X = rhs[m];
753                     rhs[m] = rhs[mp1];
754                     rhs[mp1] = X;
755                     bSkipTo40 = true;
756                     //break;
757                 } else if (d2 < this.epsilon) {
758                     d[m] = d1 * X * X;
759                     r[m1] = 1.0 / X;
760                     for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
761                         r[_i] /= X;
762                     }
763                     rhs[m] /= X;
764                     bSkipTo40 = true;
765                     //break;
766                 }
767                 if (!bSkipTo40) {
768                     d1new = d2 + d1 * X * X;
769                     cbar = d2 / d1new;
770                     sbar = X * d1 / d1new;
771                     d2new = d1 * cbar;
772                     d[m] = d1new;
773                     d[mp1] = d2new;
774                     r[m1] = sbar;
775                     for (int col = m + 2; col < nvars; col++) {
776                         ++m1;
777                         Y = r[m1];
778                         r[m1] = cbar * r[m2] + sbar * Y;
779                         r[m2] = Y - X * r[m2];
780                         ++m2;
781                     }
782                     Y = rhs[m];
783                     rhs[m] = cbar * rhs[mp1] + sbar * Y;
784                     rhs[mp1] = Y - X * rhs[mp1];
785                 }
786             }
787             if (m > 0) {
788                 pos = m;
789                 for (int row = 0; row < m; row++) {
790                     X = r[pos];
791                     r[pos] = r[pos - 1];
792                     r[pos - 1] = X;
793                     pos += nvars - row - 2;
794                 }
795             }
796             // Adjust variable order (VORDER), the tolerances (TOL) and
797             // the vector of residual sums of squares (RSS).
798             m1 = vorder[m];
799             vorder[m] = vorder[mp1];
800             vorder[mp1] = m1;
801             X = tol[m];
802             tol[m] = tol[mp1];
803             tol[mp1] = X;
804             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
805 
806             m += inc;
807             ++idx;
808         }
809     }
810 
811     /**
812      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
813      *
814      * <p> Re-order the variables in an orthogonal reduction produced by
815      * AS75.1 so that the N variables in LIST start at position POS1,
816      * though will not necessarily be in the same order as in LIST.
817      * Any variables in VORDER before position POS1 are not moved.
818      * Auxiliary routine called: VMOVE. </p>
819      *
820      * <p>This internal method reorders the regressors.</p>
821      *
822      * @param list the regressors to move
823      * @param pos1 where the list will be placed
824      * @return -1 error, 0 everything ok
825      */
826     private int reorderRegressors(int[] list, int pos1) {
827         int next;
828         int i;
829         int l;
830         if (list.length < 1 || list.length > nvars + 1 - pos1) {
831             return -1;
832         }
833         next = pos1;
834         i = pos1;
835         while (i < nvars) {
836             l = vorder[i];
837             for (int j = 0; j < list.length; j++) {
838                 if (l == list[j] && i > next) {
839                     this.vmove(i, next);
840                     ++next;
841                     if (next >= list.length + pos1) {
842                         return 0;
843                     } else {
844                         break;
845                     }
846                 }
847             }
848             ++i;
849         }
850         return 0;
851     }
852 
853     /**
854      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
855      *
856      * @param  row_data returns the diagonal of the hat matrix for this observation
857      * @return the diagonal element of the hatmatrix
858      */
859     public double getDiagonalOfHatMatrix(double[] row_data) {
860         double[] wk = new double[this.nvars];
861         int pos;
862         double total;
863 
864         if (row_data.length > nvars) {
865             return Double.NaN;
866         }
867         double[] xrow;
868         if (this.hasIntercept) {
869             xrow = new double[row_data.length + 1];
870             xrow[0] = 1.0;
871             System.arraycopy(row_data, 0, xrow, 1, row_data.length);
872         } else {
873             xrow = row_data;
874         }
875         double hii = 0.0;
876         for (int col = 0; col < xrow.length; col++) {
877             if (FastMath.sqrt(d[col]) < tol[col]) {
878                 wk[col] = 0.0;
879             } else {
880                 pos = col - 1;
881                 total = xrow[col];
882                 for (int row = 0; row < col; row++) {
883                     total = smartAdd(total, -wk[row] * r[pos]);
884                     pos += nvars - row - 2;
885                 }
886                 wk[col] = total;
887                 hii = smartAdd(hii, (total * total) / d[col]);
888             }
889         }
890         return hii;
891     }
892 
893     /**
894      * Gets the order of the regressors, useful if some type of reordering
895      * has been called. Calling regress with int[]{} args will trigger
896      * a reordering.
897      *
898      * @return int[] with the current order of the regressors
899      */
900     public int[] getOrderOfRegressors(){
901         return vorder.clone();
902     }
903 
904     /**
905      * Conducts a regression on the data in the model, using all regressors.
906      *
907      * @return RegressionResults the structure holding all regression results
908      * @exception  MathIllegalArgumentException - thrown if number of observations is
909      * less than the number of variables
910      */
911     @Override
912     public RegressionResults regress() throws MathIllegalArgumentException {
913         return regress(this.nvars);
914     }
915 
916     /**
917      * Conducts a regression on the data in the model, using a subset of regressors.
918      *
919      * @param numberOfRegressors many of the regressors to include (either in canonical
920      * order, or in the current reordered state)
921      * @return RegressionResults the structure holding all regression results
922      * @exception  MathIllegalArgumentException - thrown if number of observations is
923      * less than the number of variables or number of regressors requested
924      * is greater than the regressors in the model
925      */
926     public RegressionResults regress(int numberOfRegressors) throws MathIllegalArgumentException {
927         if (this.nobs <= numberOfRegressors) {
928            throw new MathIllegalArgumentException(
929                    LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
930                    this.nobs, numberOfRegressors);
931         }
932         if( numberOfRegressors > this.nvars ){
933             throw new MathIllegalArgumentException(
934                     LocalizedStatFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
935         }
936 
937         tolset();
938         singcheck();
939 
940         double[] beta = this.regcf(numberOfRegressors);
941 
942         ss();
943 
944         double[] cov = this.cov(numberOfRegressors);
945 
946         int rnk = 0;
947         for (int i = 0; i < this.lindep.length; i++) {
948             if (!this.lindep[i]) {
949                 ++rnk;
950             }
951         }
952 
953         boolean needsReorder = false;
954         for (int i = 0; i < numberOfRegressors; i++) {
955             if (this.vorder[i] != i) {
956                 needsReorder = true;
957                 break;
958             }
959         }
960         if (!needsReorder) {
961             return new RegressionResults(
962                     beta, new double[][]{cov}, true, this.nobs, rnk,
963                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
964         } else {
965             double[] betaNew = new double[beta.length];
966             double[] covNew = new double[cov.length];
967 
968             int[] newIndices = new int[beta.length];
969             for (int i = 0; i < nvars; i++) {
970                 for (int j = 0; j < numberOfRegressors; j++) {
971                     if (this.vorder[j] == i) {
972                         betaNew[i] = beta[ j];
973                         newIndices[i] = j;
974                     }
975                 }
976             }
977 
978             int idx1 = 0;
979             int idx2;
980             int _i;
981             int _j;
982             for (int i = 0; i < beta.length; i++) {
983                 _i = newIndices[i];
984                 for (int j = 0; j <= i; j++, idx1++) {
985                     _j = newIndices[j];
986                     if (_i > _j) {
987                         idx2 = _i * (_i + 1) / 2 + _j;
988                     } else {
989                         idx2 = _j * (_j + 1) / 2 + _i;
990                     }
991                     covNew[idx1] = cov[idx2];
992                 }
993             }
994             return new RegressionResults(
995                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
996                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
997         }
998     }
999 
1000     /**
1001      * Conducts a regression on the data in the model, using regressors in array
1002      * Calling this method will change the internal order of the regressors
1003      * and care is required in interpreting the hatmatrix.
1004      *
1005      * @param  variablesToInclude array of variables to include in regression
1006      * @return RegressionResults the structure holding all regression results
1007      * @exception  MathIllegalArgumentException - thrown if number of observations is
1008      * less than the number of variables, the number of regressors requested
1009      * is greater than the regressors in the model or a regressor index in
1010      * regressor array does not exist
1011      */
1012     @Override
1013     public RegressionResults regress(int[] variablesToInclude) throws MathIllegalArgumentException {
1014         if (variablesToInclude.length > this.nvars) {
1015             throw new MathIllegalArgumentException(
1016                     LocalizedStatFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1017         }
1018         if (this.nobs <= this.nvars) {
1019             throw new MathIllegalArgumentException(
1020                     LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1021                     this.nobs, this.nvars);
1022         }
1023         Arrays.sort(variablesToInclude);
1024         int iExclude = 0;
1025         for (int i = 0; i < variablesToInclude.length; i++) {
1026             if (i >= this.nvars) {
1027                 throw new MathIllegalArgumentException(
1028                         LocalizedCoreFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1029             }
1030             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1031                 variablesToInclude[i] = -1;
1032                 ++iExclude;
1033             }
1034         }
1035         int[] series;
1036         if (iExclude > 0) {
1037             int j = 0;
1038             series = new int[variablesToInclude.length - iExclude];
1039             for (int i = 0; i < variablesToInclude.length; i++) {
1040                 if (variablesToInclude[i] > -1) {
1041                     series[j] = variablesToInclude[i];
1042                     ++j;
1043                 }
1044             }
1045         } else {
1046             series = variablesToInclude;
1047         }
1048 
1049         reorderRegressors(series, 0);
1050         tolset();
1051         singcheck();
1052 
1053         double[] beta = this.regcf(series.length);
1054 
1055         ss();
1056 
1057         double[] cov = this.cov(series.length);
1058 
1059         int rnk = 0;
1060         for (int i = 0; i < this.lindep.length; i++) {
1061             if (!this.lindep[i]) {
1062                 ++rnk;
1063             }
1064         }
1065 
1066         boolean needsReorder = false;
1067         for (int i = 0; i < this.nvars; i++) {
1068             if (this.vorder[i] != series[i]) {
1069                 needsReorder = true;
1070                 break;
1071             }
1072         }
1073         if (!needsReorder) {
1074             return new RegressionResults(
1075                     beta, new double[][]{cov}, true, this.nobs, rnk,
1076                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1077         } else {
1078             double[] betaNew = new double[beta.length];
1079             int[] newIndices = new int[beta.length];
1080             for (int i = 0; i < series.length; i++) {
1081                 for (int j = 0; j < this.vorder.length; j++) {
1082                     if (this.vorder[j] == series[i]) {
1083                         betaNew[i] = beta[ j];
1084                         newIndices[i] = j;
1085                     }
1086                 }
1087             }
1088             double[] covNew = new double[cov.length];
1089             int idx1 = 0;
1090             int idx2;
1091             int _i;
1092             int _j;
1093             for (int i = 0; i < beta.length; i++) {
1094                 _i = newIndices[i];
1095                 for (int j = 0; j <= i; j++, idx1++) {
1096                     _j = newIndices[j];
1097                     if (_i > _j) {
1098                         idx2 = _i * (_i + 1) / 2 + _j;
1099                     } else {
1100                         idx2 = _j * (_j + 1) / 2 + _i;
1101                     }
1102                     covNew[idx1] = cov[idx2];
1103                 }
1104             }
1105             return new RegressionResults(
1106                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1107                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1108         }
1109     }
1110 }