1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.regression;
23
24 import java.util.Arrays;
25
26 import org.hipparchus.exception.LocalizedCoreFormats;
27 import org.hipparchus.exception.MathIllegalArgumentException;
28 import org.hipparchus.stat.LocalizedStatFormats;
29 import org.hipparchus.util.FastMath;
30 import org.hipparchus.util.MathUtils;
31 import org.hipparchus.util.Precision;
32
33 /**
34 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
35 *
36 * <p>The algorithm is described in:</p>
37 * <pre>
38 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
39 * Author(s): Alan J. Miller
40 * Source: Journal of the Royal Statistical Society.
41 * Series C (Applied Statistics), Vol. 41, No. 2
42 * (1992), pp. 458-478
43 * Published by: Blackwell Publishing for the Royal Statistical Society
44 * Stable URL: <a href="https://www.jstor.org/stable/2347583">Algorithm AS 274:
45 * Least Squares Routines to Supplement Those of Gentleman</a> </pre>
46 *
47 * <p>This method for multiple regression forms the solution to the OLS problem
48 * by updating the QR decomposition as described by Gentleman.</p>
49 *
50 */
51 public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
52
53 /** number of variables in regression */
54 private final int nvars;
55 /** diagonals of cross products matrix */
56 private final double[] d;
57 /** the elements of the R`Y */
58 private final double[] rhs;
59 /** the off diagonal portion of the R matrix */
60 private final double[] r;
61 /** the tolerance for each of the variables */
62 private final double[] tol;
63 /** residual sum of squares for all nested regressions */
64 private final double[] rss;
65 /** order of the regressors */
66 private final int[] vorder;
67 /** scratch space for tolerance calc */
68 private final double[] work_tolset;
69 /** number of observations entered */
70 private long nobs;
71 /** sum of squared errors of largest regression */
72 private double sserr;
73 /** has rss been called? */
74 private boolean rss_set;
75 /** has the tolerance setting method been called */
76 private boolean tol_set;
77 /** flags for variables with linear dependency problems */
78 private final boolean[] lindep;
79 /** singular x values */
80 private final double[] x_sing;
81 /** workspace for singularity method */
82 private final double[] work_sing;
83 /** summation of Y variable */
84 private double sumy;
85 /** summation of squared Y values */
86 private double sumsqy;
87 /** boolean flag whether a regression constant is added */
88 private final boolean hasIntercept;
89 /** zero tolerance */
90 private final double epsilon;
91 /**
92 * Set the default constructor to private access
93 * to prevent inadvertent instantiation
94 */
95 @SuppressWarnings("unused")
96 private MillerUpdatingRegression() {
97 this(-1, false, Double.NaN);
98 }
99
100 /**
101 * This is the augmented constructor for the MillerUpdatingRegression class.
102 *
103 * @param numberOfVariables number of regressors to expect, not including constant
104 * @param includeConstant include a constant automatically
105 * @param errorTolerance zero tolerance, how machine zero is determined
106 * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
107 */
108 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
109 throws MathIllegalArgumentException {
110 if (numberOfVariables < 1) {
111 throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
112 }
113 if (includeConstant) {
114 this.nvars = numberOfVariables + 1;
115 } else {
116 this.nvars = numberOfVariables;
117 }
118 this.hasIntercept = includeConstant;
119 this.nobs = 0;
120 this.d = new double[this.nvars];
121 this.rhs = new double[this.nvars];
122 this.r = new double[this.nvars * (this.nvars - 1) / 2];
123 this.tol = new double[this.nvars];
124 this.rss = new double[this.nvars];
125 this.vorder = new int[this.nvars];
126 this.x_sing = new double[this.nvars];
127 this.work_sing = new double[this.nvars];
128 this.work_tolset = new double[this.nvars];
129 this.lindep = new boolean[this.nvars];
130 for (int i = 0; i < this.nvars; i++) {
131 vorder[i] = i;
132 }
133 if (errorTolerance > 0) {
134 this.epsilon = errorTolerance;
135 } else {
136 this.epsilon = -errorTolerance;
137 }
138 }
139
140 /**
141 * Primary constructor for the MillerUpdatingRegression.
142 *
143 * @param numberOfVariables maximum number of potential regressors
144 * @param includeConstant include a constant automatically
145 * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
146 */
147 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
148 throws MathIllegalArgumentException {
149 this(numberOfVariables, includeConstant, Precision.EPSILON);
150 }
151
152 /**
153 * A getter method which determines whether a constant is included.
154 * @return true regression has an intercept, false no intercept
155 */
156 @Override
157 public boolean hasIntercept() {
158 return this.hasIntercept;
159 }
160
161 /**
162 * Gets the number of observations added to the regression model.
163 * @return number of observations
164 */
165 @Override
166 public long getN() {
167 return this.nobs;
168 }
169
170 /**
171 * Adds an observation to the regression model.
172 * @param x the array with regressor values
173 * @param y the value of dependent variable given these regressors
174 * @exception MathIllegalArgumentException if the length of {@code x} does not equal
175 * the number of independent variables in the model
176 */
177 @Override
178 public void addObservation(final double[] x, final double y)
179 throws MathIllegalArgumentException {
180
181 if ((!this.hasIntercept && x.length != nvars) ||
182 (this.hasIntercept && x.length + 1 != nvars)) {
183 throw new MathIllegalArgumentException(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION,
184 x.length, nvars);
185 }
186 if (!this.hasIntercept) {
187 include(x.clone(), 1.0, y);
188 } else {
189 final double[] tmp = new double[x.length + 1];
190 System.arraycopy(x, 0, tmp, 1, x.length);
191 tmp[0] = 1.0;
192 include(tmp, 1.0, y);
193 }
194 ++nobs;
195
196 }
197
198 /**
199 * Adds multiple observations to the model.
200 * @param x observations on the regressors
201 * @param y observations on the regressand
202 * @throws MathIllegalArgumentException if {@code x} is not rectangular, does not match
203 * the length of {@code y} or does not contain sufficient data to estimate the model
204 */
205 @Override
206 public void addObservations(double[][] x, double[] y) throws MathIllegalArgumentException {
207 MathUtils.checkNotNull(x, LocalizedCoreFormats.INPUT_ARRAY);
208 MathUtils.checkNotNull(y, LocalizedCoreFormats.INPUT_ARRAY);
209 MathUtils.checkDimension(x.length, y.length);
210 if (x.length == 0) { // Must be no y data either
211 throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
212 }
213 if (x[0].length + 1 > x.length) {
214 throw new MathIllegalArgumentException(
215 LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
216 x.length, x[0].length);
217 }
218 for (int i = 0; i < x.length; i++) {
219 addObservation(x[i], y[i]);
220 }
221 }
222
223 /**
224 * The include method is where the QR decomposition occurs. This statement forms all
225 * intermediate data which will be used for all derivative measures.
226 * According to the miller paper, note that in the original implementation the x vector
227 * is overwritten. In this implementation, the include method is passed a copy of the
228 * original data vector so that there is no contamination of the data. Additionally,
229 * this method differs slightly from Gentleman's method, in that the assumption is
230 * of dense design matrices, there is some advantage in using the original gentleman algorithm
231 * on sparse matrices.
232 *
233 * @param x observations on the regressors
234 * @param wi weight of the this observation (-1,1)
235 * @param yi observation on the regressand
236 */
237 private void include(final double[] x, final double wi, final double yi) {
238 int nextr = 0;
239 double w = wi;
240 double y = yi;
241 double xi;
242 double di;
243 double wxi;
244 double dpi;
245 double xk;
246 double _w;
247 this.rss_set = false;
248 sumy = smartAdd(yi, sumy);
249 sumsqy = smartAdd(sumsqy, yi * yi);
250 for (int i = 0; i < x.length; i++) {
251 if (w == 0.0) {
252 return;
253 }
254 xi = x[i];
255
256 if (xi == 0.0) {
257 nextr += nvars - i - 1;
258 continue;
259 }
260 di = d[i];
261 wxi = w * xi;
262 _w = w;
263 if (di != 0.0) {
264 dpi = smartAdd(di, wxi * xi);
265 final double tmp = wxi * xi / di;
266 if (FastMath.abs(tmp) > Precision.EPSILON) {
267 w = (di * w) / dpi;
268 }
269 } else {
270 dpi = wxi * xi;
271 w = 0.0;
272 }
273 d[i] = dpi;
274 for (int k = i + 1; k < nvars; k++) {
275 xk = x[k];
276 x[k] = smartAdd(xk, -xi * r[nextr]);
277 if (di != 0.0) {
278 r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
279 } else {
280 r[nextr] = xk / xi;
281 }
282 ++nextr;
283 }
284 xk = y;
285 y = smartAdd(xk, -xi * rhs[i]);
286 if (di != 0.0) {
287 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
288 } else {
289 rhs[i] = xk / xi;
290 }
291 }
292 sserr = smartAdd(sserr, w * y * y);
293 }
294
295 /**
296 * Adds to number a and b such that the contamination due to
297 * numerical smallness of one addend does not corrupt the sum.
298 * @param a - an addend
299 * @param b - an addend
300 * @return the sum of the a and b
301 */
302 private double smartAdd(double a, double b) {
303 final double _a = FastMath.abs(a);
304 final double _b = FastMath.abs(b);
305 if (_a > _b) {
306 final double eps = _a * Precision.EPSILON;
307 if (_b > eps) {
308 return a + b;
309 }
310 return a;
311 } else {
312 final double eps = _b * Precision.EPSILON;
313 if (_a > eps) {
314 return a + b;
315 }
316 return b;
317 }
318 }
319
320 /**
321 * As the name suggests, clear wipes the internals and reorders everything in the
322 * canonical order.
323 */
324 @Override
325 public void clear() {
326 Arrays.fill(this.d, 0.0);
327 Arrays.fill(this.rhs, 0.0);
328 Arrays.fill(this.r, 0.0);
329 Arrays.fill(this.tol, 0.0);
330 Arrays.fill(this.rss, 0.0);
331 Arrays.fill(this.work_tolset, 0.0);
332 Arrays.fill(this.work_sing, 0.0);
333 Arrays.fill(this.x_sing, 0.0);
334 Arrays.fill(this.lindep, false);
335 for (int i = 0; i < nvars; i++) {
336 this.vorder[i] = i;
337 }
338 this.nobs = 0;
339 this.sserr = 0.0;
340 this.sumy = 0.0;
341 this.sumsqy = 0.0;
342 this.rss_set = false;
343 this.tol_set = false;
344 }
345
346 /**
347 * This sets up tolerances for singularity testing.
348 */
349 private void tolset() {
350 int pos;
351 double total;
352 final double eps = this.epsilon;
353 for (int i = 0; i < nvars; i++) {
354 this.work_tolset[i] = FastMath.sqrt(d[i]);
355 }
356 tol[0] = eps * this.work_tolset[0];
357 for (int col = 1; col < nvars; col++) {
358 pos = col - 1;
359 total = work_tolset[col];
360 for (int row = 0; row < col; row++) {
361 total += FastMath.abs(r[pos]) * work_tolset[row];
362 pos += nvars - row - 2;
363 }
364 tol[col] = eps * total;
365 }
366 tol_set = true;
367 }
368
369 /**
370 * The regcf method conducts the linear regression and extracts the
371 * parameter vector. Notice that the algorithm can do subset regression
372 * with no alteration.
373 *
374 * @param nreq how many of the regressors to include (either in canonical
375 * order, or in the current reordered state)
376 * @return an array with the estimated slope coefficients
377 * @throws MathIllegalArgumentException if {@code nreq} is less than 1
378 * or greater than the number of independent variables
379 */
380 private double[] regcf(int nreq) throws MathIllegalArgumentException {
381 int nextr;
382 if (nreq < 1) {
383 throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
384 }
385 if (nreq > this.nvars) {
386 throw new MathIllegalArgumentException(
387 LocalizedStatFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
388 }
389 if (!this.tol_set) {
390 tolset();
391 }
392 final double[] ret = new double[nreq];
393 boolean rankProblem = false;
394 for (int i = nreq - 1; i > -1; i--) {
395 if (FastMath.sqrt(d[i]) < tol[i]) {
396 ret[i] = 0.0;
397 d[i] = 0.0;
398 rankProblem = true;
399 } else {
400 ret[i] = rhs[i];
401 nextr = i * (nvars + nvars - i - 1) / 2;
402 for (int j = i + 1; j < nreq; j++) {
403 ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
404 ++nextr;
405 }
406 }
407 }
408 if (rankProblem) {
409 for (int i = 0; i < nreq; i++) {
410 if (this.lindep[i]) {
411 ret[i] = Double.NaN;
412 }
413 }
414 }
415 return ret;
416 }
417
418 /**
419 * The method which checks for singularities and then eliminates the offending
420 * columns.
421 */
422 private void singcheck() {
423 int pos;
424 for (int i = 0; i < nvars; i++) {
425 work_sing[i] = FastMath.sqrt(d[i]);
426 }
427 for (int col = 0; col < nvars; col++) {
428 // Set elements within R to zero if they are less than tol(col) in
429 // absolute value after being scaled by the square root of their row
430 // multiplier
431 final double temp = tol[col];
432 pos = col - 1;
433 for (int row = 0; row < col - 1; row++) {
434 if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
435 r[pos] = 0.0;
436 }
437 pos += nvars - row - 2;
438 }
439 // If diagonal element is near zero, set it to zero, set appropriate
440 // element of LINDEP, and use INCLUD to augment the projections in
441 // the lower rows of the orthogonalization.
442 lindep[col] = false;
443 if (work_sing[col] < temp) {
444 lindep[col] = true;
445 if (col < nvars - 1) {
446 Arrays.fill(x_sing, 0.0);
447 int _pi = col * (nvars + nvars - col - 1) / 2;
448 for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
449 x_sing[_xi] = r[_pi];
450 r[_pi] = 0.0;
451 }
452 final double y = rhs[col];
453 final double weight = d[col];
454 d[col] = 0.0;
455 rhs[col] = 0.0;
456 this.include(x_sing, weight, y);
457 } else {
458 sserr += d[col] * rhs[col] * rhs[col];
459 }
460 }
461 }
462 }
463
464 /**
465 * Calculates the sum of squared errors for the full regression
466 * and all subsets in the following manner: <pre>
467 * rss[] ={
468 * ResidualSumOfSquares_allNvars,
469 * ResidualSumOfSquares_FirstNvars-1,
470 * ResidualSumOfSquares_FirstNvars-2,
471 * ..., ResidualSumOfSquares_FirstVariable} </pre>
472 */
473 private void ss() {
474 double total = sserr;
475 rss[nvars - 1] = sserr;
476 for (int i = nvars - 1; i > 0; i--) {
477 total += d[i] * rhs[i] * rhs[i];
478 rss[i - 1] = total;
479 }
480 rss_set = true;
481 }
482
483 /**
484 * Calculates the cov matrix assuming only the first nreq variables are
485 * included in the calculation. The returned array contains a symmetric
486 * matrix stored in lower triangular form. The matrix will have
487 * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
488 * cov =
489 * {
490 * cov_00,
491 * cov_10, cov_11,
492 * cov_20, cov_21, cov22,
493 * ...
494 * } </pre>
495 *
496 * @param nreq how many of the regressors to include (either in canonical
497 * order, or in the current reordered state)
498 * @return an array with the variance covariance of the included
499 * regressors in lower triangular form
500 */
501 private double[] cov(int nreq) {
502 if (this.nobs <= nreq) {
503 return null; // NOPMD
504 }
505 double rnk = 0.0;
506 for (int i = 0; i < nreq; i++) {
507 if (!this.lindep[i]) {
508 rnk += 1.0;
509 }
510 }
511 final double var = rss[nreq - 1] / (nobs - rnk);
512 final double[] rinv = new double[nreq * (nreq - 1) / 2];
513 inverse(rinv, nreq);
514 final double[] covmat = new double[nreq * (nreq + 1) / 2];
515 Arrays.fill(covmat, Double.NaN);
516 int pos2;
517 int pos1;
518 int start = 0;
519 for (int row = 0; row < nreq; row++) {
520 pos2 = start;
521 if (!this.lindep[row]) {
522 for (int col = row; col < nreq; col++) {
523 if (!this.lindep[col]) {
524 pos1 = start + col - row;
525 double total;
526 if (row == col) {
527 total = 1.0 / d[col];
528 } else {
529 total = rinv[pos1 - 1] / d[col];
530 }
531 for (int k = col + 1; k < nreq; k++) {
532 if (!this.lindep[k]) {
533 total += rinv[pos1] * rinv[pos2] / d[k];
534 }
535 ++pos1;
536 ++pos2;
537 }
538 covmat[ (col + 1) * col / 2 + row] = total * var;
539 } else {
540 pos2 += nreq - col - 1;
541 }
542 }
543 }
544 start += nreq - row - 1;
545 }
546 return covmat;
547 }
548
549 /**
550 * This internal method calculates the inverse of the upper-triangular portion
551 * of the R matrix.
552 * @param rinv the storage for the inverse of r
553 * @param nreq how many of the regressors to include (either in canonical
554 * order, or in the current reordered state)
555 */
556 private void inverse(double[] rinv, int nreq) {
557 int pos = nreq * (nreq - 1) / 2 - 1;
558 Arrays.fill(rinv, Double.NaN);
559 for (int row = nreq - 1; row > 0; --row) {
560 if (!this.lindep[row]) {
561 final int start = (row - 1) * (nvars + nvars - row) / 2;
562 for (int col = nreq; col > row; --col) {
563 int pos1 = start;
564 int pos2 = pos;
565 double total = 0.0;
566 for (int k = row; k < col - 1; k++) {
567 pos2 += nreq - k - 1;
568 if (!this.lindep[k]) {
569 total += -r[pos1] * rinv[pos2];
570 }
571 ++pos1;
572 }
573 rinv[pos] = total - r[pos1];
574 --pos;
575 }
576 } else {
577 pos -= nreq - row;
578 }
579 }
580 }
581
582 /**
583 * In the original algorithm only the partial correlations of the regressors
584 * is returned to the user. In this implementation, we have <pre>
585 * corr =
586 * {
587 * corrxx - lower triangular
588 * corrxy - bottom row of the matrix
589 * }
590 * Replaces subroutines PCORR and COR of:
591 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
592 *
593 * <p>Calculate partial correlations after the variables in rows
594 * 1, 2, ..., IN have been forced into the regression.
595 * If IN = 1, and the first row of R represents a constant in the
596 * model, then the usual simple correlations are returned.</p>
597 *
598 * <p>If IN = 0, the value returned in array CORMAT for the correlation
599 * of variables Xi & Xj is:</p><pre>
600 * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
601 *
602 * <p>On return, array CORMAT contains the upper triangle of the matrix of
603 * partial correlations stored by rows, excluding the 1's on the diagonal.
604 * e.g. if IN = 2, the consecutive elements returned are:
605 * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
606 * Array YCORR stores the partial correlations with the Y-variable
607 * starting with YCORR(IN+1) = partial correlation with the variable in
608 * position (IN+1). </p>
609 *
610 * @param in how many of the regressors to include (either in canonical
611 * order, or in the current reordered state)
612 * @return an array with the partial correlations of the remainder of
613 * regressors with each other and the regressand, in lower triangular form
614 */
615 public double[] getPartialCorrelations(int in) {
616 final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
617 int pos;
618 int pos1;
619 int pos2;
620 final int rms_off = -in;
621 final int wrk_off = -(in + 1);
622 final double[] rms = new double[nvars - in];
623 final double[] work = new double[nvars - in - 1];
624 double sumxx;
625 double sumxy;
626 double sumyy;
627 final int offXX = (nvars - in) * (nvars - in - 1) / 2;
628 if (in < -1 || in >= nvars) {
629 return null; // NOPMD
630 }
631 final int nvm = nvars - 1;
632 final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
633 if (d[in] > 0.0) {
634 rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
635 }
636 for (int col = in + 1; col < nvars; col++) {
637 pos = base_pos + col - 1 - in;
638 sumxx = d[col];
639 for (int row = in; row < col; row++) {
640 sumxx += d[row] * r[pos] * r[pos];
641 pos += nvars - row - 2;
642 }
643 if (sumxx > 0.0) {
644 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
645 } else {
646 rms[col + rms_off] = 0.0;
647 }
648 }
649 sumyy = sserr;
650 for (int row = in; row < nvars; row++) {
651 sumyy += d[row] * rhs[row] * rhs[row];
652 }
653 if (sumyy > 0.0) {
654 sumyy = 1.0 / FastMath.sqrt(sumyy);
655 }
656 pos = 0;
657 for (int col1 = in; col1 < nvars; col1++) {
658 sumxy = 0.0;
659 Arrays.fill(work, 0.0);
660 pos1 = base_pos + col1 - in - 1;
661 for (int row = in; row < col1; row++) {
662 pos2 = pos1 + 1;
663 for (int col2 = col1 + 1; col2 < nvars; col2++) {
664 work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
665 pos2++;
666 }
667 sumxy += d[row] * r[pos1] * rhs[row];
668 pos1 += nvars - row - 2;
669 }
670 pos2 = pos1 + 1;
671 for (int col2 = col1 + 1; col2 < nvars; col2++) {
672 work[col2 + wrk_off] += d[col1] * r[pos2];
673 ++pos2;
674 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
675 work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
676 ++pos;
677 }
678 sumxy += d[col1] * rhs[col1];
679 output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
680 }
681
682 return output;
683 }
684
685 /**
686 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
687 * Move variable from position FROM to position TO in an
688 * orthogonal reduction produced by AS75.1.
689 *
690 * @param from initial position
691 * @param to destination
692 */
693 private void vmove(int from, int to) {
694 double d1;
695 double d2;
696 double X;
697 double d1new;
698 double d2new;
699 double cbar;
700 double sbar;
701 double Y;
702 int first;
703 int inc;
704 int m1;
705 int m2;
706 int mp1;
707 int pos;
708 boolean bSkipTo40 = false;
709 if (from == to) {
710 return;
711 }
712 if (!this.rss_set) {
713 ss();
714 }
715 final int count;
716 if (from < to) {
717 first = from;
718 inc = 1;
719 count = to - from;
720 } else {
721 first = from - 1;
722 inc = -1;
723 count = from - to;
724 }
725
726 int m = first;
727 int idx = 0;
728 while (idx < count) {
729 m1 = m * (nvars + nvars - m - 1) / 2;
730 m2 = m1 + nvars - m - 1;
731 mp1 = m + 1;
732
733 d1 = d[m];
734 d2 = d[mp1];
735 // Special cases.
736 if (d1 > this.epsilon || d2 > this.epsilon) {
737 X = r[m1];
738 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
739 X = 0.0;
740 }
741 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
742 d[m] = d2;
743 d[mp1] = d1;
744 r[m1] = 0.0;
745 for (int col = m + 2; col < nvars; col++) {
746 ++m1;
747 X = r[m1];
748 r[m1] = r[m2];
749 r[m2] = X;
750 ++m2;
751 }
752 X = rhs[m];
753 rhs[m] = rhs[mp1];
754 rhs[mp1] = X;
755 bSkipTo40 = true;
756 //break;
757 } else if (d2 < this.epsilon) {
758 d[m] = d1 * X * X;
759 r[m1] = 1.0 / X;
760 for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
761 r[_i] /= X;
762 }
763 rhs[m] /= X;
764 bSkipTo40 = true;
765 //break;
766 }
767 if (!bSkipTo40) {
768 d1new = d2 + d1 * X * X;
769 cbar = d2 / d1new;
770 sbar = X * d1 / d1new;
771 d2new = d1 * cbar;
772 d[m] = d1new;
773 d[mp1] = d2new;
774 r[m1] = sbar;
775 for (int col = m + 2; col < nvars; col++) {
776 ++m1;
777 Y = r[m1];
778 r[m1] = cbar * r[m2] + sbar * Y;
779 r[m2] = Y - X * r[m2];
780 ++m2;
781 }
782 Y = rhs[m];
783 rhs[m] = cbar * rhs[mp1] + sbar * Y;
784 rhs[mp1] = Y - X * rhs[mp1];
785 }
786 }
787 if (m > 0) {
788 pos = m;
789 for (int row = 0; row < m; row++) {
790 X = r[pos];
791 r[pos] = r[pos - 1];
792 r[pos - 1] = X;
793 pos += nvars - row - 2;
794 }
795 }
796 // Adjust variable order (VORDER), the tolerances (TOL) and
797 // the vector of residual sums of squares (RSS).
798 m1 = vorder[m];
799 vorder[m] = vorder[mp1];
800 vorder[mp1] = m1;
801 X = tol[m];
802 tol[m] = tol[mp1];
803 tol[mp1] = X;
804 rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
805
806 m += inc;
807 ++idx;
808 }
809 }
810
811 /**
812 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
813 *
814 * <p> Re-order the variables in an orthogonal reduction produced by
815 * AS75.1 so that the N variables in LIST start at position POS1,
816 * though will not necessarily be in the same order as in LIST.
817 * Any variables in VORDER before position POS1 are not moved.
818 * Auxiliary routine called: VMOVE. </p>
819 *
820 * <p>This internal method reorders the regressors.</p>
821 *
822 * @param list the regressors to move
823 * @param pos1 where the list will be placed
824 * @return -1 error, 0 everything ok
825 */
826 private int reorderRegressors(int[] list, int pos1) {
827 int next;
828 int i;
829 int l;
830 if (list.length < 1 || list.length > nvars + 1 - pos1) {
831 return -1;
832 }
833 next = pos1;
834 i = pos1;
835 while (i < nvars) {
836 l = vorder[i];
837 for (int j = 0; j < list.length; j++) {
838 if (l == list[j] && i > next) {
839 this.vmove(i, next);
840 ++next;
841 if (next >= list.length + pos1) {
842 return 0;
843 } else {
844 break;
845 }
846 }
847 }
848 ++i;
849 }
850 return 0;
851 }
852
853 /**
854 * Gets the diagonal of the Hat matrix also known as the leverage matrix.
855 *
856 * @param row_data returns the diagonal of the hat matrix for this observation
857 * @return the diagonal element of the hatmatrix
858 */
859 public double getDiagonalOfHatMatrix(double[] row_data) {
860 double[] wk = new double[this.nvars];
861 int pos;
862 double total;
863
864 if (row_data.length > nvars) {
865 return Double.NaN;
866 }
867 double[] xrow;
868 if (this.hasIntercept) {
869 xrow = new double[row_data.length + 1];
870 xrow[0] = 1.0;
871 System.arraycopy(row_data, 0, xrow, 1, row_data.length);
872 } else {
873 xrow = row_data;
874 }
875 double hii = 0.0;
876 for (int col = 0; col < xrow.length; col++) {
877 if (FastMath.sqrt(d[col]) < tol[col]) {
878 wk[col] = 0.0;
879 } else {
880 pos = col - 1;
881 total = xrow[col];
882 for (int row = 0; row < col; row++) {
883 total = smartAdd(total, -wk[row] * r[pos]);
884 pos += nvars - row - 2;
885 }
886 wk[col] = total;
887 hii = smartAdd(hii, (total * total) / d[col]);
888 }
889 }
890 return hii;
891 }
892
893 /**
894 * Gets the order of the regressors, useful if some type of reordering
895 * has been called. Calling regress with int[]{} args will trigger
896 * a reordering.
897 *
898 * @return int[] with the current order of the regressors
899 */
900 public int[] getOrderOfRegressors(){
901 return vorder.clone();
902 }
903
904 /**
905 * Conducts a regression on the data in the model, using all regressors.
906 *
907 * @return RegressionResults the structure holding all regression results
908 * @exception MathIllegalArgumentException - thrown if number of observations is
909 * less than the number of variables
910 */
911 @Override
912 public RegressionResults regress() throws MathIllegalArgumentException {
913 return regress(this.nvars);
914 }
915
916 /**
917 * Conducts a regression on the data in the model, using a subset of regressors.
918 *
919 * @param numberOfRegressors many of the regressors to include (either in canonical
920 * order, or in the current reordered state)
921 * @return RegressionResults the structure holding all regression results
922 * @exception MathIllegalArgumentException - thrown if number of observations is
923 * less than the number of variables or number of regressors requested
924 * is greater than the regressors in the model
925 */
926 public RegressionResults regress(int numberOfRegressors) throws MathIllegalArgumentException {
927 if (this.nobs <= numberOfRegressors) {
928 throw new MathIllegalArgumentException(
929 LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
930 this.nobs, numberOfRegressors);
931 }
932 if( numberOfRegressors > this.nvars ){
933 throw new MathIllegalArgumentException(
934 LocalizedStatFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
935 }
936
937 tolset();
938 singcheck();
939
940 double[] beta = this.regcf(numberOfRegressors);
941
942 ss();
943
944 double[] cov = this.cov(numberOfRegressors);
945
946 int rnk = 0;
947 for (int i = 0; i < this.lindep.length; i++) {
948 if (!this.lindep[i]) {
949 ++rnk;
950 }
951 }
952
953 boolean needsReorder = false;
954 for (int i = 0; i < numberOfRegressors; i++) {
955 if (this.vorder[i] != i) {
956 needsReorder = true;
957 break;
958 }
959 }
960 if (!needsReorder) {
961 return new RegressionResults(
962 beta, new double[][]{cov}, true, this.nobs, rnk,
963 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
964 } else {
965 double[] betaNew = new double[beta.length];
966 double[] covNew = new double[cov.length];
967
968 int[] newIndices = new int[beta.length];
969 for (int i = 0; i < nvars; i++) {
970 for (int j = 0; j < numberOfRegressors; j++) {
971 if (this.vorder[j] == i) {
972 betaNew[i] = beta[ j];
973 newIndices[i] = j;
974 }
975 }
976 }
977
978 int idx1 = 0;
979 int idx2;
980 int _i;
981 int _j;
982 for (int i = 0; i < beta.length; i++) {
983 _i = newIndices[i];
984 for (int j = 0; j <= i; j++, idx1++) {
985 _j = newIndices[j];
986 if (_i > _j) {
987 idx2 = _i * (_i + 1) / 2 + _j;
988 } else {
989 idx2 = _j * (_j + 1) / 2 + _i;
990 }
991 covNew[idx1] = cov[idx2];
992 }
993 }
994 return new RegressionResults(
995 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
996 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
997 }
998 }
999
1000 /**
1001 * Conducts a regression on the data in the model, using regressors in array
1002 * Calling this method will change the internal order of the regressors
1003 * and care is required in interpreting the hatmatrix.
1004 *
1005 * @param variablesToInclude array of variables to include in regression
1006 * @return RegressionResults the structure holding all regression results
1007 * @exception MathIllegalArgumentException - thrown if number of observations is
1008 * less than the number of variables, the number of regressors requested
1009 * is greater than the regressors in the model or a regressor index in
1010 * regressor array does not exist
1011 */
1012 @Override
1013 public RegressionResults regress(int[] variablesToInclude) throws MathIllegalArgumentException {
1014 if (variablesToInclude.length > this.nvars) {
1015 throw new MathIllegalArgumentException(
1016 LocalizedStatFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1017 }
1018 if (this.nobs <= this.nvars) {
1019 throw new MathIllegalArgumentException(
1020 LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1021 this.nobs, this.nvars);
1022 }
1023 Arrays.sort(variablesToInclude);
1024 int iExclude = 0;
1025 for (int i = 0; i < variablesToInclude.length; i++) {
1026 if (i >= this.nvars) {
1027 throw new MathIllegalArgumentException(
1028 LocalizedCoreFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1029 }
1030 if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1031 variablesToInclude[i] = -1;
1032 ++iExclude;
1033 }
1034 }
1035 int[] series;
1036 if (iExclude > 0) {
1037 int j = 0;
1038 series = new int[variablesToInclude.length - iExclude];
1039 for (int i = 0; i < variablesToInclude.length; i++) {
1040 if (variablesToInclude[i] > -1) {
1041 series[j] = variablesToInclude[i];
1042 ++j;
1043 }
1044 }
1045 } else {
1046 series = variablesToInclude;
1047 }
1048
1049 reorderRegressors(series, 0);
1050 tolset();
1051 singcheck();
1052
1053 double[] beta = this.regcf(series.length);
1054
1055 ss();
1056
1057 double[] cov = this.cov(series.length);
1058
1059 int rnk = 0;
1060 for (int i = 0; i < this.lindep.length; i++) {
1061 if (!this.lindep[i]) {
1062 ++rnk;
1063 }
1064 }
1065
1066 boolean needsReorder = false;
1067 for (int i = 0; i < this.nvars; i++) {
1068 if (this.vorder[i] != series[i]) {
1069 needsReorder = true;
1070 break;
1071 }
1072 }
1073 if (!needsReorder) {
1074 return new RegressionResults(
1075 beta, new double[][]{cov}, true, this.nobs, rnk,
1076 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1077 } else {
1078 double[] betaNew = new double[beta.length];
1079 int[] newIndices = new int[beta.length];
1080 for (int i = 0; i < series.length; i++) {
1081 for (int j = 0; j < this.vorder.length; j++) {
1082 if (this.vorder[j] == series[i]) {
1083 betaNew[i] = beta[ j];
1084 newIndices[i] = j;
1085 }
1086 }
1087 }
1088 double[] covNew = new double[cov.length];
1089 int idx1 = 0;
1090 int idx2;
1091 int _i;
1092 int _j;
1093 for (int i = 0; i < beta.length; i++) {
1094 _i = newIndices[i];
1095 for (int j = 0; j <= i; j++, idx1++) {
1096 _j = newIndices[j];
1097 if (_i > _j) {
1098 idx2 = _i * (_i + 1) / 2 + _j;
1099 } else {
1100 idx2 = _j * (_j + 1) / 2 + _i;
1101 }
1102 covNew[idx1] = cov[idx2];
1103 }
1104 }
1105 return new RegressionResults(
1106 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1107 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1108 }
1109 }
1110 }