1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.regression;
23
24 import org.hipparchus.exception.LocalizedCoreFormats;
25 import org.hipparchus.exception.MathIllegalArgumentException;
26 import org.hipparchus.exception.NullArgumentException;
27 import org.hipparchus.linear.Array2DRowRealMatrix;
28 import org.hipparchus.linear.ArrayRealVector;
29 import org.hipparchus.linear.RealMatrix;
30 import org.hipparchus.linear.RealVector;
31 import org.hipparchus.stat.LocalizedStatFormats;
32 import org.hipparchus.stat.descriptive.moment.Variance;
33 import org.hipparchus.util.FastMath;
34 import org.hipparchus.util.MathUtils;
35
36 /**
37 * Abstract base class for implementations of MultipleLinearRegression.
38 */
39 public abstract class AbstractMultipleLinearRegression implements
40 MultipleLinearRegression {
41
42 /** X sample data. */
43 private RealMatrix xMatrix;
44
45 /** Y sample data. */
46 private RealVector yVector;
47
48 /** Whether or not the regression model includes an intercept. True means no intercept. */
49 private boolean noIntercept;
50
51 /** Empty constructor.
52 * <p>
53 * This constructor is not strictly necessary, but it prevents spurious
54 * javadoc warnings with JDK 18 and later.
55 * </p>
56 * @since 3.0
57 */
58 protected AbstractMultipleLinearRegression() { // NOPMD - unnecessary constructor added intentionally to make javadoc happy
59 // nothing to do
60 }
61
62 /** Get the X sample data.
63 * @return the X sample data.
64 */
65 protected RealMatrix getX() {
66 return xMatrix;
67 }
68
69 /** Get the Y sample data.
70 * @return the Y sample data.
71 */
72 protected RealVector getY() {
73 return yVector;
74 }
75
76 /** Chekc if the model has no intercept term.
77 * @return true if the model has no intercept term; false otherwise
78 */
79 public boolean isNoIntercept() {
80 return noIntercept;
81 }
82
83 /** Set intercept flag.
84 * @param noIntercept true means the model is to be estimated without an intercept term
85 */
86 public void setNoIntercept(boolean noIntercept) {
87 this.noIntercept = noIntercept;
88 }
89
90 /**
91 * <p>Loads model x and y sample data from a flat input array, overriding any previous sample.
92 * </p>
93 * <p>Assumes that rows are concatenated with y values first in each row. For example, an input
94 * <code>data</code> array containing the sequence of values (1, 2, 3, 4, 5, 6, 7, 8, 9) with
95 * <code>nobs = 3</code> and <code>nvars = 2</code> creates a regression dataset with two
96 * independent variables, as below:
97 * </p>
98 * <pre>
99 * y x[0] x[1]
100 * --------------
101 * 1 2 3
102 * 4 5 6
103 * 7 8 9
104 * </pre>
105 * <p>Note that there is no need to add an initial unitary column (column of 1's) when
106 * specifying a model including an intercept term. If {@link #isNoIntercept()} is <code>true</code>,
107 * the X matrix will be created without an initial column of "1"s; otherwise this column will
108 * be added.
109 * </p>
110 * <p>Throws IllegalArgumentException if any of the following preconditions fail:</p>
111 * <ul><li><code>data</code> cannot be null</li>
112 * <li><code>data.length = nobs * (nvars + 1)</code></li>
113 * <li><code>nobs > nvars</code></li></ul>
114 *
115 * @param data input data array
116 * @param nobs number of observations (rows)
117 * @param nvars number of independent variables (columns, not counting y)
118 * @throws NullArgumentException if the data array is null
119 * @throws MathIllegalArgumentException if the length of the data array is not equal
120 * to <code>nobs * (nvars + 1)</code>
121 * @throws MathIllegalArgumentException if <code>nobs</code> is less than
122 * <code>nvars + 1</code>
123 */
124 public void newSampleData(double[] data, int nobs, int nvars) {
125 MathUtils.checkNotNull(data, LocalizedCoreFormats.INPUT_ARRAY);
126 MathUtils.checkDimension(data.length, nobs * (nvars + 1));
127 if (nobs <= nvars) {
128 throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE,
129 nobs, nvars + 1);
130 }
131 double[] y = new double[nobs];
132 final int cols = noIntercept ? nvars: nvars + 1;
133 double[][] x = new double[nobs][cols];
134 int pointer = 0;
135 for (int i = 0; i < nobs; i++) {
136 y[i] = data[pointer++];
137 if (!noIntercept) {
138 x[i][0] = 1.0d;
139 }
140 for (int j = noIntercept ? 0 : 1; j < cols; j++) {
141 x[i][j] = data[pointer++];
142 }
143 }
144 this.xMatrix = new Array2DRowRealMatrix(x);
145 this.yVector = new ArrayRealVector(y);
146 }
147
148 /**
149 * Loads new y sample data, overriding any previous data.
150 *
151 * @param y the array representing the y sample
152 * @throws NullArgumentException if y is null
153 * @throws MathIllegalArgumentException if y is empty
154 */
155 protected void newYSampleData(double[] y) {
156 if (y == null) {
157 throw new NullArgumentException();
158 }
159 if (y.length == 0) {
160 throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
161 }
162 this.yVector = new ArrayRealVector(y);
163 }
164
165 /**
166 * <p>Loads new x sample data, overriding any previous data.
167 * </p>
168 * <p>
169 * The input <code>x</code> array should have one row for each sample
170 * observation, with columns corresponding to independent variables.
171 * For example, if
172 * </p>
173 * <pre>
174 * <code> x = new double[][] {{1, 2}, {3, 4}, {5, 6}} </code></pre>
175 * <p>
176 * then <code>setXSampleData(x) </code> results in a model with two independent
177 * variables and 3 observations:
178 * </p>
179 * <pre>
180 * x[0] x[1]
181 * ----------
182 * 1 2
183 * 3 4
184 * 5 6
185 * </pre>
186 * <p>Note that there is no need to add an initial unitary column (column of 1's) when
187 * specifying a model including an intercept term.
188 * </p>
189 * @param x the rectangular array representing the x sample
190 * @throws NullArgumentException if x is null
191 * @throws MathIllegalArgumentException if x is empty
192 * @throws MathIllegalArgumentException if x is not rectangular
193 */
194 protected void newXSampleData(double[][] x) {
195 if (x == null) {
196 throw new NullArgumentException();
197 }
198 if (x.length == 0) {
199 throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
200 }
201 if (noIntercept) {
202 this.xMatrix = new Array2DRowRealMatrix(x, true);
203 } else { // Augment design matrix with initial unitary column
204 final int nVars = x[0].length;
205 final double[][] xAug = new double[x.length][nVars + 1];
206 for (int i = 0; i < x.length; i++) {
207 MathUtils.checkDimension(x[i].length, nVars);
208 xAug[i][0] = 1.0d;
209 System.arraycopy(x[i], 0, xAug[i], 1, nVars);
210 }
211 this.xMatrix = new Array2DRowRealMatrix(xAug, false);
212 }
213 }
214
215 /**
216 * Validates sample data.
217 * <p>Checks that</p>
218 * <ul><li>Neither x nor y is null or empty;</li>
219 * <li>The length (i.e. number of rows) of x equals the length of y</li>
220 * <li>x has at least one more row than it has columns (i.e. there is
221 * sufficient data to estimate regression coefficients for each of the
222 * columns in x plus an intercept.</li>
223 * </ul>
224 *
225 * @param x the [n,k] array representing the x data
226 * @param y the [n,1] array representing the y data
227 * @throws NullArgumentException if {@code x} or {@code y} is null
228 * @throws MathIllegalArgumentException if {@code x} and {@code y} do not
229 * have the same length
230 * @throws MathIllegalArgumentException if {@code x} or {@code y} are zero-length
231 * @throws MathIllegalArgumentException if the number of rows of {@code x}
232 * is not larger than the number of columns + 1 if the model has an intercept;
233 * or the number of columns if there is no intercept term
234 */
235 protected void validateSampleData(double[][] x, double[] y) throws MathIllegalArgumentException {
236 if ((x == null) || (y == null)) {
237 throw new NullArgumentException();
238 }
239 MathUtils.checkDimension(x.length, y.length);
240 if (x.length == 0) { // Must be no y data either
241 throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
242 }
243 if (x[0].length + (noIntercept ? 0 : 1) > x.length) {
244 throw new MathIllegalArgumentException(
245 LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
246 x.length, x[0].length);
247 }
248 }
249
250 /**
251 * Validates that the x data and covariance matrix have the same
252 * number of rows and that the covariance matrix is square.
253 *
254 * @param x the [n,k] array representing the x sample
255 * @param covariance the [n,n] array representing the covariance matrix
256 * @throws MathIllegalArgumentException if the number of rows in x is not equal
257 * to the number of rows in covariance
258 * @throws MathIllegalArgumentException if the covariance matrix is not square
259 */
260 protected void validateCovarianceData(double[][] x, double[][] covariance) {
261 MathUtils.checkDimension(x.length, covariance.length);
262 if (covariance.length > 0 && covariance.length != covariance[0].length) {
263 throw new MathIllegalArgumentException(LocalizedCoreFormats.NON_SQUARE_MATRIX,
264 covariance.length, covariance[0].length);
265 }
266 }
267
268 /**
269 * {@inheritDoc}
270 */
271 @Override
272 public double[] estimateRegressionParameters() {
273 RealVector b = calculateBeta();
274 return b.toArray();
275 }
276
277 /**
278 * {@inheritDoc}
279 */
280 @Override
281 public double[] estimateResiduals() {
282 RealVector b = calculateBeta();
283 RealVector e = yVector.subtract(xMatrix.operate(b));
284 return e.toArray();
285 }
286
287 /**
288 * {@inheritDoc}
289 */
290 @Override
291 public double[][] estimateRegressionParametersVariance() {
292 return calculateBetaVariance().getData();
293 }
294
295 /**
296 * {@inheritDoc}
297 */
298 @Override
299 public double[] estimateRegressionParametersStandardErrors() {
300 double[][] betaVariance = estimateRegressionParametersVariance();
301 double sigma = calculateErrorVariance();
302 int length = betaVariance[0].length;
303 double[] result = new double[length];
304 for (int i = 0; i < length; i++) {
305 result[i] = FastMath.sqrt(sigma * betaVariance[i][i]);
306 }
307 return result;
308 }
309
310 /**
311 * {@inheritDoc}
312 */
313 @Override
314 public double estimateRegressandVariance() {
315 return calculateYVariance();
316 }
317
318 /**
319 * Estimates the variance of the error.
320 *
321 * @return estimate of the error variance
322 */
323 public double estimateErrorVariance() {
324 return calculateErrorVariance();
325
326 }
327
328 /**
329 * Estimates the standard error of the regression.
330 *
331 * @return regression standard error
332 */
333 public double estimateRegressionStandardError() {
334 return FastMath.sqrt(estimateErrorVariance());
335 }
336
337 /**
338 * Calculates the beta of multiple linear regression in matrix notation.
339 *
340 * @return beta
341 */
342 protected abstract RealVector calculateBeta();
343
344 /**
345 * Calculates the beta variance of multiple linear regression in matrix
346 * notation.
347 *
348 * @return beta variance
349 */
350 protected abstract RealMatrix calculateBetaVariance();
351
352
353 /**
354 * Calculates the variance of the y values.
355 *
356 * @return Y variance
357 */
358 protected double calculateYVariance() {
359 return new Variance().evaluate(yVector.toArray());
360 }
361
362 /**
363 * <p>Calculates the variance of the error term.</p>
364 * Uses the formula <pre>
365 * var(u) = u · u / (n - k)
366 * </pre>
367 * where n and k are the row and column dimensions of the design
368 * matrix X.
369 *
370 * @return error variance estimate
371 */
372 protected double calculateErrorVariance() {
373 RealVector residuals = calculateResiduals();
374 return residuals.dotProduct(residuals) /
375 (xMatrix.getRowDimension() - xMatrix.getColumnDimension());
376 }
377
378 /**
379 * Calculates the residuals of multiple linear regression in matrix
380 * notation.
381 *
382 * <pre>
383 * u = y - X * b
384 * </pre>
385 *
386 * @return The residuals [n,1] matrix
387 */
388 protected RealVector calculateResiduals() {
389 RealVector b = calculateBeta();
390 return yVector.subtract(xMatrix.operate(b));
391 }
392
393 }