View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.regression;
23  
24  import java.util.Random;
25  
26  import org.hipparchus.exception.MathIllegalArgumentException;
27  import org.hipparchus.random.ISAACRandom;
28  import org.hipparchus.stat.LocalizedStatFormats;
29  import org.hipparchus.util.FastMath;
30  import org.junit.Assert;
31  import org.junit.Test;
32  
33  
34  /**
35   * Test cases for the TestStatistic class.
36   *
37   */
38  
39  public final class SimpleRegressionTest {
40  
41      /*
42       * NIST "Norris" refernce data set from
43       * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
44       * Strangely, order is {y,x}
45       */
46      private double[][] data = { { 0.1, 0.2 }, {338.8, 337.4 }, {118.1, 118.2 },
47              {888.0, 884.6 }, {9.2, 10.1 }, {228.1, 226.5 }, {668.5, 666.3 }, {998.5, 996.3 },
48              {449.1, 448.6 }, {778.9, 777.0 }, {559.2, 558.2 }, {0.3, 0.4 }, {0.1, 0.6 }, {778.1, 775.5 },
49              {668.8, 666.9 }, {339.3, 338.0 }, {448.9, 447.5 }, {10.8, 11.6 }, {557.7, 556.0 },
50              {228.3, 228.1 }, {998.0, 995.8 }, {888.8, 887.6 }, {119.6, 120.2 }, {0.3, 0.3 },
51              {0.6, 0.3 }, {557.6, 556.8 }, {339.3, 339.1 }, {888.0, 887.2 }, {998.5, 999.0 },
52              {778.9, 779.0 }, {10.2, 11.1 }, {117.6, 118.3 }, {228.9, 229.2 }, {668.4, 669.1 },
53              {449.2, 448.9 }, {0.2, 0.5 }
54      };
55  
56      /*
57       * Correlation example from
58       * http://www.xycoon.com/correlation.htm
59       */
60      private double[][] corrData = { { 101.0, 99.2 }, {100.1, 99.0 }, {100.0, 100.0 },
61              {90.6, 111.6 }, {86.5, 122.2 }, {89.7, 117.6 }, {90.6, 121.1 }, {82.8, 136.0 },
62              {70.1, 154.2 }, {65.4, 153.6 }, {61.3, 158.5 }, {62.5, 140.6 }, {63.6, 136.2 },
63              {52.6, 168.0 }, {59.7, 154.3 }, {59.5, 149.0 }, {61.3, 165.5 }
64      };
65  
66      /*
67       * From Moore and Mcabe, "Introduction to the Practice of Statistics"
68       * Example 10.3
69       */
70      private double[][] infData = { { 15.6, 5.2 }, {26.8, 6.1 }, {37.8, 8.7 }, {36.4, 8.5 },
71              {35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
72      };
73  
74      /*
75       * Points to remove in the remove tests
76       */
77      private double[][] removeSingle = {infData[1]};
78      private double[][] removeMultiple = { infData[1], infData[2] };
79      private double removeX = infData[0][0];
80      private double removeY = infData[0][1];
81  
82  
83      /*
84       * Data with bad linear fit
85       */
86      private double[][] infData2 = { { 1, 1 }, {2, 0 }, {3, 5 }, {4, 2 },
87              {5, -1 }, {6, 12 }
88      };
89  
90  
91      /*
92       * Data from NIST NOINT1
93       */
94      private double[][] noint1 = {
95          {130.0,60.0},
96          {131.0,61.0},
97          {132.0,62.0},
98          {133.0,63.0},
99          {134.0,64.0},
100         {135.0,65.0},
101         {136.0,66.0},
102         {137.0,67.0},
103         {138.0,68.0},
104         {139.0,69.0},
105         {140.0,70.0}
106     };
107 
108     /*
109      * Data from NIST NOINT2
110      *
111      */
112     private double[][] noint2 = {
113         {3.0,4},
114         {4,5},
115         {4,6}
116     };
117 
118 
119     /**
120      * Test that the SimpleRegression objects generated from combining two
121      * SimpleRegression objects created from subsets of data are identical to
122      * SimpleRegression objects created from the combined data.
123      */
124     @Test
125     public void testAppend() {
126         check(false);
127         check(true);
128     }
129 
130     /**
131      * Checks that adding data to a single model gives the same result
132      * as adding "parts" of the dataset to smaller models and using append
133      * to aggregate the smaller models.
134      *
135      * @param includeIntercept
136      */
137     private void check(boolean includeIntercept) {
138         final int sets = 2;
139         final ISAACRandom rand = new ISAACRandom(10L);// Seed can be changed
140         final SimpleRegression whole = new SimpleRegression(includeIntercept);// regression of the whole set
141         final SimpleRegression parts = new SimpleRegression(includeIntercept);// regression with parts.
142 
143         for (int s = 0; s < sets; s++) {// loop through each subset of data.
144             final double coef = rand.nextDouble();
145             final SimpleRegression sub = new SimpleRegression(includeIntercept);// sub regression
146             for (int i = 0; i < 5; i++) { // loop through individual samlpes.
147                 final double x = rand.nextDouble();
148                 final double y = x * coef + rand.nextDouble();// some noise
149                 sub.addData(x, y);
150                 whole.addData(x, y);
151             }
152             parts.append(sub);
153             Assert.assertTrue(equals(parts, whole, 1E-6));
154         }
155     }
156 
157     /**
158      * Returns true iff the statistics reported by model1 are all within tol of
159      * those reported by model2.
160      *
161      * @param model1 first model
162      * @param model2 second model
163      * @param tol tolerance
164      * @return true if the two models report the same regression stats
165      */
166     private boolean equals(SimpleRegression model1, SimpleRegression model2, double tol) {
167         if (model1.getN() != model2.getN()) {
168             return false;
169         }
170         if (FastMath.abs(model1.getIntercept() - model2.getIntercept()) > tol) {
171             return false;
172         }
173         if (FastMath.abs(model1.getInterceptStdErr() - model2.getInterceptStdErr()) > tol) {
174             return false;
175         }
176         if (FastMath.abs(model1.getMeanSquareError() - model2.getMeanSquareError()) > tol) {
177             return false;
178         }
179         if (FastMath.abs(model1.getR() - model2.getR()) > tol) {
180             return false;
181         }
182         if (FastMath.abs(model1.getRegressionSumSquares() - model2.getRegressionSumSquares()) > tol) {
183             return false;
184         }
185         if (FastMath.abs(model1.getRSquare() - model2.getRSquare()) > tol) {
186             return false;
187         }
188         if (FastMath.abs(model1.getSignificance() - model2.getSignificance()) > tol) {
189             return false;
190         }
191         if (FastMath.abs(model1.getSlope() - model2.getSlope()) > tol) {
192             return false;
193         }
194         if (FastMath.abs(model1.getSlopeConfidenceInterval() - model2.getSlopeConfidenceInterval()) > tol) {
195             return false;
196         }
197         if (FastMath.abs(model1.getSlopeStdErr() - model2.getSlopeStdErr()) > tol) {
198             return false;
199         }
200         if (FastMath.abs(model1.getSumOfCrossProducts() - model2.getSumOfCrossProducts()) > tol) {
201             return false;
202         }
203         if (FastMath.abs(model1.getSumSquaredErrors() - model2.getSumSquaredErrors()) > tol) {
204             return false;
205         }
206         if (FastMath.abs(model1.getTotalSumSquares() - model2.getTotalSumSquares()) > tol) {
207             return false;
208         }
209         if (FastMath.abs(model1.getXSumSquares() - model2.getXSumSquares()) > tol) {
210             return false;
211         }
212         return true;
213     }
214 
215     @Test
216     public void testRegressIfaceMethod(){
217         final SimpleRegression regression = new SimpleRegression(true);
218         final UpdatingMultipleLinearRegression iface = regression;
219         final SimpleRegression regressionNoint = new SimpleRegression( false );
220         final SimpleRegression regressionIntOnly= new SimpleRegression( false );
221         for (int i = 0; i < data.length; i++) {
222             iface.addObservation( new double[]{data[i][1]}, data[i][0]);
223             regressionNoint.addData(data[i][1], data[i][0]);
224             regressionIntOnly.addData(1.0, data[i][0]);
225         }
226 
227         //should not be null
228         final RegressionResults fullReg = iface.regress( );
229         Assert.assertNotNull(fullReg);
230         Assert.assertEquals("intercept", regression.getIntercept(), fullReg.getParameterEstimate(0), 1.0e-16);
231         Assert.assertEquals("intercept std err",regression.getInterceptStdErr(), fullReg.getStdErrorOfEstimate(0),1.0E-16);
232         Assert.assertEquals("slope", regression.getSlope(), fullReg.getParameterEstimate(1), 1.0e-16);
233         Assert.assertEquals("slope std err",regression.getSlopeStdErr(), fullReg.getStdErrorOfEstimate(1),1.0E-16);
234         Assert.assertEquals("number of observations",regression.getN(), fullReg.getN());
235         Assert.assertEquals("r-square",regression.getRSquare(), fullReg.getRSquared(), 1.0E-16);
236         Assert.assertEquals("SSR", regression.getRegressionSumSquares(), fullReg.getRegressionSumSquares() ,1.0E-16);
237         Assert.assertEquals("MSE", regression.getMeanSquareError(), fullReg.getMeanSquareError() ,1.0E-16);
238         Assert.assertEquals("SSE", regression.getSumSquaredErrors(), fullReg.getErrorSumSquares() ,1.0E-16);
239 
240 
241         final RegressionResults noInt   = iface.regress( new int[]{1} );
242         Assert.assertNotNull(noInt);
243         Assert.assertEquals("slope", regressionNoint.getSlope(), noInt.getParameterEstimate(0), 1.0e-12);
244         Assert.assertEquals("slope std err",regressionNoint.getSlopeStdErr(), noInt.getStdErrorOfEstimate(0),1.0E-16);
245         Assert.assertEquals("number of observations",regressionNoint.getN(), noInt.getN());
246         Assert.assertEquals("r-square",regressionNoint.getRSquare(), noInt.getRSquared(), 1.0E-16);
247         Assert.assertEquals("SSR", regressionNoint.getRegressionSumSquares(), noInt.getRegressionSumSquares() ,1.0E-8);
248         Assert.assertEquals("MSE", regressionNoint.getMeanSquareError(), noInt.getMeanSquareError() ,1.0E-16);
249         Assert.assertEquals("SSE", regressionNoint.getSumSquaredErrors(), noInt.getErrorSumSquares() ,1.0E-16);
250 
251         final RegressionResults onlyInt = iface.regress( new int[]{0} );
252         Assert.assertNotNull(onlyInt);
253         Assert.assertEquals("slope", regressionIntOnly.getSlope(), onlyInt.getParameterEstimate(0), 1.0e-12);
254         Assert.assertEquals("slope std err",regressionIntOnly.getSlopeStdErr(), onlyInt.getStdErrorOfEstimate(0),1.0E-12);
255         Assert.assertEquals("number of observations",regressionIntOnly.getN(), onlyInt.getN());
256         Assert.assertEquals("r-square",regressionIntOnly.getRSquare(), onlyInt.getRSquared(), 1.0E-14);
257         Assert.assertEquals("SSE", regressionIntOnly.getSumSquaredErrors(), onlyInt.getErrorSumSquares() ,1.0E-8);
258         Assert.assertEquals("SSR", regressionIntOnly.getRegressionSumSquares(), onlyInt.getRegressionSumSquares() ,1.0E-8);
259         Assert.assertEquals("MSE", regressionIntOnly.getMeanSquareError(), onlyInt.getMeanSquareError() ,1.0E-8);
260 
261     }
262 
263     /**
264      * Verify that regress generates exceptions as advertised for bad model specifications.
265      */
266     @Test
267     public void testRegressExceptions() {
268         // No intercept
269         final SimpleRegression noIntRegression = new SimpleRegression(false);
270         noIntRegression.addData(noint2[0][1], noint2[0][0]);
271         noIntRegression.addData(noint2[1][1], noint2[1][0]);
272         noIntRegression.addData(noint2[2][1], noint2[2][0]);
273         try { // null array
274             noIntRegression.regress(null);
275             Assert.fail("Expecting MathIllegalArgumentException for null array");
276         } catch (MathIllegalArgumentException ex) {
277             // Expected
278         }
279         try { // empty array
280             noIntRegression.regress(new int[] {});
281             Assert.fail("Expecting MathIllegalArgumentException for empty array");
282         } catch (MathIllegalArgumentException ex) {
283             // Expected
284         }
285         try { // more than 1 regressor
286             noIntRegression.regress(new int[] {0, 1});
287             Assert.fail("Expecting MathIllegalArgumentException - too many regressors");
288         } catch (MathIllegalArgumentException ex) {
289             // Expected
290         }
291         try { // invalid regressor
292             noIntRegression.regress(new int[] {1});
293             Assert.fail("Expecting MathIllegalArgumentException - invalid regression");
294         } catch (MathIllegalArgumentException ex) {
295             // Expected
296         }
297 
298         // With intercept
299         final SimpleRegression regression = new SimpleRegression(true);
300         regression.addData(noint2[0][1], noint2[0][0]);
301         regression.addData(noint2[1][1], noint2[1][0]);
302         regression.addData(noint2[2][1], noint2[2][0]);
303         try { // null array
304             regression.regress(null);
305             Assert.fail("Expecting MathIllegalArgumentException for null array");
306         } catch (MathIllegalArgumentException ex) {
307             // Expected
308         }
309         try { // empty array
310             regression.regress(new int[] {});
311             Assert.fail("Expecting MathIllegalArgumentException for empty array");
312         } catch (MathIllegalArgumentException ex) {
313             // Expected
314         }
315         try { // more than 2 regressors
316             regression.regress(new int[] {0, 1, 2});
317             Assert.fail("Expecting MathIllegalArgumentException - too many regressors");
318         } catch (MathIllegalArgumentException ex) {
319             // Expected
320         }
321         try { // wrong order
322             regression.regress(new int[] {1,0});
323             Assert.fail("Expecting MathIllegalArgumentException - invalid regression");
324         } catch (MathIllegalArgumentException ex) {
325             // Expected
326         }
327         try { // out of range
328             regression.regress(new int[] {3,4});
329             Assert.fail("Expecting MathIllegalArgumentException");
330         } catch (MathIllegalArgumentException ex) {
331             // Expected
332         }
333         try { // out of range
334             regression.regress(new int[] {0,2});
335             Assert.fail("Expecting MathIllegalArgumentException");
336         } catch (MathIllegalArgumentException ex) {
337             // Expected
338         }
339         try { // out of range
340             regression.regress(new int[] {2});
341             Assert.fail("Expecting MathIllegalArgumentException");
342         } catch (MathIllegalArgumentException ex) {
343             // Expected
344         }
345     }
346 
347     @Test
348     public void testNoInterceot_noint2(){
349          SimpleRegression regression = new SimpleRegression(false);
350          regression.addData(noint2[0][1], noint2[0][0]);
351          regression.addData(noint2[1][1], noint2[1][0]);
352          regression.addData(noint2[2][1], noint2[2][0]);
353          Assert.assertEquals("intercept", 0, regression.getIntercept(), 0);
354          Assert.assertEquals("slope", 0.727272727272727,
355                  regression.getSlope(), 10E-12);
356          Assert.assertEquals("slope std err", 0.420827318078432E-01,
357                 regression.getSlopeStdErr(),10E-12);
358         Assert.assertEquals("number of observations", 3, regression.getN());
359         Assert.assertEquals("r-square", 0.993348115299335,
360             regression.getRSquare(), 10E-12);
361         Assert.assertEquals("SSR", 40.7272727272727,
362             regression.getRegressionSumSquares(), 10E-9);
363         Assert.assertEquals("MSE", 0.136363636363636,
364             regression.getMeanSquareError(), 10E-10);
365         Assert.assertEquals("SSE", 0.272727272727273,
366             regression.getSumSquaredErrors(),10E-9);
367     }
368 
369     @Test
370     public void testNoIntercept_noint1(){
371         SimpleRegression regression = new SimpleRegression(false);
372         for (int i = 0; i < noint1.length; i++) {
373             regression.addData(noint1[i][1], noint1[i][0]);
374         }
375         Assert.assertEquals("intercept", 0, regression.getIntercept(), 0);
376         Assert.assertEquals("slope", 2.07438016528926, regression.getSlope(), 10E-12);
377         Assert.assertEquals("slope std err", 0.165289256198347E-01,
378                 regression.getSlopeStdErr(),10E-12);
379         Assert.assertEquals("number of observations", 11, regression.getN());
380         Assert.assertEquals("r-square", 0.999365492298663,
381             regression.getRSquare(), 10E-12);
382         Assert.assertEquals("SSR", 200457.727272727,
383             regression.getRegressionSumSquares(), 10E-9);
384         Assert.assertEquals("MSE", 12.7272727272727,
385             regression.getMeanSquareError(), 10E-10);
386         Assert.assertEquals("SSE", 127.272727272727,
387             regression.getSumSquaredErrors(),10E-9);
388 
389     }
390 
391     @Test
392     public void testNorris() {
393         SimpleRegression regression = new SimpleRegression();
394         for (int i = 0; i < data.length; i++) {
395             regression.addData(data[i][1], data[i][0]);
396         }
397         // Tests against certified values from
398         // http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
399         Assert.assertEquals("slope", 1.00211681802045, regression.getSlope(), 10E-12);
400         Assert.assertEquals("slope std err", 0.429796848199937E-03,
401                 regression.getSlopeStdErr(),10E-12);
402         Assert.assertEquals("number of observations", 36, regression.getN());
403         Assert.assertEquals( "intercept", -0.262323073774029,
404             regression.getIntercept(),10E-12);
405         Assert.assertEquals("std err intercept", 0.232818234301152,
406             regression.getInterceptStdErr(),10E-12);
407         Assert.assertEquals("r-square", 0.999993745883712,
408             regression.getRSquare(), 10E-12);
409         Assert.assertEquals("SSR", 4255954.13232369,
410             regression.getRegressionSumSquares(), 10E-9);
411         Assert.assertEquals("MSE", 0.782864662630069,
412             regression.getMeanSquareError(), 10E-10);
413         Assert.assertEquals("SSE", 26.6173985294224,
414             regression.getSumSquaredErrors(),10E-9);
415         // ------------  End certified data tests
416 
417         Assert.assertEquals( "predict(0)",  -0.262323073774029,
418             regression.predict(0), 10E-12);
419         Assert.assertEquals("predict(1)", 1.00211681802045 - 0.262323073774029,
420             regression.predict(1), 10E-12);
421     }
422 
423     @Test
424     public void testCorr() {
425         SimpleRegression regression = new SimpleRegression();
426         regression.addData(corrData);
427         Assert.assertEquals("number of observations", 17, regression.getN());
428         Assert.assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
429         Assert.assertEquals("r", -0.94663767742, regression.getR(), 1E-10);
430     }
431 
432     @Test
433     public void testNaNs() {
434         SimpleRegression regression = new SimpleRegression();
435         Assert.assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
436         Assert.assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
437         Assert.assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
438         Assert.assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
439         Assert.assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
440         Assert.assertTrue("e not NaN", Double.isNaN(regression.getR()));
441         Assert.assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
442         Assert.assertTrue( "RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
443         Assert.assertTrue("SSE not NaN",Double.isNaN(regression.getSumSquaredErrors()));
444         Assert.assertTrue("SSTO not NaN", Double.isNaN(regression.getTotalSumSquares()));
445         Assert.assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
446 
447         regression.addData(1, 2);
448         regression.addData(1, 3);
449 
450         // No x variation, so these should still blow...
451         Assert.assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
452         Assert.assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
453         Assert.assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
454         Assert.assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
455         Assert.assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
456         Assert.assertTrue("e not NaN", Double.isNaN(regression.getR()));
457         Assert.assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
458         Assert.assertTrue("RSS not NaN", Double.isNaN(regression.getRegressionSumSquares()));
459         Assert.assertTrue("SSE not NaN", Double.isNaN(regression.getSumSquaredErrors()));
460         Assert.assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
461 
462         // but SSTO should be OK
463         Assert.assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
464 
465         regression = new SimpleRegression();
466 
467         regression.addData(1, 2);
468         regression.addData(3, 3);
469 
470         // All should be OK except MSE, s(b0), s(b1) which need one more df
471         Assert.assertTrue("interceptNaN", !Double.isNaN(regression.getIntercept()));
472         Assert.assertTrue("slope NaN", !Double.isNaN(regression.getSlope()));
473         Assert.assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr()));
474         Assert.assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr()));
475         Assert.assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
476         Assert.assertTrue("r NaN", !Double.isNaN(regression.getR()));
477         Assert.assertTrue("r-square NaN", !Double.isNaN(regression.getRSquare()));
478         Assert.assertTrue("RSS NaN", !Double.isNaN(regression.getRegressionSumSquares()));
479         Assert.assertTrue("SSE NaN", !Double.isNaN(regression.getSumSquaredErrors()));
480         Assert.assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
481         Assert.assertTrue("predict NaN", !Double.isNaN(regression.predict(0)));
482 
483         regression.addData(1, 4);
484 
485         // MSE, MSE, s(b0), s(b1) should all be OK now
486         Assert.assertTrue("MSE NaN", !Double.isNaN(regression.getMeanSquareError()));
487         Assert.assertTrue("slope std err NaN", !Double.isNaN(regression.getSlopeStdErr()));
488         Assert.assertTrue("intercept std err NaN", !Double.isNaN(regression.getInterceptStdErr()));
489     }
490 
491     @Test
492     public void testClear() {
493         SimpleRegression regression = new SimpleRegression();
494         regression.addData(corrData);
495         Assert.assertEquals("number of observations", 17, regression.getN());
496         regression.clear();
497         Assert.assertEquals("number of observations", 0, regression.getN());
498         regression.addData(corrData);
499         Assert.assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
500         regression.addData(data);
501         Assert.assertEquals("number of observations", 53, regression.getN());
502     }
503 
504     @Test
505     public void testInference() {
506         //----------  verified against R, version 1.8.1 -----
507         // infData
508         SimpleRegression regression = new SimpleRegression();
509         regression.addData(infData);
510         Assert.assertEquals("slope std err", 0.011448491,
511                 regression.getSlopeStdErr(), 1E-10);
512         Assert.assertEquals("std err intercept", 0.286036932,
513                 regression.getInterceptStdErr(),1E-8);
514         Assert.assertEquals("significance", 4.596e-07,
515                 regression.getSignificance(),1E-8);
516         Assert.assertEquals("slope conf interval half-width", 0.0270713794287,
517                 regression.getSlopeConfidenceInterval(),1E-8);
518         // infData2
519         regression = new SimpleRegression();
520         regression.addData(infData2);
521         Assert.assertEquals("slope std err", 1.07260253,
522                 regression.getSlopeStdErr(), 1E-8);
523         Assert.assertEquals("std err intercept",4.17718672,
524                 regression.getInterceptStdErr(),1E-8);
525         Assert.assertEquals("significance", 0.261829133982,
526                 regression.getSignificance(),1E-11);
527         Assert.assertEquals("slope conf interval half-width", 2.97802204827,
528                 regression.getSlopeConfidenceInterval(),1E-8);
529         //------------- End R-verified tests -------------------------------
530 
531         //FIXME: get a real example to test against with alpha = .01
532         Assert.assertTrue("tighter means wider",
533                 regression.getSlopeConfidenceInterval() < regression.getSlopeConfidenceInterval(0.01));
534 
535         try {
536             regression.getSlopeConfidenceInterval(1);
537             Assert.fail("expecting MathIllegalArgumentException for alpha = 1");
538         } catch (MathIllegalArgumentException ex) {
539             // ignored
540         }
541 
542     }
543 
544     @Test
545     public void testPerfect() {
546         SimpleRegression regression = new SimpleRegression();
547         int n = 100;
548         for (int i = 0; i < n; i++) {
549             regression.addData(((double) i) / (n - 1), i);
550         }
551         Assert.assertEquals(0.0, regression.getSignificance(), 1.0e-5);
552         Assert.assertTrue(regression.getSlope() > 0.0);
553         Assert.assertTrue(regression.getSumSquaredErrors() >= 0.0);
554     }
555 
556     @Test
557     public void testPerfect2() {
558         SimpleRegression regression = new SimpleRegression();
559         regression.addData(0, 0);
560         regression.addData(1, 1);
561         regression.addData(2, 2);
562         Assert.assertEquals(0.0, regression.getSlopeStdErr(), 0.0);
563         Assert.assertEquals(0.0, regression.getSignificance(), Double.MIN_VALUE);
564         Assert.assertEquals(1, regression.getRSquare(), Double.MIN_VALUE);
565     }
566 
567     @Test
568     public void testPerfectNegative() {
569         SimpleRegression regression = new SimpleRegression();
570         int n = 100;
571         for (int i = 0; i < n; i++) {
572             regression.addData(- ((double) i) / (n - 1), i);
573         }
574 
575         Assert.assertEquals(0.0, regression.getSignificance(), 1.0e-5);
576         Assert.assertTrue(regression.getSlope() < 0.0);
577     }
578 
579     @Test
580     public void testRandom() {
581         SimpleRegression regression = new SimpleRegression();
582         Random random = new Random(1);
583         int n = 100;
584         for (int i = 0; i < n; i++) {
585             regression.addData(((double) i) / (n - 1), random.nextDouble());
586         }
587 
588         Assert.assertTrue( 0.0 < regression.getSignificance()
589                     && regression.getSignificance() < 1.0);
590     }
591 
592 
593     // Jira MATH-85 = Bugzilla 39432
594     @Test
595     public void testSSENonNegative() {
596         double[] y = { 8915.102, 8919.302, 8923.502 };
597         double[] x = { 1.107178495E2, 1.107264895E2, 1.107351295E2 };
598         SimpleRegression reg = new SimpleRegression();
599         for (int i = 0; i < x.length; i++) {
600             reg.addData(x[i], y[i]);
601         }
602         Assert.assertTrue(reg.getSumSquaredErrors() >= 0.0);
603     }
604 
605     // Test remove X,Y (single observation)
606     @Test
607     public void testRemoveXY() {
608         // Create regression with inference data then remove to test
609         SimpleRegression regression = new SimpleRegression();
610         Assert.assertTrue(regression.hasIntercept());
611         regression.addData(infData);
612         regression.removeData(removeX, removeY);
613         regression.addData(removeX, removeY);
614         // Use the inference assertions to make sure that everything worked
615         Assert.assertEquals("slope std err", 0.011448491,
616                 regression.getSlopeStdErr(), 1E-10);
617         Assert.assertEquals("std err intercept", 0.286036932,
618                 regression.getInterceptStdErr(),1E-8);
619         Assert.assertEquals("significance", 4.596e-07,
620                 regression.getSignificance(),1E-8);
621         Assert.assertEquals("slope conf interval half-width", 0.0270713794287,
622                 regression.getSlopeConfidenceInterval(),1E-8);
623      }
624 
625     // Test remove single observation in array
626     @Test
627     public void testRemoveSingle() {
628         // Create regression with inference data then remove to test
629         SimpleRegression regression = new SimpleRegression();
630         Assert.assertTrue(regression.hasIntercept());
631         regression.addData(infData);
632         regression.removeData(removeSingle);
633         regression.addData(removeSingle);
634         // Use the inference assertions to make sure that everything worked
635         Assert.assertEquals("slope std err", 0.011448491,
636                 regression.getSlopeStdErr(), 1E-10);
637         Assert.assertEquals("std err intercept", 0.286036932,
638                 regression.getInterceptStdErr(),1E-8);
639         Assert.assertEquals("significance", 4.596e-07,
640                 regression.getSignificance(),1E-8);
641         Assert.assertEquals("slope conf interval half-width", 0.0270713794287,
642                 regression.getSlopeConfidenceInterval(),1E-8);
643      }
644 
645     // Test remove multiple observations
646     @Test
647     public void testRemoveMultiple() {
648         // Create regression with inference data then remove to test
649         SimpleRegression regression = new SimpleRegression();
650         Assert.assertTrue(regression.hasIntercept());
651         regression.addData(infData);
652         regression.removeData(removeMultiple);
653         regression.addData(removeMultiple);
654         // Use the inference assertions to make sure that everything worked
655         Assert.assertEquals("slope std err", 0.011448491,
656                 regression.getSlopeStdErr(), 1E-10);
657         Assert.assertEquals("std err intercept", 0.286036932,
658                 regression.getInterceptStdErr(),1E-8);
659         Assert.assertEquals("significance", 4.596e-07,
660                 regression.getSignificance(),1E-8);
661         Assert.assertEquals("slope conf interval half-width", 0.0270713794287,
662                 regression.getSlopeConfidenceInterval(),1E-8);
663      }
664 
665     // Test remove multiple observations
666     @Test
667     public void testRemoveMultipleNoIntercept() {
668         // Create regression with inference data then remove to test
669         SimpleRegression regression = new SimpleRegression(false);
670         Assert.assertFalse(regression.hasIntercept());
671         Assert.assertEquals(0.0, regression.getIntercept(), 1.0e-15);
672         regression.addData(infData);
673         Assert.assertEquals(0.30593, regression.predict(1.25), 1.0e-5);
674         regression.removeData(removeMultiple);
675         regression.addData(removeMultiple);
676         // Use the inference assertions to make sure that everything worked
677         Assert.assertEquals("slope std err", 0.0103629732,
678                 regression.getSlopeStdErr(), 1E-10);
679         Assert.assertTrue("std err intercept", Double.isNaN(regression.getInterceptStdErr()));
680         Assert.assertEquals("significance", 6.199e-08,
681                 regression.getSignificance(),1E-10);
682         Assert.assertEquals("slope conf interval half-width", 0.02450454,
683                 regression.getSlopeConfidenceInterval(),1E-8);
684      }
685 
686     // Remove observation when empty
687     @Test
688     public void testRemoveObsFromEmpty() {
689         SimpleRegression regression = new SimpleRegression();
690         regression.removeData(removeX, removeY);
691         Assert.assertEquals(regression.getN(), 0);
692     }
693 
694     // Remove single observation to empty
695     @Test
696     public void testRemoveObsFromSingle() {
697         SimpleRegression regression = new SimpleRegression();
698         regression.addData(removeX, removeY);
699         regression.removeData(removeX, removeY);
700         Assert.assertEquals(regression.getN(), 0);
701     }
702 
703     // Remove multiple observations to empty
704     @Test
705     public void testRemoveMultipleToEmpty() {
706         SimpleRegression regression = new SimpleRegression();
707         regression.addData(removeMultiple);
708         regression.removeData(removeMultiple);
709         Assert.assertEquals(regression.getN(), 0);
710     }
711 
712     // Remove multiple observations past empty (i.e. size of array > n)
713     @Test
714     public void testRemoveMultiplePastEmpty() {
715         SimpleRegression regression = new SimpleRegression();
716         regression.addData(removeX, removeY);
717         regression.removeData(removeMultiple);
718         Assert.assertEquals(regression.getN(), 0);
719     }
720 
721     @Test
722     public void testWrongDimensions() {
723         try {
724             new SimpleRegression().addData(new double[1][1]);
725             Assert.fail("an exception should have been thrown");
726         } catch (MathIllegalArgumentException miae) {
727             Assert.assertEquals(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION, miae.getSpecifier());
728         }
729         try {
730             new SimpleRegression().addObservation(null, 0.0);
731             Assert.fail("an exception should have been thrown");
732         } catch (MathIllegalArgumentException miae) {
733             Assert.assertEquals(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION, miae.getSpecifier());
734         }
735         try {
736             new SimpleRegression().addObservation(new double[0], 0.0);
737             Assert.fail("an exception should have been thrown");
738         } catch (MathIllegalArgumentException miae) {
739             Assert.assertEquals(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION, miae.getSpecifier());
740         }
741         try {
742             new SimpleRegression().addObservations(new double[][] { null, null }, new double[2]);
743             Assert.fail("an exception should have been thrown");
744         } catch (MathIllegalArgumentException miae) {
745             Assert.assertEquals(LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, miae.getSpecifier());
746         }
747         try {
748             new SimpleRegression().addObservations(new double[][] { new double[0], new double[0] }, new double[2]);
749             Assert.fail("an exception should have been thrown");
750         } catch (MathIllegalArgumentException miae) {
751             Assert.assertEquals(LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, miae.getSpecifier());
752         }
753     }
754 
755     @Test
756     public void testFewPoints() {
757         SimpleRegression sr = new SimpleRegression();
758         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
759         Assert.assertEquals(1, sr.getN());
760         Assert.assertTrue(Double.isNaN(sr.getXSumSquares()));
761         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
762         Assert.assertEquals(2, sr.getN());
763         Assert.assertFalse(Double.isNaN(sr.getXSumSquares()));
764         Assert.assertTrue(Double.isNaN(sr.getSlopeConfidenceInterval()));
765         Assert.assertTrue(Double.isNaN(sr.getSignificance()));
766         try {
767             sr.regress();
768             Assert.fail("an exception should have been thrown");
769         } catch (MathIllegalArgumentException miae) {
770             Assert.assertEquals(LocalizedStatFormats.NOT_ENOUGH_DATA_REGRESSION, miae.getSpecifier());
771         }
772         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
773         RegressionResults results = sr.regress();
774         Assert.assertTrue(Double.isNaN(results.getParameterEstimate(1)));
775         results = sr.regress(new int[] { 1 });
776         Assert.assertEquals(1.0, results.getParameterEstimate(0), 1.0e-15);
777         sr.addObservations(new double[][] { new double[] { 2.0, 2.5 }}, new double[] { 2.0 });
778         results = sr.regress();
779         Assert.assertFalse(Double.isNaN(results.getParameterEstimate(1)));
780         sr.addObservations(new double[][] { new double[] { Double.NaN, Double.NaN }}, new double[] { Double.NaN });
781         results = sr.regress(new int[] { 1 });
782         Assert.assertTrue(Double.isNaN(results.getParameterEstimate(0)));
783 
784     }
785 
786     @Test
787     public void testFewPointsWithoutIntercept() {
788         SimpleRegression sr = new SimpleRegression(false);
789         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
790         Assert.assertEquals(1, sr.getN());
791         Assert.assertTrue(Double.isNaN(sr.getXSumSquares()));
792         try {
793             sr.regress();
794             Assert.fail("an exception should have been thrown");
795         } catch (MathIllegalArgumentException miae) {
796             Assert.assertEquals(LocalizedStatFormats.NOT_ENOUGH_DATA_REGRESSION, miae.getSpecifier());
797         }
798         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
799         RegressionResults results = sr.regress();
800         Assert.assertFalse(Double.isNaN(results.getParameterEstimate(0)));
801         sr.addObservations(new double[][] { new double[] { Double.NaN, 1.0 }}, new double[] { 2.0 });
802         results = sr.regress();
803         Assert.assertTrue(Double.isNaN(results.getParameterEstimate(0)));
804     }
805 
806 }