View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.correlation;
23  
24  import org.hipparchus.UnitTestUtils;
25  import org.hipparchus.distribution.continuous.TDistribution;
26  import org.hipparchus.exception.MathIllegalArgumentException;
27  import org.hipparchus.linear.BlockRealMatrix;
28  import org.hipparchus.linear.RealMatrix;
29  import org.hipparchus.util.FastMath;
30  import org.junit.Assert;
31  import org.junit.Test;
32  
33  
34  public class PearsonsCorrelationTest {
35  
36      protected final double[] longleyData = new double[] {
37              60323,83.0,234289,2356,1590,107608,1947,
38              61122,88.5,259426,2325,1456,108632,1948,
39              60171,88.2,258054,3682,1616,109773,1949,
40              61187,89.5,284599,3351,1650,110929,1950,
41              63221,96.2,328975,2099,3099,112075,1951,
42              63639,98.1,346999,1932,3594,113270,1952,
43              64989,99.0,365385,1870,3547,115094,1953,
44              63761,100.0,363112,3578,3350,116219,1954,
45              66019,101.2,397469,2904,3048,117388,1955,
46              67857,104.6,419180,2822,2857,118734,1956,
47              68169,108.4,442769,2936,2798,120445,1957,
48              66513,110.8,444546,4681,2637,121950,1958,
49              68655,112.6,482704,3813,2552,123366,1959,
50              69564,114.2,502601,3931,2514,125368,1960,
51              69331,115.7,518173,4806,2572,127852,1961,
52              70551,116.9,554894,4007,2827,130081,1962
53          };
54  
55      protected final double[] swissData = new double[] {
56              80.2,17.0,15,12,9.96,
57              83.1,45.1,6,9,84.84,
58              92.5,39.7,5,5,93.40,
59              85.8,36.5,12,7,33.77,
60              76.9,43.5,17,15,5.16,
61              76.1,35.3,9,7,90.57,
62              83.8,70.2,16,7,92.85,
63              92.4,67.8,14,8,97.16,
64              82.4,53.3,12,7,97.67,
65              82.9,45.2,16,13,91.38,
66              87.1,64.5,14,6,98.61,
67              64.1,62.0,21,12,8.52,
68              66.9,67.5,14,7,2.27,
69              68.9,60.7,19,12,4.43,
70              61.7,69.3,22,5,2.82,
71              68.3,72.6,18,2,24.20,
72              71.7,34.0,17,8,3.30,
73              55.7,19.4,26,28,12.11,
74              54.3,15.2,31,20,2.15,
75              65.1,73.0,19,9,2.84,
76              65.5,59.8,22,10,5.23,
77              65.0,55.1,14,3,4.52,
78              56.6,50.9,22,12,15.14,
79              57.4,54.1,20,6,4.20,
80              72.5,71.2,12,1,2.40,
81              74.2,58.1,14,8,5.23,
82              72.0,63.5,6,3,2.56,
83              60.5,60.8,16,10,7.72,
84              58.3,26.8,25,19,18.46,
85              65.4,49.5,15,8,6.10,
86              75.5,85.9,3,2,99.71,
87              69.3,84.9,7,6,99.68,
88              77.3,89.7,5,2,100.00,
89              70.5,78.2,12,6,98.96,
90              79.4,64.9,7,3,98.22,
91              65.0,75.9,9,9,99.06,
92              92.2,84.6,3,3,99.46,
93              79.3,63.1,13,13,96.83,
94              70.4,38.4,26,12,5.62,
95              65.7,7.7,29,11,13.79,
96              72.7,16.7,22,13,11.22,
97              64.4,17.6,35,32,16.92,
98              77.6,37.6,15,7,4.97,
99              67.6,18.7,25,7,8.65,
100             35.0,1.2,37,53,42.34,
101             44.7,46.6,16,29,50.43,
102             42.8,27.7,22,29,58.33
103         };
104 
105 
106     /**
107      * Test Longley dataset against R.
108      */
109     @Test
110     public void testLongly() {
111         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
112         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
113         RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
114         double[] rData = new double[] {
115                 1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942,
116                 0.4573073999764817, 0.960390571594376, 0.9713294591921188,
117                 0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966,
118                 0.4647441876006747, 0.979163432977498, 0.9911491900672053,
119                 0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580,
120                 0.4464367918926265, 0.991090069458478, 0.9952734837647849,
121                 0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000,
122                 -0.1774206295018783, 0.686551516365312, 0.6682566045621746,
123                 0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783,
124                 1.0000000000000000, 0.364416267189032, 0.4172451498349454,
125                 0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120,
126                 0.3644162671890320, 1.000000000000000, 0.9939528462329257,
127                 0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746,
128                 0.4172451498349454, 0.993952846232926, 1.0000000000000000
129         };
130         UnitTestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15);
131 
132         double[] rPvalues = new double[] {
133                 4.38904690369668e-10,
134                 8.36353208910623e-12, 7.8159700933611e-14,
135                 0.0472894097790304, 0.01030636128354301, 0.01316878049026582,
136                 0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452,
137                 3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516, 0.1652293725106684,
138                 3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15
139         };
140         RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 7);
141         fillUpper(rPMatrix, 0d);
142         UnitTestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
143     }
144 
145     /**
146      * Test R Swiss fertility dataset against R.
147      */
148     @Test
149     public void testSwissFertility() {
150          RealMatrix matrix = createRealMatrix(swissData, 47, 5);
151          PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
152          RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
153          double[] rData = new double[] {
154                1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691,  0.4636847006517939,
155                  0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398,
156                 -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666,
157                 -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148,
158                  0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000
159          };
160          UnitTestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15);
161 
162          double[] rPvalues = new double[] {
163                  0.01491720061472623,
164                  9.45043734069043e-07, 9.95151527133974e-08,
165                  3.658616965962355e-07, 1.304590105694471e-06, 4.811397236181847e-08,
166                  0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683
167          };
168          RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 5);
169          fillUpper(rPMatrix, 0d);
170          UnitTestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
171     }
172 
173     /**
174      * Test p-value near 0. JIRA: MATH-371
175      */
176     @Test
177     public void testPValueNearZero() {
178         /*
179          * Create a dataset that has r -> 1, p -> 0 as dimension increases.
180          * Prior to the fix for MATH-371, p vanished for dimension >= 14.
181          * Post fix, p-values diminish smoothly, vanishing at dimension = 127.
182          * Tested value is ~1E-303.
183          */
184         int dimension = 120;
185         double[][] data = new double[dimension][2];
186         for (int i = 0; i < dimension; i++) {
187             data[i][0] = i;
188             data[i][1] = i + 1/((double)i + 1);
189         }
190         PearsonsCorrelation corrInstance = new PearsonsCorrelation(data);
191         Assert.assertTrue(corrInstance.getCorrelationPValues().getEntry(0, 1) > 0);
192     }
193 
194 
195     /**
196      * Constant column
197      */
198     @Test
199     public void testConstant() {
200         double[] noVariance = new double[] {1, 1, 1, 1};
201         double[] values = new double[] {1, 2, 3, 4};
202         Assert.assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values)));
203         Assert.assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(values, noVariance)));
204     }
205 
206 
207     /**
208      * Insufficient data
209      */
210 
211     @Test
212     public void testInsufficientData() {
213         double[] one = new double[] {1};
214         double[] two = new double[] {2};
215         try {
216             new PearsonsCorrelation().correlation(one, two);
217             Assert.fail("Expecting MathIllegalArgumentException");
218         } catch (MathIllegalArgumentException ex) {
219             // Expected
220         }
221         RealMatrix matrix = new BlockRealMatrix(new double[][] {{0},{1}});
222         try {
223             new PearsonsCorrelation(matrix);
224             Assert.fail("Expecting MathIllegalArgumentException");
225         } catch (MathIllegalArgumentException ex) {
226             // Expected
227         }
228     }
229 
230     /**
231      * Verify that direct t-tests using standard error estimates are consistent
232      * with reported p-values
233      */
234     @Test
235     public void testStdErrorConsistency() {
236         TDistribution tDistribution = new TDistribution(45);
237         RealMatrix matrix = createRealMatrix(swissData, 47, 5);
238         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
239         RealMatrix rValues = corrInstance.getCorrelationMatrix();
240         RealMatrix pValues = corrInstance.getCorrelationPValues();
241         RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
242         for (int i = 0; i < 5; i++) {
243             for (int j = 0; j < i; j++) {
244                 double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
245                 double p = 2 * (1 - tDistribution.cumulativeProbability(t));
246                 Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
247             }
248         }
249     }
250 
251     /**
252      * Verify that creating correlation from covariance gives same results as
253      * direct computation from the original matrix
254      */
255     @Test
256     public void testCovarianceConsistency() {
257         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
258         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
259         Covariance covInstance = new Covariance(matrix);
260         PearsonsCorrelation corrFromCovInstance = new PearsonsCorrelation(covInstance);
261         UnitTestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
262                 corrFromCovInstance.getCorrelationMatrix(), 10E-15);
263         UnitTestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
264                 corrFromCovInstance.getCorrelationPValues(), 10E-15);
265         UnitTestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
266                 corrFromCovInstance.getCorrelationStandardErrors(), 10E-15);
267 
268         PearsonsCorrelation corrFromCovInstance2 =
269             new PearsonsCorrelation(covInstance.getCovarianceMatrix(), 16);
270         UnitTestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
271                 corrFromCovInstance2.getCorrelationMatrix(), 10E-15);
272         UnitTestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
273                 corrFromCovInstance2.getCorrelationPValues(), 10E-15);
274         UnitTestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
275                 corrFromCovInstance2.getCorrelationStandardErrors(), 10E-15);
276     }
277 
278 
279     @Test
280     public void testConsistency() {
281         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
282         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
283         double[][] data = matrix.getData();
284         double[] x = matrix.getColumn(0);
285         double[] y = matrix.getColumn(1);
286         Assert.assertEquals(new PearsonsCorrelation().correlation(x, y),
287                 corrInstance.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE);
288         UnitTestUtils.assertEquals("Correlation matrix", corrInstance.getCorrelationMatrix(),
289                 new PearsonsCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE);
290     }
291 
292     protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
293         double[][] matrixData = new double[nRows][nCols];
294         int ptr = 0;
295         for (int i = 0; i < nRows; i++) {
296             System.arraycopy(data, ptr, matrixData[i], 0, nCols);
297             ptr += nCols;
298         }
299         return new BlockRealMatrix(matrixData);
300     }
301 
302     protected RealMatrix createLowerTriangularRealMatrix(double[] data, int dimension) {
303         int ptr = 0;
304         RealMatrix result = new BlockRealMatrix(dimension, dimension);
305         for (int i = 1; i < dimension; i++) {
306             for (int j = 0; j < i; j++) {
307                 result.setEntry(i, j, data[ptr]);
308                 ptr++;
309             }
310         }
311         return result;
312     }
313 
314     protected void fillUpper(RealMatrix matrix, double diagonalValue) {
315         int dimension = matrix.getColumnDimension();
316         for (int i = 0; i < dimension; i++) {
317             matrix.setEntry(i, i, diagonalValue);
318             for (int j = i+1; j < dimension; j++) {
319                 matrix.setEntry(i, j, matrix.getEntry(j, i));
320             }
321         }
322     }
323 }