1 /*
2 * Licensed to the Hipparchus project under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The Hipparchus project licenses this file to You under the Apache License,
6 * Version 2.0 (the "License"); you may not use this file except in compliance
7 * with the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.hipparchus.stat.projection;
18
19 import org.hipparchus.exception.MathIllegalStateException;
20 import org.hipparchus.stat.LocalizedStatFormats;
21 import org.junit.jupiter.api.Test;
22
23 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
24 import static org.junit.jupiter.api.Assertions.assertEquals;
25 import static org.junit.jupiter.api.Assertions.assertFalse;
26 import static org.junit.jupiter.api.Assertions.assertTrue;
27 import static org.junit.jupiter.api.Assertions.fail;
28
29 public class PCATest {
30
31 // example from:
32 // https://towardsdatascience.com/the-mathematics-behind-principal-component-analysis-fff2d7f4b643
33 // https://stattrek.com/matrix-algebra/covariance-matrix
34 private static final double[][] SCORES = {
35 {90, 60, 90},
36 {90, 90, 30},
37 {60, 60, 60},
38 {60, 60, 90},
39 {30, 30, 30},
40 };
41
42 public static final double[] EXPECTED_MEAN = {66, 60, 60};
43 public static final double[] EXPECTED_VARIANCE = {1137.587441, 786.387983, 56.024575};
44
45 private static final double[][] EXPECTED_COMPONENTS = {
46 { 0.6558023, 0.385999 },
47 { 0.4291978, 0.516366 },
48 { 0.6210577, -0.7644414 }
49 };
50
51 /**
52 * This is the expected value (give or take sign) when centering (covariance)
53 * but no scaling is applied. In general, components with the same values but
54 * differing sign are
55 * <a href="https://stats.stackexchange.com/questions/30348/is-it-acceptable-to-reverse-a-sign-of-a-principal-component-score">equivalent</a>.
56 *
57 * The result has been cross-checked with
58 * <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">sklearn.decomposition.PCA</a>
59 * <a href="https://javadoc.io/static/nz.ac.waikato.cms.weka/weka-dev/3.9.4/weka/attributeSelection/PrincipalComponents.html>weka.attributeSelection.PrincipalComponents</a>
60 * (with the <code>centerData</code> option set to <code>true</code>)
61 * <a href="https://github.com/datumbox/datumbox-framework/blob/develop/datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/featureselection/PCA.java">com.datumbox.framework.core.machinelearning.featureselection.PCA</a>
62 * (but for whatever reason datumbox does the transform on the unnormalized original data rather than normalized data - normalizing manually achieves the result below)
63 * <a href="https://www.javadoc.io/doc/com.github.haifengl/smile-core/latest/smile/feature/extraction/PCA.html">smile.feature.extraction.PCA</a>
64 * (using the PCA.fit method)
65 * <a href="https://au.mathworks.com/help/stats/pca.html">pca from matlab</a>
66 */
67 private static final double[][] EXPECTED_COV = {
68 { 34.3709848, -13.6692708 },
69 { 9.9834573, 47.6882055 },
70 { -3.9348135, -2.3159927 },
71 { 14.6969171, -25.2492347 },
72 { -55.1165457, -6.4537072 },
73 };
74
75 /**
76 * This is the expected value give or take sign when centering and scaling (correlation).
77 *
78 * The result has been cross-checked with
79 * <a href="https://javadoc.io/static/nz.ac.waikato.cms.weka/weka-dev/3.9.4/weka/attributeSelection/PrincipalComponents.html></a>
80 * <a href="https://au.mathworks.com/help/stats/pca.html">pca from matlab</a>
81 * (using the 'VariableWeights','variance' option)
82 */
83 private static final double[][] EXPECTED_COR = {
84 { 0.9118256, -0.942809 },
85 { 1.3832302, 1.4142136 },
86 { -0.1690309, 0.0 },
87 { 0.0666714, -0.942809 },
88 { -2.1926964, 0.4714045 },
89 };
90
91 /**
92 * This is the expected value give or take sign when centering and scaling (correlation)
93 * is applied with no bias adjustment. In general, bias adjustment is more accurate but
94 * alters most PCA machine learning models by an insignificant amount so is often not accounted for.
95 *
96 * The result has been cross-checked with
97 * <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">sklearn.decomposition.PCA</a>
98 * (but with prior preprocessing using <a href="https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html">sklearn.preprocessing.StandardScaler</a>)
99 * <a href="https://www.javadoc.io/doc/com.github.haifengl/smile-core/latest/smile/feature/extraction/PCA.html">smile.feature.extraction.PCA</a>
100 * (using the PCA.cor method)
101 */
102 public static final double[][] EXPECTED_COR_NO_BIAS = {
103 { 1.0194521, -1.0540926 },
104 { 1.5464984, 1.5811388 },
105 { -0.1889822, 0.0 },
106 { 0.0745409, -1.0540926 },
107 { -2.451509, 0.5270463 },
108 };
109 public static final double DELTA = 0.000001;
110
111 @Test
112 void defaultSettings() {
113 PCA pca = new PCA(2);
114 assertEquals(2, pca.getNumComponents());
115 assertFalse(pca.isScale());
116 assertTrue(pca.isBiasCorrection());
117 }
118
119 @Test
120 void covariance() {
121 PCA pca = new PCA(2);
122 double[][] actual = pca.fitAndTransform(SCORES);
123 assertArrayEquals(EXPECTED_MEAN, pca.getCenter(), DELTA);
124 assertArrayEquals(EXPECTED_VARIANCE, pca.getVariance(), DELTA);
125 customAssertExpected(EXPECTED_COMPONENTS, pca.getComponents());
126 customAssertExpected(EXPECTED_COV, actual);
127
128 // calling fit and transform individually should be the same as combo method
129 pca = new PCA(2);
130 actual = pca.fit(SCORES).transform(SCORES);
131 assertArrayEquals(EXPECTED_MEAN, pca.getCenter(), DELTA);
132 customAssertExpected(EXPECTED_COV, actual);
133 }
134
135 @Test
136 void correlation() {
137 PCA pca = new PCA(2, true, true);
138 double[][] actual = pca.fitAndTransform(SCORES);
139 customAssertExpected(EXPECTED_COR, actual);
140 }
141
142 @Test
143 void correlationNoBias() {
144 PCA pca = new PCA(2, true, false);
145 double[][] actual = pca.fitAndTransform(SCORES);
146 customAssertExpected(EXPECTED_COR_NO_BIAS, actual);
147 }
148
149 @Test
150 void transformWithoutFit() {
151 PCA pca = new PCA(2);
152 try {
153 pca.transform(SCORES);
154 fail("an exception should have been thrown");
155 } catch (MathIllegalStateException mise) {
156 assertEquals(LocalizedStatFormats.ILLEGAL_STATE_PCA, mise.getSpecifier());
157 assertEquals("transform", mise.getParts()[0]);
158 }
159 }
160
161 private static void customAssertExpected(double[][] expected, double[][] actual) {
162 for (int i = 0; i < expected.length; i++) {
163 double[] e = expected[i];
164 double[] t = actual[i];
165 assertArrayEquals(e, t, DELTA);
166 }
167 }
168 }