1 /* 2 * Licensed to the Hipparchus project under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The Hipparchus project licenses this file to You under the Apache License, 6 * Version 2.0 (the "License"); you may not use this file except in compliance 7 * with the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.hipparchus.stat.projection; 18 19 import org.hipparchus.exception.MathIllegalStateException; 20 import org.hipparchus.stat.LocalizedStatFormats; 21 import org.junit.Assert; 22 import org.junit.Test; 23 24 public class PCATest { 25 26 // example from: 27 // https://towardsdatascience.com/the-mathematics-behind-principal-component-analysis-fff2d7f4b643 28 // https://stattrek.com/matrix-algebra/covariance-matrix 29 private static final double[][] SCORES = { 30 {90, 60, 90}, 31 {90, 90, 30}, 32 {60, 60, 60}, 33 {60, 60, 90}, 34 {30, 30, 30}, 35 }; 36 37 public static final double[] EXPECTED_MEAN = {66, 60, 60}; 38 public static final double[] EXPECTED_VARIANCE = {1137.587441, 786.387983, 56.024575}; 39 40 private static final double[][] EXPECTED_COMPONENTS = { 41 { 0.6558023, 0.385999 }, 42 { 0.4291978, 0.516366 }, 43 { 0.6210577, -0.7644414 } 44 }; 45 46 /** 47 * This is the expected value (give or take sign) when centering (covariance) 48 * but no scaling is applied. In general, components with the same values but 49 * differing sign are 50 * <a href="https://stats.stackexchange.com/questions/30348/is-it-acceptable-to-reverse-a-sign-of-a-principal-component-score">equivalent</a>. 51 * 52 * The result has been cross-checked with 53 * <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">sklearn.decomposition.PCA</a> 54 * <a href="https://javadoc.io/static/nz.ac.waikato.cms.weka/weka-dev/3.9.4/weka/attributeSelection/PrincipalComponents.html>weka.attributeSelection.PrincipalComponents</a> 55 * (with the <code>centerData</code> option set to <code>true</code>) 56 * <a href="https://github.com/datumbox/datumbox-framework/blob/develop/datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/featureselection/PCA.java">com.datumbox.framework.core.machinelearning.featureselection.PCA</a> 57 * (but for whatever reason datumbox does the transform on the unnormalized original data rather than normalized data - normalizing manually achieves the result below) 58 * <a href="https://www.javadoc.io/doc/com.github.haifengl/smile-core/latest/smile/feature/extraction/PCA.html">smile.feature.extraction.PCA</a> 59 * (using the PCA.fit method) 60 * <a href="https://au.mathworks.com/help/stats/pca.html">pca from matlab</a> 61 */ 62 private static final double[][] EXPECTED_COV = { 63 { 34.3709848, -13.6692708 }, 64 { 9.9834573, 47.6882055 }, 65 { -3.9348135, -2.3159927 }, 66 { 14.6969171, -25.2492347 }, 67 { -55.1165457, -6.4537072 }, 68 }; 69 70 /** 71 * This is the expected value give or take sign when centering and scaling (correlation). 72 * 73 * The result has been cross-checked with 74 * <a href="https://javadoc.io/static/nz.ac.waikato.cms.weka/weka-dev/3.9.4/weka/attributeSelection/PrincipalComponents.html></a> 75 * <a href="https://au.mathworks.com/help/stats/pca.html">pca from matlab</a> 76 * (using the 'VariableWeights','variance' option) 77 */ 78 private static final double[][] EXPECTED_COR = { 79 { 0.9118256, -0.942809 }, 80 { 1.3832302, 1.4142136 }, 81 { -0.1690309, 0.0 }, 82 { 0.0666714, -0.942809 }, 83 { -2.1926964, 0.4714045 }, 84 }; 85 86 /** 87 * This is the expected value give or take sign when centering and scaling (correlation) 88 * is applied with no bias adjustment. In general, bias adjustment is more accurate but 89 * alters most PCA machine learning models by an insignificant amount so is often not accounted for. 90 * 91 * The result has been cross-checked with 92 * <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">sklearn.decomposition.PCA</a> 93 * (but with prior preprocessing using <a href="https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html">sklearn.preprocessing.StandardScaler</a>) 94 * <a href="https://www.javadoc.io/doc/com.github.haifengl/smile-core/latest/smile/feature/extraction/PCA.html">smile.feature.extraction.PCA</a> 95 * (using the PCA.cor method) 96 */ 97 public static final double[][] EXPECTED_COR_NO_BIAS = { 98 { 1.0194521, -1.0540926 }, 99 { 1.5464984, 1.5811388 }, 100 { -0.1889822, 0.0 }, 101 { 0.0745409, -1.0540926 }, 102 { -2.451509, 0.5270463 }, 103 }; 104 public static final double DELTA = 0.000001; 105 106 @Test 107 public void defaultSettings() { 108 PCA pca = new PCA(2); 109 Assert.assertEquals(2, pca.getNumComponents()); 110 Assert.assertFalse(pca.isScale()); 111 Assert.assertTrue(pca.isBiasCorrection()); 112 } 113 114 @Test 115 public void covariance() { 116 PCA pca = new PCA(2); 117 double[][] actual = pca.fitAndTransform(SCORES); 118 Assert.assertArrayEquals(EXPECTED_MEAN, pca.getCenter(), DELTA); 119 Assert.assertArrayEquals(EXPECTED_VARIANCE, pca.getVariance(), DELTA); 120 assertExpected(EXPECTED_COMPONENTS, pca.getComponents()); 121 assertExpected(EXPECTED_COV, actual); 122 123 // calling fit and transform individually should be the same as combo method 124 pca = new PCA(2); 125 actual = pca.fit(SCORES).transform(SCORES); 126 Assert.assertArrayEquals(EXPECTED_MEAN, pca.getCenter(), DELTA); 127 assertExpected(EXPECTED_COV, actual); 128 } 129 130 @Test 131 public void correlation() { 132 PCA pca = new PCA(2, true, true); 133 double[][] actual = pca.fitAndTransform(SCORES); 134 assertExpected(EXPECTED_COR, actual); 135 } 136 137 @Test 138 public void correlationNoBias() { 139 PCA pca = new PCA(2, true, false); 140 double[][] actual = pca.fitAndTransform(SCORES); 141 assertExpected(EXPECTED_COR_NO_BIAS, actual); 142 } 143 144 @Test 145 public void transformWithoutFit() { 146 PCA pca = new PCA(2); 147 try { 148 pca.transform(SCORES); 149 Assert.fail("an exception should have been thrown"); 150 } catch (MathIllegalStateException mise) { 151 Assert.assertEquals(LocalizedStatFormats.ILLEGAL_STATE_PCA, mise.getSpecifier()); 152 Assert.assertEquals("transform", mise.getParts()[0]); 153 } 154 } 155 156 private static void assertExpected(double[][] expected, double[][] actual) { 157 for (int i = 0; i < expected.length; i++) { 158 double[] e = expected[i]; 159 double[] t = actual[i]; 160 Assert.assertArrayEquals(e, t, DELTA); 161 } 162 } 163 }