1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.inference; 23 24 import java.util.ArrayList; 25 import java.util.List; 26 27 import org.hipparchus.distribution.continuous.NormalDistribution; 28 import org.hipparchus.exception.LocalizedCoreFormats; 29 import org.hipparchus.exception.MathIllegalArgumentException; 30 import org.hipparchus.exception.MathIllegalStateException; 31 import org.hipparchus.exception.NullArgumentException; 32 import org.hipparchus.stat.ranking.NaNStrategy; 33 import org.hipparchus.stat.ranking.NaturalRanking; 34 import org.hipparchus.stat.ranking.TiesStrategy; 35 import org.hipparchus.util.FastMath; 36 import org.hipparchus.util.MathArrays; 37 38 /** 39 * An implementation of the Wilcoxon signed-rank test. 40 * 41 * This implementation currently handles only paired (equal length) samples 42 * and discards tied pairs from the analysis. The latter behavior differs from 43 * the R implementation of wilcox.test and corresponds to the "wilcox" 44 * zero_method configurable in scipy.stats.wilcoxon. 45 */ 46 public class WilcoxonSignedRankTest { // NOPMD - this is not a Junit test class, PMD false positive here 47 48 /** Ranking algorithm. */ 49 private final NaturalRanking naturalRanking; 50 51 /** 52 * Create a test instance where NaN's are left in place and ties get the 53 * average of applicable ranks. 54 */ 55 public WilcoxonSignedRankTest() { 56 naturalRanking = new NaturalRanking(NaNStrategy.FIXED, 57 TiesStrategy.AVERAGE); 58 } 59 60 /** 61 * Create a test instance using the given strategies for NaN's and ties. 62 * 63 * @param nanStrategy specifies the strategy that should be used for 64 * Double.NaN's 65 * @param tiesStrategy specifies the strategy that should be used for ties 66 */ 67 public WilcoxonSignedRankTest(final NaNStrategy nanStrategy, 68 final TiesStrategy tiesStrategy) { 69 naturalRanking = new NaturalRanking(nanStrategy, tiesStrategy); 70 } 71 72 /** 73 * Ensures that the provided arrays fulfills the assumptions. Also computes 74 * and returns the number of tied pairs (i.e., zero differences). 75 * 76 * @param x first sample 77 * @param y second sample 78 * @return the number of indices where x[i] == y[i] 79 * @throws NullArgumentException if {@code x} or {@code y} are {@code null}. 80 * @throws MathIllegalArgumentException if {@code x} or {@code y} are 81 * zero-length 82 * @throws MathIllegalArgumentException if {@code x} and {@code y} do not 83 * have the same length. 84 * @throws MathIllegalArgumentException if all pairs are tied (i.e., if no 85 * data remains when tied pairs have been removed. 86 */ 87 private int ensureDataConformance(final double[] x, final double[] y) 88 throws MathIllegalArgumentException, NullArgumentException { 89 90 if (x == null || y == null) { 91 throw new NullArgumentException(); 92 } 93 if (x.length == 0 || y.length == 0) { 94 throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA); 95 } 96 MathArrays.checkEqualLength(y, x); 97 int nTies = 0; 98 for (int i = 0; i < x.length; i++) { 99 if (x[i] == y[i]) { 100 nTies++; 101 } 102 } 103 if (x.length - nTies == 0) { 104 throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_DATA); 105 } 106 return nTies; 107 } 108 109 /** 110 * Calculates y[i] - x[i] for all i, discarding ties. 111 * 112 * @param x first sample 113 * @param y second sample 114 * @return z = y - x (minus tied values) 115 */ 116 private double[] calculateDifferences(final double[] x, final double[] y) { 117 final List<Double> differences = new ArrayList<>(); 118 for (int i = 0; i < x.length; ++i) { 119 if (y[i] != x[i]) { 120 differences.add(y[i] - x[i]); 121 } 122 } 123 final int nDiff = differences.size(); 124 final double[] z = new double[nDiff]; 125 for (int i = 0; i < nDiff; i++) { 126 z[i] = differences.get(i); 127 } 128 return z; 129 } 130 131 /** 132 * Calculates |z[i]| for all i 133 * 134 * @param z sample 135 * @return |z| 136 * @throws NullArgumentException if {@code z} is {@code null} 137 * @throws MathIllegalArgumentException if {@code z} is zero-length. 138 */ 139 private double[] calculateAbsoluteDifferences(final double[] z) 140 throws MathIllegalArgumentException, NullArgumentException { 141 142 if (z == null) { 143 throw new NullArgumentException(); 144 } 145 146 if (z.length == 0) { 147 throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA); 148 } 149 150 final double[] zAbs = new double[z.length]; 151 152 for (int i = 0; i < z.length; ++i) { 153 zAbs[i] = FastMath.abs(z[i]); 154 } 155 156 return zAbs; 157 } 158 159 /** 160 * Computes the 161 * <a href="http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test"> 162 * Wilcoxon signed ranked statistic</a> comparing means for two related 163 * samples or repeated measurements on a single sample. 164 * <p> 165 * This statistic can be used to perform a Wilcoxon signed ranked test 166 * evaluating the null hypothesis that the two related samples or repeated 167 * measurements on a single sample have equal mean. 168 * </p> 169 * <p> 170 * Let X<sub>i</sub> denote the i'th individual of the first sample and 171 * Y<sub>i</sub> the related i'th individual in the second sample. Let 172 * Z<sub>i</sub> = Y<sub>i</sub> - X<sub>i</sub>. 173 * </p> 174 * <p>* <strong>Preconditions</strong>:</p> 175 * <ul> 176 * <li>The differences Z<sub>i</sub> must be independent.</li> 177 * <li>Each Z<sub>i</sub> comes from a continuous population (they must be 178 * identical) and is symmetric about a common median.</li> 179 * <li>The values that X<sub>i</sub> and Y<sub>i</sub> represent are 180 * ordered, so the comparisons greater than, less than, and equal to are 181 * meaningful.</li> 182 * </ul> 183 * 184 * @param x the first sample 185 * @param y the second sample 186 * @return wilcoxonSignedRank statistic (the larger of W+ and W-) 187 * @throws NullArgumentException if {@code x} or {@code y} are {@code null}. 188 * @throws MathIllegalArgumentException if {@code x} or {@code y} are 189 * zero-length. 190 * @throws MathIllegalArgumentException if {@code x} and {@code y} do not 191 * have the same length. 192 */ 193 public double wilcoxonSignedRank(final double[] x, final double[] y) 194 throws MathIllegalArgumentException, NullArgumentException { 195 196 ensureDataConformance(x, y); 197 198 final double[] z = calculateDifferences(x, y); 199 final double[] zAbs = calculateAbsoluteDifferences(z); 200 201 final double[] ranks = naturalRanking.rank(zAbs); 202 203 double Wplus = 0; 204 205 for (int i = 0; i < z.length; ++i) { 206 if (z[i] > 0) { 207 Wplus += ranks[i]; 208 } 209 } 210 211 final int n = z.length; 212 final double Wminus = ((n * (n + 1)) / 2.0) - Wplus; 213 214 return FastMath.max(Wplus, Wminus); 215 } 216 217 /** 218 * Calculates the p-value associated with a Wilcoxon signed rank statistic 219 * by enumerating all possible rank sums and counting the number that exceed 220 * the given value. 221 * 222 * @param stat Wilcoxon signed rank statistic value 223 * @param n number of subjects (corresponding to x.length) 224 * @return two-sided exact p-value 225 */ 226 private double calculateExactPValue(final double stat, final int n) { 227 final int m = 1 << n; 228 int largerRankSums = 0; 229 for (int i = 0; i < m; ++i) { 230 int rankSum = 0; 231 232 // Generate all possible rank sums 233 for (int j = 0; j < n; ++j) { 234 235 // (i >> j) & 1 extract i's j-th bit from the right 236 if (((i >> j) & 1) == 1) { 237 rankSum += j + 1; 238 } 239 } 240 241 if (rankSum >= stat) { 242 ++largerRankSums; 243 } 244 } 245 246 /* 247 * largerRankSums / m gives the one-sided p-value, so it's multiplied 248 * with 2 to get the two-sided p-value 249 */ 250 return 2 * ((double) largerRankSums) / m; 251 } 252 253 /** 254 * Computes an estimate of the (2-sided) p-value using the normal 255 * approximation. Includes a continuity correction in computing the 256 * correction factor. 257 * 258 * @param stat Wilcoxon rank sum statistic 259 * @param n number of subjects (corresponding to x.length minus any tied ranks) 260 * @return two-sided asymptotic p-value 261 */ 262 private double calculateAsymptoticPValue(final double stat, final int n) { 263 264 final double ES = n * (n + 1) / 4.0; 265 266 /* 267 * Same as (but saves computations): final double VarW = ((double) (N * 268 * (N + 1) * (2*N + 1))) / 24; 269 */ 270 final double VarS = ES * ((2 * n + 1) / 6.0); 271 272 double z = stat - ES; 273 final double t = FastMath.signum(z); 274 z = (z - t * 0.5) / FastMath.sqrt(VarS); 275 276 // want 2-sided tail probability, so make sure z < 0 277 if (z > 0) { 278 z = -z; 279 } 280 final NormalDistribution standardNormal = new NormalDistribution(0, 1); 281 return 2 * standardNormal.cumulativeProbability(z); 282 } 283 284 /** 285 * Returns the <i>observed significance level</i>, or 286 * <a href= "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 287 * p-value</a>, associated with a 288 * <a href="http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test"> 289 * Wilcoxon signed ranked statistic</a> comparing mean for two related 290 * samples or repeated measurements on a single sample. 291 * <p> 292 * Let X<sub>i</sub> denote the i'th individual of the first sample and 293 * Y<sub>i</sub> the related i'th individual in the second sample. Let 294 * Z<sub>i</sub> = Y<sub>i</sub> - X<sub>i</sub>. 295 * </p> 296 * <p> 297 * <strong>Preconditions</strong>:</p> 298 * <ul> 299 * <li>The differences Z<sub>i</sub> must be independent.</li> 300 * <li>Each Z<sub>i</sub> comes from a continuous population (they must be 301 * identical) and is symmetric about a common median.</li> 302 * <li>The values that X<sub>i</sub> and Y<sub>i</sub> represent are 303 * ordered, so the comparisons greater than, less than, and equal to are 304 * meaningful.</li> 305 * </ul> 306 * <p><strong>Implementation notes</strong>:</p> 307 * <ul> 308 * <li>Tied pairs are discarded from the data.</li> 309 * <li>When {@code exactPValue} is false, the normal approximation is used 310 * to estimate the p-value including a continuity correction factor. 311 * {@code wilcoxonSignedRankTest(x, y, true)} should give the same results 312 * as {@code wilcox.test(x, y, alternative = "two.sided", mu = 0, 313 * paired = TRUE, exact = FALSE, correct = TRUE)} in R (as long as 314 * there are no tied pairs in the data).</li> 315 * </ul> 316 * 317 * @param x the first sample 318 * @param y the second sample 319 * @param exactPValue if the exact p-value is wanted (only works for 320 * x.length <= 30, if true and x.length > 30, MathIllegalArgumentException is thrown) 321 * @return p-value 322 * @throws NullArgumentException if {@code x} or {@code y} are {@code null}. 323 * @throws MathIllegalArgumentException if {@code x} or {@code y} are 324 * zero-length or for all i, x[i] == y[i] 325 * @throws MathIllegalArgumentException if {@code x} and {@code y} do not 326 * have the same length. 327 * @throws MathIllegalArgumentException if {@code exactPValue} is 328 * {@code true} and {@code x.length} > 30 329 * @throws MathIllegalStateException if the p-value can not be computed due 330 * to a convergence error 331 * @throws MathIllegalStateException if the maximum number of iterations is 332 * exceeded 333 */ 334 public double wilcoxonSignedRankTest(final double[] x, final double[] y, 335 final boolean exactPValue) 336 throws MathIllegalArgumentException, NullArgumentException, 337 MathIllegalStateException { 338 339 final int nTies = ensureDataConformance(x, y); 340 341 final int n = x.length - nTies; 342 final double stat = wilcoxonSignedRank(x, y); 343 344 if (exactPValue && n > 30) { 345 throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_LARGE, 346 n, 30); 347 } 348 349 if (exactPValue) { 350 return calculateExactPValue(stat, n); 351 } else { 352 return calculateAsymptoticPValue(stat, n); 353 } 354 } 355 }