WilcoxonSignedRankTest.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.inference;

  22. import java.util.ArrayList;
  23. import java.util.List;

  24. import org.hipparchus.distribution.continuous.NormalDistribution;
  25. import org.hipparchus.exception.LocalizedCoreFormats;
  26. import org.hipparchus.exception.MathIllegalArgumentException;
  27. import org.hipparchus.exception.MathIllegalStateException;
  28. import org.hipparchus.exception.NullArgumentException;
  29. import org.hipparchus.stat.ranking.NaNStrategy;
  30. import org.hipparchus.stat.ranking.NaturalRanking;
  31. import org.hipparchus.stat.ranking.TiesStrategy;
  32. import org.hipparchus.util.FastMath;
  33. import org.hipparchus.util.MathArrays;

  34. /**
  35.  * An implementation of the Wilcoxon signed-rank test.
  36.  *
  37.  * This implementation currently handles only paired (equal length) samples
  38.  * and discards tied pairs from the analysis. The latter behavior differs from
  39.  * the R implementation of wilcox.test and corresponds to the "wilcox"
  40.  * zero_method configurable in scipy.stats.wilcoxon.
  41.  */
  42. public class WilcoxonSignedRankTest { // NOPMD - this is not a Junit test class, PMD false positive here

  43.     /** Ranking algorithm. */
  44.     private final NaturalRanking naturalRanking;

  45.     /**
  46.      * Create a test instance where NaN's are left in place and ties get the
  47.      * average of applicable ranks.
  48.      */
  49.     public WilcoxonSignedRankTest() {
  50.         naturalRanking = new NaturalRanking(NaNStrategy.FIXED,
  51.                                             TiesStrategy.AVERAGE);
  52.     }

  53.     /**
  54.      * Create a test instance using the given strategies for NaN's and ties.
  55.      *
  56.      * @param nanStrategy specifies the strategy that should be used for
  57.      *        Double.NaN's
  58.      * @param tiesStrategy specifies the strategy that should be used for ties
  59.      */
  60.     public WilcoxonSignedRankTest(final NaNStrategy nanStrategy,
  61.                                   final TiesStrategy tiesStrategy) {
  62.         naturalRanking = new NaturalRanking(nanStrategy, tiesStrategy);
  63.     }

  64.     /**
  65.      * Ensures that the provided arrays fulfills the assumptions. Also computes
  66.      * and returns the number of tied pairs (i.e., zero differences).
  67.      *
  68.      * @param x first sample
  69.      * @param y second sample
  70.      * @return the number of indices where x[i] == y[i]
  71.      * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
  72.      * @throws MathIllegalArgumentException if {@code x} or {@code y} are
  73.      *         zero-length
  74.      * @throws MathIllegalArgumentException if {@code x} and {@code y} do not
  75.      *         have the same length.
  76.      * @throws MathIllegalArgumentException if all pairs are tied (i.e., if no
  77.      *         data remains when tied pairs have been removed.
  78.      */
  79.     private int ensureDataConformance(final double[] x, final double[] y)
  80.         throws MathIllegalArgumentException, NullArgumentException {

  81.         if (x == null || y == null) {
  82.             throw new NullArgumentException();
  83.         }
  84.         if (x.length == 0 || y.length == 0) {
  85.             throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
  86.         }
  87.         MathArrays.checkEqualLength(y, x);
  88.         int nTies = 0;
  89.         for (int i = 0; i < x.length; i++) {
  90.             if (x[i] == y[i]) {
  91.                 nTies++;
  92.             }
  93.         }
  94.         if (x.length - nTies == 0) {
  95.             throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_DATA);
  96.         }
  97.         return nTies;
  98.     }

  99.     /**
  100.      * Calculates y[i] - x[i] for all i, discarding ties.
  101.      *
  102.      * @param x first sample
  103.      * @param y second sample
  104.      * @return z = y - x (minus tied values)
  105.      */
  106.     private double[] calculateDifferences(final double[] x, final double[] y) {
  107.         final List<Double> differences = new ArrayList<>();
  108.         for (int i = 0; i < x.length; ++i) {
  109.             if (y[i] != x[i]) {
  110.                 differences.add(y[i] - x[i]);
  111.             }
  112.         }
  113.         final int nDiff = differences.size();
  114.         final double[] z = new double[nDiff];
  115.         for (int i = 0; i < nDiff; i++) {
  116.             z[i] = differences.get(i);
  117.         }
  118.         return z;
  119.     }

  120.     /**
  121.      * Calculates |z[i]| for all i
  122.      *
  123.      * @param z sample
  124.      * @return |z|
  125.      * @throws NullArgumentException if {@code z} is {@code null}
  126.      * @throws MathIllegalArgumentException if {@code z} is zero-length.
  127.      */
  128.     private double[] calculateAbsoluteDifferences(final double[] z)
  129.         throws MathIllegalArgumentException, NullArgumentException {

  130.         if (z == null) {
  131.             throw new NullArgumentException();
  132.         }

  133.         if (z.length == 0) {
  134.             throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
  135.         }

  136.         final double[] zAbs = new double[z.length];

  137.         for (int i = 0; i < z.length; ++i) {
  138.             zAbs[i] = FastMath.abs(z[i]);
  139.         }

  140.         return zAbs;
  141.     }

  142.     /**
  143.      * Computes the
  144.      * <a href="http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test">
  145.      * Wilcoxon signed ranked statistic</a> comparing means for two related
  146.      * samples or repeated measurements on a single sample.
  147.      * <p>
  148.      * This statistic can be used to perform a Wilcoxon signed ranked test
  149.      * evaluating the null hypothesis that the two related samples or repeated
  150.      * measurements on a single sample have equal mean.
  151.      * </p>
  152.      * <p>
  153.      * Let X<sub>i</sub> denote the i'th individual of the first sample and
  154.      * Y<sub>i</sub> the related i'th individual in the second sample. Let
  155.      * Z<sub>i</sub> = Y<sub>i</sub> - X<sub>i</sub>.
  156.      * </p>
  157.      * <p>* <strong>Preconditions</strong>:</p>
  158.      * <ul>
  159.      * <li>The differences Z<sub>i</sub> must be independent.</li>
  160.      * <li>Each Z<sub>i</sub> comes from a continuous population (they must be
  161.      * identical) and is symmetric about a common median.</li>
  162.      * <li>The values that X<sub>i</sub> and Y<sub>i</sub> represent are
  163.      * ordered, so the comparisons greater than, less than, and equal to are
  164.      * meaningful.</li>
  165.      * </ul>
  166.      *
  167.      * @param x the first sample
  168.      * @param y the second sample
  169.      * @return wilcoxonSignedRank statistic (the larger of W+ and W-)
  170.      * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
  171.      * @throws MathIllegalArgumentException if {@code x} or {@code y} are
  172.      *         zero-length.
  173.      * @throws MathIllegalArgumentException if {@code x} and {@code y} do not
  174.      *         have the same length.
  175.      */
  176.     public double wilcoxonSignedRank(final double[] x, final double[] y)
  177.         throws MathIllegalArgumentException, NullArgumentException {

  178.         ensureDataConformance(x, y);

  179.         final double[] z = calculateDifferences(x, y);
  180.         final double[] zAbs = calculateAbsoluteDifferences(z);

  181.         final double[] ranks = naturalRanking.rank(zAbs);

  182.         double Wplus = 0;

  183.         for (int i = 0; i < z.length; ++i) {
  184.             if (z[i] > 0) {
  185.                 Wplus += ranks[i];
  186.             }
  187.         }

  188.         final int n = z.length;
  189.         final double Wminus = ((n * (n + 1)) / 2.0) - Wplus;

  190.         return FastMath.max(Wplus, Wminus);
  191.     }

  192.     /**
  193.      * Calculates the p-value associated with a Wilcoxon signed rank statistic
  194.      * by enumerating all possible rank sums and counting the number that exceed
  195.      * the given value.
  196.      *
  197.      * @param stat Wilcoxon signed rank statistic value
  198.      * @param n number of subjects (corresponding to x.length)
  199.      * @return two-sided exact p-value
  200.      */
  201.     private double calculateExactPValue(final double stat, final int n) {
  202.         final int m = 1 << n;
  203.         int largerRankSums = 0;
  204.         for (int i = 0; i < m; ++i) {
  205.             int rankSum = 0;

  206.             // Generate all possible rank sums
  207.             for (int j = 0; j < n; ++j) {

  208.                 // (i >> j) & 1 extract i's j-th bit from the right
  209.                 if (((i >> j) & 1) == 1) {
  210.                     rankSum += j + 1;
  211.                 }
  212.             }

  213.             if (rankSum >= stat) {
  214.                 ++largerRankSums;
  215.             }
  216.         }

  217.         /*
  218.          * largerRankSums / m gives the one-sided p-value, so it's multiplied
  219.          * with 2 to get the two-sided p-value
  220.          */
  221.         return 2 * ((double) largerRankSums) / m;
  222.     }

  223.     /**
  224.      * Computes an estimate of the (2-sided) p-value using the normal
  225.      * approximation. Includes a continuity correction in computing the
  226.      * correction factor.
  227.      *
  228.      * @param stat Wilcoxon rank sum statistic
  229.      * @param n number of subjects (corresponding to x.length minus any tied ranks)
  230.      * @return two-sided asymptotic p-value
  231.      */
  232.     private double calculateAsymptoticPValue(final double stat, final int n) {

  233.         final double ES = n * (n + 1) / 4.0;

  234.         /*
  235.          * Same as (but saves computations): final double VarW = ((double) (N *
  236.          * (N + 1) * (2*N + 1))) / 24;
  237.          */
  238.         final double VarS = ES * ((2 * n + 1) / 6.0);

  239.         double z = stat - ES;
  240.         final double t = FastMath.signum(z);
  241.         z = (z - t * 0.5) / FastMath.sqrt(VarS);

  242.         // want 2-sided tail probability, so make sure z < 0
  243.         if (z > 0) {
  244.             z = -z;
  245.         }
  246.         final NormalDistribution standardNormal = new NormalDistribution(0, 1);
  247.         return 2 * standardNormal.cumulativeProbability(z);
  248.     }

  249.     /**
  250.      * Returns the <i>observed significance level</i>, or
  251.      * <a href= "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
  252.      * p-value</a>, associated with a
  253.      * <a href="http://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test">
  254.      * Wilcoxon signed ranked statistic</a> comparing mean for two related
  255.      * samples or repeated measurements on a single sample.
  256.      * <p>
  257.      * Let X<sub>i</sub> denote the i'th individual of the first sample and
  258.      * Y<sub>i</sub> the related i'th individual in the second sample. Let
  259.      * Z<sub>i</sub> = Y<sub>i</sub> - X<sub>i</sub>.
  260.      * </p>
  261.      * <p>
  262.      * <strong>Preconditions</strong>:</p>
  263.      * <ul>
  264.      * <li>The differences Z<sub>i</sub> must be independent.</li>
  265.      * <li>Each Z<sub>i</sub> comes from a continuous population (they must be
  266.      * identical) and is symmetric about a common median.</li>
  267.      * <li>The values that X<sub>i</sub> and Y<sub>i</sub> represent are
  268.      * ordered, so the comparisons greater than, less than, and equal to are
  269.      * meaningful.</li>
  270.      * </ul>
  271.      * <p><strong>Implementation notes</strong>:</p>
  272.      * <ul>
  273.      * <li>Tied pairs are discarded from the data.</li>
  274.      * <li>When {@code exactPValue} is false, the normal approximation is used
  275.      * to estimate the p-value including a continuity correction factor.
  276.      * {@code wilcoxonSignedRankTest(x, y, true)} should give the same results
  277.      * as {@code  wilcox.test(x, y, alternative = "two.sided", mu = 0,
  278.      *     paired = TRUE, exact = FALSE, correct = TRUE)} in R (as long as
  279.      * there are no tied pairs in the data).</li>
  280.      * </ul>
  281.      *
  282.      * @param x the first sample
  283.      * @param y the second sample
  284.      * @param exactPValue if the exact p-value is wanted (only works for
  285.      *        x.length &lt;= 30, if true and x.length &gt; 30, MathIllegalArgumentException is thrown)
  286.      * @return p-value
  287.      * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
  288.      * @throws MathIllegalArgumentException if {@code x} or {@code y} are
  289.      *         zero-length or for all i, x[i] == y[i]
  290.      * @throws MathIllegalArgumentException if {@code x} and {@code y} do not
  291.      *         have the same length.
  292.      * @throws MathIllegalArgumentException if {@code exactPValue} is
  293.      *         {@code true} and {@code x.length} &gt; 30
  294.      * @throws MathIllegalStateException if the p-value can not be computed due
  295.      *         to a convergence error
  296.      * @throws MathIllegalStateException if the maximum number of iterations is
  297.      *         exceeded
  298.      */
  299.     public double wilcoxonSignedRankTest(final double[] x, final double[] y,
  300.                                          final boolean exactPValue)
  301.         throws MathIllegalArgumentException, NullArgumentException,
  302.         MathIllegalStateException {

  303.         final int nTies = ensureDataConformance(x, y);

  304.         final int n = x.length - nTies;
  305.         final double stat = wilcoxonSignedRank(x, y);

  306.         if (exactPValue && n > 30) {
  307.             throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_LARGE,
  308.                                                    n, 30);
  309.         }

  310.         if (exactPValue) {
  311.             return calculateExactPValue(stat, n);
  312.         } else {
  313.             return calculateAsymptoticPValue(stat, n);
  314.         }
  315.     }
  316. }