KolmogorovSmirnovTest.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */

  21. package org.hipparchus.stat.inference;

  22. import java.math.RoundingMode;
  23. import java.util.Arrays;
  24. import java.util.HashSet;

  25. import org.hipparchus.distribution.RealDistribution;
  26. import org.hipparchus.exception.LocalizedCoreFormats;
  27. import org.hipparchus.exception.MathIllegalArgumentException;
  28. import org.hipparchus.exception.MathIllegalStateException;
  29. import org.hipparchus.exception.MathRuntimeException;
  30. import org.hipparchus.fraction.BigFraction;
  31. import org.hipparchus.fraction.BigFractionField;
  32. import org.hipparchus.linear.Array2DRowFieldMatrix;
  33. import org.hipparchus.linear.FieldMatrix;
  34. import org.hipparchus.linear.MatrixUtils;
  35. import org.hipparchus.linear.RealMatrix;
  36. import org.hipparchus.random.RandomDataGenerator;
  37. import org.hipparchus.random.RandomGenerator;
  38. import org.hipparchus.stat.LocalizedStatFormats;
  39. import org.hipparchus.util.FastMath;
  40. import org.hipparchus.util.MathArrays;
  41. import org.hipparchus.util.MathUtils;

  42. /**
  43.  * Implementation of the <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">
  44.  * Kolmogorov-Smirnov (K-S) test</a> for equality of continuous distributions.
  45.  * <p>
  46.  * The K-S test uses a statistic based on the maximum deviation of the empirical distribution of
  47.  * sample data points from the distribution expected under the null hypothesis. For one-sample tests
  48.  * evaluating the null hypothesis that a set of sample data points follow a given distribution, the
  49.  * test statistic is \(D_n=\sup_x |F_n(x)-F(x)|\), where \(F\) is the expected distribution and
  50.  * \(F_n\) is the empirical distribution of the \(n\) sample data points. The distribution of
  51.  * \(D_n\) is estimated using a method based on [1] with certain quick decisions for extreme values
  52.  * given in [2].
  53.  * <p>
  54.  * Two-sample tests are also supported, evaluating the null hypothesis that the two samples
  55.  * {@code x} and {@code y} come from the same underlying distribution. In this case, the test
  56.  * statistic is \(D_{n,m}=\sup_t | F_n(t)-F_m(t)|\) where \(n\) is the length of {@code x}, \(m\) is
  57.  * the length of {@code y}, \(F_n\) is the empirical distribution that puts mass \(1/n\) at each of
  58.  * the values in {@code x} and \(F_m\) is the empirical distribution of the {@code y} values. The
  59.  * default 2-sample test method, {@link #kolmogorovSmirnovTest(double[], double[])} works as
  60.  * follows:
  61.  * <ul>
  62.  * <li>For small samples (where the product of the sample sizes is less than
  63.  * {@link #LARGE_SAMPLE_PRODUCT}), the method presented in [4] is used to compute the
  64.  * exact p-value for the 2-sample test.</li>
  65.  * <li>When the product of the sample sizes exceeds {@link #LARGE_SAMPLE_PRODUCT}, the asymptotic
  66.  * distribution of \(D_{n,m}\) is used. See {@link #approximateP(double, int, int)} for details on
  67.  * the approximation.</li>
  68.  * </ul>
  69.  * <p>
  70.  * If the product of the sample sizes is less than {@link #LARGE_SAMPLE_PRODUCT} and the sample
  71.  * data contains ties, random jitter is added to the sample data to break ties before applying
  72.  * the algorithm above. Alternatively, the {@link #bootstrap(double[], double[], int, boolean)}
  73.  * method, modeled after <a href="http://sekhon.berkeley.edu/matching/ks.boot.html">ks.boot</a>
  74.  * in the R Matching package [3], can be used if ties are known to be present in the data.
  75.  * <p>
  76.  * In the two-sample case, \(D_{n,m}\) has a discrete distribution. This makes the p-value
  77.  * associated with the null hypothesis \(H_0 : D_{n,m} \ge d \) differ from \(H_0 : D_{n,m} &gt; d \)
  78.  * by the mass of the observed value \(d\). To distinguish these, the two-sample tests use a boolean
  79.  * {@code strict} parameter. This parameter is ignored for large samples.
  80.  * <p>
  81.  * The methods used by the 2-sample default implementation are also exposed directly:
  82.  * <ul>
  83.  * <li>{@link #exactP(double, int, int, boolean)} computes exact 2-sample p-values</li>
  84.  * <li>{@link #approximateP(double, int, int)} uses the asymptotic distribution The {@code boolean}
  85.  * arguments in the first two methods allow the probability used to estimate the p-value to be
  86.  * expressed using strict or non-strict inequality. See
  87.  * {@link #kolmogorovSmirnovTest(double[], double[], boolean)}.</li>
  88.  * </ul>
  89.  * <p>
  90.  * References:
  91.  * <ul>
  92.  * <li>[1] <a href="http://www.jstatsoft.org/v08/i18/"> Evaluating Kolmogorov's Distribution</a> by
  93.  * George Marsaglia, Wai Wan Tsang, and Jingbo Wang</li>
  94.  * <li>[2] <a href="http://www.jstatsoft.org/v39/i11/"> Computing the Two-Sided Kolmogorov-Smirnov
  95.  * Distribution</a> by Richard Simard and Pierre L'Ecuyer</li>
  96.  * <li>[3] Jasjeet S. Sekhon. 2011. <a href="http://www.jstatsoft.org/article/view/v042i07">
  97.  * Multivariate and Propensity Score Matching Software with Automated Balance Optimization:
  98.  * The Matching package for R</a> Journal of Statistical Software, 42(7): 1-52.</li>
  99.  * <li>[4] Kim, P. J. and Jennrich, R. I. (1970). Tables of the Exact Sampling Distribution of the
  100.  * Two-Sample Kolmogorov-Smirnov Criterion D_mn ,m≦n in Selected Tables in Mathematical Statistics,
  101.  * Vol. 1, H. L. Harter and D. B. Owen, editors.</li>
  102.  * </ul>
  103.  * <p>
  104.  * Note that [1] contains an error in computing h, refer to <a
  105.  * href="https://issues.apache.org/jira/browse/MATH-437">MATH-437</a> for details.
  106.  */
  107. public class KolmogorovSmirnovTest { // NOPMD - this is not a Junit test class, PMD false positive here

  108.     /**
  109.      * Bound on the number of partial sums in {@link #ksSum(double, double, int)}
  110.      */
  111.     protected static final int MAXIMUM_PARTIAL_SUM_COUNT = 100000;

  112.     /** Convergence criterion for {@link #ksSum(double, double, int)} */
  113.     protected static final double KS_SUM_CAUCHY_CRITERION = 1E-20;

  114.     /** Convergence criterion for the sums in #pelzGood(double, double, int)} */
  115.     protected static final double PG_SUM_RELATIVE_ERROR = 1.0e-10;

  116.     /**
  117.      * When product of sample sizes exceeds this value, 2-sample K-S test uses asymptotic
  118.      * distribution to compute the p-value.
  119.      */
  120.     protected static final int LARGE_SAMPLE_PRODUCT = 10000;

  121.     /**
  122.      * RandomDataGenerator used by {@link #bootstrap(double[], double[], int)}
  123.      * or to generate jitter to break ties in the data.
  124.      */
  125.     private final RandomDataGenerator gen = new RandomDataGenerator();

  126.     /**
  127.      * Construct a KolmogorovSmirnovTest instance.
  128.      */
  129.     public KolmogorovSmirnovTest() {
  130.         super();
  131.     }

  132.     /**
  133.      * Construct a KolmogorovSmirnovTest instance providing a seed for the PRNG
  134.      * used by the {@link #bootstrap(double[], double[], int)} method.
  135.      *
  136.      * @param seed the seed for the PRNG
  137.      */
  138.     public KolmogorovSmirnovTest(long seed) {
  139.         super();
  140.         gen.setSeed(seed);
  141.     }

  142.     /**
  143.      * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a
  144.      * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
  145.      * evaluating the null hypothesis that {@code data} conforms to {@code distribution}. If
  146.      * {@code exact} is true, the distribution used to compute the p-value is computed using
  147.      * extended precision. See {@link #cdfExact(double, int)}.
  148.      *
  149.      * @param distribution reference distribution
  150.      * @param data sample being being evaluated
  151.      * @param exact whether or not to force exact computation of the p-value
  152.      * @return the p-value associated with the null hypothesis that {@code data} is a sample from
  153.      *         {@code distribution}
  154.      * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
  155.      * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
  156.      */
  157.     public double kolmogorovSmirnovTest(RealDistribution distribution, double[] data, boolean exact) {
  158.         return 1d - cdf(kolmogorovSmirnovStatistic(distribution, data), data.length, exact);
  159.     }

  160.     /**
  161.      * Computes the one-sample Kolmogorov-Smirnov test statistic, \(D_n=\sup_x |F_n(x)-F(x)|\) where
  162.      * \(F\) is the distribution (cdf) function associated with {@code distribution}, \(n\) is the
  163.      * length of {@code data} and \(F_n\) is the empirical distribution that puts mass \(1/n\) at
  164.      * each of the values in {@code data}.
  165.      *
  166.      * @param distribution reference distribution
  167.      * @param data sample being evaluated
  168.      * @return Kolmogorov-Smirnov statistic \(D_n\)
  169.      * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
  170.      * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
  171.      */
  172.     public double kolmogorovSmirnovStatistic(RealDistribution distribution, double[] data) {
  173.         checkArray(data);
  174.         final int n = data.length;
  175.         final double nd = n;
  176.         final double[] dataCopy = new double[n];
  177.         System.arraycopy(data, 0, dataCopy, 0, n);
  178.         Arrays.sort(dataCopy);
  179.         double d = 0d;
  180.         for (int i = 1; i <= n; i++) {
  181.             final double yi = distribution.cumulativeProbability(dataCopy[i - 1]);
  182.             final double currD = FastMath.max(yi - (i - 1) / nd, i / nd - yi);
  183.             if (currD > d) {
  184.                 d = currD;
  185.             }
  186.         }
  187.         return d;
  188.     }

  189.     /**
  190.      * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a two-sample <a
  191.      * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
  192.      * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
  193.      * probability distribution. Specifically, what is returned is an estimate of the probability
  194.      * that the {@link #kolmogorovSmirnovStatistic(double[], double[])} associated with a randomly
  195.      * selected partition of the combined sample into subsamples of sizes {@code x.length} and
  196.      * {@code y.length} will strictly exceed (if {@code strict} is {@code true}) or be at least as
  197.      * large as {@code strict = false}) as {@code kolmogorovSmirnovStatistic(x, y)}.
  198.      * <ul>
  199.      * <li>For small samples (where the product of the sample sizes is less than
  200.      * {@link #LARGE_SAMPLE_PRODUCT}), the exact p-value is computed using the method presented
  201.      * in [4], implemented in {@link #exactP(double, int, int, boolean)}. </li>
  202.      * <li>When the product of the sample sizes exceeds {@link #LARGE_SAMPLE_PRODUCT}, the
  203.      * asymptotic distribution of \(D_{n,m}\) is used. See {@link #approximateP(double, int, int)}
  204.      * for details on the approximation.</li>
  205.      * </ul><p>
  206.      * If {@code x.length * y.length} &lt; {@link #LARGE_SAMPLE_PRODUCT} and the combined set of values in
  207.      * {@code x} and {@code y} contains ties, random jitter is added to {@code x} and {@code y} to
  208.      * break ties before computing \(D_{n,m}\) and the p-value. The jitter is uniformly distributed
  209.      * on (-minDelta / 2, minDelta / 2) where minDelta is the smallest pairwise difference between
  210.      * values in the combined sample.</p>
  211.      * <p>
  212.      * If ties are known to be present in the data, {@link #bootstrap(double[], double[], int, boolean)}
  213.      * may be used as an alternative method for estimating the p-value.</p>
  214.      *
  215.      * @param x first sample dataset
  216.      * @param y second sample dataset
  217.      * @param strict whether or not the probability to compute is expressed as a strict inequality
  218.      *        (ignored for large samples)
  219.      * @return p-value associated with the null hypothesis that {@code x} and {@code y} represent
  220.      *         samples from the same distribution
  221.      * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
  222.      *         least 2
  223.      * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
  224.      * @see #bootstrap(double[], double[], int, boolean)
  225.      */
  226.     public double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict) {
  227.         final long lengthProduct = (long) x.length * y.length;
  228.         final double[] xa;
  229.         final double[] ya;
  230.         if (lengthProduct < LARGE_SAMPLE_PRODUCT && hasTies(x,y)) {
  231.             xa = x.clone();
  232.             ya = y.clone();
  233.             fixTies(xa, ya);
  234.         } else {
  235.             xa = x;
  236.             ya = y;
  237.         }
  238.         if (lengthProduct < LARGE_SAMPLE_PRODUCT) {
  239.             return exactP(kolmogorovSmirnovStatistic(xa, ya), x.length, y.length, strict);
  240.         }
  241.         return approximateP(kolmogorovSmirnovStatistic(x, y), x.length, y.length);
  242.     }

  243.     /**
  244.      * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a two-sample <a
  245.      * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
  246.      * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
  247.      * probability distribution. Assumes the strict form of the inequality used to compute the
  248.      * p-value. See {@link #kolmogorovSmirnovTest(RealDistribution, double[], boolean)}.
  249.      *
  250.      * @param x first sample dataset
  251.      * @param y second sample dataset
  252.      * @return p-value associated with the null hypothesis that {@code x} and {@code y} represent
  253.      *         samples from the same distribution
  254.      * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
  255.      *         least 2
  256.      * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
  257.      */
  258.     public double kolmogorovSmirnovTest(double[] x, double[] y) {
  259.         return kolmogorovSmirnovTest(x, y, true);
  260.     }

  261.     /**
  262.      * Computes the two-sample Kolmogorov-Smirnov test statistic, \(D_{n,m}=\sup_x |F_n(x)-F_m(x)|\)
  263.      * where \(n\) is the length of {@code x}, \(m\) is the length of {@code y}, \(F_n\) is the
  264.      * empirical distribution that puts mass \(1/n\) at each of the values in {@code x} and \(F_m\)
  265.      * is the empirical distribution of the {@code y} values.
  266.      *
  267.      * @param x first sample
  268.      * @param y second sample
  269.      * @return test statistic \(D_{n,m}\) used to evaluate the null hypothesis that {@code x} and
  270.      *         {@code y} represent samples from the same underlying distribution
  271.      * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
  272.      *         least 2
  273.      * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
  274.      */
  275.     public double kolmogorovSmirnovStatistic(double[] x, double[] y) {
  276.         return integralKolmogorovSmirnovStatistic(x, y)/((double)(x.length * (long)y.length));
  277.     }

  278.     /**
  279.      * Computes the two-sample Kolmogorov-Smirnov test statistic, \(D_{n,m}=\sup_x |F_n(x)-F_m(x)|\)
  280.      * where \(n\) is the length of {@code x}, \(m\) is the length of {@code y}, \(F_n\) is the
  281.      * empirical distribution that puts mass \(1/n\) at each of the values in {@code x} and \(F_m\)
  282.      * is the empirical distribution of the {@code y} values. Finally \(n m D_{n,m}\) is returned
  283.      * as long value.
  284.      *
  285.      * @param x first sample
  286.      * @param y second sample
  287.      * @return test statistic \(n m D_{n,m}\) used to evaluate the null hypothesis that {@code x} and
  288.      *         {@code y} represent samples from the same underlying distribution
  289.      * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
  290.      *         least 2
  291.      * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
  292.      */
  293.     private long integralKolmogorovSmirnovStatistic(double[] x, double[] y) {
  294.         checkArray(x);
  295.         checkArray(y);
  296.         // Copy and sort the sample arrays
  297.         final double[] sx = x.clone();
  298.         final double[] sy = y.clone();
  299.         Arrays.sort(sx);
  300.         Arrays.sort(sy);
  301.         final int n = sx.length;
  302.         final int m = sy.length;

  303.         int rankX = 0;
  304.         int rankY = 0;
  305.         long curD = 0l;

  306.         // Find the max difference between cdf_x and cdf_y
  307.         long supD = 0l;
  308.         do {
  309.             double z = Double.compare(sx[rankX], sy[rankY]) <= 0 ? sx[rankX] : sy[rankY];
  310.             while(rankX < n && Double.compare(sx[rankX], z) == 0) {
  311.                 rankX += 1;
  312.                 curD += m;
  313.             }
  314.             while(rankY < m && Double.compare(sy[rankY], z) == 0) {
  315.                 rankY += 1;
  316.                 curD -= n;
  317.             }
  318.             if (curD > supD) {
  319.                 supD = curD;
  320.             }
  321.             else if (-curD > supD) {
  322.                 supD = -curD;
  323.             }
  324.         } while(rankX < n && rankY < m);
  325.         return supD;
  326.     }

  327.     /**
  328.      * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a
  329.      * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
  330.      * evaluating the null hypothesis that {@code data} conforms to {@code distribution}.
  331.      *
  332.      * @param distribution reference distribution
  333.      * @param data sample being being evaluated
  334.      * @return the p-value associated with the null hypothesis that {@code data} is a sample from
  335.      *         {@code distribution}
  336.      * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
  337.      * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
  338.      */
  339.     public double kolmogorovSmirnovTest(RealDistribution distribution, double[] data) {
  340.         return kolmogorovSmirnovTest(distribution, data, false);
  341.     }

  342.     /**
  343.      * Performs a <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov
  344.      * test</a> evaluating the null hypothesis that {@code data} conforms to {@code distribution}.
  345.      *
  346.      * @param distribution reference distribution
  347.      * @param data sample being being evaluated
  348.      * @param alpha significance level of the test
  349.      * @return true iff the null hypothesis that {@code data} is a sample from {@code distribution}
  350.      *         can be rejected with confidence 1 - {@code alpha}
  351.      * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
  352.      * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
  353.      */
  354.     public boolean kolmogorovSmirnovTest(RealDistribution distribution, double[] data, double alpha) {
  355.         if ((alpha <= 0) || (alpha > 0.5)) {
  356.             throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL, alpha, 0, 0.5);
  357.         }
  358.         return kolmogorovSmirnovTest(distribution, data) < alpha;
  359.     }

  360.     /**
  361.      * Estimates the <i>p-value</i> of a two-sample
  362.      * <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
  363.      * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
  364.      * probability distribution. This method estimates the p-value by repeatedly sampling sets of size
  365.      * {@code x.length} and {@code y.length} from the empirical distribution of the combined sample.
  366.      * When {@code strict} is true, this is equivalent to the algorithm implemented in the R function
  367.      * {@code ks.boot}, described in <pre>
  368.      * Jasjeet S. Sekhon. 2011. 'Multivariate and Propensity Score Matching
  369.      * Software with Automated Balance Optimization: The Matching package for R.'
  370.      * Journal of Statistical Software, 42(7): 1-52.
  371.      * </pre>
  372.      * @param x first sample
  373.      * @param y second sample
  374.      * @param iterations number of bootstrap resampling iterations
  375.      * @param strict whether or not the null hypothesis is expressed as a strict inequality
  376.      * @return estimated p-value
  377.      */
  378.     public double bootstrap(double[] x, double[] y, int iterations, boolean strict) {
  379.         final int xLength = x.length;
  380.         final int yLength = y.length;
  381.         final double[] combined = new double[xLength + yLength];
  382.         System.arraycopy(x, 0, combined, 0, xLength);
  383.         System.arraycopy(y, 0, combined, xLength, yLength);
  384.         final long d = integralKolmogorovSmirnovStatistic(x, y);
  385.         int greaterCount = 0;
  386.         int equalCount = 0;
  387.         double[] curX;
  388.         double[] curY;
  389.         long curD;
  390.         for (int i = 0; i < iterations; i++) {
  391.             curX = resample(combined, xLength);
  392.             curY = resample(combined, yLength);
  393.             curD = integralKolmogorovSmirnovStatistic(curX, curY);
  394.             if (curD > d) {
  395.                 greaterCount++;
  396.             } else if (curD == d) {
  397.                 equalCount++;
  398.             }
  399.         }
  400.         return strict ? greaterCount / (double) iterations :
  401.             (greaterCount + equalCount) / (double) iterations;
  402.     }

  403.     /**
  404.      * Computes {@code bootstrap(x, y, iterations, true)}.
  405.      * This is equivalent to ks.boot(x,y, nboots=iterations) using the R Matching
  406.      * package function. See #bootstrap(double[], double[], int, boolean).
  407.      *
  408.      * @param x first sample
  409.      * @param y second sample
  410.      * @param iterations number of bootstrap resampling iterations
  411.      * @return estimated p-value
  412.      */
  413.     public double bootstrap(double[] x, double[] y, int iterations) {
  414.         return bootstrap(x, y, iterations, true);
  415.     }

  416.     /**
  417.      * Return a bootstrap sample (with replacement) of size k from sample.
  418.      *
  419.      * @param sample array to sample from
  420.      * @param k size of bootstrap sample
  421.      * @return bootstrap sample
  422.      */
  423.     private double[] resample(double[] sample, int k) {
  424.         final int len = sample.length;
  425.         final double[] out = new double[k];
  426.         for (int i = 0; i < k; i++) {
  427.             out[i] = gen.nextInt(len);
  428.         }
  429.         return out;
  430.     }

  431.     /**
  432.      * Calculates {@code P(D_n < d)} using the method described in [1] with quick decisions for extreme
  433.      * values given in [2] (see above). The result is not exact as with
  434.      * {@link #cdfExact(double, int)} because calculations are based on
  435.      * {@code double} rather than {@link org.hipparchus.fraction.BigFraction}.
  436.      *
  437.      * @param d statistic
  438.      * @param n sample size
  439.      * @return \(P(D_n &lt; d)\)
  440.      * @throws MathRuntimeException if algorithm fails to convert {@code h} to a
  441.      *         {@link org.hipparchus.fraction.BigFraction} in expressing {@code d} as \((k
  442.      *         - h) / m\) for integer {@code k, m} and \(0 &lt;= h &lt; 1\)
  443.      */
  444.     public double cdf(double d, int n)
  445.         throws MathRuntimeException {
  446.         return cdf(d, n, false);
  447.     }

  448.     /**
  449.      * Calculates {@code P(D_n < d)}. The result is exact in the sense that BigFraction/BigReal is
  450.      * used everywhere at the expense of very slow execution time. Almost never choose this in real
  451.      * applications unless you are very sure; this is almost solely for verification purposes.
  452.      * Normally, you would choose {@link #cdf(double, int)}. See the class
  453.      * javadoc for definitions and algorithm description.
  454.      *
  455.      * @param d statistic
  456.      * @param n sample size
  457.      * @return \(P(D_n &lt; d)\)
  458.      * @throws MathRuntimeException if the algorithm fails to convert {@code h} to a
  459.      *         {@link org.hipparchus.fraction.BigFraction} in expressing {@code d} as \((k
  460.      *         - h) / m\) for integer {@code k, m} and \(0 &lt;= h &lt; 1\)
  461.      */
  462.     public double cdfExact(double d, int n)
  463.         throws MathRuntimeException {
  464.         return cdf(d, n, true);
  465.     }

  466.     /**
  467.      * Calculates {@code P(D_n < d)} using method described in [1] with quick decisions for extreme
  468.      * values given in [2] (see above).
  469.      *
  470.      * @param d statistic
  471.      * @param n sample size
  472.      * @param exact whether the probability should be calculated exact using
  473.      *        {@link org.hipparchus.fraction.BigFraction} everywhere at the expense of
  474.      *        very slow execution time, or if {@code double} should be used convenient places to
  475.      *        gain speed. Almost never choose {@code true} in real applications unless you are very
  476.      *        sure; {@code true} is almost solely for verification purposes.
  477.      * @return \(P(D_n &lt; d)\)
  478.      * @throws MathRuntimeException if algorithm fails to convert {@code h} to a
  479.      *         {@link org.hipparchus.fraction.BigFraction} in expressing {@code d} as \((k
  480.      *         - h) / m\) for integer {@code k, m} and \(0 \lt;= h &lt; 1\).
  481.      */
  482.     public double cdf(double d, int n, boolean exact)
  483.         throws MathRuntimeException {

  484.         final double ninv = 1 / ((double) n);
  485.         final double ninvhalf = 0.5 * ninv;

  486.         if (d <= ninvhalf) {
  487.             return 0;
  488.         } else if (ninvhalf < d && d <= ninv) {
  489.             double res = 1;
  490.             final double f = 2 * d - ninv;
  491.             // n! f^n = n*f * (n-1)*f * ... * 1*x
  492.             for (int i = 1; i <= n; ++i) {
  493.                 res *= i * f;
  494.             }
  495.             return res;
  496.         } else if (1 - ninv <= d && d < 1) {
  497.             return 1 - 2 * Math.pow(1 - d, n);
  498.         } else if (1 <= d) {
  499.             return 1;
  500.         }
  501.         if (exact) {
  502.             return exactK(d, n);
  503.         }
  504.         if (n <= 140) {
  505.             return roundedK(d, n);
  506.         }
  507.         return pelzGood(d, n);
  508.     }

  509.     /**
  510.      * Calculates the exact value of {@code P(D_n < d)} using the method described in [1] (reference
  511.      * in class javadoc above) and {@link org.hipparchus.fraction.BigFraction} (see
  512.      * above).
  513.      *
  514.      * @param d statistic
  515.      * @param n sample size
  516.      * @return the two-sided probability of \(P(D_n < d)\)
  517.      * @throws MathRuntimeException if algorithm fails to convert {@code h} to a
  518.      *         {@link org.hipparchus.fraction.BigFraction} in expressing {@code d} as \((k
  519.      *         - h) / m\) for integer {@code k, m} and \(0 \le h < 1\).
  520.      */
  521.     private double exactK(double d, int n)
  522.         throws MathRuntimeException {

  523.         final int k = (int) Math.ceil(n * d);

  524.         final FieldMatrix<BigFraction> H = this.createExactH(d, n);
  525.         final FieldMatrix<BigFraction> Hpower = H.power(n);

  526.         BigFraction pFrac = Hpower.getEntry(k - 1, k - 1);

  527.         for (int i = 1; i <= n; ++i) {
  528.             pFrac = pFrac.multiply(i).divide(n);
  529.         }

  530.         /*
  531.          * BigFraction.doubleValue converts numerator to double and the denominator to double and
  532.          * divides afterwards. That gives NaN quite easy. This does not (scale is the number of
  533.          * digits):
  534.          */
  535.         return pFrac.bigDecimalValue(20, RoundingMode.HALF_UP).doubleValue();
  536.     }

  537.     /**
  538.      * Calculates {@code P(D_n < d)} using method described in [1] and doubles (see above).
  539.      *
  540.      * @param d statistic
  541.      * @param n sample size
  542.      * @return \(P(D_n < d)\)
  543.      */
  544.     private double roundedK(double d, int n) {

  545.         final int k = (int) Math.ceil(n * d);
  546.         final RealMatrix H = this.createRoundedH(d, n);
  547.         final RealMatrix Hpower = H.power(n);

  548.         double pFrac = Hpower.getEntry(k - 1, k - 1);
  549.         for (int i = 1; i <= n; ++i) {
  550.             pFrac *= (double) i / (double) n;
  551.         }

  552.         return pFrac;
  553.     }

  554.     /**
  555.      * Computes the Pelz-Good approximation for \(P(D_n &lt; d)\) as described in [2] in the class javadoc.
  556.      *
  557.      * @param d value of d-statistic (x in [2])
  558.      * @param n sample size
  559.      * @return \(P(D_n &lt; d)\)
  560.      */
  561.     public double pelzGood(double d, int n) {
  562.         // Change the variable since approximation is for the distribution evaluated at d / sqrt(n)
  563.         final double sqrtN = FastMath.sqrt(n);
  564.         final double z = d * sqrtN;
  565.         final double z2 = d * d * n;
  566.         final double z4 = z2 * z2;
  567.         final double z6 = z4 * z2;
  568.         final double z8 = z4 * z4;

  569.         // Compute K_0(z)
  570.         double sum = 0;
  571.         double z2Term = MathUtils.PI_SQUARED / (8 * z2);
  572.         int k = 1;
  573.         for (; k < MAXIMUM_PARTIAL_SUM_COUNT; k++) {
  574.             final double kTerm = 2 * k - 1;
  575.             final double increment = FastMath.exp(-z2Term * kTerm * kTerm);
  576.             sum += increment;
  577.             if (increment <= PG_SUM_RELATIVE_ERROR * sum) {
  578.                 break;
  579.             }
  580.         }
  581.         if (k == MAXIMUM_PARTIAL_SUM_COUNT) {
  582.             throw new MathIllegalStateException(LocalizedCoreFormats.MAX_COUNT_EXCEEDED, MAXIMUM_PARTIAL_SUM_COUNT);
  583.         }
  584.         double ret = sum * FastMath.sqrt(2 * FastMath.PI) / z;

  585.         // K_1(z)
  586.         // Sum is -inf to inf, but k term is always (k + 1/2) ^ 2, so really have
  587.         // twice the sum from k = 0 to inf (k = -1 is same as 0, -2 same as 1, ...)
  588.         final double twoZ2 = 2 * z2;
  589.         sum = 0;
  590.         for (k = 0; k < MAXIMUM_PARTIAL_SUM_COUNT; k++) {
  591.             final double kTerm = k + 0.5;
  592.             final double kTerm2 = kTerm * kTerm;
  593.             final double increment = (MathUtils.PI_SQUARED * kTerm2 - z2) * FastMath.exp(-MathUtils.PI_SQUARED * kTerm2 / twoZ2);
  594.             sum += increment;
  595.             if (FastMath.abs(increment) < PG_SUM_RELATIVE_ERROR * FastMath.abs(sum)) {
  596.                 break;
  597.             }
  598.         }
  599.         if (k == MAXIMUM_PARTIAL_SUM_COUNT) {
  600.             throw new MathIllegalStateException(LocalizedCoreFormats.MAX_COUNT_EXCEEDED, MAXIMUM_PARTIAL_SUM_COUNT);
  601.         }
  602.         final double sqrtHalfPi = FastMath.sqrt(FastMath.PI / 2);
  603.         // Instead of doubling sum, divide by 3 instead of 6
  604.         ret += sum * sqrtHalfPi / (3 * z4 * sqrtN);

  605.         // K_2(z)
  606.         // Same drill as K_1, but with two doubly infinite sums, all k terms are even powers.
  607.         final double z4Term = 2 * z4;
  608.         final double z6Term = 6 * z6;
  609.         z2Term = 5 * z2;
  610.         final double pi4 = MathUtils.PI_SQUARED * MathUtils.PI_SQUARED;
  611.         sum = 0;
  612.         for (k = 0; k < MAXIMUM_PARTIAL_SUM_COUNT; k++) {
  613.             final double kTerm = k + 0.5;
  614.             final double kTerm2 = kTerm * kTerm;
  615.             final double increment =  (z6Term + z4Term + MathUtils.PI_SQUARED * (z4Term - z2Term) * kTerm2 +
  616.                     pi4 * (1 - twoZ2) * kTerm2 * kTerm2) * FastMath.exp(-MathUtils.PI_SQUARED * kTerm2 / twoZ2);
  617.             sum += increment;
  618.             if (FastMath.abs(increment) < PG_SUM_RELATIVE_ERROR * FastMath.abs(sum)) {
  619.                 break;
  620.             }
  621.         }
  622.         if (k == MAXIMUM_PARTIAL_SUM_COUNT) {
  623.             throw new MathIllegalStateException(LocalizedCoreFormats.MAX_COUNT_EXCEEDED, MAXIMUM_PARTIAL_SUM_COUNT);
  624.         }
  625.         double sum2 = 0;
  626.         for (k = 1; k < MAXIMUM_PARTIAL_SUM_COUNT; k++) {
  627.             final double kTerm2 = k * k;
  628.             final double increment = MathUtils.PI_SQUARED * kTerm2 * FastMath.exp(-MathUtils.PI_SQUARED * kTerm2 / twoZ2);
  629.             sum2 += increment;
  630.             if (FastMath.abs(increment) < PG_SUM_RELATIVE_ERROR * FastMath.abs(sum2)) {
  631.                 break;
  632.             }
  633.         }
  634.         if (k == MAXIMUM_PARTIAL_SUM_COUNT) {
  635.             throw new MathIllegalStateException(LocalizedCoreFormats.MAX_COUNT_EXCEEDED, MAXIMUM_PARTIAL_SUM_COUNT);
  636.         }
  637.         // Again, adjust coefficients instead of doubling sum, sum2
  638.         ret += (sqrtHalfPi / n) * (sum / (36 * z2 * z2 * z2 * z) - sum2 / (18 * z2 * z));

  639.         // K_3(z) One more time with feeling - two doubly infinite sums, all k powers even.
  640.         // Multiply coefficient denominators by 2, so omit doubling sums.
  641.         final double pi6 = pi4 * MathUtils.PI_SQUARED;
  642.         sum = 0;
  643.         for (k = 0; k < MAXIMUM_PARTIAL_SUM_COUNT; k++) {
  644.             final double kTerm = k + 0.5;
  645.             final double kTerm2 = kTerm * kTerm;
  646.             final double kTerm4 = kTerm2 * kTerm2;
  647.             final double kTerm6 = kTerm4 * kTerm2;
  648.             final double increment = (pi6 * kTerm6 * (5 - 30 * z2) + pi4 * kTerm4 * (-60 * z2 + 212 * z4) +
  649.                             MathUtils.PI_SQUARED * kTerm2 * (135 * z4 - 96 * z6) - 30 * z6 - 90 * z8) *
  650.                     FastMath.exp(-MathUtils.PI_SQUARED * kTerm2 / twoZ2);
  651.             sum += increment;
  652.             if (FastMath.abs(increment) < PG_SUM_RELATIVE_ERROR * FastMath.abs(sum)) {
  653.                 break;
  654.             }
  655.         }
  656.         if (k == MAXIMUM_PARTIAL_SUM_COUNT) {
  657.             throw new MathIllegalStateException(LocalizedCoreFormats.MAX_COUNT_EXCEEDED, MAXIMUM_PARTIAL_SUM_COUNT);
  658.         }
  659.         sum2 = 0;
  660.         for (k = 1; k < MAXIMUM_PARTIAL_SUM_COUNT; k++) {
  661.             final double kTerm2 = k * k;
  662.             final double kTerm4 = kTerm2 * kTerm2;
  663.             final double increment = (-pi4 * kTerm4 + 3 * MathUtils.PI_SQUARED * kTerm2 * z2) *
  664.                     FastMath.exp(-MathUtils.PI_SQUARED * kTerm2 / twoZ2);
  665.             sum2 += increment;
  666.             if (FastMath.abs(increment) < PG_SUM_RELATIVE_ERROR * FastMath.abs(sum2)) {
  667.                 break;
  668.             }
  669.         }
  670.         if (k == MAXIMUM_PARTIAL_SUM_COUNT) {
  671.             throw new MathIllegalStateException(LocalizedCoreFormats.MAX_COUNT_EXCEEDED, MAXIMUM_PARTIAL_SUM_COUNT);
  672.         }
  673.         return ret + (sqrtHalfPi / (sqrtN * n)) * (sum / (3240 * z6 * z4) +
  674.                 + sum2 / (108 * z6));

  675.     }

  676.     /***
  677.      * Creates {@code H} of size {@code m x m} as described in [1] (see above).
  678.      *
  679.      * @param d statistic
  680.      * @param n sample size
  681.      * @return H matrix
  682.      * @throws MathIllegalArgumentException if fractional part is greater than 1
  683.      * @throws MathIllegalStateException if algorithm fails to convert {@code h} to a
  684.      *         {@link org.hipparchus.fraction.BigFraction} in expressing {@code d} as \((k
  685.      *         - h) / m\) for integer {@code k, m} and \(0 <= h < 1\).
  686.      */
  687.     private FieldMatrix<BigFraction> createExactH(double d, int n)
  688.         throws MathIllegalArgumentException, MathIllegalStateException {

  689.         final int k = (int) Math.ceil(n * d);
  690.         final int m = 2 * k - 1;
  691.         final double hDouble = k - n * d;
  692.         if (hDouble >= 1) {
  693.             throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_LARGE_BOUND_EXCLUDED,
  694.                                                    hDouble, 1.0);
  695.         }
  696.         BigFraction h;
  697.         try {
  698.             h = new BigFraction(hDouble, 1.0e-20, 10000);
  699.         } catch (final MathIllegalStateException e1) {
  700.             try {
  701.                 h = new BigFraction(hDouble, 1.0e-10, 10000);
  702.             } catch (final MathIllegalStateException e2) {
  703.                 h = new BigFraction(hDouble, 1.0e-5, 10000);
  704.             }
  705.         }
  706.         final BigFraction[][] Hdata = new BigFraction[m][m];

  707.         /*
  708.          * Start by filling everything with either 0 or 1.
  709.          */
  710.         for (int i = 0; i < m; ++i) {
  711.             for (int j = 0; j < m; ++j) {
  712.                 if (i - j + 1 < 0) {
  713.                     Hdata[i][j] = BigFraction.ZERO;
  714.                 } else {
  715.                     Hdata[i][j] = BigFraction.ONE;
  716.                 }
  717.             }
  718.         }

  719.         /*
  720.          * Setting up power-array to avoid calculating the same value twice: hPowers[0] = h^1 ...
  721.          * hPowers[m-1] = h^m
  722.          */
  723.         final BigFraction[] hPowers = new BigFraction[m];
  724.         hPowers[0] = h;
  725.         for (int i = 1; i < m; ++i) {
  726.             hPowers[i] = h.multiply(hPowers[i - 1]);
  727.         }

  728.         /*
  729.          * First column and last row has special values (each other reversed).
  730.          */
  731.         for (int i = 0; i < m; ++i) {
  732.             Hdata[i][0] = Hdata[i][0].subtract(hPowers[i]);
  733.             Hdata[m - 1][i] = Hdata[m - 1][i].subtract(hPowers[m - i - 1]);
  734.         }

  735.         /*
  736.          * [1] states: "For 1/2 < h < 1 the bottom left element of the matrix should be (1 - 2*h^m +
  737.          * (2h - 1)^m )/m!" Since 0 <= h < 1, then if h > 1/2 is sufficient to check:
  738.          */
  739.         if (h.compareTo(BigFraction.ONE_HALF) == 1) {
  740.             Hdata[m - 1][0] = Hdata[m - 1][0].add(h.multiply(2).subtract(1).pow(m));
  741.         }

  742.         /*
  743.          * Aside from the first column and last row, the (i, j)-th element is 1/(i - j + 1)! if i -
  744.          * j + 1 >= 0, else 0. 1's and 0's are already put, so only division with (i - j + 1)! is
  745.          * needed in the elements that have 1's. There is no need to calculate (i - j + 1)! and then
  746.          * divide - small steps avoid overflows. Note that i - j + 1 > 0 <=> i + 1 > j instead of
  747.          * j'ing all the way to m. Also note that it is started at g = 2 because dividing by 1 isn't
  748.          * really necessary.
  749.          */
  750.         for (int i = 0; i < m; ++i) {
  751.             for (int j = 0; j < i + 1; ++j) {
  752.                 if (i - j + 1 > 0) {
  753.                     for (int g = 2; g <= i - j + 1; ++g) {
  754.                         Hdata[i][j] = Hdata[i][j].divide(g);
  755.                     }
  756.                 }
  757.             }
  758.         }
  759.         return new Array2DRowFieldMatrix<BigFraction>(BigFractionField.getInstance(), Hdata);
  760.     }

  761.     /***
  762.      * Creates {@code H} of size {@code m x m} as described in [1] (see above)
  763.      * using double-precision.
  764.      *
  765.      * @param d statistic
  766.      * @param n sample size
  767.      * @return H matrix
  768.      * @throws MathIllegalArgumentException if fractional part is greater than 1
  769.      */
  770.     private RealMatrix createRoundedH(double d, int n)
  771.         throws MathIllegalArgumentException {

  772.         final int k = (int) Math.ceil(n * d);
  773.         final int m = 2 * k - 1;
  774.         final double h = k - n * d;
  775.         if (h >= 1) {
  776.             throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_LARGE_BOUND_EXCLUDED,
  777.                                                    h, 1.0);
  778.         }
  779.         final double[][] Hdata = new double[m][m];

  780.         /*
  781.          * Start by filling everything with either 0 or 1.
  782.          */
  783.         for (int i = 0; i < m; ++i) {
  784.             for (int j = 0; j < m; ++j) {
  785.                 if (i - j + 1 < 0) {
  786.                     Hdata[i][j] = 0;
  787.                 } else {
  788.                     Hdata[i][j] = 1;
  789.                 }
  790.             }
  791.         }

  792.         /*
  793.          * Setting up power-array to avoid calculating the same value twice: hPowers[0] = h^1 ...
  794.          * hPowers[m-1] = h^m
  795.          */
  796.         final double[] hPowers = new double[m];
  797.         hPowers[0] = h;
  798.         for (int i = 1; i < m; ++i) {
  799.             hPowers[i] = h * hPowers[i - 1];
  800.         }

  801.         /*
  802.          * First column and last row has special values (each other reversed).
  803.          */
  804.         for (int i = 0; i < m; ++i) {
  805.             Hdata[i][0] = Hdata[i][0] - hPowers[i];
  806.             Hdata[m - 1][i] -= hPowers[m - i - 1];
  807.         }

  808.         /*
  809.          * [1] states: "For 1/2 < h < 1 the bottom left element of the matrix should be (1 - 2*h^m +
  810.          * (2h - 1)^m )/m!" Since 0 <= h < 1, then if h > 1/2 is sufficient to check:
  811.          */
  812.         if (Double.compare(h, 0.5) > 0) {
  813.             Hdata[m - 1][0] += FastMath.pow(2 * h - 1, m);
  814.         }

  815.         /*
  816.          * Aside from the first column and last row, the (i, j)-th element is 1/(i - j + 1)! if i -
  817.          * j + 1 >= 0, else 0. 1's and 0's are already put, so only division with (i - j + 1)! is
  818.          * needed in the elements that have 1's. There is no need to calculate (i - j + 1)! and then
  819.          * divide - small steps avoid overflows. Note that i - j + 1 > 0 <=> i + 1 > j instead of
  820.          * j'ing all the way to m. Also note that it is started at g = 2 because dividing by 1 isn't
  821.          * really necessary.
  822.          */
  823.         for (int i = 0; i < m; ++i) {
  824.             for (int j = 0; j < i + 1; ++j) {
  825.                 if (i - j + 1 > 0) {
  826.                     for (int g = 2; g <= i - j + 1; ++g) {
  827.                         Hdata[i][j] /= g;
  828.                     }
  829.                 }
  830.             }
  831.         }
  832.         return MatrixUtils.createRealMatrix(Hdata);
  833.     }

  834.     /**
  835.      * Verifies that {@code array} has length at least 2.
  836.      *
  837.      * @param array array to test
  838.      * @throws org.hipparchus.exception.NullArgumentException if array is null
  839.      * @throws MathIllegalArgumentException if array is too short
  840.      */
  841.     private void checkArray(double[] array) {
  842.         MathUtils.checkNotNull(array);
  843.         if (array.length < 2) {
  844.             throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, array.length,
  845.                                                    2);
  846.         }
  847.     }

  848.     /**
  849.      * Computes \( 1 + 2 \sum_{i=1}^\infty (-1)^i e^{-2 i^2 t^2} \) stopping when successive partial
  850.      * sums are within {@code tolerance} of one another, or when {@code maxIterations} partial sums
  851.      * have been computed. If the sum does not converge before {@code maxIterations} iterations a
  852.      * {@link MathIllegalStateException} is thrown.
  853.      *
  854.      * @param t argument
  855.      * @param tolerance Cauchy criterion for partial sums
  856.      * @param maxIterations maximum number of partial sums to compute
  857.      * @return Kolmogorov sum evaluated at t
  858.      * @throws MathIllegalStateException if the series does not converge
  859.      */
  860.     public double ksSum(double t, double tolerance, int maxIterations) {
  861.         if (t == 0.0) {
  862.             return 0.0;
  863.         }

  864.         // TODO: for small t (say less than 1), the alternative expansion in part 3 of [1]
  865.         // from class javadoc should be used.

  866.         final double x = -2 * t * t;
  867.         int sign = -1;
  868.         long i = 1;
  869.         double partialSum = 0.5d;
  870.         double delta = 1;
  871.         while (delta > tolerance && i < maxIterations) {
  872.             delta = FastMath.exp(x * i * i);
  873.             partialSum += sign * delta;
  874.             sign *= -1;
  875.             i++;
  876.         }
  877.         if (i == maxIterations) {
  878.             throw new MathIllegalStateException(LocalizedCoreFormats.MAX_COUNT_EXCEEDED, maxIterations);
  879.         }
  880.         return partialSum * 2;
  881.     }

  882.     /**
  883.      * Computes \(P(D_{n,m} &gt; d)\) if {@code strict} is {@code true}; otherwise \(P(D_{n,m} \ge
  884.      * d)\), where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov statistic. See
  885.      * {@link #kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\).
  886.      * <p>
  887.      * The returned probability is exact, implemented by unwinding the recursive function
  888.      * definitions presented in [4] from the class javadoc.
  889.      * </p>
  890.      *
  891.      * @param d D-statistic value
  892.      * @param n first sample size
  893.      * @param m second sample size
  894.      * @param strict whether or not the probability to compute is expressed as a strict inequality
  895.      * @return probability that a randomly selected m-n partition of m + n generates \(D_{n,m}\)
  896.      *         greater than (resp. greater than or equal to) {@code d}
  897.      */
  898.     public double exactP(double d, int n, int m, boolean strict) {
  899.         if (d < 1 / (double)( m * n)) {
  900.             return 1.0;
  901.         } else if (d >= 1) {
  902.             return 0;
  903.         }
  904.         double normalizeD = normalizeD(d, n, m);
  905.         if (!strict) {
  906.             normalizeD -= 1 / ((double)n * m);
  907.         }
  908.         return exactPAtMeshpoint(normalizeD, n, m);
  909.     }

  910.     /**
  911.      * Normalizes a value to an integral multiple of 1/mn between 0 and 1.
  912.      * If d < 1/mn, 0 is returned; if d > 1, 1 is returned; if d is very close
  913.      * to an integral multiple of 1/mn, that value is returned; otherwise the
  914.      * returned value is the smallest multiple of 1/mn less than or equal to d.
  915.      *
  916.      * @param d d value
  917.      * @param n first sample size
  918.      * @param m second sample size
  919.      * @return d value suitable for input to exactPAtMeshpoint(d, m, n)
  920.      */
  921.     private double normalizeD(double d, int n, int m) {
  922.         final double resolution = 1 / ((double)n * m);
  923.         final double tol = 1e-12;

  924.         // If d is smaller that the first mesh point, return 0
  925.         // If greater than 1, return 1
  926.         if (d < resolution) {
  927.             return 0;
  928.         } else if (d > 1) {
  929.             return 1;
  930.         }

  931.         // Normalize d to the smallest mesh point less than or equal to d;
  932.         // except if d is less than tol less than the next mesh point, bump it up
  933.         final double resolutions = d / resolution;
  934.         final double ceil = FastMath.ceil(resolutions);
  935.         if (ceil - resolutions < tol) {
  936.            return ceil * resolution;
  937.         } else {
  938.            return FastMath.floor(resolutions) * resolution;
  939.         }

  940.     }

  941.     /**
  942.      * Computes \(P(D_{n,m} &gt; d)\) where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov statistic. See
  943.      * {@link #kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\).
  944.      * <p>
  945.      * The returned probability is exact, implemented by unwinding the recursive function
  946.      * definitions presented in [4].
  947.      *
  948.      * @param d D-statistic value (must be a "meshpoint" - i.e., a possible actual value of D(m,n)).
  949.      * @param n first sample size
  950.      * @param m second sample size
  951.      * @return probability that a randomly selected m-n partition of m + n generates \(D_{n,m}\)
  952.      *         greater than (resp. greater than or equal to) {@code d}
  953.      */
  954.     private double exactPAtMeshpoint(double d, int n, int m) {
  955.         final int nn = FastMath.max(n, m);
  956.         final int mm = FastMath.min(n, m);
  957.         final double[] u = new double[nn + 2];
  958.         final double k = mm * nn * d + 0.5;
  959.         u[1] = 1d;
  960.         for (int j = 1; j < nn + 1; j++) {
  961.             u[j + 1] = 1;
  962.             if (mm * j > k) {
  963.                 u[j + 1] = 0;
  964.             }
  965.         }
  966.         for (int i = 1; i < mm + 1; i++) {
  967.             final double w = (double) i / (double) (i + nn);
  968.             u[1] = w * u[1];
  969.             if (nn * i > k) {
  970.                 u[1] = 0;
  971.             }
  972.             for (int j = 1; j < nn + 1; j++) {
  973.                 u[j + 1] = u[j] + u[j + 1] * w;
  974.                 if (FastMath.abs(nn * i - mm * j) > k) {
  975.                     u[j + 1] = 0;
  976.                 }
  977.             }
  978.         }
  979.         return 1 - u[nn + 1];
  980.     }

  981.     /**
  982.      * Uses the Kolmogorov-Smirnov distribution to approximate \(P(D_{n,m} &gt; d)\) where \(D_{n,m}\)
  983.      * is the 2-sample Kolmogorov-Smirnov statistic. See
  984.      * {@link #kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\).
  985.      * <p>
  986.      * Specifically, what is returned is \(1 - k(d \sqrt{mn / (m + n)})\) where \(k(t) = 1 + 2
  987.      * \sum_{i=1}^\infty (-1)^i e^{-2 i^2 t^2}\). See {@link #ksSum(double, double, int)} for
  988.      * details on how convergence of the sum is determined. This implementation passes {@code ksSum}
  989.      * {@link #KS_SUM_CAUCHY_CRITERION} as {@code tolerance} and
  990.      * {@link #MAXIMUM_PARTIAL_SUM_COUNT} as {@code maxIterations}.
  991.      * </p>
  992.      *
  993.      * @param d D-statistic value
  994.      * @param n first sample size
  995.      * @param m second sample size
  996.      * @return approximate probability that a randomly selected m-n partition of m + n generates
  997.      *         \(D_{n,m}\) greater than {@code d}
  998.      */
  999.     public double approximateP(double d, int n, int m) {
  1000.         final double dm = m;
  1001.         final double dn = n;
  1002.         return 1 - ksSum(d * FastMath.sqrt((dm * dn) / (dm + dn)),
  1003.                          KS_SUM_CAUCHY_CRITERION, MAXIMUM_PARTIAL_SUM_COUNT);
  1004.     }

  1005.     /**
  1006.      * Fills a boolean array randomly with a fixed number of {@code true} values.
  1007.      * The method uses a simplified version of the Fisher-Yates shuffle algorithm.
  1008.      * By processing first the {@code true} values followed by the remaining {@code false} values
  1009.      * less random numbers need to be generated. The method is optimized for the case
  1010.      * that the number of {@code true} values is larger than or equal to the number of
  1011.      * {@code false} values.
  1012.      *
  1013.      * @param b boolean array
  1014.      * @param numberOfTrueValues number of {@code true} values the boolean array should finally have
  1015.      * @param rng random data generator
  1016.      */
  1017.     static void fillBooleanArrayRandomlyWithFixedNumberTrueValues(final boolean[] b, final int numberOfTrueValues, final RandomGenerator rng) {
  1018.         Arrays.fill(b, true);
  1019.         for (int k = numberOfTrueValues; k < b.length; k++) {
  1020.             final int r = rng.nextInt(k + 1);
  1021.             b[(b[r]) ? r : k] = false;
  1022.         }
  1023.     }

  1024.     /**
  1025.      * If there are no ties in the combined dataset formed from x and y, this
  1026.      * method is a no-op.  If there are ties, a uniform random deviate in
  1027.      * (-minDelta / 2, minDelta / 2) - {0} is added to each value in x and y, where
  1028.      * minDelta is the minimum difference between unequal values in the combined
  1029.      * sample.  A fixed seed is used to generate the jitter, so repeated activations
  1030.      * with the same input arrays result in the same values.
  1031.      *
  1032.      * NOTE: if there are ties in the data, this method overwrites the data in
  1033.      * x and y with the jittered values.
  1034.      *
  1035.      * @param x first sample
  1036.      * @param y second sample
  1037.      */
  1038.     private void fixTies(double[] x, double[] y) {
  1039.        final double[] values = MathArrays.unique(MathArrays.concatenate(x,y));
  1040.        if (values.length == x.length + y.length) {
  1041.            return;  // There are no ties
  1042.        }

  1043.        // Find the smallest difference between values, or 1 if all values are the same
  1044.        double minDelta = 1;
  1045.        double prev = values[0];
  1046.        for (int i = 1; i < values.length; i++) {
  1047.           final double delta = prev - values[i];
  1048.           if (delta < minDelta) {
  1049.               minDelta = delta;
  1050.           }
  1051.           prev = values[i];
  1052.        }
  1053.        minDelta /= 2;

  1054.        // Add jitter using a fixed seed (so same arguments always give same results),
  1055.        // low-initialization-overhead generator
  1056.        gen.setSeed(100);

  1057.        // It is theoretically possible that jitter does not break ties, so repeat
  1058.        // until all ties are gone.  Bound the loop and throw MIE if bound is exceeded.
  1059.        int ct = 0;
  1060.        boolean ties;
  1061.        do {
  1062.            jitter(x, minDelta);
  1063.            jitter(y, minDelta);
  1064.            ties = hasTies(x, y);
  1065.            ct++;
  1066.        } while (ties && ct < 1000);
  1067.        if (ties) {
  1068.            throw MathRuntimeException.createInternalError(); // Should never happen
  1069.        }
  1070.     }

  1071.     /**
  1072.      * Returns true iff there are ties in the combined sample
  1073.      * formed from x and y.
  1074.      *
  1075.      * @param x first sample
  1076.      * @param y second sample
  1077.      * @return true if x and y together contain ties
  1078.      */
  1079.     private static boolean hasTies(double[] x, double[] y) {
  1080.         final HashSet<Double> values = new HashSet<>();
  1081.             for (int i = 0; i < x.length; i++) {
  1082.                 if (!values.add(x[i])) {
  1083.                     return true;
  1084.                 }
  1085.             }
  1086.             for (int i = 0; i < y.length; i++) {
  1087.                 if (!values.add(y[i])) {
  1088.                     return true;
  1089.                 }
  1090.             }
  1091.         return false;
  1092.     }

  1093.     /**
  1094.      * Adds random jitter to {@code data} using uniform deviates between {@code -delta} and {@code delta}.
  1095.      * <p>
  1096.      * Note that jitter is applied in-place - i.e., the array
  1097.      * values are overwritten with the result of applying jitter.</p>
  1098.      *
  1099.      * @param data input/output data array - entries overwritten by the method
  1100.      * @param delta max magnitude of jitter
  1101.      * @throws NullPointerException if either of the parameters is null
  1102.      */
  1103.     private void jitter(double[] data, double delta) {
  1104.         for (int i = 0; i < data.length; i++) {
  1105.             data[i] += gen.nextUniform(-delta, delta);
  1106.         }
  1107.     }

  1108. }