View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.inference;
23  
24  import java.util.Collection;
25  
26  import org.hipparchus.distribution.RealDistribution;
27  import org.hipparchus.exception.MathIllegalArgumentException;
28  import org.hipparchus.exception.MathIllegalStateException;
29  import org.hipparchus.exception.NullArgumentException;
30  import org.hipparchus.stat.descriptive.StatisticalSummary;
31  
32  /**
33   * A collection of static methods to create inference test instances or to
34   * perform inference tests.
35   */
36  public class InferenceTestUtils  {
37  
38      /** Singleton TTest instance. */
39      private static final TTest T_TEST = new TTest();
40  
41      /** Singleton ChiSquareTest instance. */
42      private static final ChiSquareTest CHI_SQUARE_TEST = new ChiSquareTest();
43  
44      /** Singleton OneWayAnova instance. */
45      private static final OneWayAnova ONE_WAY_ANANOVA = new OneWayAnova();
46  
47      /** Singleton G-Test instance. */
48      private static final GTest G_TEST = new GTest();
49  
50      /** Singleton K-S test instance */
51      private static final KolmogorovSmirnovTest KS_TEST = new KolmogorovSmirnovTest();
52  
53      /**
54       * Prevent instantiation.
55       */
56      private InferenceTestUtils() {
57          super();
58      }
59  
60      /**
61       * Computes a 2-sample t statistic,  under the hypothesis of equal
62       * subpopulation variances.  To compute a t-statistic without the
63       * equal variances hypothesis, use {@link #t(double[], double[])}.
64       * <p>
65       * This statistic can be used to perform a (homoscedastic) two-sample
66       * t-test to compare sample means.</p>
67       * <p>
68       * The t-statistic is</p>
69       * <p>
70       * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
71       * </p><p>
72       * where <strong><code>n1</code></strong> is the size of first sample;
73       * <strong><code> n2</code></strong> is the size of second sample;
74       * <strong><code> m1</code></strong> is the mean of first sample;
75       * <strong><code> m2</code></strong> is the mean of second sample
76       * and <strong><code>var</code></strong> is the pooled variance estimate:
77       * </p><p>
78       * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
79       * </p><p>
80       * with <strong><code>var1</code></strong> the variance of the first sample and
81       * <strong><code>var2</code></strong> the variance of the second sample.
82       * </p><p>
83       * <strong>Preconditions</strong>:</p>
84       * <ul>
85       * <li>The observed array lengths must both be at least 2.
86       * </li></ul>
87       *
88       * @param sample1 array of sample data values
89       * @param sample2 array of sample data values
90       * @return t statistic
91       * @throws NullArgumentException if the arrays are <code>null</code>
92       * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
93       */
94      public static double homoscedasticT(final double[] sample1, final double[] sample2)
95          throws MathIllegalArgumentException, NullArgumentException {
96          return T_TEST.homoscedasticT(sample1, sample2);
97      }
98  
99      /**
100      * Computes a 2-sample t statistic, comparing the means of the datasets
101      * described by two {@link StatisticalSummary} instances, under the
102      * assumption of equal subpopulation variances.  To compute a t-statistic
103      * without the equal variances assumption, use
104      * {@link #t(StatisticalSummary, StatisticalSummary)}.
105      * <p>
106      * This statistic can be used to perform a (homoscedastic) two-sample
107      * t-test to compare sample means.</p>
108      * <p>
109      * The t-statistic returned is</p>
110      * <p>
111      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
112      * </p><p>
113      * where <strong><code>n1</code></strong> is the size of first sample;
114      * <strong><code> n2</code></strong> is the size of second sample;
115      * <strong><code> m1</code></strong> is the mean of first sample;
116      * <strong><code> m2</code></strong> is the mean of second sample
117      * and <strong><code>var</code></strong> is the pooled variance estimate:
118      * </p><p>
119      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
120      * </p><p>
121      * with <strong><code>var1</code></strong> the variance of the first sample and
122      * <strong><code>var2</code></strong> the variance of the second sample.
123      * </p><p>
124      * <strong>Preconditions</strong>:</p><ul>
125      * <li>The datasets described by the two Univariates must each contain
126      * at least 2 observations.
127      * </li></ul>
128      *
129      * @param sampleStats1 StatisticalSummary describing data from the first sample
130      * @param sampleStats2 StatisticalSummary describing data from the second sample
131      * @return t statistic
132      * @throws NullArgumentException if the sample statistics are <code>null</code>
133      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
134      */
135     public static double homoscedasticT(final StatisticalSummary sampleStats1,
136                                         final StatisticalSummary sampleStats2)
137         throws MathIllegalArgumentException, NullArgumentException {
138         return T_TEST.homoscedasticT(sampleStats1, sampleStats2);
139     }
140 
141     /**
142      * Performs a
143      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
144      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
145      * and <code>sample2</code> are drawn from populations with the same mean,
146      * with significance level <code>alpha</code>,  assuming that the
147      * subpopulation variances are equal.  Use
148      * {@link #tTest(double[], double[], double)} to perform the test without
149      * the assumption of equal variances.
150      * <p>
151      * Returns <code>true</code> iff the null hypothesis that the means are
152      * equal can be rejected with confidence <code>1 - alpha</code>.  To
153      * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
154      * without the assumption of equal subpopulation variances, use
155      * {@link #tTest(double[], double[], double)}.</p>
156      * <p>
157      * A pooled variance estimate is used to compute the t-statistic. See
158      * {@link #t(double[], double[])} for the formula. The sum of the sample
159      * sizes minus 2 is used as the degrees of freedom.</p>
160      * <p>
161      * <strong>Examples:</strong></p><ol>
162      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
163      * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
164      * </li>
165      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2, </code>
166      * at the 99% level, first verify that the measured mean of
167      * <code>sample 1</code> is less than the mean of <code>sample 2</code>
168      * and then use
169      * <br><code>tTest(sample1, sample2, 0.02) </code>
170      * </li></ol>
171      * <p>
172      * <strong>Usage Note:</strong><br>
173      * The validity of the test depends on the assumptions of the parametric
174      * t-test procedure, as discussed
175      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
176      * here</a></p>
177      * <p>
178      * <strong>Preconditions</strong>:</p>
179      * <ul>
180      * <li>The observed array lengths must both be at least 2.
181      * </li>
182      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
183      * </li></ul>
184      *
185      * @param sample1 array of sample data values
186      * @param sample2 array of sample data values
187      * @param alpha significance level of the test
188      * @return true if the null hypothesis can be rejected with
189      * confidence 1 - alpha
190      * @throws NullArgumentException if the arrays are <code>null</code>
191      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
192      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
193      * @throws MathIllegalStateException if an error occurs computing the p-value
194      */
195     public static boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
196                                              final double alpha)
197         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
198         return T_TEST.homoscedasticTTest(sample1, sample2, alpha);
199     }
200 
201     /**
202      * Returns the <i>observed significance level</i>, or
203      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
204      * comparing the means of the input arrays, under the assumption that
205      * the two samples are drawn from subpopulations with equal variances.
206      * To perform the test without the equal variances assumption, use
207      * {@link #tTest(double[], double[])}.
208      * <p>
209      * The number returned is the smallest significance level
210      * at which one can reject the null hypothesis that the two means are
211      * equal in favor of the two-sided alternative that they are different.
212      * For a one-sided test, divide the returned value by 2.</p>
213      * <p>
214      * A pooled variance estimate is used to compute the t-statistic.  See
215      * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
216      * minus 2 is used as the degrees of freedom.</p>
217      * <p>
218      * <strong>Usage Note:</strong><br>
219      * The validity of the p-value depends on the assumptions of the parametric
220      * t-test procedure, as discussed
221      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
222      * here</a></p>
223      * <p>
224      * <strong>Preconditions</strong>:</p>
225      * <ul>
226      * <li>The observed array lengths must both be at least 2.
227      * </li></ul>
228      *
229      * @param sample1 array of sample data values
230      * @param sample2 array of sample data values
231      * @return p-value for t-test
232      * @throws NullArgumentException if the arrays are <code>null</code>
233      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
234      * @throws MathIllegalStateException if an error occurs computing the p-value
235      */
236     public static double homoscedasticTTest(final double[] sample1, final double[] sample2)
237         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
238         return T_TEST.homoscedasticTTest(sample1, sample2);
239     }
240 
241     /**
242      * Returns the <i>observed significance level</i>, or
243      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
244      * comparing the means of the datasets described by two StatisticalSummary
245      * instances, under the hypothesis of equal subpopulation variances. To
246      * perform a test without the equal variances assumption, use
247      * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
248      * <p>
249      * The number returned is the smallest significance level
250      * at which one can reject the null hypothesis that the two means are
251      * equal in favor of the two-sided alternative that they are different.
252      * For a one-sided test, divide the returned value by 2.</p>
253      * <p>
254      * See {@link #homoscedasticT(double[], double[])} for the formula used to
255      * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
256      * the degrees of freedom.</p>
257      * <p>
258      * <strong>Usage Note:</strong><br>
259      * The validity of the p-value depends on the assumptions of the parametric
260      * t-test procedure, as discussed
261      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
262      * </p><p>
263      * <strong>Preconditions</strong>:</p>
264      * <ul>
265      * <li>The datasets described by the two Univariates must each contain
266      * at least 2 observations.
267      * </li></ul>
268      *
269      * @param sampleStats1  StatisticalSummary describing data from the first sample
270      * @param sampleStats2  StatisticalSummary describing data from the second sample
271      * @return p-value for t-test
272      * @throws NullArgumentException if the sample statistics are <code>null</code>
273      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
274      * @throws MathIllegalStateException if an error occurs computing the p-value
275      */
276     public static double homoscedasticTTest(final StatisticalSummary sampleStats1,
277                                             final StatisticalSummary sampleStats2)
278         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
279         return T_TEST.homoscedasticTTest(sampleStats1, sampleStats2);
280     }
281 
282     /**
283      * Computes a paired, 2-sample t-statistic based on the data in the input
284      * arrays.  The t-statistic returned is equivalent to what would be returned by
285      * computing the one-sample t-statistic {@link #t(double, double[])}, with
286      * <code>mu = 0</code> and the sample array consisting of the (signed)
287      * differences between corresponding entries in <code>sample1</code> and
288      * <code>sample2.</code>
289      * <p>
290      * <strong>Preconditions</strong>:</p>
291      * <ul>
292      * <li>The input arrays must have the same length and their common length
293      * must be at least 2.
294      * </li></ul>
295      *
296      * @param sample1 array of sample data values
297      * @param sample2 array of sample data values
298      * @return t statistic
299      * @throws NullArgumentException if the arrays are <code>null</code>
300      * @throws MathIllegalArgumentException if the arrays are empty
301      * @throws MathIllegalArgumentException if the length of the arrays is not equal
302      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
303      */
304     public static double pairedT(final double[] sample1, final double[] sample2)
305         throws MathIllegalArgumentException, NullArgumentException {
306         return T_TEST.pairedT(sample1, sample2);
307     }
308 
309     /**
310      * Performs a paired t-test evaluating the null hypothesis that the
311      * mean of the paired differences between <code>sample1</code> and
312      * <code>sample2</code> is 0 in favor of the two-sided alternative that the
313      * mean paired difference is not equal to 0, with significance level
314      * <code>alpha</code>.
315      * <p>
316      * Returns <code>true</code> iff the null hypothesis can be rejected with
317      * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
318      * <code>alpha * 2</code></p>
319      * <p>
320      * <strong>Usage Note:</strong><br>
321      * The validity of the test depends on the assumptions of the parametric
322      * t-test procedure, as discussed
323      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
324      * here</a></p>
325      * <p>
326      * <strong>Preconditions</strong>:</p>
327      * <ul>
328      * <li>The input array lengths must be the same and their common length
329      * must be at least 2.
330      * </li>
331      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
332      * </li></ul>
333      *
334      * @param sample1 array of sample data values
335      * @param sample2 array of sample data values
336      * @param alpha significance level of the test
337      * @return true if the null hypothesis can be rejected with
338      * confidence 1 - alpha
339      * @throws NullArgumentException if the arrays are <code>null</code>
340      * @throws MathIllegalArgumentException if the arrays are empty
341      * @throws MathIllegalArgumentException if the length of the arrays is not equal
342      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
343      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
344      * @throws MathIllegalStateException if an error occurs computing the p-value
345      */
346     public static boolean pairedTTest(final double[] sample1, final double[] sample2,
347                                       final double alpha)
348         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
349         return T_TEST.pairedTTest(sample1, sample2, alpha);
350     }
351 
352     /**
353      * Returns the <i>observed significance level</i>, or
354      * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
355      * based on the data in the input arrays.
356      * <p>
357      * The number returned is the smallest significance level
358      * at which one can reject the null hypothesis that the mean of the paired
359      * differences is 0 in favor of the two-sided alternative that the mean paired
360      * difference is not equal to 0. For a one-sided test, divide the returned
361      * value by 2.</p>
362      * <p>
363      * This test is equivalent to a one-sample t-test computed using
364      * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
365      * array consisting of the signed differences between corresponding elements of
366      * <code>sample1</code> and <code>sample2.</code></p>
367      * <p>
368      * <strong>Usage Note:</strong><br>
369      * The validity of the p-value depends on the assumptions of the parametric
370      * t-test procedure, as discussed
371      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
372      * here</a></p>
373      * <p>
374      * <strong>Preconditions</strong>:</p>
375      * <ul>
376      * <li>The input array lengths must be the same and their common length must
377      * be at least 2.
378      * </li></ul>
379      *
380      * @param sample1 array of sample data values
381      * @param sample2 array of sample data values
382      * @return p-value for t-test
383      * @throws NullArgumentException if the arrays are <code>null</code>
384      * @throws MathIllegalArgumentException if the arrays are empty
385      * @throws MathIllegalArgumentException if the length of the arrays is not equal
386      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
387      * @throws MathIllegalStateException if an error occurs computing the p-value
388      */
389     public static double pairedTTest(final double[] sample1, final double[] sample2)
390         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
391         return T_TEST.pairedTTest(sample1, sample2);
392     }
393 
394     /**
395      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
396      * t statistic </a> given observed values and a comparison constant.
397      * <p>
398      * This statistic can be used to perform a one sample t-test for the mean.
399      * </p><p>
400      * <strong>Preconditions</strong>:</p>
401      * <ul>
402      * <li>The observed array length must be at least 2.
403      * </li></ul>
404      *
405      * @param mu comparison constant
406      * @param observed array of values
407      * @return t statistic
408      * @throws NullArgumentException if <code>observed</code> is <code>null</code>
409      * @throws MathIllegalArgumentException if the length of <code>observed</code> is &lt; 2
410      */
411     public static double t(final double mu, final double[] observed)
412         throws MathIllegalArgumentException, NullArgumentException {
413         return T_TEST.t(mu, observed);
414     }
415 
416     /**
417      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
418      * t statistic </a> to use in comparing the mean of the dataset described by
419      * <code>sampleStats</code> to <code>mu</code>.
420      * <p>
421      * This statistic can be used to perform a one sample t-test for the mean.
422      * </p><p>
423      * <strong>Preconditions</strong>:</p>
424      * <ul>
425      * <li><code>observed.getN() &ge; 2</code>.
426      * </li></ul>
427      *
428      * @param mu comparison constant
429      * @param sampleStats DescriptiveStatistics holding sample summary statitstics
430      * @return t statistic
431      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
432      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
433      */
434     public static double t(final double mu, final StatisticalSummary sampleStats)
435         throws MathIllegalArgumentException, NullArgumentException {
436         return T_TEST.t(mu, sampleStats);
437     }
438 
439     /**
440      * Computes a 2-sample t statistic, without the hypothesis of equal
441      * subpopulation variances.  To compute a t-statistic assuming equal
442      * variances, use {@link #homoscedasticT(double[], double[])}.
443      * <p>
444      * This statistic can be used to perform a two-sample t-test to compare
445      * sample means.</p>
446      * <p>
447      * The t-statistic is</p>
448      * <p>
449      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
450      * </p><p>
451      *  where <strong><code>n1</code></strong> is the size of the first sample
452      * <strong><code> n2</code></strong> is the size of the second sample;
453      * <strong><code> m1</code></strong> is the mean of the first sample;
454      * <strong><code> m2</code></strong> is the mean of the second sample;
455      * <strong><code> var1</code></strong> is the variance of the first sample;
456      * <strong><code> var2</code></strong> is the variance of the second sample;
457      * </p><p>
458      * <strong>Preconditions</strong>:</p>
459      * <ul>
460      * <li>The observed array lengths must both be at least 2.
461      * </li></ul>
462      *
463      * @param sample1 array of sample data values
464      * @param sample2 array of sample data values
465      * @return t statistic
466      * @throws NullArgumentException if the arrays are <code>null</code>
467      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
468      */
469     public static double t(final double[] sample1, final double[] sample2)
470         throws MathIllegalArgumentException, NullArgumentException {
471         return T_TEST.t(sample1, sample2);
472     }
473 
474     /**
475      * Computes a 2-sample t statistic, comparing the means of the datasets
476      * described by two {@link StatisticalSummary} instances, without the
477      * assumption of equal subpopulation variances.  Use
478      * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
479      * compute a t-statistic under the equal variances assumption.
480      * <p>
481      * This statistic can be used to perform a two-sample t-test to compare
482      * sample means.</p>
483      * <p>
484       * The returned  t-statistic is</p>
485      * <p>
486      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
487      * </p><p>
488      * where <strong><code>n1</code></strong> is the size of the first sample;
489      * <strong><code> n2</code></strong> is the size of the second sample;
490      * <strong><code> m1</code></strong> is the mean of the first sample;
491      * <strong><code> m2</code></strong> is the mean of the second sample
492      * <strong><code> var1</code></strong> is the variance of the first sample;
493      * <strong><code> var2</code></strong> is the variance of the second sample
494      * </p><p>
495      * <strong>Preconditions</strong>:</p>
496      * <ul>
497      * <li>The datasets described by the two Univariates must each contain
498      * at least 2 observations.
499      * </li></ul>
500      *
501      * @param sampleStats1 StatisticalSummary describing data from the first sample
502      * @param sampleStats2 StatisticalSummary describing data from the second sample
503      * @return t statistic
504      * @throws NullArgumentException if the sample statistics are <code>null</code>
505      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
506      */
507     public static double t(final StatisticalSummary sampleStats1,
508                            final StatisticalSummary sampleStats2)
509         throws MathIllegalArgumentException, NullArgumentException {
510         return T_TEST.t(sampleStats1, sampleStats2);
511     }
512 
513     /**
514      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
515      * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
516      * which <code>sample</code> is drawn equals <code>mu</code>.
517      * <p>
518      * Returns <code>true</code> iff the null hypothesis can be
519      * rejected with confidence <code>1 - alpha</code>.  To
520      * perform a 1-sided test, use <code>alpha * 2</code></p>
521      * <p>
522      * <strong>Examples:</strong></p><ol>
523      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
524      * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
525      * </li>
526      * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
527      * at the 99% level, first verify that the measured sample mean is less
528      * than <code>mu</code> and then use
529      * <br><code>tTest(mu, sample, 0.02) </code>
530      * </li></ol>
531      * <p>
532      * <strong>Usage Note:</strong><br>
533      * The validity of the test depends on the assumptions of the one-sample
534      * parametric t-test procedure, as discussed
535      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
536      * </p><p>
537      * <strong>Preconditions</strong>:</p>
538      * <ul>
539      * <li>The observed array length must be at least 2.
540      * </li></ul>
541      *
542      * @param mu constant value to compare sample mean against
543      * @param sample array of sample data values
544      * @param alpha significance level of the test
545      * @return p-value
546      * @throws NullArgumentException if the sample array is <code>null</code>
547      * @throws MathIllegalArgumentException if the length of the array is &lt; 2
548      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
549      * @throws MathIllegalStateException if an error computing the p-value
550      */
551     public static boolean tTest(final double mu, final double[] sample, final double alpha)
552         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
553         return T_TEST.tTest(mu, sample, alpha);
554     }
555 
556     /**
557      * Returns the <i>observed significance level</i>, or
558      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
559      * comparing the mean of the input array with the constant <code>mu</code>.
560      * <p>
561      * The number returned is the smallest significance level
562      * at which one can reject the null hypothesis that the mean equals
563      * <code>mu</code> in favor of the two-sided alternative that the mean
564      * is different from <code>mu</code>. For a one-sided test, divide the
565      * returned value by 2.</p>
566      * <p>
567      * <strong>Usage Note:</strong><br>
568      * The validity of the test depends on the assumptions of the parametric
569      * t-test procedure, as discussed
570      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
571      * </p><p>
572      * <strong>Preconditions</strong>:</p>
573      * <ul>
574      * <li>The observed array length must be at least 2.
575      * </li></ul>
576      *
577      * @param mu constant value to compare sample mean against
578      * @param sample array of sample data values
579      * @return p-value
580      * @throws NullArgumentException if the sample array is <code>null</code>
581      * @throws MathIllegalArgumentException if the length of the array is &lt; 2
582      * @throws MathIllegalStateException if an error occurs computing the p-value
583      */
584     public static double tTest(final double mu, final double[] sample)
585         throws MathIllegalArgumentException, NullArgumentException,
586         MathIllegalStateException {
587         return T_TEST.tTest(mu, sample);
588     }
589 
590     /**
591      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
592      * two-sided t-test</a> evaluating the null hypothesis that the mean of the
593      * population from which the dataset described by <code>stats</code> is
594      * drawn equals <code>mu</code>.
595      * <p>
596      * Returns <code>true</code> iff the null hypothesis can be rejected with
597      * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
598      * <code>alpha * 2.</code></p>
599      * <p>
600      * <strong>Examples:</strong></p><ol>
601      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
602      * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
603      * </li>
604      * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
605      * at the 99% level, first verify that the measured sample mean is less
606      * than <code>mu</code> and then use
607      * <br><code>tTest(mu, sampleStats, 0.02) </code>
608      * </li></ol>
609      * <p>
610      * <strong>Usage Note:</strong><br>
611      * The validity of the test depends on the assumptions of the one-sample
612      * parametric t-test procedure, as discussed
613      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
614      * </p><p>
615      * <strong>Preconditions</strong>:</p>
616      * <ul>
617      * <li>The sample must include at least 2 observations.
618      * </li></ul>
619      *
620      * @param mu constant value to compare sample mean against
621      * @param sampleStats StatisticalSummary describing sample data values
622      * @param alpha significance level of the test
623      * @return p-value
624      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
625      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
626      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
627      * @throws MathIllegalStateException if an error occurs computing the p-value
628      */
629     public static boolean tTest(final double mu, final StatisticalSummary sampleStats,
630                                 final double alpha)
631         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
632         return T_TEST.tTest(mu, sampleStats, alpha);
633     }
634 
635     /**
636      * Returns the <i>observed significance level</i>, or
637      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
638      * comparing the mean of the dataset described by <code>sampleStats</code>
639      * with the constant <code>mu</code>.
640      * <p>
641      * The number returned is the smallest significance level
642      * at which one can reject the null hypothesis that the mean equals
643      * <code>mu</code> in favor of the two-sided alternative that the mean
644      * is different from <code>mu</code>. For a one-sided test, divide the
645      * returned value by 2.</p>
646      * <p>
647      * <strong>Usage Note:</strong><br>
648      * The validity of the test depends on the assumptions of the parametric
649      * t-test procedure, as discussed
650      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
651      * here</a></p>
652      * <p>
653      * <strong>Preconditions</strong>:</p>
654      * <ul>
655      * <li>The sample must contain at least 2 observations.
656      * </li></ul>
657      *
658      * @param mu constant value to compare sample mean against
659      * @param sampleStats StatisticalSummary describing sample data
660      * @return p-value
661      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
662      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
663      * @throws MathIllegalStateException if an error occurs computing the p-value
664      */
665     public static double tTest(final double mu, final StatisticalSummary sampleStats)
666         throws MathIllegalArgumentException, NullArgumentException,
667         MathIllegalStateException {
668         return T_TEST.tTest(mu, sampleStats);
669     }
670 
671     /**
672      * Performs a
673      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
674      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
675      * and <code>sample2</code> are drawn from populations with the same mean,
676      * with significance level <code>alpha</code>.  This test does not assume
677      * that the subpopulation variances are equal.  To perform the test assuming
678      * equal variances, use
679      * {@link #homoscedasticTTest(double[], double[], double)}.
680      * <p>
681      * Returns <code>true</code> iff the null hypothesis that the means are
682      * equal can be rejected with confidence <code>1 - alpha</code>.  To
683      * perform a 1-sided test, use <code>alpha * 2</code></p>
684      * <p>
685      * See {@link #t(double[], double[])} for the formula used to compute the
686      * t-statistic.  Degrees of freedom are approximated using the
687      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
688      * Welch-Satterthwaite approximation.</a></p>
689      * <p>
690      * <strong>Examples:</strong></p><ol>
691      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
692      * the 95% level,  use
693      * <br><code>tTest(sample1, sample2, 0.05). </code>
694      * </li>
695      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>,
696      * at the 99% level, first verify that the measured  mean of <code>sample 1</code>
697      * is less than the mean of <code>sample 2</code> and then use
698      * <br><code>tTest(sample1, sample2, 0.02) </code>
699      * </li></ol>
700      * <p>
701      * <strong>Usage Note:</strong><br>
702      * The validity of the test depends on the assumptions of the parametric
703      * t-test procedure, as discussed
704      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
705      * here</a></p>
706      * <p>
707      * <strong>Preconditions</strong>:</p>
708      * <ul>
709      * <li>The observed array lengths must both be at least 2.
710      * </li>
711      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
712      * </li></ul>
713      *
714      * @param sample1 array of sample data values
715      * @param sample2 array of sample data values
716      * @param alpha significance level of the test
717      * @return true if the null hypothesis can be rejected with
718      * confidence 1 - alpha
719      * @throws NullArgumentException if the arrays are <code>null</code>
720      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
721      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
722      * @throws MathIllegalStateException if an error occurs computing the p-value
723      */
724     public static boolean tTest(final double[] sample1, final double[] sample2,
725                                 final double alpha)
726         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
727         return T_TEST.tTest(sample1, sample2, alpha);
728     }
729 
730     /**
731      * Returns the <i>observed significance level</i>, or
732      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
733      * comparing the means of the input arrays.
734      * <p>
735      * The number returned is the smallest significance level
736      * at which one can reject the null hypothesis that the two means are
737      * equal in favor of the two-sided alternative that they are different.
738      * For a one-sided test, divide the returned value by 2.</p>
739      * <p>
740      * The test does not assume that the underlying popuation variances are
741      * equal  and it uses approximated degrees of freedom computed from the
742      * sample data to compute the p-value.  The t-statistic used is as defined in
743      * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
744      * to the degrees of freedom is used,
745      * as described
746      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
747      * here.</a>  To perform the test under the assumption of equal subpopulation
748      * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
749      * <p>
750      * <strong>Usage Note:</strong><br>
751      * The validity of the p-value depends on the assumptions of the parametric
752      * t-test procedure, as discussed
753      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
754      * here</a></p>
755      * <p>
756      * <strong>Preconditions</strong>:</p>
757      * <ul>
758      * <li>The observed array lengths must both be at least 2.
759      * </li></ul>
760      *
761      * @param sample1 array of sample data values
762      * @param sample2 array of sample data values
763      * @return p-value for t-test
764      * @throws NullArgumentException if the arrays are <code>null</code>
765      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
766      * @throws MathIllegalStateException if an error occurs computing the p-value
767      */
768     public static double tTest(final double[] sample1, final double[] sample2)
769         throws MathIllegalArgumentException, NullArgumentException,
770         MathIllegalStateException {
771         return T_TEST.tTest(sample1, sample2);
772     }
773 
774     /**
775      * Performs a
776      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
777      * two-sided t-test</a> evaluating the null hypothesis that
778      * <code>sampleStats1</code> and <code>sampleStats2</code> describe
779      * datasets drawn from populations with the same mean, with significance
780      * level <code>alpha</code>.   This test does not assume that the
781      * subpopulation variances are equal.  To perform the test under the equal
782      * variances assumption, use
783      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
784      * <p>
785      * Returns <code>true</code> iff the null hypothesis that the means are
786      * equal can be rejected with confidence <code>1 - alpha</code>.  To
787      * perform a 1-sided test, use <code>alpha * 2</code></p>
788      * <p>
789      * See {@link #t(double[], double[])} for the formula used to compute the
790      * t-statistic.  Degrees of freedom are approximated using the
791      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
792      * Welch-Satterthwaite approximation.</a></p>
793      * <p>
794      * <strong>Examples:</strong></p><ol>
795      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
796      * the 95%, use
797      * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
798      * </li>
799      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>
800      * at the 99% level,  first verify that the measured mean of
801      * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
802      * and then use
803      * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
804      * </li></ol>
805      * <p>
806      * <strong>Usage Note:</strong><br>
807      * The validity of the test depends on the assumptions of the parametric
808      * t-test procedure, as discussed
809      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
810      * here</a></p>
811      * <p>
812      * <strong>Preconditions</strong>:</p>
813      * <ul>
814      * <li>The datasets described by the two Univariates must each contain
815      * at least 2 observations.
816      * </li>
817      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
818      * </li></ul>
819      *
820      * @param sampleStats1 StatisticalSummary describing sample data values
821      * @param sampleStats2 StatisticalSummary describing sample data values
822      * @param alpha significance level of the test
823      * @return true if the null hypothesis can be rejected with
824      * confidence 1 - alpha
825      * @throws NullArgumentException if the sample statistics are <code>null</code>
826      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
827      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
828      * @throws MathIllegalStateException if an error occurs computing the p-value
829      */
830     public static boolean tTest(final StatisticalSummary sampleStats1,
831                                 final StatisticalSummary sampleStats2,
832                                 final double alpha)
833         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
834         return T_TEST.tTest(sampleStats1, sampleStats2, alpha);
835     }
836 
837     /**
838      * Returns the <i>observed significance level</i>, or
839      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
840      * comparing the means of the datasets described by two StatisticalSummary
841      * instances.
842      * <p>
843      * The number returned is the smallest significance level
844      * at which one can reject the null hypothesis that the two means are
845      * equal in favor of the two-sided alternative that they are different.
846      * For a one-sided test, divide the returned value by 2.</p>
847      * <p>
848      * The test does not assume that the underlying population variances are
849      * equal  and it uses approximated degrees of freedom computed from the
850      * sample data to compute the p-value.   To perform the test assuming
851      * equal variances, use
852      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
853      * <p>
854      * <strong>Usage Note:</strong><br>
855      * The validity of the p-value depends on the assumptions of the parametric
856      * t-test procedure, as discussed
857      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
858      * here</a></p>
859      * <p>
860      * <strong>Preconditions</strong>:</p>
861      * <ul>
862      * <li>The datasets described by the two Univariates must each contain
863      * at least 2 observations.
864      * </li></ul>
865      *
866      * @param sampleStats1  StatisticalSummary describing data from the first sample
867      * @param sampleStats2  StatisticalSummary describing data from the second sample
868      * @return p-value for t-test
869      * @throws NullArgumentException if the sample statistics are <code>null</code>
870      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
871      * @throws MathIllegalStateException if an error occurs computing the p-value
872      */
873     public static double tTest(final StatisticalSummary sampleStats1,
874                                final StatisticalSummary sampleStats2)
875         throws MathIllegalArgumentException, NullArgumentException,
876         MathIllegalStateException {
877         return T_TEST.tTest(sampleStats1, sampleStats2);
878     }
879 
880     /**
881      * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
882      * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
883      * frequency counts.
884      * <p>
885      * This statistic can be used to perform a Chi-Square test evaluating the null
886      * hypothesis that the observed counts follow the expected distribution.
887      * <p>
888      * <strong>Preconditions</strong>:
889      * <ul>
890      * <li>Expected counts must all be positive.</li>
891      * <li>Observed counts must all be &ge; 0.</li>
892      * <li>The observed and expected arrays must have the same length and
893      * their common length must be at least 2.</li>
894      * </ul>
895      * <p>
896      * If any of the preconditions are not met, an
897      * <code>IllegalArgumentException</code> is thrown.
898      * <p>
899      * <strong>Note: </strong>This implementation rescales the
900      * <code>expected</code> array if necessary to ensure that the sum of the
901      * expected and observed counts are equal.
902      *
903      * @param observed array of observed frequency counts
904      * @param expected array of expected frequency counts
905      * @return chiSquare test statistic
906      * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
907      * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
908      * not strictly positive
909      * @throws MathIllegalArgumentException if the arrays length is less than 2
910      */
911     public static double chiSquare(final double[] expected, final long[] observed)
912         throws MathIllegalArgumentException {
913         return CHI_SQUARE_TEST.chiSquare(expected, observed);
914     }
915 
916     /**
917      * Computes the Chi-Square statistic associated with a
918      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
919      * chi-square test of independence</a> based on the input <code>counts</code>
920      * array, viewed as a two-way table.
921      * <p>
922      * The rows of the 2-way table are
923      * <code>count[0], ... , count[count.length - 1] </code>
924      * <p>
925      * <strong>Preconditions</strong>:
926      * <ul>
927      * <li>All counts must be &ge; 0.</li>
928      * <li>The count array must be rectangular (i.e. all count[i] subarrays
929      * must have the same length).</li>
930      * <li>The 2-way table represented by <code>counts</code> must have at
931      * least 2 columns and at least 2 rows.</li>
932      * </ul>
933      * <p>
934      * If any of the preconditions are not met, an
935      * <code>IllegalArgumentException</code> is thrown.
936      *
937      * @param counts array representation of 2-way table
938      * @return chiSquare test statistic
939      * @throws NullArgumentException if the array is null
940      * @throws MathIllegalArgumentException if the array is not rectangular
941      * @throws MathIllegalArgumentException if {@code counts} has negative entries
942      */
943     public static double chiSquare(final long[][] counts)
944         throws MathIllegalArgumentException, NullArgumentException {
945         return CHI_SQUARE_TEST.chiSquare(counts);
946     }
947 
948     /**
949      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
950      * Chi-square goodness of fit test</a> evaluating the null hypothesis that the
951      * observed counts conform to the frequency distribution described by the expected
952      * counts, with significance level <code>alpha</code>.  Returns true iff the null
953      * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
954      * <p>
955      * <strong>Example:</strong><br>
956      * To test the hypothesis that <code>observed</code> follows
957      * <code>expected</code> at the 99% level, use
958      * <code>chiSquareTest(expected, observed, 0.01)</code>
959      * <p>
960      * <strong>Preconditions</strong>:
961      * <ul>
962      * <li>Expected counts must all be positive.</li>
963      * <li>Observed counts must all be &ge; 0.</li>
964      * <li>The observed and expected arrays must have the same length and
965      * their common length must be at least 2.</li>
966      * <li><code> 0 &lt; alpha &lt; 0.5</code></li>
967      * </ul>
968      * <p>
969      * If any of the preconditions are not met, an
970      * <code>IllegalArgumentException</code> is thrown.
971      * <p>
972      * <strong>Note: </strong>This implementation rescales the
973      * <code>expected</code> array if necessary to ensure that the sum of the
974      * expected and observed counts are equal.
975      *
976      * @param observed array of observed frequency counts
977      * @param expected array of expected frequency counts
978      * @param alpha significance level of the test
979      * @return true iff null hypothesis can be rejected with confidence
980      * 1 - alpha
981      * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
982      * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
983      * not strictly positive
984      * @throws MathIllegalArgumentException if the arrays length is less than 2
985      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
986      * @throws MathIllegalStateException if an error occurs computing the p-value
987      */
988     public static boolean chiSquareTest(final double[] expected, final long[] observed,
989                                         final double alpha)
990         throws MathIllegalArgumentException, MathIllegalStateException {
991         return CHI_SQUARE_TEST.chiSquareTest(expected, observed, alpha);
992     }
993 
994     /**
995      * Returns the <i>observed significance level</i>, or <a href=
996      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
997      * p-value</a>, associated with a
998      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
999      * Chi-square goodness of fit test</a> comparing the <code>observed</code>
1000      * frequency counts to those in the <code>expected</code> array.
1001      * <p>
1002      * The number returned is the smallest significance level at which one can reject
1003      * the null hypothesis that the observed counts conform to the frequency distribution
1004      * described by the expected counts.
1005      * <p>
1006      * <strong>Preconditions</strong>:
1007      * <ul>
1008      * <li>Expected counts must all be positive.</li>
1009      * <li>Observed counts must all be &ge; 0.</li>
1010      * <li>The observed and expected arrays must have the same length and
1011      * their common length must be at least 2.</li>
1012      * </ul>
1013      * <p>
1014      * If any of the preconditions are not met, an
1015      * <code>IllegalArgumentException</code> is thrown.
1016      * <p>
1017      * <strong>Note: </strong>This implementation rescales the
1018      * <code>expected</code> array if necessary to ensure that the sum of the
1019      * expected and observed counts are equal.
1020      *
1021      * @param observed array of observed frequency counts
1022      * @param expected array of expected frequency counts
1023      * @return p-value
1024      * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
1025      * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
1026      * not strictly positive
1027      * @throws MathIllegalArgumentException if the arrays length is less than 2
1028      * @throws MathIllegalStateException if an error occurs computing the p-value
1029      */
1030     public static double chiSquareTest(final double[] expected, final long[] observed)
1031         throws MathIllegalArgumentException, MathIllegalStateException {
1032         return CHI_SQUARE_TEST.chiSquareTest(expected, observed);
1033     }
1034 
1035     /**
1036      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
1037      * chi-square test of independence</a> evaluating the null hypothesis that the
1038      * classifications represented by the counts in the columns of the input 2-way table
1039      * are independent of the rows, with significance level <code>alpha</code>.
1040      * Returns true iff the null hypothesis can be rejected with 100 * (1 - alpha) percent
1041      * confidence.
1042      * <p>
1043      * The rows of the 2-way table are
1044      * <code>count[0], ... , count[count.length - 1] </code>
1045      * <p>
1046      * <strong>Example:</strong><br>
1047      * To test the null hypothesis that the counts in
1048      * <code>count[0], ... , count[count.length - 1] </code>
1049      * all correspond to the same underlying probability distribution at the 99% level,
1050      * use <code>chiSquareTest(counts, 0.01)</code>.
1051      * <p>
1052      * <strong>Preconditions</strong>:
1053      * <ul>
1054      * <li>All counts must be &ge; 0.</li>
1055      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the
1056      * same length).</li>
1057      * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
1058      * at least 2 rows.</li>
1059      * </ul>
1060      * <p>
1061      * If any of the preconditions are not met, an
1062      * <code>IllegalArgumentException</code> is thrown.
1063      *
1064      * @param counts array representation of 2-way table
1065      * @param alpha significance level of the test
1066      * @return true iff null hypothesis can be rejected with confidence
1067      * 1 - alpha
1068      * @throws NullArgumentException if the array is null
1069      * @throws MathIllegalArgumentException if the array is not rectangular
1070      * @throws MathIllegalArgumentException if {@code counts} has any negative entries
1071      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
1072      * @throws MathIllegalStateException if an error occurs computing the p-value
1073      */
1074     public static boolean chiSquareTest(final long[][] counts, final double alpha)
1075         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
1076         return CHI_SQUARE_TEST.chiSquareTest(counts, alpha);
1077     }
1078 
1079     /**
1080      * Returns the <i>observed significance level</i>, or <a href=
1081      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
1082      * p-value</a>, associated with a
1083      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
1084      * chi-square test of independence</a> based on the input <code>counts</code>
1085      * array, viewed as a two-way table.
1086      * <p>
1087      * The rows of the 2-way table are
1088      * <code>count[0], ... , count[count.length - 1] </code>
1089      * <p>
1090      * <strong>Preconditions</strong>:
1091      * <ul>
1092      * <li>All counts must be &ge; 0.</li>
1093      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have
1094      * the same length).</li>
1095      * <li>The 2-way table represented by <code>counts</code> must have at least 2
1096      * columns and at least 2 rows.</li>
1097      * </ul>
1098      * <p>
1099      * If any of the preconditions are not met, an
1100      * <code>IllegalArgumentException</code> is thrown.
1101      *
1102      * @param counts array representation of 2-way table
1103      * @return p-value
1104      * @throws NullArgumentException if the array is null
1105      * @throws MathIllegalArgumentException if the array is not rectangular
1106      * @throws MathIllegalArgumentException if {@code counts} has negative entries
1107      * @throws MathIllegalStateException if an error occurs computing the p-value
1108      */
1109     public static double chiSquareTest(final long[][] counts)
1110         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
1111         return CHI_SQUARE_TEST.chiSquareTest(counts);
1112     }
1113 
1114     /**
1115      * Computes a
1116      * <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
1117      * Chi-Square two sample test statistic</a> comparing bin frequency counts
1118      * in <code>observed1</code> and <code>observed2</code>.
1119      * <p>
1120      * The sums of frequency counts in the two samples are not required to be the
1121      * same. The formula used to compute the test statistic is
1122      * </p>
1123      * <code>
1124      * &sum;[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
1125      * </code>
1126      * <p>
1127      * where
1128      * </p>
1129      * <code>K = √[&sum;(observed2 / &sum;(observed1)]</code>
1130      * <p>
1131      * This statistic can be used to perform a Chi-Square test evaluating the
1132      * null hypothesis that both observed counts follow the same distribution.
1133      * </p>
1134      * <p><strong>Preconditions</strong>:</p>
1135      * <ul>
1136      * <li>Observed counts must be non-negative.</li>
1137      * <li>Observed counts for a specific bin must not both be zero.</li>
1138      * <li>Observed counts for a specific sample must not all be 0.</li>
1139      * <li>The arrays <code>observed1</code> and <code>observed2</code> must have
1140      * the same length and their common length must be at least 2.</li>
1141      * </ul>
1142      * <p>
1143      * If any of the preconditions are not met, an
1144      * <code>IllegalArgumentException</code> is thrown.
1145      * </p>
1146      *
1147      * @param observed1 array of observed frequency counts of the first data set
1148      * @param observed2 array of observed frequency counts of the second data set
1149      * @return chiSquare test statistic
1150      * @throws MathIllegalArgumentException the the length of the arrays does not match
1151      * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
1152      * <code>observed2</code> are negative
1153      * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
1154      * <code>observed2</code> are zero, or if the count at some index is zero
1155      * for both arrays
1156      */
1157     public static double chiSquareDataSetsComparison(final long[] observed1,
1158                                                      final long[] observed2)
1159         throws MathIllegalArgumentException {
1160         return CHI_SQUARE_TEST.chiSquareDataSetsComparison(observed1, observed2);
1161     }
1162 
1163     /**
1164      * Returns the <i>observed significance level</i>, or <a href=
1165      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
1166      * p-value</a>, associated with a Chi-Square two sample test comparing
1167      * bin frequency counts in <code>observed1</code> and
1168      * <code>observed2</code>.
1169      * <p>
1170      * The number returned is the smallest significance level at which one
1171      * can reject the null hypothesis that the observed counts conform to the
1172      * same distribution.
1173      * <p>
1174      * See {@link #chiSquareDataSetsComparison(long[], long[])} for details
1175      * on the formula used to compute the test statistic. The degrees of
1176      * of freedom used to perform the test is one less than the common length
1177      * of the input observed count arrays.
1178      * <p>
1179      * <strong>Preconditions</strong>:
1180      * <ul>
1181      * <li>Observed counts must be non-negative.</li>
1182      * <li>Observed counts for a specific bin must not both be zero.</li>
1183      * <li>Observed counts for a specific sample must not all be 0.</li>
1184      * <li>The arrays <code>observed1</code> and <code>observed2</code> must
1185      * have the same length and their common length must be at least 2.</li>
1186      * </ul>
1187      * <p>
1188      * If any of the preconditions are not met, an
1189      * <code>IllegalArgumentException</code> is thrown.
1190      *
1191      * @param observed1 array of observed frequency counts of the first data set
1192      * @param observed2 array of observed frequency counts of the second data set
1193      * @return p-value
1194      * @throws MathIllegalArgumentException the the length of the arrays does not match
1195      * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
1196      * <code>observed2</code> are negative
1197      * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
1198      * <code>observed2</code> are zero, or if the count at the same index is zero
1199      * for both arrays
1200      * @throws MathIllegalStateException if an error occurs computing the p-value
1201      */
1202     public static double chiSquareTestDataSetsComparison(final long[] observed1,
1203                                                          final long[] observed2)
1204         throws MathIllegalArgumentException,
1205         MathIllegalStateException {
1206         return CHI_SQUARE_TEST.chiSquareTestDataSetsComparison(observed1, observed2);
1207     }
1208 
1209     /**
1210      * Performs a Chi-Square two sample test comparing two binned data
1211      * sets. The test evaluates the null hypothesis that the two lists of
1212      * observed counts conform to the same frequency distribution, with
1213      * significance level <code>alpha</code>.  Returns true iff the null
1214      * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
1215      * <p>
1216      * See {@link #chiSquareDataSetsComparison(long[], long[])} for
1217      * details on the formula used to compute the Chisquare statistic used
1218      * in the test. The degrees of of freedom used to perform the test is
1219      * one less than the common length of the input observed count arrays.
1220      * <p>
1221      * <strong>Preconditions</strong>:
1222      * <ul>
1223      * <li>Observed counts must be non-negative.</li>
1224      * <li>Observed counts for a specific bin must not both be zero.</li>
1225      * <li>Observed counts for a specific sample must not all be 0.</li>
1226      * <li>The arrays <code>observed1</code> and <code>observed2</code> must
1227      * have the same length and their common length must be at least 2.</li>
1228      * <li><code> 0 &lt; alpha &lt; 0.5</code></li>
1229      * </ul>
1230      * <p>
1231      * If any of the preconditions are not met, an
1232      * <code>IllegalArgumentException</code> is thrown.
1233      *
1234      * @param observed1 array of observed frequency counts of the first data set
1235      * @param observed2 array of observed frequency counts of the second data set
1236      * @param alpha significance level of the test
1237      * @return true iff null hypothesis can be rejected with confidence
1238      * 1 - alpha
1239      * @throws MathIllegalArgumentException the the length of the arrays does not match
1240      * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
1241      * <code>observed2</code> are negative
1242      * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
1243      * <code>observed2</code> are zero, or if the count at the same index is zero
1244      * for both arrays
1245      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
1246      * @throws MathIllegalStateException if an error occurs performing the test
1247      */
1248     public static boolean chiSquareTestDataSetsComparison(final long[] observed1,
1249                                                           final long[] observed2,
1250                                                           final double alpha)
1251         throws MathIllegalArgumentException, MathIllegalStateException {
1252         return CHI_SQUARE_TEST.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
1253     }
1254 
1255     /**
1256      * Computes the ANOVA F-value for a collection of <code>double[]</code>
1257      * arrays.
1258      *
1259      * <p><strong>Preconditions</strong>:</p>
1260      * <ul>
1261      * <li>The categoryData <code>Collection</code> must contain
1262      * <code>double[]</code> arrays.</li>
1263      * <li> There must be at least two <code>double[]</code> arrays in the
1264      * <code>categoryData</code> collection and each of these arrays must
1265      * contain at least two values.</li></ul>
1266      * <p>
1267      * This implementation computes the F statistic using the definitional
1268      * formula</p>
1269      * <pre>
1270      *   F = msbg/mswg</pre>
1271      * <p>where</p>
1272      * <pre>
1273      *  msbg = between group mean square
1274      *  mswg = within group mean square</pre>
1275      * <p>
1276      * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
1277      * here</a></p>
1278      *
1279      * @param categoryData <code>Collection</code> of <code>double[]</code>
1280      * arrays each containing data for one category
1281      * @return Fvalue
1282      * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
1283      * @throws MathIllegalArgumentException if the length of the <code>categoryData</code>
1284      * array is less than 2 or a contained <code>double[]</code> array does not have
1285      * at least two values
1286      */
1287     public static double oneWayAnovaFValue(final Collection<double[]> categoryData)
1288         throws MathIllegalArgumentException, NullArgumentException {
1289         return ONE_WAY_ANANOVA.anovaFValue(categoryData);
1290     }
1291 
1292     /**
1293      * Computes the ANOVA P-value for a collection of <code>double[]</code>
1294      * arrays.
1295      *
1296      * <p><strong>Preconditions</strong>:</p>
1297      * <ul>
1298      * <li>The categoryData <code>Collection</code> must contain
1299      * <code>double[]</code> arrays.</li>
1300      * <li> There must be at least two <code>double[]</code> arrays in the
1301      * <code>categoryData</code> collection and each of these arrays must
1302      * contain at least two values.</li></ul>
1303      * <p>
1304      * This implementation uses the
1305      * {@link org.hipparchus.distribution.continuous.FDistribution
1306      * Hipparchus F Distribution implementation} to estimate the exact
1307      * p-value, using the formula</p>
1308      * <pre>
1309      *   p = 1 - cumulativeProbability(F)</pre>
1310      * <p>
1311      * where <code>F</code> is the F value and <code>cumulativeProbability</code>
1312      * is the Hipparchus implementation of the F distribution.</p>
1313      *
1314      * @param categoryData <code>Collection</code> of <code>double[]</code>
1315      * arrays each containing data for one category
1316      * @return Pvalue
1317      * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
1318      * @throws MathIllegalArgumentException if the length of the <code>categoryData</code>
1319      * array is less than 2 or a contained <code>double[]</code> array does not have
1320      * at least two values
1321      * @throws MathIllegalStateException if the p-value can not be computed due to a convergence error
1322      * @throws MathIllegalStateException if the maximum number of iterations is exceeded
1323      */
1324     public static double oneWayAnovaPValue(final Collection<double[]> categoryData)
1325         throws MathIllegalArgumentException, NullArgumentException,
1326         MathIllegalStateException {
1327         return ONE_WAY_ANANOVA.anovaPValue(categoryData);
1328     }
1329 
1330     /**
1331      * Performs an ANOVA test, evaluating the null hypothesis that there
1332      * is no difference among the means of the data categories.
1333      *
1334      * <p><strong>Preconditions</strong>:</p>
1335      * <ul>
1336      * <li>The categoryData <code>Collection</code> must contain
1337      * <code>double[]</code> arrays.</li>
1338      * <li> There must be at least two <code>double[]</code> arrays in the
1339      * <code>categoryData</code> collection and each of these arrays must
1340      * contain at least two values.</li>
1341      * <li>alpha must be strictly greater than 0 and less than or equal to 0.5.
1342      * </li></ul>
1343      * <p>
1344      * This implementation uses the
1345      * {@link org.hipparchus.distribution.continuous.FDistribution
1346      * Hipparchus F Distribution implementation} to estimate the exact
1347      * p-value, using the formula</p><pre>
1348      *   p = 1 - cumulativeProbability(F)</pre>
1349      * <p>where <code>F</code> is the F value and <code>cumulativeProbability</code>
1350      * is the Hipparchus implementation of the F distribution.</p>
1351      * <p>True is returned iff the estimated p-value is less than alpha.</p>
1352      *
1353      * @param categoryData <code>Collection</code> of <code>double[]</code>
1354      * arrays each containing data for one category
1355      * @param alpha significance level of the test
1356      * @return true if the null hypothesis can be rejected with
1357      * confidence 1 - alpha
1358      * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
1359      * @throws MathIllegalArgumentException if the length of the <code>categoryData</code>
1360      * array is less than 2 or a contained <code>double[]</code> array does not have
1361      * at least two values
1362      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
1363      * @throws MathIllegalStateException if the p-value can not be computed due to a convergence error
1364      * @throws MathIllegalStateException if the maximum number of iterations is exceeded
1365      */
1366     public static boolean oneWayAnovaTest(final Collection<double[]> categoryData,
1367                                           final double alpha)
1368         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
1369         return ONE_WAY_ANANOVA.anovaTest(categoryData, alpha);
1370     }
1371 
1372     /**
1373      * Computes the <a href="http://en.wikipedia.org/wiki/G-test">G statistic
1374      * for Goodness of Fit</a> comparing {@code observed} and {@code expected}
1375      * frequency counts.
1376      * <p>
1377      * This statistic can be used to perform a G test (Log-Likelihood Ratio
1378      * Test) evaluating the null hypothesis that the observed counts follow the
1379      * expected distribution.
1380      * <p>
1381      * <strong>Preconditions</strong>:
1382      * <ul>
1383      * <li>Expected counts must all be positive.</li>
1384      * <li>Observed counts must all be &ge; 0.</li>
1385      * <li>The observed and expected arrays must have the same length and their
1386      * common length must be at least 2. </li>
1387      * </ul>
1388      * <p>
1389      * If any of the preconditions are not met, a
1390      * {@code MathIllegalArgumentException} is thrown.
1391      * <p>
1392      * <strong>Note:</strong>This implementation rescales the
1393      * {@code expected} array if necessary to ensure that the sum of the
1394      * expected and observed counts are equal.
1395      *
1396      * @param observed array of observed frequency counts
1397      * @param expected array of expected frequency counts
1398      * @return G-Test statistic
1399      * @throws MathIllegalArgumentException if {@code observed} has negative entries
1400      * @throws MathIllegalArgumentException if {@code expected} has entries that
1401      * are not strictly positive
1402      * @throws MathIllegalArgumentException if the array lengths do not match or
1403      * are less than 2.
1404      */
1405     public static double g(final double[] expected, final long[] observed)
1406         throws MathIllegalArgumentException {
1407         return G_TEST.g(expected, observed);
1408     }
1409 
1410     /**
1411      * Returns the <i>observed significance level</i>, or <a href=
1412      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> p-value</a>,
1413      * associated with a G-Test for goodness of fit comparing the
1414      * {@code observed} frequency counts to those in the {@code expected} array.
1415      *
1416      * <p>The number returned is the smallest significance level at which one
1417      * can reject the null hypothesis that the observed counts conform to the
1418      * frequency distribution described by the expected counts.</p>
1419      *
1420      * <p>The probability returned is the tail probability beyond
1421      * {@link #g(double[], long[]) g(expected, observed)}
1422      * in the ChiSquare distribution with degrees of freedom one less than the
1423      * common length of {@code expected} and {@code observed}.</p>
1424      *
1425      * <p> <strong>Preconditions</strong>:</p>
1426      * <ul>
1427      * <li>Expected counts must all be positive. </li>
1428      * <li>Observed counts must all be &ge; 0. </li>
1429      * <li>The observed and expected arrays must have the
1430      * same length and their common length must be at least 2.</li>
1431      * </ul>
1432      *
1433      * <p>If any of the preconditions are not met, a
1434      * {@code MathIllegalArgumentException} is thrown.</p>
1435      *
1436      * <p><strong>Note:</strong>This implementation rescales the
1437      * {@code expected} array if necessary to ensure that the sum of the
1438      *  expected and observed counts are equal.</p>
1439      *
1440      * @param observed array of observed frequency counts
1441      * @param expected array of expected frequency counts
1442      * @return p-value
1443      * @throws MathIllegalArgumentException if {@code observed} has negative entries
1444      * @throws MathIllegalArgumentException if {@code expected} has entries that
1445      * are not strictly positive
1446      * @throws MathIllegalArgumentException if the array lengths do not match or
1447      * are less than 2.
1448      * @throws MathIllegalStateException if an error occurs computing the
1449      * p-value.
1450      */
1451     public static double gTest(final double[] expected, final long[] observed)
1452         throws MathIllegalArgumentException, MathIllegalStateException {
1453         return G_TEST.gTest(expected, observed);
1454     }
1455 
1456     /**
1457      * Returns the intrinsic (Hardy-Weinberg proportions) p-Value, as described
1458      * in p64-69 of McDonald, J.H. 2009. Handbook of Biological Statistics
1459      * (2nd ed.). Sparky House Publishing, Baltimore, Maryland.
1460      *
1461      * <p> The probability returned is the tail probability beyond
1462      * {@link #g(double[], long[]) g(expected, observed)}
1463      * in the ChiSquare distribution with degrees of freedom two less than the
1464      * common length of {@code expected} and {@code observed}.</p>
1465      *
1466      * @param observed array of observed frequency counts
1467      * @param expected array of expected frequency counts
1468      * @return p-value
1469      * @throws MathIllegalArgumentException if {@code observed} has negative entries
1470      * @throws MathIllegalArgumentException {@code expected} has entries that are
1471      * not strictly positive
1472      * @throws MathIllegalArgumentException if the array lengths do not match or
1473      * are less than 2.
1474      * @throws MathIllegalStateException if an error occurs computing the
1475      * p-value.
1476      */
1477     public static double gTestIntrinsic(final double[] expected, final long[] observed)
1478         throws MathIllegalArgumentException, MathIllegalStateException {
1479         return G_TEST.gTestIntrinsic(expected, observed);
1480     }
1481 
1482     /**
1483      * Performs a G-Test (Log-Likelihood Ratio Test) for goodness of fit
1484      * evaluating the null hypothesis that the observed counts conform to the
1485      * frequency distribution described by the expected counts, with
1486      * significance level {@code alpha}. Returns true iff the null
1487      * hypothesis can be rejected with {@code 100 * (1 - alpha)} percent confidence.
1488      *
1489      * <p><strong>Example:</strong><br> To test the hypothesis that
1490      * {@code observed} follows {@code expected} at the 99% level,
1491      * use </p><p>
1492      * {@code gTest(expected, observed, 0.01)}</p>
1493      *
1494      * <p>Returns true iff {@link #gTest(double[], long[])
1495      *  gTestGoodnessOfFitPValue(expected, observed)} &gt; alpha</p>
1496      *
1497      * <p><strong>Preconditions</strong>:</p>
1498      * <ul>
1499      * <li>Expected counts must all be positive. </li>
1500      * <li>Observed counts must all be &ge; 0. </li>
1501      * <li>The observed and expected arrays must have the same length and their
1502      * common length must be at least 2.
1503      * <li> {@code 0 < alpha < 0.5} </li></ul>
1504      *
1505      * <p>If any of the preconditions are not met, a
1506      * {@code MathIllegalArgumentException} is thrown.</p>
1507      *
1508      * <p><strong>Note:</strong>This implementation rescales the
1509      * {@code expected} array if necessary to ensure that the sum of the
1510      * expected and observed counts are equal.</p>
1511      *
1512      * @param observed array of observed frequency counts
1513      * @param expected array of expected frequency counts
1514      * @param alpha significance level of the test
1515      * @return true iff null hypothesis can be rejected with confidence 1 -
1516      * alpha
1517      * @throws MathIllegalArgumentException if {@code observed} has negative entries
1518      * @throws MathIllegalArgumentException if {@code expected} has entries that
1519      * are not strictly positive
1520      * @throws MathIllegalArgumentException if the array lengths do not match or
1521      * are less than 2.
1522      * @throws MathIllegalStateException if an error occurs computing the
1523      * p-value.
1524      * @throws MathIllegalArgumentException if alpha is not strictly greater than zero
1525      * and less than or equal to 0.5
1526      */
1527     public static boolean gTest(final double[] expected, final long[] observed,
1528                                 final double alpha)
1529         throws MathIllegalArgumentException, MathIllegalStateException {
1530         return G_TEST.gTest(expected, observed, alpha);
1531     }
1532 
1533     /**
1534      * <p>Computes a G (Log-Likelihood Ratio) two sample test statistic for
1535      * independence comparing frequency counts in
1536      * {@code observed1} and {@code observed2}. The sums of frequency
1537      * counts in the two samples are not required to be the same. The formula
1538      * used to compute the test statistic is </p>
1539      *
1540      * <p>{@code 2 * totalSum * [H(rowSums) + H(colSums) - H(k)]}</p>
1541      *
1542      * <p> where {@code H} is the
1543      * <a href="http://en.wikipedia.org/wiki/Entropy_%28information_theory%29">
1544      * Shannon Entropy</a> of the random variable formed by viewing the elements
1545      * of the argument array as incidence counts; <br>
1546      * {@code k} is a matrix with rows {@code [observed1, observed2]}; <br>
1547      * {@code rowSums, colSums} are the row/col sums of {@code k}; <br>
1548      * and {@code totalSum} is the overall sum of all entries in {@code k}.</p>
1549      *
1550      * <p>This statistic can be used to perform a G test evaluating the null
1551      * hypothesis that both observed counts are independent </p>
1552      *
1553      * <p> <strong>Preconditions</strong>:</p>
1554      * <ul>
1555      * <li>Observed counts must be non-negative. </li>
1556      * <li>Observed counts for a specific bin must not both be zero. </li>
1557      * <li>Observed counts for a specific sample must not all be  0. </li>
1558      * <li>The arrays {@code observed1} and {@code observed2} must have
1559      * the same length and their common length must be at least 2. </li></ul>
1560      *
1561      * <p>If any of the preconditions are not met, a
1562      * {@code MathIllegalArgumentException} is thrown.</p>
1563      *
1564      * @param observed1 array of observed frequency counts of the first data set
1565      * @param observed2 array of observed frequency counts of the second data
1566      * set
1567      * @return G-Test statistic
1568      * @throws MathIllegalArgumentException the the lengths of the arrays do not
1569      * match or their common length is less than 2
1570      * @throws MathIllegalArgumentException if any entry in {@code observed1} or
1571      * {@code observed2} is negative
1572      * @throws MathIllegalArgumentException if either all counts of
1573      * {@code observed1} or {@code observed2} are zero, or if the count
1574      * at the same index is zero for both arrays.
1575      */
1576     public static double gDataSetsComparison(final long[] observed1,
1577                                                   final long[] observed2)
1578         throws MathIllegalArgumentException {
1579         return G_TEST.gDataSetsComparison(observed1, observed2);
1580     }
1581 
1582     /**
1583      * Calculates the root log-likelihood ratio for 2 state Datasets. See
1584      * {@link #gDataSetsComparison(long[], long[] )}.
1585      *
1586      * <p>Given two events A and B, let k11 be the number of times both events
1587      * occur, k12 the incidence of B without A, k21 the count of A without B,
1588      * and k22 the number of times neither A nor B occurs.  What is returned
1589      * by this method is </p>
1590      *
1591      * <p>{@code (sgn) sqrt(gValueDataSetsComparison({k11, k12}, {k21, k22})}</p>
1592      *
1593      * <p>where {@code sgn} is -1 if {@code k11 / (k11 + k12) < k21 / (k21 + k22))};<br>
1594      * 1 otherwise.</p>
1595      *
1596      * <p>Signed root LLR has two advantages over the basic LLR: a) it is positive
1597      * where k11 is bigger than expected, negative where it is lower b) if there is
1598      * no difference it is asymptotically normally distributed. This allows one
1599      * to talk about "number of standard deviations" which is a more common frame
1600      * of reference than the chi^2 distribution.</p>
1601      *
1602      * @param k11 number of times the two events occurred together (AB)
1603      * @param k12 number of times the second event occurred WITHOUT the
1604      * first event (notA,B)
1605      * @param k21 number of times the first event occurred WITHOUT the
1606      * second event (A, notB)
1607      * @param k22 number of times something else occurred (i.e. was neither
1608      * of these events (notA, notB)
1609      * @return root log-likelihood ratio
1610      *
1611      */
1612     public static double rootLogLikelihoodRatio(final long k11, final long k12, final long k21, final long k22)
1613         throws MathIllegalArgumentException {
1614         return G_TEST.rootLogLikelihoodRatio(k11, k12, k21, k22);
1615     }
1616 
1617 
1618     /**
1619      * <p>Returns the <i>observed significance level</i>, or <a href=
1620      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
1621      * p-value</a>, associated with a G-Value (Log-Likelihood Ratio) for two
1622      * sample test comparing bin frequency counts in {@code observed1} and
1623      * {@code observed2}.</p>
1624      *
1625      * <p>The number returned is the smallest significance level at which one
1626      * can reject the null hypothesis that the observed counts conform to the
1627      * same distribution. </p>
1628      *
1629      * <p>See {@link #gTest(double[], long[])} for details
1630      * on how the p-value is computed.  The degrees of of freedom used to
1631      * perform the test is one less than the common length of the input observed
1632      * count arrays.</p>
1633      *
1634      * <p><strong>Preconditions</strong>:</p>
1635      * <ul> <li>Observed counts must be non-negative. </li>
1636      * <li>Observed counts for a specific bin must not both be zero. </li>
1637      * <li>Observed counts for a specific sample must not all be 0. </li>
1638      * <li>The arrays {@code observed1} and {@code observed2} must
1639      * have the same length and their common length must be at least 2. </li>
1640      * </ul>
1641      * <p> If any of the preconditions are not met, a
1642      * {@code MathIllegalArgumentException} is thrown.</p>
1643      *
1644      * @param observed1 array of observed frequency counts of the first data set
1645      * @param observed2 array of observed frequency counts of the second data
1646      * set
1647      * @return p-value
1648      * @throws MathIllegalArgumentException the the length of the arrays does not
1649      * match or their common length is less than 2
1650      * @throws MathIllegalArgumentException if any of the entries in {@code observed1} or
1651      * {@code observed2} are negative
1652      * @throws MathIllegalArgumentException if either all counts of {@code observed1} or
1653      * {@code observed2} are zero, or if the count at some index is
1654      * zero for both arrays
1655      * @throws MathIllegalStateException if an error occurs computing the
1656      * p-value.
1657      */
1658     public static double gTestDataSetsComparison(final long[] observed1,
1659                                                         final long[] observed2)
1660         throws MathIllegalArgumentException,
1661         MathIllegalStateException {
1662         return G_TEST.gTestDataSetsComparison(observed1, observed2);
1663     }
1664 
1665     /**
1666      * <p>Performs a G-Test (Log-Likelihood Ratio Test) comparing two binned
1667      * data sets. The test evaluates the null hypothesis that the two lists
1668      * of observed counts conform to the same frequency distribution, with
1669      * significance level {@code alpha}. Returns true iff the null
1670      * hypothesis can be rejected  with 100 * (1 - alpha) percent confidence.
1671      * </p>
1672      * <p>See {@link #gDataSetsComparison(long[], long[])} for details
1673      * on the formula used to compute the G (LLR) statistic used in the test and
1674      * {@link #gTest(double[], long[])} for information on how
1675      * the observed significance level is computed. The degrees of of freedom used
1676      * to perform the test is one less than the common length of the input observed
1677      * count arrays. </p>
1678      *
1679      * <p><strong>Preconditions</strong>:</p>
1680      * <ul>
1681      * <li>Observed counts must be non-negative. </li>
1682      * <li>Observed counts for a specific bin must not both be zero. </li>
1683      * <li>Observed counts for a specific sample must not all be 0. </li>
1684      * <li>The arrays {@code observed1} and {@code observed2} must
1685      * have the same length and their common length must be at least 2. </li>
1686      * <li>{@code 0 < alpha < 0.5} </li></ul>
1687      *
1688      * <p>If any of the preconditions are not met, a
1689      * {@code MathIllegalArgumentException} is thrown.</p>
1690      *
1691      * @param observed1 array of observed frequency counts of the first data set
1692      * @param observed2 array of observed frequency counts of the second data
1693      * set
1694      * @param alpha significance level of the test
1695      * @return true iff null hypothesis can be rejected with confidence 1 -
1696      * alpha
1697      * @throws MathIllegalArgumentException the the length of the arrays does not
1698      * match
1699      * @throws MathIllegalArgumentException if any of the entries in {@code observed1} or
1700      * {@code observed2} are negative
1701      * @throws MathIllegalArgumentException if either all counts of {@code observed1} or
1702      * {@code observed2} are zero, or if the count at some index is
1703      * zero for both arrays
1704      * @throws MathIllegalArgumentException if {@code alpha} is not in the range
1705      * (0, 0.5]
1706      * @throws MathIllegalStateException if an error occurs performing the test
1707      */
1708     public static boolean gTestDataSetsComparison(final long[] observed1,
1709                                                   final long[] observed2,
1710                                                   final double alpha)
1711         throws MathIllegalArgumentException, MathIllegalStateException {
1712         return G_TEST.gTestDataSetsComparison(observed1, observed2, alpha);
1713     }
1714 
1715     /**
1716      * Computes the one-sample Kolmogorov-Smirnov test statistic, \(D_n=\sup_x |F_n(x)-F(x)|\) where
1717      * \(F\) is the distribution (cdf) function associated with {@code distribution}, \(n\) is the
1718      * length of {@code data} and \(F_n\) is the empirical distribution that puts mass \(1/n\) at
1719      * each of the values in {@code data}.
1720      *
1721      * @param dist reference distribution
1722      * @param data sample being evaluated
1723      * @return Kolmogorov-Smirnov statistic \(D_n\)
1724      * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
1725      * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
1726      */
1727     public static double kolmogorovSmirnovStatistic(RealDistribution dist, double[] data)
1728             throws MathIllegalArgumentException, NullArgumentException {
1729         return KS_TEST.kolmogorovSmirnovStatistic(dist, data);
1730     }
1731 
1732     /**
1733      * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a
1734      * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
1735      * evaluating the null hypothesis that {@code data} conforms to {@code distribution}.
1736      *
1737      * @param dist reference distribution
1738      * @param data sample being being evaluated
1739      * @return the p-value associated with the null hypothesis that {@code data} is a sample from
1740      *         {@code distribution}
1741      * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
1742      * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
1743      */
1744     public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data)
1745             throws MathIllegalArgumentException, NullArgumentException {
1746         return KS_TEST.kolmogorovSmirnovTest(dist, data);
1747     }
1748 
1749     /**
1750      * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a
1751      * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
1752      * evaluating the null hypothesis that {@code data} conforms to {@code distribution}. If
1753      * {@code exact} is true, the distribution used to compute the p-value is computed using
1754      * extended precision. See {@link KolmogorovSmirnovTest#cdfExact(double, int)}.
1755      *
1756      * @param dist reference distribution
1757      * @param data sample being being evaluated
1758      * @param strict whether or not to force exact computation of the p-value
1759      * @return the p-value associated with the null hypothesis that {@code data} is a sample from
1760      *         {@code distribution}
1761      * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
1762      * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
1763      */
1764     public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data, boolean strict)
1765             throws MathIllegalArgumentException, NullArgumentException {
1766         return KS_TEST.kolmogorovSmirnovTest(dist, data, strict);
1767     }
1768 
1769     /**
1770      * Performs a <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov
1771      * test</a> evaluating the null hypothesis that {@code data} conforms to {@code distribution}.
1772      *
1773      * @param dist reference distribution
1774      * @param data sample being being evaluated
1775      * @param alpha significance level of the test
1776      * @return true iff the null hypothesis that {@code data} is a sample from {@code distribution}
1777      *         can be rejected with confidence 1 - {@code alpha}
1778      * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
1779      * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
1780      */
1781     public static boolean kolmogorovSmirnovTest(RealDistribution dist, double[] data, double alpha)
1782             throws MathIllegalArgumentException, NullArgumentException {
1783         return KS_TEST.kolmogorovSmirnovTest(dist, data, alpha);
1784     }
1785 
1786     /**
1787      * Computes the two-sample Kolmogorov-Smirnov test statistic, \(D_{n,m}=\sup_x |F_n(x)-F_m(x)|\)
1788      * where \(n\) is the length of {@code x}, \(m\) is the length of {@code y}, \(F_n\) is the
1789      * empirical distribution that puts mass \(1/n\) at each of the values in {@code x} and \(F_m\)
1790      * is the empirical distribution of the {@code y} values.
1791      *
1792      * @param x first sample
1793      * @param y second sample
1794      * @return test statistic \(D_{n,m}\) used to evaluate the null hypothesis that {@code x} and
1795      *         {@code y} represent samples from the same underlying distribution
1796      * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
1797      *         least 2
1798      * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
1799      */
1800     public static double kolmogorovSmirnovStatistic(double[] x, double[] y)
1801             throws MathIllegalArgumentException, NullArgumentException {
1802         return KS_TEST.kolmogorovSmirnovStatistic(x, y);
1803     }
1804 
1805     /**
1806      * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a two-sample <a
1807      * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
1808      * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
1809      * probability distribution. Assumes the strict form of the inequality used to compute the
1810      * p-value. See {@link KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[], boolean)}.
1811      *
1812      * @param x first sample dataset
1813      * @param y second sample dataset
1814      * @return p-value associated with the null hypothesis that {@code x} and {@code y} represent
1815      *         samples from the same distribution
1816      * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
1817      *         least 2
1818      * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
1819      */
1820     public static double kolmogorovSmirnovTest(double[] x, double[] y)
1821             throws MathIllegalArgumentException, NullArgumentException {
1822         return KS_TEST.kolmogorovSmirnovTest(x, y);
1823     }
1824 
1825     /**
1826      * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a two-sample <a
1827      * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
1828      * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
1829      * probability distribution. Specifically, what is returned is an estimate of the probability
1830      * that the {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} associated with a randomly
1831      * selected partition of the combined sample into subsamples of sizes {@code x.length} and
1832      * {@code y.length} will strictly exceed (if {@code strict} is {@code true}) or be at least as
1833      * large as {@code strict = false}) as {@code kolmogorovSmirnovStatistic(x, y)}.
1834      * <ul>
1835      * <li>For small samples (where the product of the sample sizes is less than
1836      * {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT}), the exact p-value is computed using the method presented
1837      * in [4], implemented in {@link #exactP(double, int, int, boolean)}. </li>
1838      * <li>When the product of the sample sizes exceeds {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT}, the
1839      * asymptotic distribution of \(D_{n,m}\) is used. See {@link #approximateP(double, int, int)}
1840      * for details on the approximation.</li>
1841      * </ul><p>
1842      * If {@code x.length * y.length} &lt; {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT} and the combined set of values in
1843      * {@code x} and {@code y} contains ties, random jitter is added to {@code x} and {@code y} to
1844      * break ties before computing \(D_{n,m}\) and the p-value. The jitter is uniformly distributed
1845      * on (-minDelta / 2, minDelta / 2) where minDelta is the smallest pairwise difference between
1846      * values in the combined sample.</p>
1847      * <p>
1848      * If ties are known to be present in the data, {@link KolmogorovSmirnovTest#bootstrap(double[], double[], int, boolean)}
1849      * may be used as an alternative method for estimating the p-value.</p>
1850      *
1851      * @param x first sample dataset
1852      * @param y second sample dataset
1853      * @param strict whether or not the probability to compute is expressed as a strict inequality
1854      *        (ignored for large samples)
1855      * @return p-value associated with the null hypothesis that {@code x} and {@code y} represent
1856      *         samples from the same distribution
1857      * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
1858      *         least 2
1859      * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
1860      * @see KolmogorovSmirnovTest#bootstrap(double[], double[], int, boolean)
1861      */
1862     public static double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict)
1863             throws MathIllegalArgumentException, NullArgumentException  {
1864         return KS_TEST.kolmogorovSmirnovTest(x, y, strict);
1865     }
1866 
1867     /**
1868      * Computes \(P(D_{n,m} &gt; d)\) if {@code strict} is {@code true}; otherwise \(P(D_{n,m} \ge
1869      * d)\), where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov statistic. See
1870      * {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\).
1871      * <p>
1872      * The returned probability is exact, implemented by unwinding the recursive function
1873      * definitions presented in [4] from the class javadoc.
1874      * </p>
1875      *
1876      * @param d D-statistic value
1877      * @param n first sample size
1878      * @param m second sample size
1879      * @param strict whether or not the probability to compute is expressed as a strict inequality
1880      * @return probability that a randomly selected m-n partition of m + n generates \(D_{n,m}\)
1881      *         greater than (resp. greater than or equal to) {@code d}
1882      */
1883     public static double exactP(double d, int m, int n, boolean strict) {
1884         return KS_TEST.exactP(d, n, m, strict);
1885     }
1886 
1887     /**
1888      * Uses the Kolmogorov-Smirnov distribution to approximate \(P(D_{n,m} &gt; d)\) where \(D_{n,m}\)
1889      * is the 2-sample Kolmogorov-Smirnov statistic. See
1890      * {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\).
1891      * <p>
1892      * Specifically, what is returned is \(1 - k(d \sqrt{mn / (m + n)})\) where \(k(t) = 1 + 2
1893      * \sum_{i=1}^\infty (-1)^i e^{-2 i^2 t^2}\). See {@link KolmogorovSmirnovTest#ksSum(double, double, int)} for
1894      * details on how convergence of the sum is determined. This implementation passes {@code ksSum}
1895      * {@link KolmogorovSmirnovTest#KS_SUM_CAUCHY_CRITERION} as {@code tolerance} and
1896      * {@link KolmogorovSmirnovTest#MAXIMUM_PARTIAL_SUM_COUNT} as {@code maxIterations}.
1897      * </p>
1898      *
1899      * @param d D-statistic value
1900      * @param n first sample size
1901      * @param m second sample size
1902      * @return approximate probability that a randomly selected m-n partition of m + n generates
1903      *         \(D_{n,m}\) greater than {@code d}
1904      */
1905     public static double approximateP(double d, int n, int m) {
1906         return KS_TEST.approximateP(d, n, m);
1907     }
1908 
1909 }