1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.inference;
23
24 import java.util.Collection;
25
26 import org.hipparchus.distribution.RealDistribution;
27 import org.hipparchus.exception.MathIllegalArgumentException;
28 import org.hipparchus.exception.MathIllegalStateException;
29 import org.hipparchus.exception.NullArgumentException;
30 import org.hipparchus.stat.descriptive.StatisticalSummary;
31
32 /**
33 * A collection of static methods to create inference test instances or to
34 * perform inference tests.
35 */
36 public class InferenceTestUtils {
37
38 /** Singleton TTest instance. */
39 private static final TTest T_TEST = new TTest();
40
41 /** Singleton ChiSquareTest instance. */
42 private static final ChiSquareTest CHI_SQUARE_TEST = new ChiSquareTest();
43
44 /** Singleton OneWayAnova instance. */
45 private static final OneWayAnova ONE_WAY_ANANOVA = new OneWayAnova();
46
47 /** Singleton G-Test instance. */
48 private static final GTest G_TEST = new GTest();
49
50 /** Singleton K-S test instance */
51 private static final KolmogorovSmirnovTest KS_TEST = new KolmogorovSmirnovTest();
52
53 /**
54 * Prevent instantiation.
55 */
56 private InferenceTestUtils() {
57 super();
58 }
59
60 /**
61 * Computes a 2-sample t statistic, under the hypothesis of equal
62 * subpopulation variances. To compute a t-statistic without the
63 * equal variances hypothesis, use {@link #t(double[], double[])}.
64 * <p>
65 * This statistic can be used to perform a (homoscedastic) two-sample
66 * t-test to compare sample means.</p>
67 * <p>
68 * The t-statistic is</p>
69 * <p>
70 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
71 * </p><p>
72 * where <strong><code>n1</code></strong> is the size of first sample;
73 * <strong><code> n2</code></strong> is the size of second sample;
74 * <strong><code> m1</code></strong> is the mean of first sample;
75 * <strong><code> m2</code></strong> is the mean of second sample
76 * and <strong><code>var</code></strong> is the pooled variance estimate:
77 * </p><p>
78 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
79 * </p><p>
80 * with <strong><code>var1</code></strong> the variance of the first sample and
81 * <strong><code>var2</code></strong> the variance of the second sample.
82 * </p><p>
83 * <strong>Preconditions</strong>:</p>
84 * <ul>
85 * <li>The observed array lengths must both be at least 2.
86 * </li></ul>
87 *
88 * @param sample1 array of sample data values
89 * @param sample2 array of sample data values
90 * @return t statistic
91 * @throws NullArgumentException if the arrays are <code>null</code>
92 * @throws MathIllegalArgumentException if the length of the arrays is < 2
93 */
94 public static double homoscedasticT(final double[] sample1, final double[] sample2)
95 throws MathIllegalArgumentException, NullArgumentException {
96 return T_TEST.homoscedasticT(sample1, sample2);
97 }
98
99 /**
100 * Computes a 2-sample t statistic, comparing the means of the datasets
101 * described by two {@link StatisticalSummary} instances, under the
102 * assumption of equal subpopulation variances. To compute a t-statistic
103 * without the equal variances assumption, use
104 * {@link #t(StatisticalSummary, StatisticalSummary)}.
105 * <p>
106 * This statistic can be used to perform a (homoscedastic) two-sample
107 * t-test to compare sample means.</p>
108 * <p>
109 * The t-statistic returned is</p>
110 * <p>
111 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
112 * </p><p>
113 * where <strong><code>n1</code></strong> is the size of first sample;
114 * <strong><code> n2</code></strong> is the size of second sample;
115 * <strong><code> m1</code></strong> is the mean of first sample;
116 * <strong><code> m2</code></strong> is the mean of second sample
117 * and <strong><code>var</code></strong> is the pooled variance estimate:
118 * </p><p>
119 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
120 * </p><p>
121 * with <strong><code>var1</code></strong> the variance of the first sample and
122 * <strong><code>var2</code></strong> the variance of the second sample.
123 * </p><p>
124 * <strong>Preconditions</strong>:</p><ul>
125 * <li>The datasets described by the two Univariates must each contain
126 * at least 2 observations.
127 * </li></ul>
128 *
129 * @param sampleStats1 StatisticalSummary describing data from the first sample
130 * @param sampleStats2 StatisticalSummary describing data from the second sample
131 * @return t statistic
132 * @throws NullArgumentException if the sample statistics are <code>null</code>
133 * @throws MathIllegalArgumentException if the number of samples is < 2
134 */
135 public static double homoscedasticT(final StatisticalSummary sampleStats1,
136 final StatisticalSummary sampleStats2)
137 throws MathIllegalArgumentException, NullArgumentException {
138 return T_TEST.homoscedasticT(sampleStats1, sampleStats2);
139 }
140
141 /**
142 * Performs a
143 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
144 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
145 * and <code>sample2</code> are drawn from populations with the same mean,
146 * with significance level <code>alpha</code>, assuming that the
147 * subpopulation variances are equal. Use
148 * {@link #tTest(double[], double[], double)} to perform the test without
149 * the assumption of equal variances.
150 * <p>
151 * Returns <code>true</code> iff the null hypothesis that the means are
152 * equal can be rejected with confidence <code>1 - alpha</code>. To
153 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
154 * without the assumption of equal subpopulation variances, use
155 * {@link #tTest(double[], double[], double)}.</p>
156 * <p>
157 * A pooled variance estimate is used to compute the t-statistic. See
158 * {@link #t(double[], double[])} for the formula. The sum of the sample
159 * sizes minus 2 is used as the degrees of freedom.</p>
160 * <p>
161 * <strong>Examples:</strong></p><ol>
162 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
163 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
164 * </li>
165 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
166 * at the 99% level, first verify that the measured mean of
167 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
168 * and then use
169 * <br><code>tTest(sample1, sample2, 0.02) </code>
170 * </li></ol>
171 * <p>
172 * <strong>Usage Note:</strong><br>
173 * The validity of the test depends on the assumptions of the parametric
174 * t-test procedure, as discussed
175 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
176 * here</a></p>
177 * <p>
178 * <strong>Preconditions</strong>:</p>
179 * <ul>
180 * <li>The observed array lengths must both be at least 2.
181 * </li>
182 * <li> <code> 0 < alpha < 0.5 </code>
183 * </li></ul>
184 *
185 * @param sample1 array of sample data values
186 * @param sample2 array of sample data values
187 * @param alpha significance level of the test
188 * @return true if the null hypothesis can be rejected with
189 * confidence 1 - alpha
190 * @throws NullArgumentException if the arrays are <code>null</code>
191 * @throws MathIllegalArgumentException if the length of the arrays is < 2
192 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
193 * @throws MathIllegalStateException if an error occurs computing the p-value
194 */
195 public static boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
196 final double alpha)
197 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
198 return T_TEST.homoscedasticTTest(sample1, sample2, alpha);
199 }
200
201 /**
202 * Returns the <i>observed significance level</i>, or
203 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
204 * comparing the means of the input arrays, under the assumption that
205 * the two samples are drawn from subpopulations with equal variances.
206 * To perform the test without the equal variances assumption, use
207 * {@link #tTest(double[], double[])}.
208 * <p>
209 * The number returned is the smallest significance level
210 * at which one can reject the null hypothesis that the two means are
211 * equal in favor of the two-sided alternative that they are different.
212 * For a one-sided test, divide the returned value by 2.</p>
213 * <p>
214 * A pooled variance estimate is used to compute the t-statistic. See
215 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
216 * minus 2 is used as the degrees of freedom.</p>
217 * <p>
218 * <strong>Usage Note:</strong><br>
219 * The validity of the p-value depends on the assumptions of the parametric
220 * t-test procedure, as discussed
221 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
222 * here</a></p>
223 * <p>
224 * <strong>Preconditions</strong>:</p>
225 * <ul>
226 * <li>The observed array lengths must both be at least 2.
227 * </li></ul>
228 *
229 * @param sample1 array of sample data values
230 * @param sample2 array of sample data values
231 * @return p-value for t-test
232 * @throws NullArgumentException if the arrays are <code>null</code>
233 * @throws MathIllegalArgumentException if the length of the arrays is < 2
234 * @throws MathIllegalStateException if an error occurs computing the p-value
235 */
236 public static double homoscedasticTTest(final double[] sample1, final double[] sample2)
237 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
238 return T_TEST.homoscedasticTTest(sample1, sample2);
239 }
240
241 /**
242 * Returns the <i>observed significance level</i>, or
243 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
244 * comparing the means of the datasets described by two StatisticalSummary
245 * instances, under the hypothesis of equal subpopulation variances. To
246 * perform a test without the equal variances assumption, use
247 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
248 * <p>
249 * The number returned is the smallest significance level
250 * at which one can reject the null hypothesis that the two means are
251 * equal in favor of the two-sided alternative that they are different.
252 * For a one-sided test, divide the returned value by 2.</p>
253 * <p>
254 * See {@link #homoscedasticT(double[], double[])} for the formula used to
255 * compute the t-statistic. The sum of the sample sizes minus 2 is used as
256 * the degrees of freedom.</p>
257 * <p>
258 * <strong>Usage Note:</strong><br>
259 * The validity of the p-value depends on the assumptions of the parametric
260 * t-test procedure, as discussed
261 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
262 * </p><p>
263 * <strong>Preconditions</strong>:</p>
264 * <ul>
265 * <li>The datasets described by the two Univariates must each contain
266 * at least 2 observations.
267 * </li></ul>
268 *
269 * @param sampleStats1 StatisticalSummary describing data from the first sample
270 * @param sampleStats2 StatisticalSummary describing data from the second sample
271 * @return p-value for t-test
272 * @throws NullArgumentException if the sample statistics are <code>null</code>
273 * @throws MathIllegalArgumentException if the number of samples is < 2
274 * @throws MathIllegalStateException if an error occurs computing the p-value
275 */
276 public static double homoscedasticTTest(final StatisticalSummary sampleStats1,
277 final StatisticalSummary sampleStats2)
278 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
279 return T_TEST.homoscedasticTTest(sampleStats1, sampleStats2);
280 }
281
282 /**
283 * Computes a paired, 2-sample t-statistic based on the data in the input
284 * arrays. The t-statistic returned is equivalent to what would be returned by
285 * computing the one-sample t-statistic {@link #t(double, double[])}, with
286 * <code>mu = 0</code> and the sample array consisting of the (signed)
287 * differences between corresponding entries in <code>sample1</code> and
288 * <code>sample2.</code>
289 * <p>
290 * <strong>Preconditions</strong>:</p>
291 * <ul>
292 * <li>The input arrays must have the same length and their common length
293 * must be at least 2.
294 * </li></ul>
295 *
296 * @param sample1 array of sample data values
297 * @param sample2 array of sample data values
298 * @return t statistic
299 * @throws NullArgumentException if the arrays are <code>null</code>
300 * @throws MathIllegalArgumentException if the arrays are empty
301 * @throws MathIllegalArgumentException if the length of the arrays is not equal
302 * @throws MathIllegalArgumentException if the length of the arrays is < 2
303 */
304 public static double pairedT(final double[] sample1, final double[] sample2)
305 throws MathIllegalArgumentException, NullArgumentException {
306 return T_TEST.pairedT(sample1, sample2);
307 }
308
309 /**
310 * Performs a paired t-test evaluating the null hypothesis that the
311 * mean of the paired differences between <code>sample1</code> and
312 * <code>sample2</code> is 0 in favor of the two-sided alternative that the
313 * mean paired difference is not equal to 0, with significance level
314 * <code>alpha</code>.
315 * <p>
316 * Returns <code>true</code> iff the null hypothesis can be rejected with
317 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
318 * <code>alpha * 2</code></p>
319 * <p>
320 * <strong>Usage Note:</strong><br>
321 * The validity of the test depends on the assumptions of the parametric
322 * t-test procedure, as discussed
323 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
324 * here</a></p>
325 * <p>
326 * <strong>Preconditions</strong>:</p>
327 * <ul>
328 * <li>The input array lengths must be the same and their common length
329 * must be at least 2.
330 * </li>
331 * <li> <code> 0 < alpha < 0.5 </code>
332 * </li></ul>
333 *
334 * @param sample1 array of sample data values
335 * @param sample2 array of sample data values
336 * @param alpha significance level of the test
337 * @return true if the null hypothesis can be rejected with
338 * confidence 1 - alpha
339 * @throws NullArgumentException if the arrays are <code>null</code>
340 * @throws MathIllegalArgumentException if the arrays are empty
341 * @throws MathIllegalArgumentException if the length of the arrays is not equal
342 * @throws MathIllegalArgumentException if the length of the arrays is < 2
343 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
344 * @throws MathIllegalStateException if an error occurs computing the p-value
345 */
346 public static boolean pairedTTest(final double[] sample1, final double[] sample2,
347 final double alpha)
348 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
349 return T_TEST.pairedTTest(sample1, sample2, alpha);
350 }
351
352 /**
353 * Returns the <i>observed significance level</i>, or
354 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
355 * based on the data in the input arrays.
356 * <p>
357 * The number returned is the smallest significance level
358 * at which one can reject the null hypothesis that the mean of the paired
359 * differences is 0 in favor of the two-sided alternative that the mean paired
360 * difference is not equal to 0. For a one-sided test, divide the returned
361 * value by 2.</p>
362 * <p>
363 * This test is equivalent to a one-sample t-test computed using
364 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
365 * array consisting of the signed differences between corresponding elements of
366 * <code>sample1</code> and <code>sample2.</code></p>
367 * <p>
368 * <strong>Usage Note:</strong><br>
369 * The validity of the p-value depends on the assumptions of the parametric
370 * t-test procedure, as discussed
371 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
372 * here</a></p>
373 * <p>
374 * <strong>Preconditions</strong>:</p>
375 * <ul>
376 * <li>The input array lengths must be the same and their common length must
377 * be at least 2.
378 * </li></ul>
379 *
380 * @param sample1 array of sample data values
381 * @param sample2 array of sample data values
382 * @return p-value for t-test
383 * @throws NullArgumentException if the arrays are <code>null</code>
384 * @throws MathIllegalArgumentException if the arrays are empty
385 * @throws MathIllegalArgumentException if the length of the arrays is not equal
386 * @throws MathIllegalArgumentException if the length of the arrays is < 2
387 * @throws MathIllegalStateException if an error occurs computing the p-value
388 */
389 public static double pairedTTest(final double[] sample1, final double[] sample2)
390 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
391 return T_TEST.pairedTTest(sample1, sample2);
392 }
393
394 /**
395 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
396 * t statistic </a> given observed values and a comparison constant.
397 * <p>
398 * This statistic can be used to perform a one sample t-test for the mean.
399 * </p><p>
400 * <strong>Preconditions</strong>:</p>
401 * <ul>
402 * <li>The observed array length must be at least 2.
403 * </li></ul>
404 *
405 * @param mu comparison constant
406 * @param observed array of values
407 * @return t statistic
408 * @throws NullArgumentException if <code>observed</code> is <code>null</code>
409 * @throws MathIllegalArgumentException if the length of <code>observed</code> is < 2
410 */
411 public static double t(final double mu, final double[] observed)
412 throws MathIllegalArgumentException, NullArgumentException {
413 return T_TEST.t(mu, observed);
414 }
415
416 /**
417 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
418 * t statistic </a> to use in comparing the mean of the dataset described by
419 * <code>sampleStats</code> to <code>mu</code>.
420 * <p>
421 * This statistic can be used to perform a one sample t-test for the mean.
422 * </p><p>
423 * <strong>Preconditions</strong>:</p>
424 * <ul>
425 * <li><code>observed.getN() ≥ 2</code>.
426 * </li></ul>
427 *
428 * @param mu comparison constant
429 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
430 * @return t statistic
431 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
432 * @throws MathIllegalArgumentException if the number of samples is < 2
433 */
434 public static double t(final double mu, final StatisticalSummary sampleStats)
435 throws MathIllegalArgumentException, NullArgumentException {
436 return T_TEST.t(mu, sampleStats);
437 }
438
439 /**
440 * Computes a 2-sample t statistic, without the hypothesis of equal
441 * subpopulation variances. To compute a t-statistic assuming equal
442 * variances, use {@link #homoscedasticT(double[], double[])}.
443 * <p>
444 * This statistic can be used to perform a two-sample t-test to compare
445 * sample means.</p>
446 * <p>
447 * The t-statistic is</p>
448 * <p>
449 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
450 * </p><p>
451 * where <strong><code>n1</code></strong> is the size of the first sample
452 * <strong><code> n2</code></strong> is the size of the second sample;
453 * <strong><code> m1</code></strong> is the mean of the first sample;
454 * <strong><code> m2</code></strong> is the mean of the second sample;
455 * <strong><code> var1</code></strong> is the variance of the first sample;
456 * <strong><code> var2</code></strong> is the variance of the second sample;
457 * </p><p>
458 * <strong>Preconditions</strong>:</p>
459 * <ul>
460 * <li>The observed array lengths must both be at least 2.
461 * </li></ul>
462 *
463 * @param sample1 array of sample data values
464 * @param sample2 array of sample data values
465 * @return t statistic
466 * @throws NullArgumentException if the arrays are <code>null</code>
467 * @throws MathIllegalArgumentException if the length of the arrays is < 2
468 */
469 public static double t(final double[] sample1, final double[] sample2)
470 throws MathIllegalArgumentException, NullArgumentException {
471 return T_TEST.t(sample1, sample2);
472 }
473
474 /**
475 * Computes a 2-sample t statistic, comparing the means of the datasets
476 * described by two {@link StatisticalSummary} instances, without the
477 * assumption of equal subpopulation variances. Use
478 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
479 * compute a t-statistic under the equal variances assumption.
480 * <p>
481 * This statistic can be used to perform a two-sample t-test to compare
482 * sample means.</p>
483 * <p>
484 * The returned t-statistic is</p>
485 * <p>
486 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
487 * </p><p>
488 * where <strong><code>n1</code></strong> is the size of the first sample;
489 * <strong><code> n2</code></strong> is the size of the second sample;
490 * <strong><code> m1</code></strong> is the mean of the first sample;
491 * <strong><code> m2</code></strong> is the mean of the second sample
492 * <strong><code> var1</code></strong> is the variance of the first sample;
493 * <strong><code> var2</code></strong> is the variance of the second sample
494 * </p><p>
495 * <strong>Preconditions</strong>:</p>
496 * <ul>
497 * <li>The datasets described by the two Univariates must each contain
498 * at least 2 observations.
499 * </li></ul>
500 *
501 * @param sampleStats1 StatisticalSummary describing data from the first sample
502 * @param sampleStats2 StatisticalSummary describing data from the second sample
503 * @return t statistic
504 * @throws NullArgumentException if the sample statistics are <code>null</code>
505 * @throws MathIllegalArgumentException if the number of samples is < 2
506 */
507 public static double t(final StatisticalSummary sampleStats1,
508 final StatisticalSummary sampleStats2)
509 throws MathIllegalArgumentException, NullArgumentException {
510 return T_TEST.t(sampleStats1, sampleStats2);
511 }
512
513 /**
514 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
515 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
516 * which <code>sample</code> is drawn equals <code>mu</code>.
517 * <p>
518 * Returns <code>true</code> iff the null hypothesis can be
519 * rejected with confidence <code>1 - alpha</code>. To
520 * perform a 1-sided test, use <code>alpha * 2</code></p>
521 * <p>
522 * <strong>Examples:</strong></p><ol>
523 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
524 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
525 * </li>
526 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
527 * at the 99% level, first verify that the measured sample mean is less
528 * than <code>mu</code> and then use
529 * <br><code>tTest(mu, sample, 0.02) </code>
530 * </li></ol>
531 * <p>
532 * <strong>Usage Note:</strong><br>
533 * The validity of the test depends on the assumptions of the one-sample
534 * parametric t-test procedure, as discussed
535 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
536 * </p><p>
537 * <strong>Preconditions</strong>:</p>
538 * <ul>
539 * <li>The observed array length must be at least 2.
540 * </li></ul>
541 *
542 * @param mu constant value to compare sample mean against
543 * @param sample array of sample data values
544 * @param alpha significance level of the test
545 * @return p-value
546 * @throws NullArgumentException if the sample array is <code>null</code>
547 * @throws MathIllegalArgumentException if the length of the array is < 2
548 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
549 * @throws MathIllegalStateException if an error computing the p-value
550 */
551 public static boolean tTest(final double mu, final double[] sample, final double alpha)
552 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
553 return T_TEST.tTest(mu, sample, alpha);
554 }
555
556 /**
557 * Returns the <i>observed significance level</i>, or
558 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
559 * comparing the mean of the input array with the constant <code>mu</code>.
560 * <p>
561 * The number returned is the smallest significance level
562 * at which one can reject the null hypothesis that the mean equals
563 * <code>mu</code> in favor of the two-sided alternative that the mean
564 * is different from <code>mu</code>. For a one-sided test, divide the
565 * returned value by 2.</p>
566 * <p>
567 * <strong>Usage Note:</strong><br>
568 * The validity of the test depends on the assumptions of the parametric
569 * t-test procedure, as discussed
570 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
571 * </p><p>
572 * <strong>Preconditions</strong>:</p>
573 * <ul>
574 * <li>The observed array length must be at least 2.
575 * </li></ul>
576 *
577 * @param mu constant value to compare sample mean against
578 * @param sample array of sample data values
579 * @return p-value
580 * @throws NullArgumentException if the sample array is <code>null</code>
581 * @throws MathIllegalArgumentException if the length of the array is < 2
582 * @throws MathIllegalStateException if an error occurs computing the p-value
583 */
584 public static double tTest(final double mu, final double[] sample)
585 throws MathIllegalArgumentException, NullArgumentException,
586 MathIllegalStateException {
587 return T_TEST.tTest(mu, sample);
588 }
589
590 /**
591 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
592 * two-sided t-test</a> evaluating the null hypothesis that the mean of the
593 * population from which the dataset described by <code>stats</code> is
594 * drawn equals <code>mu</code>.
595 * <p>
596 * Returns <code>true</code> iff the null hypothesis can be rejected with
597 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
598 * <code>alpha * 2.</code></p>
599 * <p>
600 * <strong>Examples:</strong></p><ol>
601 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
602 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
603 * </li>
604 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
605 * at the 99% level, first verify that the measured sample mean is less
606 * than <code>mu</code> and then use
607 * <br><code>tTest(mu, sampleStats, 0.02) </code>
608 * </li></ol>
609 * <p>
610 * <strong>Usage Note:</strong><br>
611 * The validity of the test depends on the assumptions of the one-sample
612 * parametric t-test procedure, as discussed
613 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
614 * </p><p>
615 * <strong>Preconditions</strong>:</p>
616 * <ul>
617 * <li>The sample must include at least 2 observations.
618 * </li></ul>
619 *
620 * @param mu constant value to compare sample mean against
621 * @param sampleStats StatisticalSummary describing sample data values
622 * @param alpha significance level of the test
623 * @return p-value
624 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
625 * @throws MathIllegalArgumentException if the number of samples is < 2
626 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
627 * @throws MathIllegalStateException if an error occurs computing the p-value
628 */
629 public static boolean tTest(final double mu, final StatisticalSummary sampleStats,
630 final double alpha)
631 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
632 return T_TEST.tTest(mu, sampleStats, alpha);
633 }
634
635 /**
636 * Returns the <i>observed significance level</i>, or
637 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
638 * comparing the mean of the dataset described by <code>sampleStats</code>
639 * with the constant <code>mu</code>.
640 * <p>
641 * The number returned is the smallest significance level
642 * at which one can reject the null hypothesis that the mean equals
643 * <code>mu</code> in favor of the two-sided alternative that the mean
644 * is different from <code>mu</code>. For a one-sided test, divide the
645 * returned value by 2.</p>
646 * <p>
647 * <strong>Usage Note:</strong><br>
648 * The validity of the test depends on the assumptions of the parametric
649 * t-test procedure, as discussed
650 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
651 * here</a></p>
652 * <p>
653 * <strong>Preconditions</strong>:</p>
654 * <ul>
655 * <li>The sample must contain at least 2 observations.
656 * </li></ul>
657 *
658 * @param mu constant value to compare sample mean against
659 * @param sampleStats StatisticalSummary describing sample data
660 * @return p-value
661 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
662 * @throws MathIllegalArgumentException if the number of samples is < 2
663 * @throws MathIllegalStateException if an error occurs computing the p-value
664 */
665 public static double tTest(final double mu, final StatisticalSummary sampleStats)
666 throws MathIllegalArgumentException, NullArgumentException,
667 MathIllegalStateException {
668 return T_TEST.tTest(mu, sampleStats);
669 }
670
671 /**
672 * Performs a
673 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
674 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
675 * and <code>sample2</code> are drawn from populations with the same mean,
676 * with significance level <code>alpha</code>. This test does not assume
677 * that the subpopulation variances are equal. To perform the test assuming
678 * equal variances, use
679 * {@link #homoscedasticTTest(double[], double[], double)}.
680 * <p>
681 * Returns <code>true</code> iff the null hypothesis that the means are
682 * equal can be rejected with confidence <code>1 - alpha</code>. To
683 * perform a 1-sided test, use <code>alpha * 2</code></p>
684 * <p>
685 * See {@link #t(double[], double[])} for the formula used to compute the
686 * t-statistic. Degrees of freedom are approximated using the
687 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
688 * Welch-Satterthwaite approximation.</a></p>
689 * <p>
690 * <strong>Examples:</strong></p><ol>
691 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
692 * the 95% level, use
693 * <br><code>tTest(sample1, sample2, 0.05). </code>
694 * </li>
695 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
696 * at the 99% level, first verify that the measured mean of <code>sample 1</code>
697 * is less than the mean of <code>sample 2</code> and then use
698 * <br><code>tTest(sample1, sample2, 0.02) </code>
699 * </li></ol>
700 * <p>
701 * <strong>Usage Note:</strong><br>
702 * The validity of the test depends on the assumptions of the parametric
703 * t-test procedure, as discussed
704 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
705 * here</a></p>
706 * <p>
707 * <strong>Preconditions</strong>:</p>
708 * <ul>
709 * <li>The observed array lengths must both be at least 2.
710 * </li>
711 * <li> <code> 0 < alpha < 0.5 </code>
712 * </li></ul>
713 *
714 * @param sample1 array of sample data values
715 * @param sample2 array of sample data values
716 * @param alpha significance level of the test
717 * @return true if the null hypothesis can be rejected with
718 * confidence 1 - alpha
719 * @throws NullArgumentException if the arrays are <code>null</code>
720 * @throws MathIllegalArgumentException if the length of the arrays is < 2
721 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
722 * @throws MathIllegalStateException if an error occurs computing the p-value
723 */
724 public static boolean tTest(final double[] sample1, final double[] sample2,
725 final double alpha)
726 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
727 return T_TEST.tTest(sample1, sample2, alpha);
728 }
729
730 /**
731 * Returns the <i>observed significance level</i>, or
732 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
733 * comparing the means of the input arrays.
734 * <p>
735 * The number returned is the smallest significance level
736 * at which one can reject the null hypothesis that the two means are
737 * equal in favor of the two-sided alternative that they are different.
738 * For a one-sided test, divide the returned value by 2.</p>
739 * <p>
740 * The test does not assume that the underlying popuation variances are
741 * equal and it uses approximated degrees of freedom computed from the
742 * sample data to compute the p-value. The t-statistic used is as defined in
743 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
744 * to the degrees of freedom is used,
745 * as described
746 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
747 * here.</a> To perform the test under the assumption of equal subpopulation
748 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
749 * <p>
750 * <strong>Usage Note:</strong><br>
751 * The validity of the p-value depends on the assumptions of the parametric
752 * t-test procedure, as discussed
753 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
754 * here</a></p>
755 * <p>
756 * <strong>Preconditions</strong>:</p>
757 * <ul>
758 * <li>The observed array lengths must both be at least 2.
759 * </li></ul>
760 *
761 * @param sample1 array of sample data values
762 * @param sample2 array of sample data values
763 * @return p-value for t-test
764 * @throws NullArgumentException if the arrays are <code>null</code>
765 * @throws MathIllegalArgumentException if the length of the arrays is < 2
766 * @throws MathIllegalStateException if an error occurs computing the p-value
767 */
768 public static double tTest(final double[] sample1, final double[] sample2)
769 throws MathIllegalArgumentException, NullArgumentException,
770 MathIllegalStateException {
771 return T_TEST.tTest(sample1, sample2);
772 }
773
774 /**
775 * Performs a
776 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
777 * two-sided t-test</a> evaluating the null hypothesis that
778 * <code>sampleStats1</code> and <code>sampleStats2</code> describe
779 * datasets drawn from populations with the same mean, with significance
780 * level <code>alpha</code>. This test does not assume that the
781 * subpopulation variances are equal. To perform the test under the equal
782 * variances assumption, use
783 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
784 * <p>
785 * Returns <code>true</code> iff the null hypothesis that the means are
786 * equal can be rejected with confidence <code>1 - alpha</code>. To
787 * perform a 1-sided test, use <code>alpha * 2</code></p>
788 * <p>
789 * See {@link #t(double[], double[])} for the formula used to compute the
790 * t-statistic. Degrees of freedom are approximated using the
791 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
792 * Welch-Satterthwaite approximation.</a></p>
793 * <p>
794 * <strong>Examples:</strong></p><ol>
795 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
796 * the 95%, use
797 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
798 * </li>
799 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
800 * at the 99% level, first verify that the measured mean of
801 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
802 * and then use
803 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
804 * </li></ol>
805 * <p>
806 * <strong>Usage Note:</strong><br>
807 * The validity of the test depends on the assumptions of the parametric
808 * t-test procedure, as discussed
809 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
810 * here</a></p>
811 * <p>
812 * <strong>Preconditions</strong>:</p>
813 * <ul>
814 * <li>The datasets described by the two Univariates must each contain
815 * at least 2 observations.
816 * </li>
817 * <li> <code> 0 < alpha < 0.5 </code>
818 * </li></ul>
819 *
820 * @param sampleStats1 StatisticalSummary describing sample data values
821 * @param sampleStats2 StatisticalSummary describing sample data values
822 * @param alpha significance level of the test
823 * @return true if the null hypothesis can be rejected with
824 * confidence 1 - alpha
825 * @throws NullArgumentException if the sample statistics are <code>null</code>
826 * @throws MathIllegalArgumentException if the number of samples is < 2
827 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
828 * @throws MathIllegalStateException if an error occurs computing the p-value
829 */
830 public static boolean tTest(final StatisticalSummary sampleStats1,
831 final StatisticalSummary sampleStats2,
832 final double alpha)
833 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
834 return T_TEST.tTest(sampleStats1, sampleStats2, alpha);
835 }
836
837 /**
838 * Returns the <i>observed significance level</i>, or
839 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
840 * comparing the means of the datasets described by two StatisticalSummary
841 * instances.
842 * <p>
843 * The number returned is the smallest significance level
844 * at which one can reject the null hypothesis that the two means are
845 * equal in favor of the two-sided alternative that they are different.
846 * For a one-sided test, divide the returned value by 2.</p>
847 * <p>
848 * The test does not assume that the underlying population variances are
849 * equal and it uses approximated degrees of freedom computed from the
850 * sample data to compute the p-value. To perform the test assuming
851 * equal variances, use
852 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
853 * <p>
854 * <strong>Usage Note:</strong><br>
855 * The validity of the p-value depends on the assumptions of the parametric
856 * t-test procedure, as discussed
857 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
858 * here</a></p>
859 * <p>
860 * <strong>Preconditions</strong>:</p>
861 * <ul>
862 * <li>The datasets described by the two Univariates must each contain
863 * at least 2 observations.
864 * </li></ul>
865 *
866 * @param sampleStats1 StatisticalSummary describing data from the first sample
867 * @param sampleStats2 StatisticalSummary describing data from the second sample
868 * @return p-value for t-test
869 * @throws NullArgumentException if the sample statistics are <code>null</code>
870 * @throws MathIllegalArgumentException if the number of samples is < 2
871 * @throws MathIllegalStateException if an error occurs computing the p-value
872 */
873 public static double tTest(final StatisticalSummary sampleStats1,
874 final StatisticalSummary sampleStats2)
875 throws MathIllegalArgumentException, NullArgumentException,
876 MathIllegalStateException {
877 return T_TEST.tTest(sampleStats1, sampleStats2);
878 }
879
880 /**
881 * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
882 * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
883 * frequency counts.
884 * <p>
885 * This statistic can be used to perform a Chi-Square test evaluating the null
886 * hypothesis that the observed counts follow the expected distribution.
887 * <p>
888 * <strong>Preconditions</strong>:
889 * <ul>
890 * <li>Expected counts must all be positive.</li>
891 * <li>Observed counts must all be ≥ 0.</li>
892 * <li>The observed and expected arrays must have the same length and
893 * their common length must be at least 2.</li>
894 * </ul>
895 * <p>
896 * If any of the preconditions are not met, an
897 * <code>IllegalArgumentException</code> is thrown.
898 * <p>
899 * <strong>Note: </strong>This implementation rescales the
900 * <code>expected</code> array if necessary to ensure that the sum of the
901 * expected and observed counts are equal.
902 *
903 * @param observed array of observed frequency counts
904 * @param expected array of expected frequency counts
905 * @return chiSquare test statistic
906 * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
907 * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
908 * not strictly positive
909 * @throws MathIllegalArgumentException if the arrays length is less than 2
910 */
911 public static double chiSquare(final double[] expected, final long[] observed)
912 throws MathIllegalArgumentException {
913 return CHI_SQUARE_TEST.chiSquare(expected, observed);
914 }
915
916 /**
917 * Computes the Chi-Square statistic associated with a
918 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
919 * chi-square test of independence</a> based on the input <code>counts</code>
920 * array, viewed as a two-way table.
921 * <p>
922 * The rows of the 2-way table are
923 * <code>count[0], ... , count[count.length - 1] </code>
924 * <p>
925 * <strong>Preconditions</strong>:
926 * <ul>
927 * <li>All counts must be ≥ 0.</li>
928 * <li>The count array must be rectangular (i.e. all count[i] subarrays
929 * must have the same length).</li>
930 * <li>The 2-way table represented by <code>counts</code> must have at
931 * least 2 columns and at least 2 rows.</li>
932 * </ul>
933 * <p>
934 * If any of the preconditions are not met, an
935 * <code>IllegalArgumentException</code> is thrown.
936 *
937 * @param counts array representation of 2-way table
938 * @return chiSquare test statistic
939 * @throws NullArgumentException if the array is null
940 * @throws MathIllegalArgumentException if the array is not rectangular
941 * @throws MathIllegalArgumentException if {@code counts} has negative entries
942 */
943 public static double chiSquare(final long[][] counts)
944 throws MathIllegalArgumentException, NullArgumentException {
945 return CHI_SQUARE_TEST.chiSquare(counts);
946 }
947
948 /**
949 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
950 * Chi-square goodness of fit test</a> evaluating the null hypothesis that the
951 * observed counts conform to the frequency distribution described by the expected
952 * counts, with significance level <code>alpha</code>. Returns true iff the null
953 * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
954 * <p>
955 * <strong>Example:</strong><br>
956 * To test the hypothesis that <code>observed</code> follows
957 * <code>expected</code> at the 99% level, use
958 * <code>chiSquareTest(expected, observed, 0.01)</code>
959 * <p>
960 * <strong>Preconditions</strong>:
961 * <ul>
962 * <li>Expected counts must all be positive.</li>
963 * <li>Observed counts must all be ≥ 0.</li>
964 * <li>The observed and expected arrays must have the same length and
965 * their common length must be at least 2.</li>
966 * <li><code> 0 < alpha < 0.5</code></li>
967 * </ul>
968 * <p>
969 * If any of the preconditions are not met, an
970 * <code>IllegalArgumentException</code> is thrown.
971 * <p>
972 * <strong>Note: </strong>This implementation rescales the
973 * <code>expected</code> array if necessary to ensure that the sum of the
974 * expected and observed counts are equal.
975 *
976 * @param observed array of observed frequency counts
977 * @param expected array of expected frequency counts
978 * @param alpha significance level of the test
979 * @return true iff null hypothesis can be rejected with confidence
980 * 1 - alpha
981 * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
982 * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
983 * not strictly positive
984 * @throws MathIllegalArgumentException if the arrays length is less than 2
985 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
986 * @throws MathIllegalStateException if an error occurs computing the p-value
987 */
988 public static boolean chiSquareTest(final double[] expected, final long[] observed,
989 final double alpha)
990 throws MathIllegalArgumentException, MathIllegalStateException {
991 return CHI_SQUARE_TEST.chiSquareTest(expected, observed, alpha);
992 }
993
994 /**
995 * Returns the <i>observed significance level</i>, or <a href=
996 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
997 * p-value</a>, associated with a
998 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
999 * Chi-square goodness of fit test</a> comparing the <code>observed</code>
1000 * frequency counts to those in the <code>expected</code> array.
1001 * <p>
1002 * The number returned is the smallest significance level at which one can reject
1003 * the null hypothesis that the observed counts conform to the frequency distribution
1004 * described by the expected counts.
1005 * <p>
1006 * <strong>Preconditions</strong>:
1007 * <ul>
1008 * <li>Expected counts must all be positive.</li>
1009 * <li>Observed counts must all be ≥ 0.</li>
1010 * <li>The observed and expected arrays must have the same length and
1011 * their common length must be at least 2.</li>
1012 * </ul>
1013 * <p>
1014 * If any of the preconditions are not met, an
1015 * <code>IllegalArgumentException</code> is thrown.
1016 * <p>
1017 * <strong>Note: </strong>This implementation rescales the
1018 * <code>expected</code> array if necessary to ensure that the sum of the
1019 * expected and observed counts are equal.
1020 *
1021 * @param observed array of observed frequency counts
1022 * @param expected array of expected frequency counts
1023 * @return p-value
1024 * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
1025 * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
1026 * not strictly positive
1027 * @throws MathIllegalArgumentException if the arrays length is less than 2
1028 * @throws MathIllegalStateException if an error occurs computing the p-value
1029 */
1030 public static double chiSquareTest(final double[] expected, final long[] observed)
1031 throws MathIllegalArgumentException, MathIllegalStateException {
1032 return CHI_SQUARE_TEST.chiSquareTest(expected, observed);
1033 }
1034
1035 /**
1036 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
1037 * chi-square test of independence</a> evaluating the null hypothesis that the
1038 * classifications represented by the counts in the columns of the input 2-way table
1039 * are independent of the rows, with significance level <code>alpha</code>.
1040 * Returns true iff the null hypothesis can be rejected with 100 * (1 - alpha) percent
1041 * confidence.
1042 * <p>
1043 * The rows of the 2-way table are
1044 * <code>count[0], ... , count[count.length - 1] </code>
1045 * <p>
1046 * <strong>Example:</strong><br>
1047 * To test the null hypothesis that the counts in
1048 * <code>count[0], ... , count[count.length - 1] </code>
1049 * all correspond to the same underlying probability distribution at the 99% level,
1050 * use <code>chiSquareTest(counts, 0.01)</code>.
1051 * <p>
1052 * <strong>Preconditions</strong>:
1053 * <ul>
1054 * <li>All counts must be ≥ 0.</li>
1055 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the
1056 * same length).</li>
1057 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
1058 * at least 2 rows.</li>
1059 * </ul>
1060 * <p>
1061 * If any of the preconditions are not met, an
1062 * <code>IllegalArgumentException</code> is thrown.
1063 *
1064 * @param counts array representation of 2-way table
1065 * @param alpha significance level of the test
1066 * @return true iff null hypothesis can be rejected with confidence
1067 * 1 - alpha
1068 * @throws NullArgumentException if the array is null
1069 * @throws MathIllegalArgumentException if the array is not rectangular
1070 * @throws MathIllegalArgumentException if {@code counts} has any negative entries
1071 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
1072 * @throws MathIllegalStateException if an error occurs computing the p-value
1073 */
1074 public static boolean chiSquareTest(final long[][] counts, final double alpha)
1075 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
1076 return CHI_SQUARE_TEST.chiSquareTest(counts, alpha);
1077 }
1078
1079 /**
1080 * Returns the <i>observed significance level</i>, or <a href=
1081 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
1082 * p-value</a>, associated with a
1083 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
1084 * chi-square test of independence</a> based on the input <code>counts</code>
1085 * array, viewed as a two-way table.
1086 * <p>
1087 * The rows of the 2-way table are
1088 * <code>count[0], ... , count[count.length - 1] </code>
1089 * <p>
1090 * <strong>Preconditions</strong>:
1091 * <ul>
1092 * <li>All counts must be ≥ 0.</li>
1093 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have
1094 * the same length).</li>
1095 * <li>The 2-way table represented by <code>counts</code> must have at least 2
1096 * columns and at least 2 rows.</li>
1097 * </ul>
1098 * <p>
1099 * If any of the preconditions are not met, an
1100 * <code>IllegalArgumentException</code> is thrown.
1101 *
1102 * @param counts array representation of 2-way table
1103 * @return p-value
1104 * @throws NullArgumentException if the array is null
1105 * @throws MathIllegalArgumentException if the array is not rectangular
1106 * @throws MathIllegalArgumentException if {@code counts} has negative entries
1107 * @throws MathIllegalStateException if an error occurs computing the p-value
1108 */
1109 public static double chiSquareTest(final long[][] counts)
1110 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
1111 return CHI_SQUARE_TEST.chiSquareTest(counts);
1112 }
1113
1114 /**
1115 * Computes a
1116 * <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
1117 * Chi-Square two sample test statistic</a> comparing bin frequency counts
1118 * in <code>observed1</code> and <code>observed2</code>.
1119 * <p>
1120 * The sums of frequency counts in the two samples are not required to be the
1121 * same. The formula used to compute the test statistic is
1122 * </p>
1123 * <code>
1124 * ∑[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
1125 * </code>
1126 * <p>
1127 * where
1128 * </p>
1129 * <code>K = √[∑(observed2 / ∑(observed1)]</code>
1130 * <p>
1131 * This statistic can be used to perform a Chi-Square test evaluating the
1132 * null hypothesis that both observed counts follow the same distribution.
1133 * </p>
1134 * <p><strong>Preconditions</strong>:</p>
1135 * <ul>
1136 * <li>Observed counts must be non-negative.</li>
1137 * <li>Observed counts for a specific bin must not both be zero.</li>
1138 * <li>Observed counts for a specific sample must not all be 0.</li>
1139 * <li>The arrays <code>observed1</code> and <code>observed2</code> must have
1140 * the same length and their common length must be at least 2.</li>
1141 * </ul>
1142 * <p>
1143 * If any of the preconditions are not met, an
1144 * <code>IllegalArgumentException</code> is thrown.
1145 * </p>
1146 *
1147 * @param observed1 array of observed frequency counts of the first data set
1148 * @param observed2 array of observed frequency counts of the second data set
1149 * @return chiSquare test statistic
1150 * @throws MathIllegalArgumentException the the length of the arrays does not match
1151 * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
1152 * <code>observed2</code> are negative
1153 * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
1154 * <code>observed2</code> are zero, or if the count at some index is zero
1155 * for both arrays
1156 */
1157 public static double chiSquareDataSetsComparison(final long[] observed1,
1158 final long[] observed2)
1159 throws MathIllegalArgumentException {
1160 return CHI_SQUARE_TEST.chiSquareDataSetsComparison(observed1, observed2);
1161 }
1162
1163 /**
1164 * Returns the <i>observed significance level</i>, or <a href=
1165 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
1166 * p-value</a>, associated with a Chi-Square two sample test comparing
1167 * bin frequency counts in <code>observed1</code> and
1168 * <code>observed2</code>.
1169 * <p>
1170 * The number returned is the smallest significance level at which one
1171 * can reject the null hypothesis that the observed counts conform to the
1172 * same distribution.
1173 * <p>
1174 * See {@link #chiSquareDataSetsComparison(long[], long[])} for details
1175 * on the formula used to compute the test statistic. The degrees of
1176 * of freedom used to perform the test is one less than the common length
1177 * of the input observed count arrays.
1178 * <p>
1179 * <strong>Preconditions</strong>:
1180 * <ul>
1181 * <li>Observed counts must be non-negative.</li>
1182 * <li>Observed counts for a specific bin must not both be zero.</li>
1183 * <li>Observed counts for a specific sample must not all be 0.</li>
1184 * <li>The arrays <code>observed1</code> and <code>observed2</code> must
1185 * have the same length and their common length must be at least 2.</li>
1186 * </ul>
1187 * <p>
1188 * If any of the preconditions are not met, an
1189 * <code>IllegalArgumentException</code> is thrown.
1190 *
1191 * @param observed1 array of observed frequency counts of the first data set
1192 * @param observed2 array of observed frequency counts of the second data set
1193 * @return p-value
1194 * @throws MathIllegalArgumentException the the length of the arrays does not match
1195 * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
1196 * <code>observed2</code> are negative
1197 * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
1198 * <code>observed2</code> are zero, or if the count at the same index is zero
1199 * for both arrays
1200 * @throws MathIllegalStateException if an error occurs computing the p-value
1201 */
1202 public static double chiSquareTestDataSetsComparison(final long[] observed1,
1203 final long[] observed2)
1204 throws MathIllegalArgumentException,
1205 MathIllegalStateException {
1206 return CHI_SQUARE_TEST.chiSquareTestDataSetsComparison(observed1, observed2);
1207 }
1208
1209 /**
1210 * Performs a Chi-Square two sample test comparing two binned data
1211 * sets. The test evaluates the null hypothesis that the two lists of
1212 * observed counts conform to the same frequency distribution, with
1213 * significance level <code>alpha</code>. Returns true iff the null
1214 * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
1215 * <p>
1216 * See {@link #chiSquareDataSetsComparison(long[], long[])} for
1217 * details on the formula used to compute the Chisquare statistic used
1218 * in the test. The degrees of of freedom used to perform the test is
1219 * one less than the common length of the input observed count arrays.
1220 * <p>
1221 * <strong>Preconditions</strong>:
1222 * <ul>
1223 * <li>Observed counts must be non-negative.</li>
1224 * <li>Observed counts for a specific bin must not both be zero.</li>
1225 * <li>Observed counts for a specific sample must not all be 0.</li>
1226 * <li>The arrays <code>observed1</code> and <code>observed2</code> must
1227 * have the same length and their common length must be at least 2.</li>
1228 * <li><code> 0 < alpha < 0.5</code></li>
1229 * </ul>
1230 * <p>
1231 * If any of the preconditions are not met, an
1232 * <code>IllegalArgumentException</code> is thrown.
1233 *
1234 * @param observed1 array of observed frequency counts of the first data set
1235 * @param observed2 array of observed frequency counts of the second data set
1236 * @param alpha significance level of the test
1237 * @return true iff null hypothesis can be rejected with confidence
1238 * 1 - alpha
1239 * @throws MathIllegalArgumentException the the length of the arrays does not match
1240 * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
1241 * <code>observed2</code> are negative
1242 * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
1243 * <code>observed2</code> are zero, or if the count at the same index is zero
1244 * for both arrays
1245 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
1246 * @throws MathIllegalStateException if an error occurs performing the test
1247 */
1248 public static boolean chiSquareTestDataSetsComparison(final long[] observed1,
1249 final long[] observed2,
1250 final double alpha)
1251 throws MathIllegalArgumentException, MathIllegalStateException {
1252 return CHI_SQUARE_TEST.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
1253 }
1254
1255 /**
1256 * Computes the ANOVA F-value for a collection of <code>double[]</code>
1257 * arrays.
1258 *
1259 * <p><strong>Preconditions</strong>:</p>
1260 * <ul>
1261 * <li>The categoryData <code>Collection</code> must contain
1262 * <code>double[]</code> arrays.</li>
1263 * <li> There must be at least two <code>double[]</code> arrays in the
1264 * <code>categoryData</code> collection and each of these arrays must
1265 * contain at least two values.</li></ul>
1266 * <p>
1267 * This implementation computes the F statistic using the definitional
1268 * formula</p>
1269 * <pre>
1270 * F = msbg/mswg</pre>
1271 * <p>where</p>
1272 * <pre>
1273 * msbg = between group mean square
1274 * mswg = within group mean square</pre>
1275 * <p>
1276 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
1277 * here</a></p>
1278 *
1279 * @param categoryData <code>Collection</code> of <code>double[]</code>
1280 * arrays each containing data for one category
1281 * @return Fvalue
1282 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
1283 * @throws MathIllegalArgumentException if the length of the <code>categoryData</code>
1284 * array is less than 2 or a contained <code>double[]</code> array does not have
1285 * at least two values
1286 */
1287 public static double oneWayAnovaFValue(final Collection<double[]> categoryData)
1288 throws MathIllegalArgumentException, NullArgumentException {
1289 return ONE_WAY_ANANOVA.anovaFValue(categoryData);
1290 }
1291
1292 /**
1293 * Computes the ANOVA P-value for a collection of <code>double[]</code>
1294 * arrays.
1295 *
1296 * <p><strong>Preconditions</strong>:</p>
1297 * <ul>
1298 * <li>The categoryData <code>Collection</code> must contain
1299 * <code>double[]</code> arrays.</li>
1300 * <li> There must be at least two <code>double[]</code> arrays in the
1301 * <code>categoryData</code> collection and each of these arrays must
1302 * contain at least two values.</li></ul>
1303 * <p>
1304 * This implementation uses the
1305 * {@link org.hipparchus.distribution.continuous.FDistribution
1306 * Hipparchus F Distribution implementation} to estimate the exact
1307 * p-value, using the formula</p>
1308 * <pre>
1309 * p = 1 - cumulativeProbability(F)</pre>
1310 * <p>
1311 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
1312 * is the Hipparchus implementation of the F distribution.</p>
1313 *
1314 * @param categoryData <code>Collection</code> of <code>double[]</code>
1315 * arrays each containing data for one category
1316 * @return Pvalue
1317 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
1318 * @throws MathIllegalArgumentException if the length of the <code>categoryData</code>
1319 * array is less than 2 or a contained <code>double[]</code> array does not have
1320 * at least two values
1321 * @throws MathIllegalStateException if the p-value can not be computed due to a convergence error
1322 * @throws MathIllegalStateException if the maximum number of iterations is exceeded
1323 */
1324 public static double oneWayAnovaPValue(final Collection<double[]> categoryData)
1325 throws MathIllegalArgumentException, NullArgumentException,
1326 MathIllegalStateException {
1327 return ONE_WAY_ANANOVA.anovaPValue(categoryData);
1328 }
1329
1330 /**
1331 * Performs an ANOVA test, evaluating the null hypothesis that there
1332 * is no difference among the means of the data categories.
1333 *
1334 * <p><strong>Preconditions</strong>:</p>
1335 * <ul>
1336 * <li>The categoryData <code>Collection</code> must contain
1337 * <code>double[]</code> arrays.</li>
1338 * <li> There must be at least two <code>double[]</code> arrays in the
1339 * <code>categoryData</code> collection and each of these arrays must
1340 * contain at least two values.</li>
1341 * <li>alpha must be strictly greater than 0 and less than or equal to 0.5.
1342 * </li></ul>
1343 * <p>
1344 * This implementation uses the
1345 * {@link org.hipparchus.distribution.continuous.FDistribution
1346 * Hipparchus F Distribution implementation} to estimate the exact
1347 * p-value, using the formula</p><pre>
1348 * p = 1 - cumulativeProbability(F)</pre>
1349 * <p>where <code>F</code> is the F value and <code>cumulativeProbability</code>
1350 * is the Hipparchus implementation of the F distribution.</p>
1351 * <p>True is returned iff the estimated p-value is less than alpha.</p>
1352 *
1353 * @param categoryData <code>Collection</code> of <code>double[]</code>
1354 * arrays each containing data for one category
1355 * @param alpha significance level of the test
1356 * @return true if the null hypothesis can be rejected with
1357 * confidence 1 - alpha
1358 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
1359 * @throws MathIllegalArgumentException if the length of the <code>categoryData</code>
1360 * array is less than 2 or a contained <code>double[]</code> array does not have
1361 * at least two values
1362 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
1363 * @throws MathIllegalStateException if the p-value can not be computed due to a convergence error
1364 * @throws MathIllegalStateException if the maximum number of iterations is exceeded
1365 */
1366 public static boolean oneWayAnovaTest(final Collection<double[]> categoryData,
1367 final double alpha)
1368 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
1369 return ONE_WAY_ANANOVA.anovaTest(categoryData, alpha);
1370 }
1371
1372 /**
1373 * Computes the <a href="http://en.wikipedia.org/wiki/G-test">G statistic
1374 * for Goodness of Fit</a> comparing {@code observed} and {@code expected}
1375 * frequency counts.
1376 * <p>
1377 * This statistic can be used to perform a G test (Log-Likelihood Ratio
1378 * Test) evaluating the null hypothesis that the observed counts follow the
1379 * expected distribution.
1380 * <p>
1381 * <strong>Preconditions</strong>:
1382 * <ul>
1383 * <li>Expected counts must all be positive.</li>
1384 * <li>Observed counts must all be ≥ 0.</li>
1385 * <li>The observed and expected arrays must have the same length and their
1386 * common length must be at least 2. </li>
1387 * </ul>
1388 * <p>
1389 * If any of the preconditions are not met, a
1390 * {@code MathIllegalArgumentException} is thrown.
1391 * <p>
1392 * <strong>Note:</strong>This implementation rescales the
1393 * {@code expected} array if necessary to ensure that the sum of the
1394 * expected and observed counts are equal.
1395 *
1396 * @param observed array of observed frequency counts
1397 * @param expected array of expected frequency counts
1398 * @return G-Test statistic
1399 * @throws MathIllegalArgumentException if {@code observed} has negative entries
1400 * @throws MathIllegalArgumentException if {@code expected} has entries that
1401 * are not strictly positive
1402 * @throws MathIllegalArgumentException if the array lengths do not match or
1403 * are less than 2.
1404 */
1405 public static double g(final double[] expected, final long[] observed)
1406 throws MathIllegalArgumentException {
1407 return G_TEST.g(expected, observed);
1408 }
1409
1410 /**
1411 * Returns the <i>observed significance level</i>, or <a href=
1412 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> p-value</a>,
1413 * associated with a G-Test for goodness of fit comparing the
1414 * {@code observed} frequency counts to those in the {@code expected} array.
1415 *
1416 * <p>The number returned is the smallest significance level at which one
1417 * can reject the null hypothesis that the observed counts conform to the
1418 * frequency distribution described by the expected counts.</p>
1419 *
1420 * <p>The probability returned is the tail probability beyond
1421 * {@link #g(double[], long[]) g(expected, observed)}
1422 * in the ChiSquare distribution with degrees of freedom one less than the
1423 * common length of {@code expected} and {@code observed}.</p>
1424 *
1425 * <p> <strong>Preconditions</strong>:</p>
1426 * <ul>
1427 * <li>Expected counts must all be positive. </li>
1428 * <li>Observed counts must all be ≥ 0. </li>
1429 * <li>The observed and expected arrays must have the
1430 * same length and their common length must be at least 2.</li>
1431 * </ul>
1432 *
1433 * <p>If any of the preconditions are not met, a
1434 * {@code MathIllegalArgumentException} is thrown.</p>
1435 *
1436 * <p><strong>Note:</strong>This implementation rescales the
1437 * {@code expected} array if necessary to ensure that the sum of the
1438 * expected and observed counts are equal.</p>
1439 *
1440 * @param observed array of observed frequency counts
1441 * @param expected array of expected frequency counts
1442 * @return p-value
1443 * @throws MathIllegalArgumentException if {@code observed} has negative entries
1444 * @throws MathIllegalArgumentException if {@code expected} has entries that
1445 * are not strictly positive
1446 * @throws MathIllegalArgumentException if the array lengths do not match or
1447 * are less than 2.
1448 * @throws MathIllegalStateException if an error occurs computing the
1449 * p-value.
1450 */
1451 public static double gTest(final double[] expected, final long[] observed)
1452 throws MathIllegalArgumentException, MathIllegalStateException {
1453 return G_TEST.gTest(expected, observed);
1454 }
1455
1456 /**
1457 * Returns the intrinsic (Hardy-Weinberg proportions) p-Value, as described
1458 * in p64-69 of McDonald, J.H. 2009. Handbook of Biological Statistics
1459 * (2nd ed.). Sparky House Publishing, Baltimore, Maryland.
1460 *
1461 * <p> The probability returned is the tail probability beyond
1462 * {@link #g(double[], long[]) g(expected, observed)}
1463 * in the ChiSquare distribution with degrees of freedom two less than the
1464 * common length of {@code expected} and {@code observed}.</p>
1465 *
1466 * @param observed array of observed frequency counts
1467 * @param expected array of expected frequency counts
1468 * @return p-value
1469 * @throws MathIllegalArgumentException if {@code observed} has negative entries
1470 * @throws MathIllegalArgumentException {@code expected} has entries that are
1471 * not strictly positive
1472 * @throws MathIllegalArgumentException if the array lengths do not match or
1473 * are less than 2.
1474 * @throws MathIllegalStateException if an error occurs computing the
1475 * p-value.
1476 */
1477 public static double gTestIntrinsic(final double[] expected, final long[] observed)
1478 throws MathIllegalArgumentException, MathIllegalStateException {
1479 return G_TEST.gTestIntrinsic(expected, observed);
1480 }
1481
1482 /**
1483 * Performs a G-Test (Log-Likelihood Ratio Test) for goodness of fit
1484 * evaluating the null hypothesis that the observed counts conform to the
1485 * frequency distribution described by the expected counts, with
1486 * significance level {@code alpha}. Returns true iff the null
1487 * hypothesis can be rejected with {@code 100 * (1 - alpha)} percent confidence.
1488 *
1489 * <p><strong>Example:</strong><br> To test the hypothesis that
1490 * {@code observed} follows {@code expected} at the 99% level,
1491 * use </p><p>
1492 * {@code gTest(expected, observed, 0.01)}</p>
1493 *
1494 * <p>Returns true iff {@link #gTest(double[], long[])
1495 * gTestGoodnessOfFitPValue(expected, observed)} > alpha</p>
1496 *
1497 * <p><strong>Preconditions</strong>:</p>
1498 * <ul>
1499 * <li>Expected counts must all be positive. </li>
1500 * <li>Observed counts must all be ≥ 0. </li>
1501 * <li>The observed and expected arrays must have the same length and their
1502 * common length must be at least 2.
1503 * <li> {@code 0 < alpha < 0.5} </li></ul>
1504 *
1505 * <p>If any of the preconditions are not met, a
1506 * {@code MathIllegalArgumentException} is thrown.</p>
1507 *
1508 * <p><strong>Note:</strong>This implementation rescales the
1509 * {@code expected} array if necessary to ensure that the sum of the
1510 * expected and observed counts are equal.</p>
1511 *
1512 * @param observed array of observed frequency counts
1513 * @param expected array of expected frequency counts
1514 * @param alpha significance level of the test
1515 * @return true iff null hypothesis can be rejected with confidence 1 -
1516 * alpha
1517 * @throws MathIllegalArgumentException if {@code observed} has negative entries
1518 * @throws MathIllegalArgumentException if {@code expected} has entries that
1519 * are not strictly positive
1520 * @throws MathIllegalArgumentException if the array lengths do not match or
1521 * are less than 2.
1522 * @throws MathIllegalStateException if an error occurs computing the
1523 * p-value.
1524 * @throws MathIllegalArgumentException if alpha is not strictly greater than zero
1525 * and less than or equal to 0.5
1526 */
1527 public static boolean gTest(final double[] expected, final long[] observed,
1528 final double alpha)
1529 throws MathIllegalArgumentException, MathIllegalStateException {
1530 return G_TEST.gTest(expected, observed, alpha);
1531 }
1532
1533 /**
1534 * <p>Computes a G (Log-Likelihood Ratio) two sample test statistic for
1535 * independence comparing frequency counts in
1536 * {@code observed1} and {@code observed2}. The sums of frequency
1537 * counts in the two samples are not required to be the same. The formula
1538 * used to compute the test statistic is </p>
1539 *
1540 * <p>{@code 2 * totalSum * [H(rowSums) + H(colSums) - H(k)]}</p>
1541 *
1542 * <p> where {@code H} is the
1543 * <a href="http://en.wikipedia.org/wiki/Entropy_%28information_theory%29">
1544 * Shannon Entropy</a> of the random variable formed by viewing the elements
1545 * of the argument array as incidence counts; <br>
1546 * {@code k} is a matrix with rows {@code [observed1, observed2]}; <br>
1547 * {@code rowSums, colSums} are the row/col sums of {@code k}; <br>
1548 * and {@code totalSum} is the overall sum of all entries in {@code k}.</p>
1549 *
1550 * <p>This statistic can be used to perform a G test evaluating the null
1551 * hypothesis that both observed counts are independent </p>
1552 *
1553 * <p> <strong>Preconditions</strong>:</p>
1554 * <ul>
1555 * <li>Observed counts must be non-negative. </li>
1556 * <li>Observed counts for a specific bin must not both be zero. </li>
1557 * <li>Observed counts for a specific sample must not all be 0. </li>
1558 * <li>The arrays {@code observed1} and {@code observed2} must have
1559 * the same length and their common length must be at least 2. </li></ul>
1560 *
1561 * <p>If any of the preconditions are not met, a
1562 * {@code MathIllegalArgumentException} is thrown.</p>
1563 *
1564 * @param observed1 array of observed frequency counts of the first data set
1565 * @param observed2 array of observed frequency counts of the second data
1566 * set
1567 * @return G-Test statistic
1568 * @throws MathIllegalArgumentException the the lengths of the arrays do not
1569 * match or their common length is less than 2
1570 * @throws MathIllegalArgumentException if any entry in {@code observed1} or
1571 * {@code observed2} is negative
1572 * @throws MathIllegalArgumentException if either all counts of
1573 * {@code observed1} or {@code observed2} are zero, or if the count
1574 * at the same index is zero for both arrays.
1575 */
1576 public static double gDataSetsComparison(final long[] observed1,
1577 final long[] observed2)
1578 throws MathIllegalArgumentException {
1579 return G_TEST.gDataSetsComparison(observed1, observed2);
1580 }
1581
1582 /**
1583 * Calculates the root log-likelihood ratio for 2 state Datasets. See
1584 * {@link #gDataSetsComparison(long[], long[] )}.
1585 *
1586 * <p>Given two events A and B, let k11 be the number of times both events
1587 * occur, k12 the incidence of B without A, k21 the count of A without B,
1588 * and k22 the number of times neither A nor B occurs. What is returned
1589 * by this method is </p>
1590 *
1591 * <p>{@code (sgn) sqrt(gValueDataSetsComparison({k11, k12}, {k21, k22})}</p>
1592 *
1593 * <p>where {@code sgn} is -1 if {@code k11 / (k11 + k12) < k21 / (k21 + k22))};<br>
1594 * 1 otherwise.</p>
1595 *
1596 * <p>Signed root LLR has two advantages over the basic LLR: a) it is positive
1597 * where k11 is bigger than expected, negative where it is lower b) if there is
1598 * no difference it is asymptotically normally distributed. This allows one
1599 * to talk about "number of standard deviations" which is a more common frame
1600 * of reference than the chi^2 distribution.</p>
1601 *
1602 * @param k11 number of times the two events occurred together (AB)
1603 * @param k12 number of times the second event occurred WITHOUT the
1604 * first event (notA,B)
1605 * @param k21 number of times the first event occurred WITHOUT the
1606 * second event (A, notB)
1607 * @param k22 number of times something else occurred (i.e. was neither
1608 * of these events (notA, notB)
1609 * @return root log-likelihood ratio
1610 *
1611 */
1612 public static double rootLogLikelihoodRatio(final long k11, final long k12, final long k21, final long k22)
1613 throws MathIllegalArgumentException {
1614 return G_TEST.rootLogLikelihoodRatio(k11, k12, k21, k22);
1615 }
1616
1617
1618 /**
1619 * <p>Returns the <i>observed significance level</i>, or <a href=
1620 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
1621 * p-value</a>, associated with a G-Value (Log-Likelihood Ratio) for two
1622 * sample test comparing bin frequency counts in {@code observed1} and
1623 * {@code observed2}.</p>
1624 *
1625 * <p>The number returned is the smallest significance level at which one
1626 * can reject the null hypothesis that the observed counts conform to the
1627 * same distribution. </p>
1628 *
1629 * <p>See {@link #gTest(double[], long[])} for details
1630 * on how the p-value is computed. The degrees of of freedom used to
1631 * perform the test is one less than the common length of the input observed
1632 * count arrays.</p>
1633 *
1634 * <p><strong>Preconditions</strong>:</p>
1635 * <ul> <li>Observed counts must be non-negative. </li>
1636 * <li>Observed counts for a specific bin must not both be zero. </li>
1637 * <li>Observed counts for a specific sample must not all be 0. </li>
1638 * <li>The arrays {@code observed1} and {@code observed2} must
1639 * have the same length and their common length must be at least 2. </li>
1640 * </ul>
1641 * <p> If any of the preconditions are not met, a
1642 * {@code MathIllegalArgumentException} is thrown.</p>
1643 *
1644 * @param observed1 array of observed frequency counts of the first data set
1645 * @param observed2 array of observed frequency counts of the second data
1646 * set
1647 * @return p-value
1648 * @throws MathIllegalArgumentException the the length of the arrays does not
1649 * match or their common length is less than 2
1650 * @throws MathIllegalArgumentException if any of the entries in {@code observed1} or
1651 * {@code observed2} are negative
1652 * @throws MathIllegalArgumentException if either all counts of {@code observed1} or
1653 * {@code observed2} are zero, or if the count at some index is
1654 * zero for both arrays
1655 * @throws MathIllegalStateException if an error occurs computing the
1656 * p-value.
1657 */
1658 public static double gTestDataSetsComparison(final long[] observed1,
1659 final long[] observed2)
1660 throws MathIllegalArgumentException,
1661 MathIllegalStateException {
1662 return G_TEST.gTestDataSetsComparison(observed1, observed2);
1663 }
1664
1665 /**
1666 * <p>Performs a G-Test (Log-Likelihood Ratio Test) comparing two binned
1667 * data sets. The test evaluates the null hypothesis that the two lists
1668 * of observed counts conform to the same frequency distribution, with
1669 * significance level {@code alpha}. Returns true iff the null
1670 * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
1671 * </p>
1672 * <p>See {@link #gDataSetsComparison(long[], long[])} for details
1673 * on the formula used to compute the G (LLR) statistic used in the test and
1674 * {@link #gTest(double[], long[])} for information on how
1675 * the observed significance level is computed. The degrees of of freedom used
1676 * to perform the test is one less than the common length of the input observed
1677 * count arrays. </p>
1678 *
1679 * <p><strong>Preconditions</strong>:</p>
1680 * <ul>
1681 * <li>Observed counts must be non-negative. </li>
1682 * <li>Observed counts for a specific bin must not both be zero. </li>
1683 * <li>Observed counts for a specific sample must not all be 0. </li>
1684 * <li>The arrays {@code observed1} and {@code observed2} must
1685 * have the same length and their common length must be at least 2. </li>
1686 * <li>{@code 0 < alpha < 0.5} </li></ul>
1687 *
1688 * <p>If any of the preconditions are not met, a
1689 * {@code MathIllegalArgumentException} is thrown.</p>
1690 *
1691 * @param observed1 array of observed frequency counts of the first data set
1692 * @param observed2 array of observed frequency counts of the second data
1693 * set
1694 * @param alpha significance level of the test
1695 * @return true iff null hypothesis can be rejected with confidence 1 -
1696 * alpha
1697 * @throws MathIllegalArgumentException the the length of the arrays does not
1698 * match
1699 * @throws MathIllegalArgumentException if any of the entries in {@code observed1} or
1700 * {@code observed2} are negative
1701 * @throws MathIllegalArgumentException if either all counts of {@code observed1} or
1702 * {@code observed2} are zero, or if the count at some index is
1703 * zero for both arrays
1704 * @throws MathIllegalArgumentException if {@code alpha} is not in the range
1705 * (0, 0.5]
1706 * @throws MathIllegalStateException if an error occurs performing the test
1707 */
1708 public static boolean gTestDataSetsComparison(final long[] observed1,
1709 final long[] observed2,
1710 final double alpha)
1711 throws MathIllegalArgumentException, MathIllegalStateException {
1712 return G_TEST.gTestDataSetsComparison(observed1, observed2, alpha);
1713 }
1714
1715 /**
1716 * Computes the one-sample Kolmogorov-Smirnov test statistic, \(D_n=\sup_x |F_n(x)-F(x)|\) where
1717 * \(F\) is the distribution (cdf) function associated with {@code distribution}, \(n\) is the
1718 * length of {@code data} and \(F_n\) is the empirical distribution that puts mass \(1/n\) at
1719 * each of the values in {@code data}.
1720 *
1721 * @param dist reference distribution
1722 * @param data sample being evaluated
1723 * @return Kolmogorov-Smirnov statistic \(D_n\)
1724 * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
1725 * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
1726 */
1727 public static double kolmogorovSmirnovStatistic(RealDistribution dist, double[] data)
1728 throws MathIllegalArgumentException, NullArgumentException {
1729 return KS_TEST.kolmogorovSmirnovStatistic(dist, data);
1730 }
1731
1732 /**
1733 * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a
1734 * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
1735 * evaluating the null hypothesis that {@code data} conforms to {@code distribution}.
1736 *
1737 * @param dist reference distribution
1738 * @param data sample being being evaluated
1739 * @return the p-value associated with the null hypothesis that {@code data} is a sample from
1740 * {@code distribution}
1741 * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
1742 * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
1743 */
1744 public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data)
1745 throws MathIllegalArgumentException, NullArgumentException {
1746 return KS_TEST.kolmogorovSmirnovTest(dist, data);
1747 }
1748
1749 /**
1750 * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a
1751 * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
1752 * evaluating the null hypothesis that {@code data} conforms to {@code distribution}. If
1753 * {@code exact} is true, the distribution used to compute the p-value is computed using
1754 * extended precision. See {@link KolmogorovSmirnovTest#cdfExact(double, int)}.
1755 *
1756 * @param dist reference distribution
1757 * @param data sample being being evaluated
1758 * @param strict whether or not to force exact computation of the p-value
1759 * @return the p-value associated with the null hypothesis that {@code data} is a sample from
1760 * {@code distribution}
1761 * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
1762 * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
1763 */
1764 public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data, boolean strict)
1765 throws MathIllegalArgumentException, NullArgumentException {
1766 return KS_TEST.kolmogorovSmirnovTest(dist, data, strict);
1767 }
1768
1769 /**
1770 * Performs a <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov
1771 * test</a> evaluating the null hypothesis that {@code data} conforms to {@code distribution}.
1772 *
1773 * @param dist reference distribution
1774 * @param data sample being being evaluated
1775 * @param alpha significance level of the test
1776 * @return true iff the null hypothesis that {@code data} is a sample from {@code distribution}
1777 * can be rejected with confidence 1 - {@code alpha}
1778 * @throws MathIllegalArgumentException if {@code data} does not have length at least 2
1779 * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null
1780 */
1781 public static boolean kolmogorovSmirnovTest(RealDistribution dist, double[] data, double alpha)
1782 throws MathIllegalArgumentException, NullArgumentException {
1783 return KS_TEST.kolmogorovSmirnovTest(dist, data, alpha);
1784 }
1785
1786 /**
1787 * Computes the two-sample Kolmogorov-Smirnov test statistic, \(D_{n,m}=\sup_x |F_n(x)-F_m(x)|\)
1788 * where \(n\) is the length of {@code x}, \(m\) is the length of {@code y}, \(F_n\) is the
1789 * empirical distribution that puts mass \(1/n\) at each of the values in {@code x} and \(F_m\)
1790 * is the empirical distribution of the {@code y} values.
1791 *
1792 * @param x first sample
1793 * @param y second sample
1794 * @return test statistic \(D_{n,m}\) used to evaluate the null hypothesis that {@code x} and
1795 * {@code y} represent samples from the same underlying distribution
1796 * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
1797 * least 2
1798 * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
1799 */
1800 public static double kolmogorovSmirnovStatistic(double[] x, double[] y)
1801 throws MathIllegalArgumentException, NullArgumentException {
1802 return KS_TEST.kolmogorovSmirnovStatistic(x, y);
1803 }
1804
1805 /**
1806 * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a two-sample <a
1807 * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
1808 * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
1809 * probability distribution. Assumes the strict form of the inequality used to compute the
1810 * p-value. See {@link KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[], boolean)}.
1811 *
1812 * @param x first sample dataset
1813 * @param y second sample dataset
1814 * @return p-value associated with the null hypothesis that {@code x} and {@code y} represent
1815 * samples from the same distribution
1816 * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
1817 * least 2
1818 * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
1819 */
1820 public static double kolmogorovSmirnovTest(double[] x, double[] y)
1821 throws MathIllegalArgumentException, NullArgumentException {
1822 return KS_TEST.kolmogorovSmirnovTest(x, y);
1823 }
1824
1825 /**
1826 * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a two-sample <a
1827 * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
1828 * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
1829 * probability distribution. Specifically, what is returned is an estimate of the probability
1830 * that the {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} associated with a randomly
1831 * selected partition of the combined sample into subsamples of sizes {@code x.length} and
1832 * {@code y.length} will strictly exceed (if {@code strict} is {@code true}) or be at least as
1833 * large as {@code strict = false}) as {@code kolmogorovSmirnovStatistic(x, y)}.
1834 * <ul>
1835 * <li>For small samples (where the product of the sample sizes is less than
1836 * {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT}), the exact p-value is computed using the method presented
1837 * in [4], implemented in {@link #exactP(double, int, int, boolean)}. </li>
1838 * <li>When the product of the sample sizes exceeds {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT}, the
1839 * asymptotic distribution of \(D_{n,m}\) is used. See {@link #approximateP(double, int, int)}
1840 * for details on the approximation.</li>
1841 * </ul><p>
1842 * If {@code x.length * y.length} < {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT} and the combined set of values in
1843 * {@code x} and {@code y} contains ties, random jitter is added to {@code x} and {@code y} to
1844 * break ties before computing \(D_{n,m}\) and the p-value. The jitter is uniformly distributed
1845 * on (-minDelta / 2, minDelta / 2) where minDelta is the smallest pairwise difference between
1846 * values in the combined sample.</p>
1847 * <p>
1848 * If ties are known to be present in the data, {@link KolmogorovSmirnovTest#bootstrap(double[], double[], int, boolean)}
1849 * may be used as an alternative method for estimating the p-value.</p>
1850 *
1851 * @param x first sample dataset
1852 * @param y second sample dataset
1853 * @param strict whether or not the probability to compute is expressed as a strict inequality
1854 * (ignored for large samples)
1855 * @return p-value associated with the null hypothesis that {@code x} and {@code y} represent
1856 * samples from the same distribution
1857 * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at
1858 * least 2
1859 * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null
1860 * @see KolmogorovSmirnovTest#bootstrap(double[], double[], int, boolean)
1861 */
1862 public static double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict)
1863 throws MathIllegalArgumentException, NullArgumentException {
1864 return KS_TEST.kolmogorovSmirnovTest(x, y, strict);
1865 }
1866
1867 /**
1868 * Computes \(P(D_{n,m} > d)\) if {@code strict} is {@code true}; otherwise \(P(D_{n,m} \ge
1869 * d)\), where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov statistic. See
1870 * {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\).
1871 * <p>
1872 * The returned probability is exact, implemented by unwinding the recursive function
1873 * definitions presented in [4] from the class javadoc.
1874 * </p>
1875 *
1876 * @param d D-statistic value
1877 * @param n first sample size
1878 * @param m second sample size
1879 * @param strict whether or not the probability to compute is expressed as a strict inequality
1880 * @return probability that a randomly selected m-n partition of m + n generates \(D_{n,m}\)
1881 * greater than (resp. greater than or equal to) {@code d}
1882 */
1883 public static double exactP(double d, int m, int n, boolean strict) {
1884 return KS_TEST.exactP(d, n, m, strict);
1885 }
1886
1887 /**
1888 * Uses the Kolmogorov-Smirnov distribution to approximate \(P(D_{n,m} > d)\) where \(D_{n,m}\)
1889 * is the 2-sample Kolmogorov-Smirnov statistic. See
1890 * {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\).
1891 * <p>
1892 * Specifically, what is returned is \(1 - k(d \sqrt{mn / (m + n)})\) where \(k(t) = 1 + 2
1893 * \sum_{i=1}^\infty (-1)^i e^{-2 i^2 t^2}\). See {@link KolmogorovSmirnovTest#ksSum(double, double, int)} for
1894 * details on how convergence of the sum is determined. This implementation passes {@code ksSum}
1895 * {@link KolmogorovSmirnovTest#KS_SUM_CAUCHY_CRITERION} as {@code tolerance} and
1896 * {@link KolmogorovSmirnovTest#MAXIMUM_PARTIAL_SUM_COUNT} as {@code maxIterations}.
1897 * </p>
1898 *
1899 * @param d D-statistic value
1900 * @param n first sample size
1901 * @param m second sample size
1902 * @return approximate probability that a randomly selected m-n partition of m + n generates
1903 * \(D_{n,m}\) greater than {@code d}
1904 */
1905 public static double approximateP(double d, int n, int m) {
1906 return KS_TEST.approximateP(d, n, m);
1907 }
1908
1909 }