1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.inference; 23 24 import java.util.Collection; 25 26 import org.hipparchus.distribution.RealDistribution; 27 import org.hipparchus.exception.MathIllegalArgumentException; 28 import org.hipparchus.exception.MathIllegalStateException; 29 import org.hipparchus.exception.NullArgumentException; 30 import org.hipparchus.stat.descriptive.StatisticalSummary; 31 32 /** 33 * A collection of static methods to create inference test instances or to 34 * perform inference tests. 35 */ 36 public class InferenceTestUtils { 37 38 /** Singleton TTest instance. */ 39 private static final TTest T_TEST = new TTest(); 40 41 /** Singleton ChiSquareTest instance. */ 42 private static final ChiSquareTest CHI_SQUARE_TEST = new ChiSquareTest(); 43 44 /** Singleton OneWayAnova instance. */ 45 private static final OneWayAnova ONE_WAY_ANANOVA = new OneWayAnova(); 46 47 /** Singleton G-Test instance. */ 48 private static final GTest G_TEST = new GTest(); 49 50 /** Singleton K-S test instance */ 51 private static final KolmogorovSmirnovTest KS_TEST = new KolmogorovSmirnovTest(); 52 53 /** 54 * Prevent instantiation. 55 */ 56 private InferenceTestUtils() { 57 super(); 58 } 59 60 /** 61 * Computes a 2-sample t statistic, under the hypothesis of equal 62 * subpopulation variances. To compute a t-statistic without the 63 * equal variances hypothesis, use {@link #t(double[], double[])}. 64 * <p> 65 * This statistic can be used to perform a (homoscedastic) two-sample 66 * t-test to compare sample means.</p> 67 * <p> 68 * The t-statistic is</p> 69 * <p> 70 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> 71 * </p><p> 72 * where <strong><code>n1</code></strong> is the size of first sample; 73 * <strong><code> n2</code></strong> is the size of second sample; 74 * <strong><code> m1</code></strong> is the mean of first sample; 75 * <strong><code> m2</code></strong> is the mean of second sample 76 * and <strong><code>var</code></strong> is the pooled variance estimate: 77 * </p><p> 78 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> 79 * </p><p> 80 * with <strong><code>var1</code></strong> the variance of the first sample and 81 * <strong><code>var2</code></strong> the variance of the second sample. 82 * </p><p> 83 * <strong>Preconditions</strong>:</p> 84 * <ul> 85 * <li>The observed array lengths must both be at least 2. 86 * </li></ul> 87 * 88 * @param sample1 array of sample data values 89 * @param sample2 array of sample data values 90 * @return t statistic 91 * @throws NullArgumentException if the arrays are <code>null</code> 92 * @throws MathIllegalArgumentException if the length of the arrays is < 2 93 */ 94 public static double homoscedasticT(final double[] sample1, final double[] sample2) 95 throws MathIllegalArgumentException, NullArgumentException { 96 return T_TEST.homoscedasticT(sample1, sample2); 97 } 98 99 /** 100 * Computes a 2-sample t statistic, comparing the means of the datasets 101 * described by two {@link StatisticalSummary} instances, under the 102 * assumption of equal subpopulation variances. To compute a t-statistic 103 * without the equal variances assumption, use 104 * {@link #t(StatisticalSummary, StatisticalSummary)}. 105 * <p> 106 * This statistic can be used to perform a (homoscedastic) two-sample 107 * t-test to compare sample means.</p> 108 * <p> 109 * The t-statistic returned is</p> 110 * <p> 111 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> 112 * </p><p> 113 * where <strong><code>n1</code></strong> is the size of first sample; 114 * <strong><code> n2</code></strong> is the size of second sample; 115 * <strong><code> m1</code></strong> is the mean of first sample; 116 * <strong><code> m2</code></strong> is the mean of second sample 117 * and <strong><code>var</code></strong> is the pooled variance estimate: 118 * </p><p> 119 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> 120 * </p><p> 121 * with <strong><code>var1</code></strong> the variance of the first sample and 122 * <strong><code>var2</code></strong> the variance of the second sample. 123 * </p><p> 124 * <strong>Preconditions</strong>:</p><ul> 125 * <li>The datasets described by the two Univariates must each contain 126 * at least 2 observations. 127 * </li></ul> 128 * 129 * @param sampleStats1 StatisticalSummary describing data from the first sample 130 * @param sampleStats2 StatisticalSummary describing data from the second sample 131 * @return t statistic 132 * @throws NullArgumentException if the sample statistics are <code>null</code> 133 * @throws MathIllegalArgumentException if the number of samples is < 2 134 */ 135 public static double homoscedasticT(final StatisticalSummary sampleStats1, 136 final StatisticalSummary sampleStats2) 137 throws MathIllegalArgumentException, NullArgumentException { 138 return T_TEST.homoscedasticT(sampleStats1, sampleStats2); 139 } 140 141 /** 142 * Performs a 143 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 144 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 145 * and <code>sample2</code> are drawn from populations with the same mean, 146 * with significance level <code>alpha</code>, assuming that the 147 * subpopulation variances are equal. Use 148 * {@link #tTest(double[], double[], double)} to perform the test without 149 * the assumption of equal variances. 150 * <p> 151 * Returns <code>true</code> iff the null hypothesis that the means are 152 * equal can be rejected with confidence <code>1 - alpha</code>. To 153 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test 154 * without the assumption of equal subpopulation variances, use 155 * {@link #tTest(double[], double[], double)}.</p> 156 * <p> 157 * A pooled variance estimate is used to compute the t-statistic. See 158 * {@link #t(double[], double[])} for the formula. The sum of the sample 159 * sizes minus 2 is used as the degrees of freedom.</p> 160 * <p> 161 * <strong>Examples:</strong></p><ol> 162 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 163 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code> 164 * </li> 165 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code> 166 * at the 99% level, first verify that the measured mean of 167 * <code>sample 1</code> is less than the mean of <code>sample 2</code> 168 * and then use 169 * <br><code>tTest(sample1, sample2, 0.02) </code> 170 * </li></ol> 171 * <p> 172 * <strong>Usage Note:</strong><br> 173 * The validity of the test depends on the assumptions of the parametric 174 * t-test procedure, as discussed 175 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 176 * here</a></p> 177 * <p> 178 * <strong>Preconditions</strong>:</p> 179 * <ul> 180 * <li>The observed array lengths must both be at least 2. 181 * </li> 182 * <li> <code> 0 < alpha < 0.5 </code> 183 * </li></ul> 184 * 185 * @param sample1 array of sample data values 186 * @param sample2 array of sample data values 187 * @param alpha significance level of the test 188 * @return true if the null hypothesis can be rejected with 189 * confidence 1 - alpha 190 * @throws NullArgumentException if the arrays are <code>null</code> 191 * @throws MathIllegalArgumentException if the length of the arrays is < 2 192 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 193 * @throws MathIllegalStateException if an error occurs computing the p-value 194 */ 195 public static boolean homoscedasticTTest(final double[] sample1, final double[] sample2, 196 final double alpha) 197 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 198 return T_TEST.homoscedasticTTest(sample1, sample2, alpha); 199 } 200 201 /** 202 * Returns the <i>observed significance level</i>, or 203 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 204 * comparing the means of the input arrays, under the assumption that 205 * the two samples are drawn from subpopulations with equal variances. 206 * To perform the test without the equal variances assumption, use 207 * {@link #tTest(double[], double[])}. 208 * <p> 209 * The number returned is the smallest significance level 210 * at which one can reject the null hypothesis that the two means are 211 * equal in favor of the two-sided alternative that they are different. 212 * For a one-sided test, divide the returned value by 2.</p> 213 * <p> 214 * A pooled variance estimate is used to compute the t-statistic. See 215 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes 216 * minus 2 is used as the degrees of freedom.</p> 217 * <p> 218 * <strong>Usage Note:</strong><br> 219 * The validity of the p-value depends on the assumptions of the parametric 220 * t-test procedure, as discussed 221 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 222 * here</a></p> 223 * <p> 224 * <strong>Preconditions</strong>:</p> 225 * <ul> 226 * <li>The observed array lengths must both be at least 2. 227 * </li></ul> 228 * 229 * @param sample1 array of sample data values 230 * @param sample2 array of sample data values 231 * @return p-value for t-test 232 * @throws NullArgumentException if the arrays are <code>null</code> 233 * @throws MathIllegalArgumentException if the length of the arrays is < 2 234 * @throws MathIllegalStateException if an error occurs computing the p-value 235 */ 236 public static double homoscedasticTTest(final double[] sample1, final double[] sample2) 237 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 238 return T_TEST.homoscedasticTTest(sample1, sample2); 239 } 240 241 /** 242 * Returns the <i>observed significance level</i>, or 243 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 244 * comparing the means of the datasets described by two StatisticalSummary 245 * instances, under the hypothesis of equal subpopulation variances. To 246 * perform a test without the equal variances assumption, use 247 * {@link #tTest(StatisticalSummary, StatisticalSummary)}. 248 * <p> 249 * The number returned is the smallest significance level 250 * at which one can reject the null hypothesis that the two means are 251 * equal in favor of the two-sided alternative that they are different. 252 * For a one-sided test, divide the returned value by 2.</p> 253 * <p> 254 * See {@link #homoscedasticT(double[], double[])} for the formula used to 255 * compute the t-statistic. The sum of the sample sizes minus 2 is used as 256 * the degrees of freedom.</p> 257 * <p> 258 * <strong>Usage Note:</strong><br> 259 * The validity of the p-value depends on the assumptions of the parametric 260 * t-test procedure, as discussed 261 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> 262 * </p><p> 263 * <strong>Preconditions</strong>:</p> 264 * <ul> 265 * <li>The datasets described by the two Univariates must each contain 266 * at least 2 observations. 267 * </li></ul> 268 * 269 * @param sampleStats1 StatisticalSummary describing data from the first sample 270 * @param sampleStats2 StatisticalSummary describing data from the second sample 271 * @return p-value for t-test 272 * @throws NullArgumentException if the sample statistics are <code>null</code> 273 * @throws MathIllegalArgumentException if the number of samples is < 2 274 * @throws MathIllegalStateException if an error occurs computing the p-value 275 */ 276 public static double homoscedasticTTest(final StatisticalSummary sampleStats1, 277 final StatisticalSummary sampleStats2) 278 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 279 return T_TEST.homoscedasticTTest(sampleStats1, sampleStats2); 280 } 281 282 /** 283 * Computes a paired, 2-sample t-statistic based on the data in the input 284 * arrays. The t-statistic returned is equivalent to what would be returned by 285 * computing the one-sample t-statistic {@link #t(double, double[])}, with 286 * <code>mu = 0</code> and the sample array consisting of the (signed) 287 * differences between corresponding entries in <code>sample1</code> and 288 * <code>sample2.</code> 289 * <p> 290 * <strong>Preconditions</strong>:</p> 291 * <ul> 292 * <li>The input arrays must have the same length and their common length 293 * must be at least 2. 294 * </li></ul> 295 * 296 * @param sample1 array of sample data values 297 * @param sample2 array of sample data values 298 * @return t statistic 299 * @throws NullArgumentException if the arrays are <code>null</code> 300 * @throws MathIllegalArgumentException if the arrays are empty 301 * @throws MathIllegalArgumentException if the length of the arrays is not equal 302 * @throws MathIllegalArgumentException if the length of the arrays is < 2 303 */ 304 public static double pairedT(final double[] sample1, final double[] sample2) 305 throws MathIllegalArgumentException, NullArgumentException { 306 return T_TEST.pairedT(sample1, sample2); 307 } 308 309 /** 310 * Performs a paired t-test evaluating the null hypothesis that the 311 * mean of the paired differences between <code>sample1</code> and 312 * <code>sample2</code> is 0 in favor of the two-sided alternative that the 313 * mean paired difference is not equal to 0, with significance level 314 * <code>alpha</code>. 315 * <p> 316 * Returns <code>true</code> iff the null hypothesis can be rejected with 317 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use 318 * <code>alpha * 2</code></p> 319 * <p> 320 * <strong>Usage Note:</strong><br> 321 * The validity of the test depends on the assumptions of the parametric 322 * t-test procedure, as discussed 323 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 324 * here</a></p> 325 * <p> 326 * <strong>Preconditions</strong>:</p> 327 * <ul> 328 * <li>The input array lengths must be the same and their common length 329 * must be at least 2. 330 * </li> 331 * <li> <code> 0 < alpha < 0.5 </code> 332 * </li></ul> 333 * 334 * @param sample1 array of sample data values 335 * @param sample2 array of sample data values 336 * @param alpha significance level of the test 337 * @return true if the null hypothesis can be rejected with 338 * confidence 1 - alpha 339 * @throws NullArgumentException if the arrays are <code>null</code> 340 * @throws MathIllegalArgumentException if the arrays are empty 341 * @throws MathIllegalArgumentException if the length of the arrays is not equal 342 * @throws MathIllegalArgumentException if the length of the arrays is < 2 343 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 344 * @throws MathIllegalStateException if an error occurs computing the p-value 345 */ 346 public static boolean pairedTTest(final double[] sample1, final double[] sample2, 347 final double alpha) 348 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 349 return T_TEST.pairedTTest(sample1, sample2, alpha); 350 } 351 352 /** 353 * Returns the <i>observed significance level</i>, or 354 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test 355 * based on the data in the input arrays. 356 * <p> 357 * The number returned is the smallest significance level 358 * at which one can reject the null hypothesis that the mean of the paired 359 * differences is 0 in favor of the two-sided alternative that the mean paired 360 * difference is not equal to 0. For a one-sided test, divide the returned 361 * value by 2.</p> 362 * <p> 363 * This test is equivalent to a one-sample t-test computed using 364 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample 365 * array consisting of the signed differences between corresponding elements of 366 * <code>sample1</code> and <code>sample2.</code></p> 367 * <p> 368 * <strong>Usage Note:</strong><br> 369 * The validity of the p-value depends on the assumptions of the parametric 370 * t-test procedure, as discussed 371 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 372 * here</a></p> 373 * <p> 374 * <strong>Preconditions</strong>:</p> 375 * <ul> 376 * <li>The input array lengths must be the same and their common length must 377 * be at least 2. 378 * </li></ul> 379 * 380 * @param sample1 array of sample data values 381 * @param sample2 array of sample data values 382 * @return p-value for t-test 383 * @throws NullArgumentException if the arrays are <code>null</code> 384 * @throws MathIllegalArgumentException if the arrays are empty 385 * @throws MathIllegalArgumentException if the length of the arrays is not equal 386 * @throws MathIllegalArgumentException if the length of the arrays is < 2 387 * @throws MathIllegalStateException if an error occurs computing the p-value 388 */ 389 public static double pairedTTest(final double[] sample1, final double[] sample2) 390 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 391 return T_TEST.pairedTTest(sample1, sample2); 392 } 393 394 /** 395 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 396 * t statistic </a> given observed values and a comparison constant. 397 * <p> 398 * This statistic can be used to perform a one sample t-test for the mean. 399 * </p><p> 400 * <strong>Preconditions</strong>:</p> 401 * <ul> 402 * <li>The observed array length must be at least 2. 403 * </li></ul> 404 * 405 * @param mu comparison constant 406 * @param observed array of values 407 * @return t statistic 408 * @throws NullArgumentException if <code>observed</code> is <code>null</code> 409 * @throws MathIllegalArgumentException if the length of <code>observed</code> is < 2 410 */ 411 public static double t(final double mu, final double[] observed) 412 throws MathIllegalArgumentException, NullArgumentException { 413 return T_TEST.t(mu, observed); 414 } 415 416 /** 417 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 418 * t statistic </a> to use in comparing the mean of the dataset described by 419 * <code>sampleStats</code> to <code>mu</code>. 420 * <p> 421 * This statistic can be used to perform a one sample t-test for the mean. 422 * </p><p> 423 * <strong>Preconditions</strong>:</p> 424 * <ul> 425 * <li><code>observed.getN() ≥ 2</code>. 426 * </li></ul> 427 * 428 * @param mu comparison constant 429 * @param sampleStats DescriptiveStatistics holding sample summary statitstics 430 * @return t statistic 431 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 432 * @throws MathIllegalArgumentException if the number of samples is < 2 433 */ 434 public static double t(final double mu, final StatisticalSummary sampleStats) 435 throws MathIllegalArgumentException, NullArgumentException { 436 return T_TEST.t(mu, sampleStats); 437 } 438 439 /** 440 * Computes a 2-sample t statistic, without the hypothesis of equal 441 * subpopulation variances. To compute a t-statistic assuming equal 442 * variances, use {@link #homoscedasticT(double[], double[])}. 443 * <p> 444 * This statistic can be used to perform a two-sample t-test to compare 445 * sample means.</p> 446 * <p> 447 * The t-statistic is</p> 448 * <p> 449 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> 450 * </p><p> 451 * where <strong><code>n1</code></strong> is the size of the first sample 452 * <strong><code> n2</code></strong> is the size of the second sample; 453 * <strong><code> m1</code></strong> is the mean of the first sample; 454 * <strong><code> m2</code></strong> is the mean of the second sample; 455 * <strong><code> var1</code></strong> is the variance of the first sample; 456 * <strong><code> var2</code></strong> is the variance of the second sample; 457 * </p><p> 458 * <strong>Preconditions</strong>:</p> 459 * <ul> 460 * <li>The observed array lengths must both be at least 2. 461 * </li></ul> 462 * 463 * @param sample1 array of sample data values 464 * @param sample2 array of sample data values 465 * @return t statistic 466 * @throws NullArgumentException if the arrays are <code>null</code> 467 * @throws MathIllegalArgumentException if the length of the arrays is < 2 468 */ 469 public static double t(final double[] sample1, final double[] sample2) 470 throws MathIllegalArgumentException, NullArgumentException { 471 return T_TEST.t(sample1, sample2); 472 } 473 474 /** 475 * Computes a 2-sample t statistic, comparing the means of the datasets 476 * described by two {@link StatisticalSummary} instances, without the 477 * assumption of equal subpopulation variances. Use 478 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to 479 * compute a t-statistic under the equal variances assumption. 480 * <p> 481 * This statistic can be used to perform a two-sample t-test to compare 482 * sample means.</p> 483 * <p> 484 * The returned t-statistic is</p> 485 * <p> 486 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> 487 * </p><p> 488 * where <strong><code>n1</code></strong> is the size of the first sample; 489 * <strong><code> n2</code></strong> is the size of the second sample; 490 * <strong><code> m1</code></strong> is the mean of the first sample; 491 * <strong><code> m2</code></strong> is the mean of the second sample 492 * <strong><code> var1</code></strong> is the variance of the first sample; 493 * <strong><code> var2</code></strong> is the variance of the second sample 494 * </p><p> 495 * <strong>Preconditions</strong>:</p> 496 * <ul> 497 * <li>The datasets described by the two Univariates must each contain 498 * at least 2 observations. 499 * </li></ul> 500 * 501 * @param sampleStats1 StatisticalSummary describing data from the first sample 502 * @param sampleStats2 StatisticalSummary describing data from the second sample 503 * @return t statistic 504 * @throws NullArgumentException if the sample statistics are <code>null</code> 505 * @throws MathIllegalArgumentException if the number of samples is < 2 506 */ 507 public static double t(final StatisticalSummary sampleStats1, 508 final StatisticalSummary sampleStats2) 509 throws MathIllegalArgumentException, NullArgumentException { 510 return T_TEST.t(sampleStats1, sampleStats2); 511 } 512 513 /** 514 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 515 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from 516 * which <code>sample</code> is drawn equals <code>mu</code>. 517 * <p> 518 * Returns <code>true</code> iff the null hypothesis can be 519 * rejected with confidence <code>1 - alpha</code>. To 520 * perform a 1-sided test, use <code>alpha * 2</code></p> 521 * <p> 522 * <strong>Examples:</strong></p><ol> 523 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at 524 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code> 525 * </li> 526 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> 527 * at the 99% level, first verify that the measured sample mean is less 528 * than <code>mu</code> and then use 529 * <br><code>tTest(mu, sample, 0.02) </code> 530 * </li></ol> 531 * <p> 532 * <strong>Usage Note:</strong><br> 533 * The validity of the test depends on the assumptions of the one-sample 534 * parametric t-test procedure, as discussed 535 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> 536 * </p><p> 537 * <strong>Preconditions</strong>:</p> 538 * <ul> 539 * <li>The observed array length must be at least 2. 540 * </li></ul> 541 * 542 * @param mu constant value to compare sample mean against 543 * @param sample array of sample data values 544 * @param alpha significance level of the test 545 * @return p-value 546 * @throws NullArgumentException if the sample array is <code>null</code> 547 * @throws MathIllegalArgumentException if the length of the array is < 2 548 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 549 * @throws MathIllegalStateException if an error computing the p-value 550 */ 551 public static boolean tTest(final double mu, final double[] sample, final double alpha) 552 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 553 return T_TEST.tTest(mu, sample, alpha); 554 } 555 556 /** 557 * Returns the <i>observed significance level</i>, or 558 * <i>p-value</i>, associated with a one-sample, two-tailed t-test 559 * comparing the mean of the input array with the constant <code>mu</code>. 560 * <p> 561 * The number returned is the smallest significance level 562 * at which one can reject the null hypothesis that the mean equals 563 * <code>mu</code> in favor of the two-sided alternative that the mean 564 * is different from <code>mu</code>. For a one-sided test, divide the 565 * returned value by 2.</p> 566 * <p> 567 * <strong>Usage Note:</strong><br> 568 * The validity of the test depends on the assumptions of the parametric 569 * t-test procedure, as discussed 570 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> 571 * </p><p> 572 * <strong>Preconditions</strong>:</p> 573 * <ul> 574 * <li>The observed array length must be at least 2. 575 * </li></ul> 576 * 577 * @param mu constant value to compare sample mean against 578 * @param sample array of sample data values 579 * @return p-value 580 * @throws NullArgumentException if the sample array is <code>null</code> 581 * @throws MathIllegalArgumentException if the length of the array is < 2 582 * @throws MathIllegalStateException if an error occurs computing the p-value 583 */ 584 public static double tTest(final double mu, final double[] sample) 585 throws MathIllegalArgumentException, NullArgumentException, 586 MathIllegalStateException { 587 return T_TEST.tTest(mu, sample); 588 } 589 590 /** 591 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 592 * two-sided t-test</a> evaluating the null hypothesis that the mean of the 593 * population from which the dataset described by <code>stats</code> is 594 * drawn equals <code>mu</code>. 595 * <p> 596 * Returns <code>true</code> iff the null hypothesis can be rejected with 597 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use 598 * <code>alpha * 2.</code></p> 599 * <p> 600 * <strong>Examples:</strong></p><ol> 601 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at 602 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code> 603 * </li> 604 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> 605 * at the 99% level, first verify that the measured sample mean is less 606 * than <code>mu</code> and then use 607 * <br><code>tTest(mu, sampleStats, 0.02) </code> 608 * </li></ol> 609 * <p> 610 * <strong>Usage Note:</strong><br> 611 * The validity of the test depends on the assumptions of the one-sample 612 * parametric t-test procedure, as discussed 613 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> 614 * </p><p> 615 * <strong>Preconditions</strong>:</p> 616 * <ul> 617 * <li>The sample must include at least 2 observations. 618 * </li></ul> 619 * 620 * @param mu constant value to compare sample mean against 621 * @param sampleStats StatisticalSummary describing sample data values 622 * @param alpha significance level of the test 623 * @return p-value 624 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 625 * @throws MathIllegalArgumentException if the number of samples is < 2 626 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 627 * @throws MathIllegalStateException if an error occurs computing the p-value 628 */ 629 public static boolean tTest(final double mu, final StatisticalSummary sampleStats, 630 final double alpha) 631 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 632 return T_TEST.tTest(mu, sampleStats, alpha); 633 } 634 635 /** 636 * Returns the <i>observed significance level</i>, or 637 * <i>p-value</i>, associated with a one-sample, two-tailed t-test 638 * comparing the mean of the dataset described by <code>sampleStats</code> 639 * with the constant <code>mu</code>. 640 * <p> 641 * The number returned is the smallest significance level 642 * at which one can reject the null hypothesis that the mean equals 643 * <code>mu</code> in favor of the two-sided alternative that the mean 644 * is different from <code>mu</code>. For a one-sided test, divide the 645 * returned value by 2.</p> 646 * <p> 647 * <strong>Usage Note:</strong><br> 648 * The validity of the test depends on the assumptions of the parametric 649 * t-test procedure, as discussed 650 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 651 * here</a></p> 652 * <p> 653 * <strong>Preconditions</strong>:</p> 654 * <ul> 655 * <li>The sample must contain at least 2 observations. 656 * </li></ul> 657 * 658 * @param mu constant value to compare sample mean against 659 * @param sampleStats StatisticalSummary describing sample data 660 * @return p-value 661 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> 662 * @throws MathIllegalArgumentException if the number of samples is < 2 663 * @throws MathIllegalStateException if an error occurs computing the p-value 664 */ 665 public static double tTest(final double mu, final StatisticalSummary sampleStats) 666 throws MathIllegalArgumentException, NullArgumentException, 667 MathIllegalStateException { 668 return T_TEST.tTest(mu, sampleStats); 669 } 670 671 /** 672 * Performs a 673 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 674 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 675 * and <code>sample2</code> are drawn from populations with the same mean, 676 * with significance level <code>alpha</code>. This test does not assume 677 * that the subpopulation variances are equal. To perform the test assuming 678 * equal variances, use 679 * {@link #homoscedasticTTest(double[], double[], double)}. 680 * <p> 681 * Returns <code>true</code> iff the null hypothesis that the means are 682 * equal can be rejected with confidence <code>1 - alpha</code>. To 683 * perform a 1-sided test, use <code>alpha * 2</code></p> 684 * <p> 685 * See {@link #t(double[], double[])} for the formula used to compute the 686 * t-statistic. Degrees of freedom are approximated using the 687 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 688 * Welch-Satterthwaite approximation.</a></p> 689 * <p> 690 * <strong>Examples:</strong></p><ol> 691 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 692 * the 95% level, use 693 * <br><code>tTest(sample1, sample2, 0.05). </code> 694 * </li> 695 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>, 696 * at the 99% level, first verify that the measured mean of <code>sample 1</code> 697 * is less than the mean of <code>sample 2</code> and then use 698 * <br><code>tTest(sample1, sample2, 0.02) </code> 699 * </li></ol> 700 * <p> 701 * <strong>Usage Note:</strong><br> 702 * The validity of the test depends on the assumptions of the parametric 703 * t-test procedure, as discussed 704 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 705 * here</a></p> 706 * <p> 707 * <strong>Preconditions</strong>:</p> 708 * <ul> 709 * <li>The observed array lengths must both be at least 2. 710 * </li> 711 * <li> <code> 0 < alpha < 0.5 </code> 712 * </li></ul> 713 * 714 * @param sample1 array of sample data values 715 * @param sample2 array of sample data values 716 * @param alpha significance level of the test 717 * @return true if the null hypothesis can be rejected with 718 * confidence 1 - alpha 719 * @throws NullArgumentException if the arrays are <code>null</code> 720 * @throws MathIllegalArgumentException if the length of the arrays is < 2 721 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 722 * @throws MathIllegalStateException if an error occurs computing the p-value 723 */ 724 public static boolean tTest(final double[] sample1, final double[] sample2, 725 final double alpha) 726 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 727 return T_TEST.tTest(sample1, sample2, alpha); 728 } 729 730 /** 731 * Returns the <i>observed significance level</i>, or 732 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 733 * comparing the means of the input arrays. 734 * <p> 735 * The number returned is the smallest significance level 736 * at which one can reject the null hypothesis that the two means are 737 * equal in favor of the two-sided alternative that they are different. 738 * For a one-sided test, divide the returned value by 2.</p> 739 * <p> 740 * The test does not assume that the underlying popuation variances are 741 * equal and it uses approximated degrees of freedom computed from the 742 * sample data to compute the p-value. The t-statistic used is as defined in 743 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation 744 * to the degrees of freedom is used, 745 * as described 746 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 747 * here.</a> To perform the test under the assumption of equal subpopulation 748 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p> 749 * <p> 750 * <strong>Usage Note:</strong><br> 751 * The validity of the p-value depends on the assumptions of the parametric 752 * t-test procedure, as discussed 753 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 754 * here</a></p> 755 * <p> 756 * <strong>Preconditions</strong>:</p> 757 * <ul> 758 * <li>The observed array lengths must both be at least 2. 759 * </li></ul> 760 * 761 * @param sample1 array of sample data values 762 * @param sample2 array of sample data values 763 * @return p-value for t-test 764 * @throws NullArgumentException if the arrays are <code>null</code> 765 * @throws MathIllegalArgumentException if the length of the arrays is < 2 766 * @throws MathIllegalStateException if an error occurs computing the p-value 767 */ 768 public static double tTest(final double[] sample1, final double[] sample2) 769 throws MathIllegalArgumentException, NullArgumentException, 770 MathIllegalStateException { 771 return T_TEST.tTest(sample1, sample2); 772 } 773 774 /** 775 * Performs a 776 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> 777 * two-sided t-test</a> evaluating the null hypothesis that 778 * <code>sampleStats1</code> and <code>sampleStats2</code> describe 779 * datasets drawn from populations with the same mean, with significance 780 * level <code>alpha</code>. This test does not assume that the 781 * subpopulation variances are equal. To perform the test under the equal 782 * variances assumption, use 783 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}. 784 * <p> 785 * Returns <code>true</code> iff the null hypothesis that the means are 786 * equal can be rejected with confidence <code>1 - alpha</code>. To 787 * perform a 1-sided test, use <code>alpha * 2</code></p> 788 * <p> 789 * See {@link #t(double[], double[])} for the formula used to compute the 790 * t-statistic. Degrees of freedom are approximated using the 791 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> 792 * Welch-Satterthwaite approximation.</a></p> 793 * <p> 794 * <strong>Examples:</strong></p><ol> 795 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at 796 * the 95%, use 797 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code> 798 * </li> 799 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> 800 * at the 99% level, first verify that the measured mean of 801 * <code>sample 1</code> is less than the mean of <code>sample 2</code> 802 * and then use 803 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code> 804 * </li></ol> 805 * <p> 806 * <strong>Usage Note:</strong><br> 807 * The validity of the test depends on the assumptions of the parametric 808 * t-test procedure, as discussed 809 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 810 * here</a></p> 811 * <p> 812 * <strong>Preconditions</strong>:</p> 813 * <ul> 814 * <li>The datasets described by the two Univariates must each contain 815 * at least 2 observations. 816 * </li> 817 * <li> <code> 0 < alpha < 0.5 </code> 818 * </li></ul> 819 * 820 * @param sampleStats1 StatisticalSummary describing sample data values 821 * @param sampleStats2 StatisticalSummary describing sample data values 822 * @param alpha significance level of the test 823 * @return true if the null hypothesis can be rejected with 824 * confidence 1 - alpha 825 * @throws NullArgumentException if the sample statistics are <code>null</code> 826 * @throws MathIllegalArgumentException if the number of samples is < 2 827 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 828 * @throws MathIllegalStateException if an error occurs computing the p-value 829 */ 830 public static boolean tTest(final StatisticalSummary sampleStats1, 831 final StatisticalSummary sampleStats2, 832 final double alpha) 833 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 834 return T_TEST.tTest(sampleStats1, sampleStats2, alpha); 835 } 836 837 /** 838 * Returns the <i>observed significance level</i>, or 839 * <i>p-value</i>, associated with a two-sample, two-tailed t-test 840 * comparing the means of the datasets described by two StatisticalSummary 841 * instances. 842 * <p> 843 * The number returned is the smallest significance level 844 * at which one can reject the null hypothesis that the two means are 845 * equal in favor of the two-sided alternative that they are different. 846 * For a one-sided test, divide the returned value by 2.</p> 847 * <p> 848 * The test does not assume that the underlying population variances are 849 * equal and it uses approximated degrees of freedom computed from the 850 * sample data to compute the p-value. To perform the test assuming 851 * equal variances, use 852 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p> 853 * <p> 854 * <strong>Usage Note:</strong><br> 855 * The validity of the p-value depends on the assumptions of the parametric 856 * t-test procedure, as discussed 857 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> 858 * here</a></p> 859 * <p> 860 * <strong>Preconditions</strong>:</p> 861 * <ul> 862 * <li>The datasets described by the two Univariates must each contain 863 * at least 2 observations. 864 * </li></ul> 865 * 866 * @param sampleStats1 StatisticalSummary describing data from the first sample 867 * @param sampleStats2 StatisticalSummary describing data from the second sample 868 * @return p-value for t-test 869 * @throws NullArgumentException if the sample statistics are <code>null</code> 870 * @throws MathIllegalArgumentException if the number of samples is < 2 871 * @throws MathIllegalStateException if an error occurs computing the p-value 872 */ 873 public static double tTest(final StatisticalSummary sampleStats1, 874 final StatisticalSummary sampleStats2) 875 throws MathIllegalArgumentException, NullArgumentException, 876 MathIllegalStateException { 877 return T_TEST.tTest(sampleStats1, sampleStats2); 878 } 879 880 /** 881 * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 882 * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code> 883 * frequency counts. 884 * <p> 885 * This statistic can be used to perform a Chi-Square test evaluating the null 886 * hypothesis that the observed counts follow the expected distribution. 887 * <p> 888 * <strong>Preconditions</strong>: 889 * <ul> 890 * <li>Expected counts must all be positive.</li> 891 * <li>Observed counts must all be ≥ 0.</li> 892 * <li>The observed and expected arrays must have the same length and 893 * their common length must be at least 2.</li> 894 * </ul> 895 * <p> 896 * If any of the preconditions are not met, an 897 * <code>IllegalArgumentException</code> is thrown. 898 * <p> 899 * <strong>Note: </strong>This implementation rescales the 900 * <code>expected</code> array if necessary to ensure that the sum of the 901 * expected and observed counts are equal. 902 * 903 * @param observed array of observed frequency counts 904 * @param expected array of expected frequency counts 905 * @return chiSquare test statistic 906 * @throws MathIllegalArgumentException if <code>observed</code> has negative entries 907 * @throws MathIllegalArgumentException if <code>expected</code> has entries that are 908 * not strictly positive 909 * @throws MathIllegalArgumentException if the arrays length is less than 2 910 */ 911 public static double chiSquare(final double[] expected, final long[] observed) 912 throws MathIllegalArgumentException { 913 return CHI_SQUARE_TEST.chiSquare(expected, observed); 914 } 915 916 /** 917 * Computes the Chi-Square statistic associated with a 918 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 919 * chi-square test of independence</a> based on the input <code>counts</code> 920 * array, viewed as a two-way table. 921 * <p> 922 * The rows of the 2-way table are 923 * <code>count[0], ... , count[count.length - 1] </code> 924 * <p> 925 * <strong>Preconditions</strong>: 926 * <ul> 927 * <li>All counts must be ≥ 0.</li> 928 * <li>The count array must be rectangular (i.e. all count[i] subarrays 929 * must have the same length).</li> 930 * <li>The 2-way table represented by <code>counts</code> must have at 931 * least 2 columns and at least 2 rows.</li> 932 * </ul> 933 * <p> 934 * If any of the preconditions are not met, an 935 * <code>IllegalArgumentException</code> is thrown. 936 * 937 * @param counts array representation of 2-way table 938 * @return chiSquare test statistic 939 * @throws NullArgumentException if the array is null 940 * @throws MathIllegalArgumentException if the array is not rectangular 941 * @throws MathIllegalArgumentException if {@code counts} has negative entries 942 */ 943 public static double chiSquare(final long[][] counts) 944 throws MathIllegalArgumentException, NullArgumentException { 945 return CHI_SQUARE_TEST.chiSquare(counts); 946 } 947 948 /** 949 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 950 * Chi-square goodness of fit test</a> evaluating the null hypothesis that the 951 * observed counts conform to the frequency distribution described by the expected 952 * counts, with significance level <code>alpha</code>. Returns true iff the null 953 * hypothesis can be rejected with 100 * (1 - alpha) percent confidence. 954 * <p> 955 * <strong>Example:</strong><br> 956 * To test the hypothesis that <code>observed</code> follows 957 * <code>expected</code> at the 99% level, use 958 * <code>chiSquareTest(expected, observed, 0.01)</code> 959 * <p> 960 * <strong>Preconditions</strong>: 961 * <ul> 962 * <li>Expected counts must all be positive.</li> 963 * <li>Observed counts must all be ≥ 0.</li> 964 * <li>The observed and expected arrays must have the same length and 965 * their common length must be at least 2.</li> 966 * <li><code> 0 < alpha < 0.5</code></li> 967 * </ul> 968 * <p> 969 * If any of the preconditions are not met, an 970 * <code>IllegalArgumentException</code> is thrown. 971 * <p> 972 * <strong>Note: </strong>This implementation rescales the 973 * <code>expected</code> array if necessary to ensure that the sum of the 974 * expected and observed counts are equal. 975 * 976 * @param observed array of observed frequency counts 977 * @param expected array of expected frequency counts 978 * @param alpha significance level of the test 979 * @return true iff null hypothesis can be rejected with confidence 980 * 1 - alpha 981 * @throws MathIllegalArgumentException if <code>observed</code> has negative entries 982 * @throws MathIllegalArgumentException if <code>expected</code> has entries that are 983 * not strictly positive 984 * @throws MathIllegalArgumentException if the arrays length is less than 2 985 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 986 * @throws MathIllegalStateException if an error occurs computing the p-value 987 */ 988 public static boolean chiSquareTest(final double[] expected, final long[] observed, 989 final double alpha) 990 throws MathIllegalArgumentException, MathIllegalStateException { 991 return CHI_SQUARE_TEST.chiSquareTest(expected, observed, alpha); 992 } 993 994 /** 995 * Returns the <i>observed significance level</i>, or <a href= 996 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 997 * p-value</a>, associated with a 998 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm"> 999 * Chi-square goodness of fit test</a> comparing the <code>observed</code> 1000 * frequency counts to those in the <code>expected</code> array. 1001 * <p> 1002 * The number returned is the smallest significance level at which one can reject 1003 * the null hypothesis that the observed counts conform to the frequency distribution 1004 * described by the expected counts. 1005 * <p> 1006 * <strong>Preconditions</strong>: 1007 * <ul> 1008 * <li>Expected counts must all be positive.</li> 1009 * <li>Observed counts must all be ≥ 0.</li> 1010 * <li>The observed and expected arrays must have the same length and 1011 * their common length must be at least 2.</li> 1012 * </ul> 1013 * <p> 1014 * If any of the preconditions are not met, an 1015 * <code>IllegalArgumentException</code> is thrown. 1016 * <p> 1017 * <strong>Note: </strong>This implementation rescales the 1018 * <code>expected</code> array if necessary to ensure that the sum of the 1019 * expected and observed counts are equal. 1020 * 1021 * @param observed array of observed frequency counts 1022 * @param expected array of expected frequency counts 1023 * @return p-value 1024 * @throws MathIllegalArgumentException if <code>observed</code> has negative entries 1025 * @throws MathIllegalArgumentException if <code>expected</code> has entries that are 1026 * not strictly positive 1027 * @throws MathIllegalArgumentException if the arrays length is less than 2 1028 * @throws MathIllegalStateException if an error occurs computing the p-value 1029 */ 1030 public static double chiSquareTest(final double[] expected, final long[] observed) 1031 throws MathIllegalArgumentException, MathIllegalStateException { 1032 return CHI_SQUARE_TEST.chiSquareTest(expected, observed); 1033 } 1034 1035 /** 1036 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 1037 * chi-square test of independence</a> evaluating the null hypothesis that the 1038 * classifications represented by the counts in the columns of the input 2-way table 1039 * are independent of the rows, with significance level <code>alpha</code>. 1040 * Returns true iff the null hypothesis can be rejected with 100 * (1 - alpha) percent 1041 * confidence. 1042 * <p> 1043 * The rows of the 2-way table are 1044 * <code>count[0], ... , count[count.length - 1] </code> 1045 * <p> 1046 * <strong>Example:</strong><br> 1047 * To test the null hypothesis that the counts in 1048 * <code>count[0], ... , count[count.length - 1] </code> 1049 * all correspond to the same underlying probability distribution at the 99% level, 1050 * use <code>chiSquareTest(counts, 0.01)</code>. 1051 * <p> 1052 * <strong>Preconditions</strong>: 1053 * <ul> 1054 * <li>All counts must be ≥ 0.</li> 1055 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the 1056 * same length).</li> 1057 * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and 1058 * at least 2 rows.</li> 1059 * </ul> 1060 * <p> 1061 * If any of the preconditions are not met, an 1062 * <code>IllegalArgumentException</code> is thrown. 1063 * 1064 * @param counts array representation of 2-way table 1065 * @param alpha significance level of the test 1066 * @return true iff null hypothesis can be rejected with confidence 1067 * 1 - alpha 1068 * @throws NullArgumentException if the array is null 1069 * @throws MathIllegalArgumentException if the array is not rectangular 1070 * @throws MathIllegalArgumentException if {@code counts} has any negative entries 1071 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 1072 * @throws MathIllegalStateException if an error occurs computing the p-value 1073 */ 1074 public static boolean chiSquareTest(final long[][] counts, final double alpha) 1075 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 1076 return CHI_SQUARE_TEST.chiSquareTest(counts, alpha); 1077 } 1078 1079 /** 1080 * Returns the <i>observed significance level</i>, or <a href= 1081 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 1082 * p-value</a>, associated with a 1083 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm"> 1084 * chi-square test of independence</a> based on the input <code>counts</code> 1085 * array, viewed as a two-way table. 1086 * <p> 1087 * The rows of the 2-way table are 1088 * <code>count[0], ... , count[count.length - 1] </code> 1089 * <p> 1090 * <strong>Preconditions</strong>: 1091 * <ul> 1092 * <li>All counts must be ≥ 0.</li> 1093 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have 1094 * the same length).</li> 1095 * <li>The 2-way table represented by <code>counts</code> must have at least 2 1096 * columns and at least 2 rows.</li> 1097 * </ul> 1098 * <p> 1099 * If any of the preconditions are not met, an 1100 * <code>IllegalArgumentException</code> is thrown. 1101 * 1102 * @param counts array representation of 2-way table 1103 * @return p-value 1104 * @throws NullArgumentException if the array is null 1105 * @throws MathIllegalArgumentException if the array is not rectangular 1106 * @throws MathIllegalArgumentException if {@code counts} has negative entries 1107 * @throws MathIllegalStateException if an error occurs computing the p-value 1108 */ 1109 public static double chiSquareTest(final long[][] counts) 1110 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 1111 return CHI_SQUARE_TEST.chiSquareTest(counts); 1112 } 1113 1114 /** 1115 * Computes a 1116 * <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm"> 1117 * Chi-Square two sample test statistic</a> comparing bin frequency counts 1118 * in <code>observed1</code> and <code>observed2</code>. 1119 * <p> 1120 * The sums of frequency counts in the two samples are not required to be the 1121 * same. The formula used to compute the test statistic is 1122 * </p> 1123 * <code> 1124 * ∑[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])] 1125 * </code> 1126 * <p> 1127 * where 1128 * </p> 1129 * <code>K = √[∑(observed2 / ∑(observed1)]</code> 1130 * <p> 1131 * This statistic can be used to perform a Chi-Square test evaluating the 1132 * null hypothesis that both observed counts follow the same distribution. 1133 * </p> 1134 * <p><strong>Preconditions</strong>:</p> 1135 * <ul> 1136 * <li>Observed counts must be non-negative.</li> 1137 * <li>Observed counts for a specific bin must not both be zero.</li> 1138 * <li>Observed counts for a specific sample must not all be 0.</li> 1139 * <li>The arrays <code>observed1</code> and <code>observed2</code> must have 1140 * the same length and their common length must be at least 2.</li> 1141 * </ul> 1142 * <p> 1143 * If any of the preconditions are not met, an 1144 * <code>IllegalArgumentException</code> is thrown. 1145 * </p> 1146 * 1147 * @param observed1 array of observed frequency counts of the first data set 1148 * @param observed2 array of observed frequency counts of the second data set 1149 * @return chiSquare test statistic 1150 * @throws MathIllegalArgumentException the the length of the arrays does not match 1151 * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or 1152 * <code>observed2</code> are negative 1153 * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or 1154 * <code>observed2</code> are zero, or if the count at some index is zero 1155 * for both arrays 1156 */ 1157 public static double chiSquareDataSetsComparison(final long[] observed1, 1158 final long[] observed2) 1159 throws MathIllegalArgumentException { 1160 return CHI_SQUARE_TEST.chiSquareDataSetsComparison(observed1, observed2); 1161 } 1162 1163 /** 1164 * Returns the <i>observed significance level</i>, or <a href= 1165 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 1166 * p-value</a>, associated with a Chi-Square two sample test comparing 1167 * bin frequency counts in <code>observed1</code> and 1168 * <code>observed2</code>. 1169 * <p> 1170 * The number returned is the smallest significance level at which one 1171 * can reject the null hypothesis that the observed counts conform to the 1172 * same distribution. 1173 * <p> 1174 * See {@link #chiSquareDataSetsComparison(long[], long[])} for details 1175 * on the formula used to compute the test statistic. The degrees of 1176 * of freedom used to perform the test is one less than the common length 1177 * of the input observed count arrays. 1178 * <p> 1179 * <strong>Preconditions</strong>: 1180 * <ul> 1181 * <li>Observed counts must be non-negative.</li> 1182 * <li>Observed counts for a specific bin must not both be zero.</li> 1183 * <li>Observed counts for a specific sample must not all be 0.</li> 1184 * <li>The arrays <code>observed1</code> and <code>observed2</code> must 1185 * have the same length and their common length must be at least 2.</li> 1186 * </ul> 1187 * <p> 1188 * If any of the preconditions are not met, an 1189 * <code>IllegalArgumentException</code> is thrown. 1190 * 1191 * @param observed1 array of observed frequency counts of the first data set 1192 * @param observed2 array of observed frequency counts of the second data set 1193 * @return p-value 1194 * @throws MathIllegalArgumentException the the length of the arrays does not match 1195 * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or 1196 * <code>observed2</code> are negative 1197 * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or 1198 * <code>observed2</code> are zero, or if the count at the same index is zero 1199 * for both arrays 1200 * @throws MathIllegalStateException if an error occurs computing the p-value 1201 */ 1202 public static double chiSquareTestDataSetsComparison(final long[] observed1, 1203 final long[] observed2) 1204 throws MathIllegalArgumentException, 1205 MathIllegalStateException { 1206 return CHI_SQUARE_TEST.chiSquareTestDataSetsComparison(observed1, observed2); 1207 } 1208 1209 /** 1210 * Performs a Chi-Square two sample test comparing two binned data 1211 * sets. The test evaluates the null hypothesis that the two lists of 1212 * observed counts conform to the same frequency distribution, with 1213 * significance level <code>alpha</code>. Returns true iff the null 1214 * hypothesis can be rejected with 100 * (1 - alpha) percent confidence. 1215 * <p> 1216 * See {@link #chiSquareDataSetsComparison(long[], long[])} for 1217 * details on the formula used to compute the Chisquare statistic used 1218 * in the test. The degrees of of freedom used to perform the test is 1219 * one less than the common length of the input observed count arrays. 1220 * <p> 1221 * <strong>Preconditions</strong>: 1222 * <ul> 1223 * <li>Observed counts must be non-negative.</li> 1224 * <li>Observed counts for a specific bin must not both be zero.</li> 1225 * <li>Observed counts for a specific sample must not all be 0.</li> 1226 * <li>The arrays <code>observed1</code> and <code>observed2</code> must 1227 * have the same length and their common length must be at least 2.</li> 1228 * <li><code> 0 < alpha < 0.5</code></li> 1229 * </ul> 1230 * <p> 1231 * If any of the preconditions are not met, an 1232 * <code>IllegalArgumentException</code> is thrown. 1233 * 1234 * @param observed1 array of observed frequency counts of the first data set 1235 * @param observed2 array of observed frequency counts of the second data set 1236 * @param alpha significance level of the test 1237 * @return true iff null hypothesis can be rejected with confidence 1238 * 1 - alpha 1239 * @throws MathIllegalArgumentException the the length of the arrays does not match 1240 * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or 1241 * <code>observed2</code> are negative 1242 * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or 1243 * <code>observed2</code> are zero, or if the count at the same index is zero 1244 * for both arrays 1245 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 1246 * @throws MathIllegalStateException if an error occurs performing the test 1247 */ 1248 public static boolean chiSquareTestDataSetsComparison(final long[] observed1, 1249 final long[] observed2, 1250 final double alpha) 1251 throws MathIllegalArgumentException, MathIllegalStateException { 1252 return CHI_SQUARE_TEST.chiSquareTestDataSetsComparison(observed1, observed2, alpha); 1253 } 1254 1255 /** 1256 * Computes the ANOVA F-value for a collection of <code>double[]</code> 1257 * arrays. 1258 * 1259 * <p><strong>Preconditions</strong>:</p> 1260 * <ul> 1261 * <li>The categoryData <code>Collection</code> must contain 1262 * <code>double[]</code> arrays.</li> 1263 * <li> There must be at least two <code>double[]</code> arrays in the 1264 * <code>categoryData</code> collection and each of these arrays must 1265 * contain at least two values.</li></ul> 1266 * <p> 1267 * This implementation computes the F statistic using the definitional 1268 * formula</p> 1269 * <pre> 1270 * F = msbg/mswg</pre> 1271 * <p>where</p> 1272 * <pre> 1273 * msbg = between group mean square 1274 * mswg = within group mean square</pre> 1275 * <p> 1276 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html"> 1277 * here</a></p> 1278 * 1279 * @param categoryData <code>Collection</code> of <code>double[]</code> 1280 * arrays each containing data for one category 1281 * @return Fvalue 1282 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code> 1283 * @throws MathIllegalArgumentException if the length of the <code>categoryData</code> 1284 * array is less than 2 or a contained <code>double[]</code> array does not have 1285 * at least two values 1286 */ 1287 public static double oneWayAnovaFValue(final Collection<double[]> categoryData) 1288 throws MathIllegalArgumentException, NullArgumentException { 1289 return ONE_WAY_ANANOVA.anovaFValue(categoryData); 1290 } 1291 1292 /** 1293 * Computes the ANOVA P-value for a collection of <code>double[]</code> 1294 * arrays. 1295 * 1296 * <p><strong>Preconditions</strong>:</p> 1297 * <ul> 1298 * <li>The categoryData <code>Collection</code> must contain 1299 * <code>double[]</code> arrays.</li> 1300 * <li> There must be at least two <code>double[]</code> arrays in the 1301 * <code>categoryData</code> collection and each of these arrays must 1302 * contain at least two values.</li></ul> 1303 * <p> 1304 * This implementation uses the 1305 * {@link org.hipparchus.distribution.continuous.FDistribution 1306 * Hipparchus F Distribution implementation} to estimate the exact 1307 * p-value, using the formula</p> 1308 * <pre> 1309 * p = 1 - cumulativeProbability(F)</pre> 1310 * <p> 1311 * where <code>F</code> is the F value and <code>cumulativeProbability</code> 1312 * is the Hipparchus implementation of the F distribution.</p> 1313 * 1314 * @param categoryData <code>Collection</code> of <code>double[]</code> 1315 * arrays each containing data for one category 1316 * @return Pvalue 1317 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code> 1318 * @throws MathIllegalArgumentException if the length of the <code>categoryData</code> 1319 * array is less than 2 or a contained <code>double[]</code> array does not have 1320 * at least two values 1321 * @throws MathIllegalStateException if the p-value can not be computed due to a convergence error 1322 * @throws MathIllegalStateException if the maximum number of iterations is exceeded 1323 */ 1324 public static double oneWayAnovaPValue(final Collection<double[]> categoryData) 1325 throws MathIllegalArgumentException, NullArgumentException, 1326 MathIllegalStateException { 1327 return ONE_WAY_ANANOVA.anovaPValue(categoryData); 1328 } 1329 1330 /** 1331 * Performs an ANOVA test, evaluating the null hypothesis that there 1332 * is no difference among the means of the data categories. 1333 * 1334 * <p><strong>Preconditions</strong>:</p> 1335 * <ul> 1336 * <li>The categoryData <code>Collection</code> must contain 1337 * <code>double[]</code> arrays.</li> 1338 * <li> There must be at least two <code>double[]</code> arrays in the 1339 * <code>categoryData</code> collection and each of these arrays must 1340 * contain at least two values.</li> 1341 * <li>alpha must be strictly greater than 0 and less than or equal to 0.5. 1342 * </li></ul> 1343 * <p> 1344 * This implementation uses the 1345 * {@link org.hipparchus.distribution.continuous.FDistribution 1346 * Hipparchus F Distribution implementation} to estimate the exact 1347 * p-value, using the formula</p><pre> 1348 * p = 1 - cumulativeProbability(F)</pre> 1349 * <p>where <code>F</code> is the F value and <code>cumulativeProbability</code> 1350 * is the Hipparchus implementation of the F distribution.</p> 1351 * <p>True is returned iff the estimated p-value is less than alpha.</p> 1352 * 1353 * @param categoryData <code>Collection</code> of <code>double[]</code> 1354 * arrays each containing data for one category 1355 * @param alpha significance level of the test 1356 * @return true if the null hypothesis can be rejected with 1357 * confidence 1 - alpha 1358 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code> 1359 * @throws MathIllegalArgumentException if the length of the <code>categoryData</code> 1360 * array is less than 2 or a contained <code>double[]</code> array does not have 1361 * at least two values 1362 * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5] 1363 * @throws MathIllegalStateException if the p-value can not be computed due to a convergence error 1364 * @throws MathIllegalStateException if the maximum number of iterations is exceeded 1365 */ 1366 public static boolean oneWayAnovaTest(final Collection<double[]> categoryData, 1367 final double alpha) 1368 throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException { 1369 return ONE_WAY_ANANOVA.anovaTest(categoryData, alpha); 1370 } 1371 1372 /** 1373 * Computes the <a href="http://en.wikipedia.org/wiki/G-test">G statistic 1374 * for Goodness of Fit</a> comparing {@code observed} and {@code expected} 1375 * frequency counts. 1376 * <p> 1377 * This statistic can be used to perform a G test (Log-Likelihood Ratio 1378 * Test) evaluating the null hypothesis that the observed counts follow the 1379 * expected distribution. 1380 * <p> 1381 * <strong>Preconditions</strong>: 1382 * <ul> 1383 * <li>Expected counts must all be positive.</li> 1384 * <li>Observed counts must all be ≥ 0.</li> 1385 * <li>The observed and expected arrays must have the same length and their 1386 * common length must be at least 2. </li> 1387 * </ul> 1388 * <p> 1389 * If any of the preconditions are not met, a 1390 * {@code MathIllegalArgumentException} is thrown. 1391 * <p> 1392 * <strong>Note:</strong>This implementation rescales the 1393 * {@code expected} array if necessary to ensure that the sum of the 1394 * expected and observed counts are equal. 1395 * 1396 * @param observed array of observed frequency counts 1397 * @param expected array of expected frequency counts 1398 * @return G-Test statistic 1399 * @throws MathIllegalArgumentException if {@code observed} has negative entries 1400 * @throws MathIllegalArgumentException if {@code expected} has entries that 1401 * are not strictly positive 1402 * @throws MathIllegalArgumentException if the array lengths do not match or 1403 * are less than 2. 1404 */ 1405 public static double g(final double[] expected, final long[] observed) 1406 throws MathIllegalArgumentException { 1407 return G_TEST.g(expected, observed); 1408 } 1409 1410 /** 1411 * Returns the <i>observed significance level</i>, or <a href= 1412 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> p-value</a>, 1413 * associated with a G-Test for goodness of fit comparing the 1414 * {@code observed} frequency counts to those in the {@code expected} array. 1415 * 1416 * <p>The number returned is the smallest significance level at which one 1417 * can reject the null hypothesis that the observed counts conform to the 1418 * frequency distribution described by the expected counts.</p> 1419 * 1420 * <p>The probability returned is the tail probability beyond 1421 * {@link #g(double[], long[]) g(expected, observed)} 1422 * in the ChiSquare distribution with degrees of freedom one less than the 1423 * common length of {@code expected} and {@code observed}.</p> 1424 * 1425 * <p> <strong>Preconditions</strong>:</p> 1426 * <ul> 1427 * <li>Expected counts must all be positive. </li> 1428 * <li>Observed counts must all be ≥ 0. </li> 1429 * <li>The observed and expected arrays must have the 1430 * same length and their common length must be at least 2.</li> 1431 * </ul> 1432 * 1433 * <p>If any of the preconditions are not met, a 1434 * {@code MathIllegalArgumentException} is thrown.</p> 1435 * 1436 * <p><strong>Note:</strong>This implementation rescales the 1437 * {@code expected} array if necessary to ensure that the sum of the 1438 * expected and observed counts are equal.</p> 1439 * 1440 * @param observed array of observed frequency counts 1441 * @param expected array of expected frequency counts 1442 * @return p-value 1443 * @throws MathIllegalArgumentException if {@code observed} has negative entries 1444 * @throws MathIllegalArgumentException if {@code expected} has entries that 1445 * are not strictly positive 1446 * @throws MathIllegalArgumentException if the array lengths do not match or 1447 * are less than 2. 1448 * @throws MathIllegalStateException if an error occurs computing the 1449 * p-value. 1450 */ 1451 public static double gTest(final double[] expected, final long[] observed) 1452 throws MathIllegalArgumentException, MathIllegalStateException { 1453 return G_TEST.gTest(expected, observed); 1454 } 1455 1456 /** 1457 * Returns the intrinsic (Hardy-Weinberg proportions) p-Value, as described 1458 * in p64-69 of McDonald, J.H. 2009. Handbook of Biological Statistics 1459 * (2nd ed.). Sparky House Publishing, Baltimore, Maryland. 1460 * 1461 * <p> The probability returned is the tail probability beyond 1462 * {@link #g(double[], long[]) g(expected, observed)} 1463 * in the ChiSquare distribution with degrees of freedom two less than the 1464 * common length of {@code expected} and {@code observed}.</p> 1465 * 1466 * @param observed array of observed frequency counts 1467 * @param expected array of expected frequency counts 1468 * @return p-value 1469 * @throws MathIllegalArgumentException if {@code observed} has negative entries 1470 * @throws MathIllegalArgumentException {@code expected} has entries that are 1471 * not strictly positive 1472 * @throws MathIllegalArgumentException if the array lengths do not match or 1473 * are less than 2. 1474 * @throws MathIllegalStateException if an error occurs computing the 1475 * p-value. 1476 */ 1477 public static double gTestIntrinsic(final double[] expected, final long[] observed) 1478 throws MathIllegalArgumentException, MathIllegalStateException { 1479 return G_TEST.gTestIntrinsic(expected, observed); 1480 } 1481 1482 /** 1483 * Performs a G-Test (Log-Likelihood Ratio Test) for goodness of fit 1484 * evaluating the null hypothesis that the observed counts conform to the 1485 * frequency distribution described by the expected counts, with 1486 * significance level {@code alpha}. Returns true iff the null 1487 * hypothesis can be rejected with {@code 100 * (1 - alpha)} percent confidence. 1488 * 1489 * <p><strong>Example:</strong><br> To test the hypothesis that 1490 * {@code observed} follows {@code expected} at the 99% level, 1491 * use </p><p> 1492 * {@code gTest(expected, observed, 0.01)}</p> 1493 * 1494 * <p>Returns true iff {@link #gTest(double[], long[]) 1495 * gTestGoodnessOfFitPValue(expected, observed)} > alpha</p> 1496 * 1497 * <p><strong>Preconditions</strong>:</p> 1498 * <ul> 1499 * <li>Expected counts must all be positive. </li> 1500 * <li>Observed counts must all be ≥ 0. </li> 1501 * <li>The observed and expected arrays must have the same length and their 1502 * common length must be at least 2. 1503 * <li> {@code 0 < alpha < 0.5} </li></ul> 1504 * 1505 * <p>If any of the preconditions are not met, a 1506 * {@code MathIllegalArgumentException} is thrown.</p> 1507 * 1508 * <p><strong>Note:</strong>This implementation rescales the 1509 * {@code expected} array if necessary to ensure that the sum of the 1510 * expected and observed counts are equal.</p> 1511 * 1512 * @param observed array of observed frequency counts 1513 * @param expected array of expected frequency counts 1514 * @param alpha significance level of the test 1515 * @return true iff null hypothesis can be rejected with confidence 1 - 1516 * alpha 1517 * @throws MathIllegalArgumentException if {@code observed} has negative entries 1518 * @throws MathIllegalArgumentException if {@code expected} has entries that 1519 * are not strictly positive 1520 * @throws MathIllegalArgumentException if the array lengths do not match or 1521 * are less than 2. 1522 * @throws MathIllegalStateException if an error occurs computing the 1523 * p-value. 1524 * @throws MathIllegalArgumentException if alpha is not strictly greater than zero 1525 * and less than or equal to 0.5 1526 */ 1527 public static boolean gTest(final double[] expected, final long[] observed, 1528 final double alpha) 1529 throws MathIllegalArgumentException, MathIllegalStateException { 1530 return G_TEST.gTest(expected, observed, alpha); 1531 } 1532 1533 /** 1534 * <p>Computes a G (Log-Likelihood Ratio) two sample test statistic for 1535 * independence comparing frequency counts in 1536 * {@code observed1} and {@code observed2}. The sums of frequency 1537 * counts in the two samples are not required to be the same. The formula 1538 * used to compute the test statistic is </p> 1539 * 1540 * <p>{@code 2 * totalSum * [H(rowSums) + H(colSums) - H(k)]}</p> 1541 * 1542 * <p> where {@code H} is the 1543 * <a href="http://en.wikipedia.org/wiki/Entropy_%28information_theory%29"> 1544 * Shannon Entropy</a> of the random variable formed by viewing the elements 1545 * of the argument array as incidence counts; <br> 1546 * {@code k} is a matrix with rows {@code [observed1, observed2]}; <br> 1547 * {@code rowSums, colSums} are the row/col sums of {@code k}; <br> 1548 * and {@code totalSum} is the overall sum of all entries in {@code k}.</p> 1549 * 1550 * <p>This statistic can be used to perform a G test evaluating the null 1551 * hypothesis that both observed counts are independent </p> 1552 * 1553 * <p> <strong>Preconditions</strong>:</p> 1554 * <ul> 1555 * <li>Observed counts must be non-negative. </li> 1556 * <li>Observed counts for a specific bin must not both be zero. </li> 1557 * <li>Observed counts for a specific sample must not all be 0. </li> 1558 * <li>The arrays {@code observed1} and {@code observed2} must have 1559 * the same length and their common length must be at least 2. </li></ul> 1560 * 1561 * <p>If any of the preconditions are not met, a 1562 * {@code MathIllegalArgumentException} is thrown.</p> 1563 * 1564 * @param observed1 array of observed frequency counts of the first data set 1565 * @param observed2 array of observed frequency counts of the second data 1566 * set 1567 * @return G-Test statistic 1568 * @throws MathIllegalArgumentException the the lengths of the arrays do not 1569 * match or their common length is less than 2 1570 * @throws MathIllegalArgumentException if any entry in {@code observed1} or 1571 * {@code observed2} is negative 1572 * @throws MathIllegalArgumentException if either all counts of 1573 * {@code observed1} or {@code observed2} are zero, or if the count 1574 * at the same index is zero for both arrays. 1575 */ 1576 public static double gDataSetsComparison(final long[] observed1, 1577 final long[] observed2) 1578 throws MathIllegalArgumentException { 1579 return G_TEST.gDataSetsComparison(observed1, observed2); 1580 } 1581 1582 /** 1583 * Calculates the root log-likelihood ratio for 2 state Datasets. See 1584 * {@link #gDataSetsComparison(long[], long[] )}. 1585 * 1586 * <p>Given two events A and B, let k11 be the number of times both events 1587 * occur, k12 the incidence of B without A, k21 the count of A without B, 1588 * and k22 the number of times neither A nor B occurs. What is returned 1589 * by this method is </p> 1590 * 1591 * <p>{@code (sgn) sqrt(gValueDataSetsComparison({k11, k12}, {k21, k22})}</p> 1592 * 1593 * <p>where {@code sgn} is -1 if {@code k11 / (k11 + k12) < k21 / (k21 + k22))};<br> 1594 * 1 otherwise.</p> 1595 * 1596 * <p>Signed root LLR has two advantages over the basic LLR: a) it is positive 1597 * where k11 is bigger than expected, negative where it is lower b) if there is 1598 * no difference it is asymptotically normally distributed. This allows one 1599 * to talk about "number of standard deviations" which is a more common frame 1600 * of reference than the chi^2 distribution.</p> 1601 * 1602 * @param k11 number of times the two events occurred together (AB) 1603 * @param k12 number of times the second event occurred WITHOUT the 1604 * first event (notA,B) 1605 * @param k21 number of times the first event occurred WITHOUT the 1606 * second event (A, notB) 1607 * @param k22 number of times something else occurred (i.e. was neither 1608 * of these events (notA, notB) 1609 * @return root log-likelihood ratio 1610 * 1611 */ 1612 public static double rootLogLikelihoodRatio(final long k11, final long k12, final long k21, final long k22) 1613 throws MathIllegalArgumentException { 1614 return G_TEST.rootLogLikelihoodRatio(k11, k12, k21, k22); 1615 } 1616 1617 1618 /** 1619 * <p>Returns the <i>observed significance level</i>, or <a href= 1620 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> 1621 * p-value</a>, associated with a G-Value (Log-Likelihood Ratio) for two 1622 * sample test comparing bin frequency counts in {@code observed1} and 1623 * {@code observed2}.</p> 1624 * 1625 * <p>The number returned is the smallest significance level at which one 1626 * can reject the null hypothesis that the observed counts conform to the 1627 * same distribution. </p> 1628 * 1629 * <p>See {@link #gTest(double[], long[])} for details 1630 * on how the p-value is computed. The degrees of of freedom used to 1631 * perform the test is one less than the common length of the input observed 1632 * count arrays.</p> 1633 * 1634 * <p><strong>Preconditions</strong>:</p> 1635 * <ul> <li>Observed counts must be non-negative. </li> 1636 * <li>Observed counts for a specific bin must not both be zero. </li> 1637 * <li>Observed counts for a specific sample must not all be 0. </li> 1638 * <li>The arrays {@code observed1} and {@code observed2} must 1639 * have the same length and their common length must be at least 2. </li> 1640 * </ul> 1641 * <p> If any of the preconditions are not met, a 1642 * {@code MathIllegalArgumentException} is thrown.</p> 1643 * 1644 * @param observed1 array of observed frequency counts of the first data set 1645 * @param observed2 array of observed frequency counts of the second data 1646 * set 1647 * @return p-value 1648 * @throws MathIllegalArgumentException the the length of the arrays does not 1649 * match or their common length is less than 2 1650 * @throws MathIllegalArgumentException if any of the entries in {@code observed1} or 1651 * {@code observed2} are negative 1652 * @throws MathIllegalArgumentException if either all counts of {@code observed1} or 1653 * {@code observed2} are zero, or if the count at some index is 1654 * zero for both arrays 1655 * @throws MathIllegalStateException if an error occurs computing the 1656 * p-value. 1657 */ 1658 public static double gTestDataSetsComparison(final long[] observed1, 1659 final long[] observed2) 1660 throws MathIllegalArgumentException, 1661 MathIllegalStateException { 1662 return G_TEST.gTestDataSetsComparison(observed1, observed2); 1663 } 1664 1665 /** 1666 * <p>Performs a G-Test (Log-Likelihood Ratio Test) comparing two binned 1667 * data sets. The test evaluates the null hypothesis that the two lists 1668 * of observed counts conform to the same frequency distribution, with 1669 * significance level {@code alpha}. Returns true iff the null 1670 * hypothesis can be rejected with 100 * (1 - alpha) percent confidence. 1671 * </p> 1672 * <p>See {@link #gDataSetsComparison(long[], long[])} for details 1673 * on the formula used to compute the G (LLR) statistic used in the test and 1674 * {@link #gTest(double[], long[])} for information on how 1675 * the observed significance level is computed. The degrees of of freedom used 1676 * to perform the test is one less than the common length of the input observed 1677 * count arrays. </p> 1678 * 1679 * <p><strong>Preconditions</strong>:</p> 1680 * <ul> 1681 * <li>Observed counts must be non-negative. </li> 1682 * <li>Observed counts for a specific bin must not both be zero. </li> 1683 * <li>Observed counts for a specific sample must not all be 0. </li> 1684 * <li>The arrays {@code observed1} and {@code observed2} must 1685 * have the same length and their common length must be at least 2. </li> 1686 * <li>{@code 0 < alpha < 0.5} </li></ul> 1687 * 1688 * <p>If any of the preconditions are not met, a 1689 * {@code MathIllegalArgumentException} is thrown.</p> 1690 * 1691 * @param observed1 array of observed frequency counts of the first data set 1692 * @param observed2 array of observed frequency counts of the second data 1693 * set 1694 * @param alpha significance level of the test 1695 * @return true iff null hypothesis can be rejected with confidence 1 - 1696 * alpha 1697 * @throws MathIllegalArgumentException the the length of the arrays does not 1698 * match 1699 * @throws MathIllegalArgumentException if any of the entries in {@code observed1} or 1700 * {@code observed2} are negative 1701 * @throws MathIllegalArgumentException if either all counts of {@code observed1} or 1702 * {@code observed2} are zero, or if the count at some index is 1703 * zero for both arrays 1704 * @throws MathIllegalArgumentException if {@code alpha} is not in the range 1705 * (0, 0.5] 1706 * @throws MathIllegalStateException if an error occurs performing the test 1707 */ 1708 public static boolean gTestDataSetsComparison(final long[] observed1, 1709 final long[] observed2, 1710 final double alpha) 1711 throws MathIllegalArgumentException, MathIllegalStateException { 1712 return G_TEST.gTestDataSetsComparison(observed1, observed2, alpha); 1713 } 1714 1715 /** 1716 * Computes the one-sample Kolmogorov-Smirnov test statistic, \(D_n=\sup_x |F_n(x)-F(x)|\) where 1717 * \(F\) is the distribution (cdf) function associated with {@code distribution}, \(n\) is the 1718 * length of {@code data} and \(F_n\) is the empirical distribution that puts mass \(1/n\) at 1719 * each of the values in {@code data}. 1720 * 1721 * @param dist reference distribution 1722 * @param data sample being evaluated 1723 * @return Kolmogorov-Smirnov statistic \(D_n\) 1724 * @throws MathIllegalArgumentException if {@code data} does not have length at least 2 1725 * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null 1726 */ 1727 public static double kolmogorovSmirnovStatistic(RealDistribution dist, double[] data) 1728 throws MathIllegalArgumentException, NullArgumentException { 1729 return KS_TEST.kolmogorovSmirnovStatistic(dist, data); 1730 } 1731 1732 /** 1733 * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a 1734 * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a> 1735 * evaluating the null hypothesis that {@code data} conforms to {@code distribution}. 1736 * 1737 * @param dist reference distribution 1738 * @param data sample being being evaluated 1739 * @return the p-value associated with the null hypothesis that {@code data} is a sample from 1740 * {@code distribution} 1741 * @throws MathIllegalArgumentException if {@code data} does not have length at least 2 1742 * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null 1743 */ 1744 public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data) 1745 throws MathIllegalArgumentException, NullArgumentException { 1746 return KS_TEST.kolmogorovSmirnovTest(dist, data); 1747 } 1748 1749 /** 1750 * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a 1751 * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a> 1752 * evaluating the null hypothesis that {@code data} conforms to {@code distribution}. If 1753 * {@code exact} is true, the distribution used to compute the p-value is computed using 1754 * extended precision. See {@link KolmogorovSmirnovTest#cdfExact(double, int)}. 1755 * 1756 * @param dist reference distribution 1757 * @param data sample being being evaluated 1758 * @param strict whether or not to force exact computation of the p-value 1759 * @return the p-value associated with the null hypothesis that {@code data} is a sample from 1760 * {@code distribution} 1761 * @throws MathIllegalArgumentException if {@code data} does not have length at least 2 1762 * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null 1763 */ 1764 public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data, boolean strict) 1765 throws MathIllegalArgumentException, NullArgumentException { 1766 return KS_TEST.kolmogorovSmirnovTest(dist, data, strict); 1767 } 1768 1769 /** 1770 * Performs a <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov 1771 * test</a> evaluating the null hypothesis that {@code data} conforms to {@code distribution}. 1772 * 1773 * @param dist reference distribution 1774 * @param data sample being being evaluated 1775 * @param alpha significance level of the test 1776 * @return true iff the null hypothesis that {@code data} is a sample from {@code distribution} 1777 * can be rejected with confidence 1 - {@code alpha} 1778 * @throws MathIllegalArgumentException if {@code data} does not have length at least 2 1779 * @throws org.hipparchus.exception.NullArgumentException if {@code data} is null 1780 */ 1781 public static boolean kolmogorovSmirnovTest(RealDistribution dist, double[] data, double alpha) 1782 throws MathIllegalArgumentException, NullArgumentException { 1783 return KS_TEST.kolmogorovSmirnovTest(dist, data, alpha); 1784 } 1785 1786 /** 1787 * Computes the two-sample Kolmogorov-Smirnov test statistic, \(D_{n,m}=\sup_x |F_n(x)-F_m(x)|\) 1788 * where \(n\) is the length of {@code x}, \(m\) is the length of {@code y}, \(F_n\) is the 1789 * empirical distribution that puts mass \(1/n\) at each of the values in {@code x} and \(F_m\) 1790 * is the empirical distribution of the {@code y} values. 1791 * 1792 * @param x first sample 1793 * @param y second sample 1794 * @return test statistic \(D_{n,m}\) used to evaluate the null hypothesis that {@code x} and 1795 * {@code y} represent samples from the same underlying distribution 1796 * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at 1797 * least 2 1798 * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null 1799 */ 1800 public static double kolmogorovSmirnovStatistic(double[] x, double[] y) 1801 throws MathIllegalArgumentException, NullArgumentException { 1802 return KS_TEST.kolmogorovSmirnovStatistic(x, y); 1803 } 1804 1805 /** 1806 * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a two-sample <a 1807 * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a> 1808 * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same 1809 * probability distribution. Assumes the strict form of the inequality used to compute the 1810 * p-value. See {@link KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[], boolean)}. 1811 * 1812 * @param x first sample dataset 1813 * @param y second sample dataset 1814 * @return p-value associated with the null hypothesis that {@code x} and {@code y} represent 1815 * samples from the same distribution 1816 * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at 1817 * least 2 1818 * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null 1819 */ 1820 public static double kolmogorovSmirnovTest(double[] x, double[] y) 1821 throws MathIllegalArgumentException, NullArgumentException { 1822 return KS_TEST.kolmogorovSmirnovTest(x, y); 1823 } 1824 1825 /** 1826 * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a two-sample <a 1827 * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a> 1828 * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same 1829 * probability distribution. Specifically, what is returned is an estimate of the probability 1830 * that the {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} associated with a randomly 1831 * selected partition of the combined sample into subsamples of sizes {@code x.length} and 1832 * {@code y.length} will strictly exceed (if {@code strict} is {@code true}) or be at least as 1833 * large as {@code strict = false}) as {@code kolmogorovSmirnovStatistic(x, y)}. 1834 * <ul> 1835 * <li>For small samples (where the product of the sample sizes is less than 1836 * {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT}), the exact p-value is computed using the method presented 1837 * in [4], implemented in {@link #exactP(double, int, int, boolean)}. </li> 1838 * <li>When the product of the sample sizes exceeds {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT}, the 1839 * asymptotic distribution of \(D_{n,m}\) is used. See {@link #approximateP(double, int, int)} 1840 * for details on the approximation.</li> 1841 * </ul><p> 1842 * If {@code x.length * y.length} < {@link KolmogorovSmirnovTest#LARGE_SAMPLE_PRODUCT} and the combined set of values in 1843 * {@code x} and {@code y} contains ties, random jitter is added to {@code x} and {@code y} to 1844 * break ties before computing \(D_{n,m}\) and the p-value. The jitter is uniformly distributed 1845 * on (-minDelta / 2, minDelta / 2) where minDelta is the smallest pairwise difference between 1846 * values in the combined sample.</p> 1847 * <p> 1848 * If ties are known to be present in the data, {@link KolmogorovSmirnovTest#bootstrap(double[], double[], int, boolean)} 1849 * may be used as an alternative method for estimating the p-value.</p> 1850 * 1851 * @param x first sample dataset 1852 * @param y second sample dataset 1853 * @param strict whether or not the probability to compute is expressed as a strict inequality 1854 * (ignored for large samples) 1855 * @return p-value associated with the null hypothesis that {@code x} and {@code y} represent 1856 * samples from the same distribution 1857 * @throws MathIllegalArgumentException if either {@code x} or {@code y} does not have length at 1858 * least 2 1859 * @throws org.hipparchus.exception.NullArgumentException if either {@code x} or {@code y} is null 1860 * @see KolmogorovSmirnovTest#bootstrap(double[], double[], int, boolean) 1861 */ 1862 public static double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict) 1863 throws MathIllegalArgumentException, NullArgumentException { 1864 return KS_TEST.kolmogorovSmirnovTest(x, y, strict); 1865 } 1866 1867 /** 1868 * Computes \(P(D_{n,m} > d)\) if {@code strict} is {@code true}; otherwise \(P(D_{n,m} \ge 1869 * d)\), where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov statistic. See 1870 * {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\). 1871 * <p> 1872 * The returned probability is exact, implemented by unwinding the recursive function 1873 * definitions presented in [4] from the class javadoc. 1874 * </p> 1875 * 1876 * @param d D-statistic value 1877 * @param n first sample size 1878 * @param m second sample size 1879 * @param strict whether or not the probability to compute is expressed as a strict inequality 1880 * @return probability that a randomly selected m-n partition of m + n generates \(D_{n,m}\) 1881 * greater than (resp. greater than or equal to) {@code d} 1882 */ 1883 public static double exactP(double d, int m, int n, boolean strict) { 1884 return KS_TEST.exactP(d, n, m, strict); 1885 } 1886 1887 /** 1888 * Uses the Kolmogorov-Smirnov distribution to approximate \(P(D_{n,m} > d)\) where \(D_{n,m}\) 1889 * is the 2-sample Kolmogorov-Smirnov statistic. See 1890 * {@link KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(double[], double[])} for the definition of \(D_{n,m}\). 1891 * <p> 1892 * Specifically, what is returned is \(1 - k(d \sqrt{mn / (m + n)})\) where \(k(t) = 1 + 2 1893 * \sum_{i=1}^\infty (-1)^i e^{-2 i^2 t^2}\). See {@link KolmogorovSmirnovTest#ksSum(double, double, int)} for 1894 * details on how convergence of the sum is determined. This implementation passes {@code ksSum} 1895 * {@link KolmogorovSmirnovTest#KS_SUM_CAUCHY_CRITERION} as {@code tolerance} and 1896 * {@link KolmogorovSmirnovTest#MAXIMUM_PARTIAL_SUM_COUNT} as {@code maxIterations}. 1897 * </p> 1898 * 1899 * @param d D-statistic value 1900 * @param n first sample size 1901 * @param m second sample size 1902 * @return approximate probability that a randomly selected m-n partition of m + n generates 1903 * \(D_{n,m}\) greater than {@code d} 1904 */ 1905 public static double approximateP(double d, int n, int m) { 1906 return KS_TEST.approximateP(d, n, m); 1907 } 1908 1909 }