View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.inference;
23  
24  import org.hipparchus.distribution.continuous.TDistribution;
25  import org.hipparchus.exception.LocalizedCoreFormats;
26  import org.hipparchus.exception.MathIllegalArgumentException;
27  import org.hipparchus.exception.MathIllegalStateException;
28  import org.hipparchus.exception.NullArgumentException;
29  import org.hipparchus.stat.LocalizedStatFormats;
30  import org.hipparchus.stat.StatUtils;
31  import org.hipparchus.stat.descriptive.StatisticalSummary;
32  import org.hipparchus.util.FastMath;
33  import org.hipparchus.util.MathUtils;
34  
35  /**
36   * An implementation for Student's t-tests.
37   * <p>
38   * Tests can be:
39   * <ul>
40   * <li>One-sample or two-sample</li>
41   * <li>One-sided or two-sided</li>
42   * <li>Paired or unpaired (for two-sample tests)</li>
43   * <li>Homoscedastic (equal variance assumption) or heteroscedastic
44   * (for two sample tests)</li>
45   * <li>Fixed significance level (boolean-valued) or returning p-values.</li>
46   * </ul>
47   * <p>
48   * Test statistics are available for all tests.  Methods including "Test" in
49   * in their names perform tests, all other methods return t-statistics.  Among
50   * the "Test" methods, <code>double-</code>valued methods return p-values;
51   * <code>boolean-</code>valued methods perform fixed significance level tests.
52   * Significance levels are always specified as numbers between 0 and 0.5
53   * (e.g. tests at the 95% level  use <code>alpha=0.05</code>).
54   * <p>
55   * Input to tests can be either <code>double[]</code> arrays or
56   * {@link StatisticalSummary} instances.
57   * <p>
58   * Uses Hipparchus {@link org.hipparchus.distribution.continuous.TDistribution}
59   * implementation to estimate exact p-values.
60   */
61  public class TTest { // NOPMD - this is not a Junit test class, PMD false positive here
62  
63      /** Empty constructor.
64       * <p>
65       * This constructor is not strictly necessary, but it prevents spurious
66       * javadoc warnings with JDK 18 and later.
67       * </p>
68       * @since 3.0
69       */
70      public TTest() { // NOPMD - unnecessary constructor added intentionally to make javadoc happy
71          // nothing to do
72      }
73  
74      /**
75       * Computes a paired, 2-sample t-statistic based on the data in the input
76       * arrays.  The t-statistic returned is equivalent to what would be returned by
77       * computing the one-sample t-statistic {@link #t(double, double[])}, with
78       * <code>mu = 0</code> and the sample array consisting of the (signed)
79       * differences between corresponding entries in <code>sample1</code> and
80       * <code>sample2.</code>
81       * <p>* <strong>Preconditions</strong>:</p>
82       * <ul>
83       * <li>The input arrays must have the same length and their common length
84       * must be at least 2.
85       * </li></ul>
86       *
87       * @param sample1 array of sample data values
88       * @param sample2 array of sample data values
89       * @return t statistic
90       * @throws NullArgumentException if the arrays are <code>null</code>
91       * @throws MathIllegalArgumentException if the arrays are empty
92       * @throws MathIllegalArgumentException if the length of the arrays is not equal
93       * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
94       */
95      public double pairedT(final double[] sample1, final double[] sample2)
96          throws MathIllegalArgumentException, NullArgumentException {
97  
98          checkSampleData(sample1);
99          checkSampleData(sample2);
100         double meanDifference = StatUtils.meanDifference(sample1, sample2);
101         return t(meanDifference, 0,
102                  StatUtils.varianceDifference(sample1, sample2, meanDifference),
103                  sample1.length);
104     }
105 
106     /**
107      * Returns the <i>observed significance level</i>, or
108      * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
109      * based on the data in the input arrays.
110      * <p>
111      * The number returned is the smallest significance level
112      * at which one can reject the null hypothesis that the mean of the paired
113      * differences is 0 in favor of the two-sided alternative that the mean paired
114      * difference is not equal to 0. For a one-sided test, divide the returned
115      * value by 2.</p>
116      * <p>
117      * This test is equivalent to a one-sample t-test computed using
118      * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
119      * array consisting of the signed differences between corresponding elements of
120      * <code>sample1</code> and <code>sample2.</code></p>
121      * <p>
122      * <strong>Usage Note:</strong><br>
123      * The validity of the p-value depends on the assumptions of the parametric
124      * t-test procedure, as discussed
125      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
126      * here</a></p>
127      * <p><strong>Preconditions</strong>:</p>
128      * <ul>
129      * <li>The input array lengths must be the same and their common length must
130      * be at least 2.
131      * </li></ul>
132      *
133      * @param sample1 array of sample data values
134      * @param sample2 array of sample data values
135      * @return p-value for t-test
136      * @throws NullArgumentException if the arrays are <code>null</code>
137      * @throws MathIllegalArgumentException if the arrays are empty
138      * @throws MathIllegalArgumentException if the length of the arrays is not equal
139      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
140      * @throws MathIllegalStateException if an error occurs computing the p-value
141      */
142     public double pairedTTest(final double[] sample1, final double[] sample2)
143         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
144 
145         double meanDifference = StatUtils.meanDifference(sample1, sample2);
146         return tTest(meanDifference, 0,
147                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
148                 sample1.length);
149     }
150 
151     /**
152      * Performs a paired t-test evaluating the null hypothesis that the
153      * mean of the paired differences between <code>sample1</code> and
154      * <code>sample2</code> is 0 in favor of the two-sided alternative that the
155      * mean paired difference is not equal to 0, with significance level
156      * <code>alpha</code>.
157      * <p>
158      * Returns <code>true</code> iff the null hypothesis can be rejected with
159      * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
160      * <code>alpha * 2</code></p>
161      * <p>
162      * <strong>Usage Note:</strong><br>
163      * The validity of the test depends on the assumptions of the parametric
164      * t-test procedure, as discussed
165      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
166      * here</a></p>
167      * <p><strong>Preconditions</strong>:</p>
168      * <ul>
169      * <li>The input array lengths must be the same and their common length
170      * must be at least 2.
171      * </li>
172      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
173      * </li></ul>
174      *
175      * @param sample1 array of sample data values
176      * @param sample2 array of sample data values
177      * @param alpha significance level of the test
178      * @return true if the null hypothesis can be rejected with
179      * confidence 1 - alpha
180      * @throws NullArgumentException if the arrays are <code>null</code>
181      * @throws MathIllegalArgumentException if the arrays are empty
182      * @throws MathIllegalArgumentException if the length of the arrays is not equal
183      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
184      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
185      * @throws MathIllegalStateException if an error occurs computing the p-value
186      */
187     public boolean pairedTTest(final double[] sample1, final double[] sample2,
188                                final double alpha)
189         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
190 
191         checkSignificanceLevel(alpha);
192         return pairedTTest(sample1, sample2) < alpha;
193 
194     }
195 
196     /**
197      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
198      * t statistic </a> given observed values and a comparison constant.
199      * <p>
200      * This statistic can be used to perform a one sample t-test for the mean.
201      * </p>
202      * <p><strong>Preconditions</strong>:</p>
203      * <ul>
204      * <li>The observed array length must be at least 2.
205      * </li></ul>
206      *
207      * @param mu comparison constant
208      * @param observed array of values
209      * @return t statistic
210      * @throws NullArgumentException if <code>observed</code> is <code>null</code>
211      * @throws MathIllegalArgumentException if the length of <code>observed</code> is &lt; 2
212      */
213     public double t(final double mu, final double[] observed)
214         throws MathIllegalArgumentException, NullArgumentException {
215 
216         checkSampleData(observed);
217         // No try-catch or advertised exception because args have just been checked
218         return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
219                  observed.length);
220     }
221 
222     /**
223      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
224      * t statistic </a> to use in comparing the mean of the dataset described by
225      * <code>sampleStats</code> to <code>mu</code>.
226      * <p>
227      * This statistic can be used to perform a one sample t-test for the mean.
228      * </p>
229      * <p><strong>Preconditions</strong>:</p>
230      * <ul>
231      * <li><code>observed.getN() &ge; 2</code>.
232      * </li></ul>
233      *
234      * @param mu comparison constant
235      * @param sampleStats DescriptiveStatistics holding sample summary statitstics
236      * @return t statistic
237      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
238      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
239      */
240     public double t(final double mu, final StatisticalSummary sampleStats)
241         throws MathIllegalArgumentException, NullArgumentException {
242 
243         checkSampleData(sampleStats);
244         return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
245                  sampleStats.getN());
246     }
247 
248     /**
249      * Computes a 2-sample t statistic,  under the hypothesis of equal
250      * subpopulation variances.  To compute a t-statistic without the
251      * equal variances hypothesis, use {@link #t(double[], double[])}.
252      * <p>
253      * This statistic can be used to perform a (homoscedastic) two-sample
254      * t-test to compare sample means.</p>
255      * <p>
256      * The t-statistic is</p>
257      * <p>
258      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
259      * </p><p>
260      * where <strong><code>n1</code></strong> is the size of first sample;
261      * <strong><code> n2</code></strong> is the size of second sample;
262      * <strong><code> m1</code></strong> is the mean of first sample;
263      * <strong><code> m2</code></strong> is the mean of second sample
264      * and <strong><code>var</code></strong> is the pooled variance estimate:
265      * </p><p>
266      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
267      * </p><p>
268      * with <strong><code>var1</code></strong> the variance of the first sample and
269      * <strong><code>var2</code></strong> the variance of the second sample.
270      * </p>
271      * <p><strong>Preconditions</strong>:</p>
272      * <ul>
273      * <li>The observed array lengths must both be at least 2.
274      * </li></ul>
275      *
276      * @param sample1 array of sample data values
277      * @param sample2 array of sample data values
278      * @return t statistic
279      * @throws NullArgumentException if the arrays are <code>null</code>
280      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
281      */
282     public double homoscedasticT(final double[] sample1, final double[] sample2)
283         throws MathIllegalArgumentException, NullArgumentException {
284 
285         checkSampleData(sample1);
286         checkSampleData(sample2);
287         // No try-catch or advertised exception because args have just been checked
288         return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
289                               StatUtils.variance(sample1), StatUtils.variance(sample2),
290                               sample1.length, sample2.length);
291     }
292 
293     /**
294      * Computes a 2-sample t statistic, without the hypothesis of equal
295      * subpopulation variances.  To compute a t-statistic assuming equal
296      * variances, use {@link #homoscedasticT(double[], double[])}.
297      * <p>
298      * This statistic can be used to perform a two-sample t-test to compare
299      * sample means.</p>
300      * <p>
301      * The t-statistic is</p>
302      * <p>
303      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
304      * </p><p>
305      *  where <strong><code>n1</code></strong> is the size of the first sample
306      * <strong><code> n2</code></strong> is the size of the second sample;
307      * <strong><code> m1</code></strong> is the mean of the first sample;
308      * <strong><code> m2</code></strong> is the mean of the second sample;
309      * <strong><code> var1</code></strong> is the variance of the first sample;
310      * <strong><code> var2</code></strong> is the variance of the second sample;
311      * </p>
312      * <p><strong>Preconditions</strong>:</p>
313      * <ul>
314      * <li>The observed array lengths must both be at least 2.
315      * </li></ul>
316      *
317      * @param sample1 array of sample data values
318      * @param sample2 array of sample data values
319      * @return t statistic
320      * @throws NullArgumentException if the arrays are <code>null</code>
321      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
322      */
323     public double t(final double[] sample1, final double[] sample2)
324         throws MathIllegalArgumentException, NullArgumentException {
325 
326         checkSampleData(sample1);
327         checkSampleData(sample2);
328         // No try-catch or advertised exception because args have just been checked
329         return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
330                  StatUtils.variance(sample1), StatUtils.variance(sample2),
331                  sample1.length, sample2.length);
332     }
333 
334     /**
335      * Computes a 2-sample t statistic, comparing the means of the datasets
336      * described by two {@link StatisticalSummary} instances, without the
337      * assumption of equal subpopulation variances.  Use
338      * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
339      * compute a t-statistic under the equal variances assumption.
340      * <p>
341      * This statistic can be used to perform a two-sample t-test to compare
342      * sample means.</p>
343      * <p>
344       * The returned  t-statistic is</p>
345      * <p>
346      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
347      * </p><p>
348      * where <strong><code>n1</code></strong> is the size of the first sample;
349      * <strong><code> n2</code></strong> is the size of the second sample;
350      * <strong><code> m1</code></strong> is the mean of the first sample;
351      * <strong><code> m2</code></strong> is the mean of the second sample
352      * <strong><code> var1</code></strong> is the variance of the first sample;
353      * <strong><code> var2</code></strong> is the variance of the second sample
354      * </p>
355      * <p><strong>Preconditions</strong>:</p>
356      * <ul>
357      * <li>The datasets described by the two Univariates must each contain
358      * at least 2 observations.
359      * </li></ul>
360      *
361      * @param sampleStats1 StatisticalSummary describing data from the first sample
362      * @param sampleStats2 StatisticalSummary describing data from the second sample
363      * @return t statistic
364      * @throws NullArgumentException if the sample statistics are <code>null</code>
365      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
366      */
367     public double t(final StatisticalSummary sampleStats1,
368                     final StatisticalSummary sampleStats2)
369         throws MathIllegalArgumentException, NullArgumentException {
370 
371         checkSampleData(sampleStats1);
372         checkSampleData(sampleStats2);
373         return t(sampleStats1.getMean(), sampleStats2.getMean(),
374                  sampleStats1.getVariance(), sampleStats2.getVariance(),
375                  sampleStats1.getN(), sampleStats2.getN());
376     }
377 
378     /**
379      * Computes a 2-sample t statistic, comparing the means of the datasets
380      * described by two {@link StatisticalSummary} instances, under the
381      * assumption of equal subpopulation variances.  To compute a t-statistic
382      * without the equal variances assumption, use
383      * {@link #t(StatisticalSummary, StatisticalSummary)}.
384      * <p>
385      * This statistic can be used to perform a (homoscedastic) two-sample
386      * t-test to compare sample means.</p>
387      * <p>
388      * The t-statistic returned is</p>
389      * <p>
390      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
391      * </p><p>
392      * where <strong><code>n1</code></strong> is the size of first sample;
393      * <strong><code> n2</code></strong> is the size of second sample;
394      * <strong><code> m1</code></strong> is the mean of first sample;
395      * <strong><code> m2</code></strong> is the mean of second sample
396      * and <strong><code>var</code></strong> is the pooled variance estimate:
397      * </p><p>
398      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
399      * </p><p>
400      * with <strong><code>var1</code></strong> the variance of the first sample and
401      * <strong><code>var2</code></strong> the variance of the second sample.
402      * </p>
403      * <p><strong>Preconditions</strong>:</p>
404      * <ul>
405      * <li>The datasets described by the two Univariates must each contain
406      * at least 2 observations.
407      * </li></ul>
408      *
409      * @param sampleStats1 StatisticalSummary describing data from the first sample
410      * @param sampleStats2 StatisticalSummary describing data from the second sample
411      * @return t statistic
412      * @throws NullArgumentException if the sample statistics are <code>null</code>
413      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
414      */
415     public double homoscedasticT(final StatisticalSummary sampleStats1,
416                                  final StatisticalSummary sampleStats2)
417         throws MathIllegalArgumentException, NullArgumentException {
418 
419         checkSampleData(sampleStats1);
420         checkSampleData(sampleStats2);
421         return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
422                               sampleStats1.getVariance(), sampleStats2.getVariance(),
423                               sampleStats1.getN(), sampleStats2.getN());
424     }
425 
426     /**
427      * Returns the <i>observed significance level</i>, or
428      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
429      * comparing the mean of the input array with the constant <code>mu</code>.
430      * <p>
431      * The number returned is the smallest significance level
432      * at which one can reject the null hypothesis that the mean equals
433      * <code>mu</code> in favor of the two-sided alternative that the mean
434      * is different from <code>mu</code>. For a one-sided test, divide the
435      * returned value by 2.</p>
436      * <p>
437      * <strong>Usage Note:</strong><br>
438      * The validity of the test depends on the assumptions of the parametric
439      * t-test procedure, as discussed
440      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
441      * </p>
442      * <p><strong>Preconditions</strong>:</p>
443      * <ul>
444      * <li>The observed array length must be at least 2.
445      * </li></ul>
446      *
447      * @param mu constant value to compare sample mean against
448      * @param sample array of sample data values
449      * @return p-value
450      * @throws NullArgumentException if the sample array is <code>null</code>
451      * @throws MathIllegalArgumentException if the length of the array is &lt; 2
452      * @throws MathIllegalStateException if an error occurs computing the p-value
453      */
454     public double tTest(final double mu, final double[] sample)
455         throws MathIllegalArgumentException, NullArgumentException,
456         MathIllegalStateException {
457 
458         checkSampleData(sample);
459         // No try-catch or advertised exception because args have just been checked
460         return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
461                      sample.length);
462     }
463 
464     /**
465      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
466      * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
467      * which <code>sample</code> is drawn equals <code>mu</code>.
468      * <p>
469      * Returns <code>true</code> iff the null hypothesis can be
470      * rejected with confidence <code>1 - alpha</code>.  To
471      * perform a 1-sided test, use <code>alpha * 2</code></p>
472      * <p>* <strong>Examples:</strong></p>
473      * <ol>
474      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
475      * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
476      * </li>
477      * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
478      * at the 99% level, first verify that the measured sample mean is less
479      * than <code>mu</code> and then use
480      * <br><code>tTest(mu, sample, 0.02) </code>
481      * </li></ol>
482      * <p>
483      * <strong>Usage Note:</strong><br>
484      * The validity of the test depends on the assumptions of the one-sample
485      * parametric t-test procedure, as discussed
486      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
487      * </p>
488      * <p><strong>Preconditions</strong>:</p>
489      * <ul>
490      * <li>The observed array length must be at least 2.
491      * </li></ul>
492      *
493      * @param mu constant value to compare sample mean against
494      * @param sample array of sample data values
495      * @param alpha significance level of the test
496      * @return p-value
497      * @throws NullArgumentException if the sample array is <code>null</code>
498      * @throws MathIllegalArgumentException if the length of the array is &lt; 2
499      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
500      * @throws MathIllegalStateException if an error computing the p-value
501      */
502     public boolean tTest(final double mu, final double[] sample, final double alpha)
503         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
504 
505         checkSignificanceLevel(alpha);
506         return tTest(mu, sample) < alpha;
507     }
508 
509     /**
510      * Returns the <i>observed significance level</i>, or
511      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
512      * comparing the mean of the dataset described by <code>sampleStats</code>
513      * with the constant <code>mu</code>.
514      * <p>
515      * The number returned is the smallest significance level
516      * at which one can reject the null hypothesis that the mean equals
517      * <code>mu</code> in favor of the two-sided alternative that the mean
518      * is different from <code>mu</code>. For a one-sided test, divide the
519      * returned value by 2.</p>
520      * <p>
521      * <strong>Usage Note:</strong><br>
522      * The validity of the test depends on the assumptions of the parametric
523      * t-test procedure, as discussed
524      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
525      * here</a></p>
526      * <p><strong>Preconditions</strong>:</p>
527      * <ul>
528      * <li>The sample must contain at least 2 observations.
529      * </li></ul>
530      *
531      * @param mu constant value to compare sample mean against
532      * @param sampleStats StatisticalSummary describing sample data
533      * @return p-value
534      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
535      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
536      * @throws MathIllegalStateException if an error occurs computing the p-value
537      */
538     public double tTest(final double mu, final StatisticalSummary sampleStats)
539         throws MathIllegalArgumentException, NullArgumentException,
540         MathIllegalStateException {
541 
542         checkSampleData(sampleStats);
543         return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
544                      sampleStats.getN());
545     }
546 
547     /**
548      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
549      * two-sided t-test</a> evaluating the null hypothesis that the mean of the
550      * population from which the dataset described by <code>stats</code> is
551      * drawn equals <code>mu</code>.
552      * <p>
553      * Returns <code>true</code> iff the null hypothesis can be rejected with
554      * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
555      * <code>alpha * 2.</code></p>
556      * <p>* <strong>Examples:</strong></p>
557      * <ol>
558      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
559      * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
560      * </li>
561      * <li>To test the (one-sided) hypothesis <code> sample mean &lt; mu </code>
562      * at the 99% level, first verify that the measured sample mean is less
563      * than <code>mu</code> and then use
564      * <br><code>tTest(mu, sampleStats, 0.02) </code>
565      * </li></ol>
566      * <p>
567      * <strong>Usage Note:</strong><br>
568      * The validity of the test depends on the assumptions of the one-sample
569      * parametric t-test procedure, as discussed
570      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
571      * </p>
572      * <p><strong>Preconditions</strong>:</p>
573      * <ul>
574      * <li>The sample must include at least 2 observations.
575      * </li></ul>
576      *
577      * @param mu constant value to compare sample mean against
578      * @param sampleStats StatisticalSummary describing sample data values
579      * @param alpha significance level of the test
580      * @return p-value
581      * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
582      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
583      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
584      * @throws MathIllegalStateException if an error occurs computing the p-value
585      */
586     public boolean tTest(final double mu, final StatisticalSummary sampleStats,
587                          final double alpha)
588         throws MathIllegalArgumentException, NullArgumentException,
589                MathIllegalStateException {
590 
591         checkSignificanceLevel(alpha);
592         return tTest(mu, sampleStats) < alpha;
593     }
594 
595     /**
596      * Returns the <i>observed significance level</i>, or
597      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
598      * comparing the means of the input arrays.
599      * <p>
600      * The number returned is the smallest significance level
601      * at which one can reject the null hypothesis that the two means are
602      * equal in favor of the two-sided alternative that they are different.
603      * For a one-sided test, divide the returned value by 2.</p>
604      * <p>
605      * The test does not assume that the underlying popuation variances are
606      * equal  and it uses approximated degrees of freedom computed from the
607      * sample data to compute the p-value.  The t-statistic used is as defined in
608      * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
609      * to the degrees of freedom is used,
610      * as described
611      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
612      * here.</a>  To perform the test under the assumption of equal subpopulation
613      * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
614      * <p>
615      * <strong>Usage Note:</strong><br>
616      * The validity of the p-value depends on the assumptions of the parametric
617      * t-test procedure, as discussed
618      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
619      * here</a></p>
620      * <p><strong>Preconditions</strong>:</p>
621      * <ul>
622      * <li>The observed array lengths must both be at least 2.
623      * </li></ul>
624      *
625      * @param sample1 array of sample data values
626      * @param sample2 array of sample data values
627      * @return p-value for t-test
628      * @throws NullArgumentException if the arrays are <code>null</code>
629      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
630      * @throws MathIllegalStateException if an error occurs computing the p-value
631      */
632     public double tTest(final double[] sample1, final double[] sample2)
633         throws MathIllegalArgumentException, NullArgumentException,
634                MathIllegalStateException {
635 
636         checkSampleData(sample1);
637         checkSampleData(sample2);
638         // No try-catch or advertised exception because args have just been checked
639         return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
640                      StatUtils.variance(sample1), StatUtils.variance(sample2),
641                      sample1.length, sample2.length);
642     }
643 
644     /**
645      * Returns the <i>observed significance level</i>, or
646      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
647      * comparing the means of the input arrays, under the assumption that
648      * the two samples are drawn from subpopulations with equal variances.
649      * To perform the test without the equal variances assumption, use
650      * {@link #tTest(double[], double[])}.
651      * <p>
652      * The number returned is the smallest significance level
653      * at which one can reject the null hypothesis that the two means are
654      * equal in favor of the two-sided alternative that they are different.
655      * For a one-sided test, divide the returned value by 2.</p>
656      * <p>
657      * A pooled variance estimate is used to compute the t-statistic.  See
658      * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
659      * minus 2 is used as the degrees of freedom.</p>
660      * <p>
661      * <strong>Usage Note:</strong><br>
662      * The validity of the p-value depends on the assumptions of the parametric
663      * t-test procedure, as discussed
664      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
665      * here</a></p>
666      * <p><strong>Preconditions</strong>:</p>
667      * <ul>
668      * <li>The observed array lengths must both be at least 2.
669      * </li></ul>
670      *
671      * @param sample1 array of sample data values
672      * @param sample2 array of sample data values
673      * @return p-value for t-test
674      * @throws NullArgumentException if the arrays are <code>null</code>
675      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
676      * @throws MathIllegalStateException if an error occurs computing the p-value
677      */
678     public double homoscedasticTTest(final double[] sample1, final double[] sample2)
679         throws MathIllegalArgumentException, NullArgumentException,
680         MathIllegalStateException {
681 
682         checkSampleData(sample1);
683         checkSampleData(sample2);
684         // No try-catch or advertised exception because args have just been checked
685         return homoscedasticTTest(StatUtils.mean(sample1),
686                                   StatUtils.mean(sample2),
687                                   StatUtils.variance(sample1),
688                                   StatUtils.variance(sample2),
689                                   sample1.length, sample2.length);
690     }
691 
692     /**
693      * Performs a
694      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
695      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
696      * and <code>sample2</code> are drawn from populations with the same mean,
697      * with significance level <code>alpha</code>.  This test does not assume
698      * that the subpopulation variances are equal.  To perform the test assuming
699      * equal variances, use
700      * {@link #homoscedasticTTest(double[], double[], double)}.
701      * <p>
702      * Returns <code>true</code> iff the null hypothesis that the means are
703      * equal can be rejected with confidence <code>1 - alpha</code>.  To
704      * perform a 1-sided test, use <code>alpha * 2</code></p>
705      * <p>
706      * See {@link #t(double[], double[])} for the formula used to compute the
707      * t-statistic.  Degrees of freedom are approximated using the
708      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
709      * Welch-Satterthwaite approximation.</a></p>
710      * <p>* <strong>Examples:</strong></p>
711      * <ol>
712      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
713      * the 95% level,  use
714      * <br><code>tTest(sample1, sample2, 0.05). </code>
715      * </li>
716      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>,
717      * at the 99% level, first verify that the measured  mean of <code>sample 1</code>
718      * is less than the mean of <code>sample 2</code> and then use
719      * <br><code>tTest(sample1, sample2, 0.02) </code>
720      * </li></ol>
721      * <p>
722      * <strong>Usage Note:</strong><br>
723      * The validity of the test depends on the assumptions of the parametric
724      * t-test procedure, as discussed
725      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
726      * here</a></p>
727      * <p><strong>Preconditions</strong>:</p>
728      * <ul>
729      * <li>The observed array lengths must both be at least 2.
730      * </li>
731      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
732      * </li></ul>
733      *
734      * @param sample1 array of sample data values
735      * @param sample2 array of sample data values
736      * @param alpha significance level of the test
737      * @return true if the null hypothesis can be rejected with
738      * confidence 1 - alpha
739      * @throws NullArgumentException if the arrays are <code>null</code>
740      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
741      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
742      * @throws MathIllegalStateException if an error occurs computing the p-value
743      */
744     public boolean tTest(final double[] sample1, final double[] sample2,
745                          final double alpha)
746         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
747 
748         checkSignificanceLevel(alpha);
749         return tTest(sample1, sample2) < alpha;
750     }
751 
752     /**
753      * Performs a
754      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
755      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
756      * and <code>sample2</code> are drawn from populations with the same mean,
757      * with significance level <code>alpha</code>,  assuming that the
758      * subpopulation variances are equal.  Use
759      * {@link #tTest(double[], double[], double)} to perform the test without
760      * the assumption of equal variances.
761      * <p>
762      * Returns <code>true</code> iff the null hypothesis that the means are
763      * equal can be rejected with confidence <code>1 - alpha</code>.  To
764      * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
765      * without the assumption of equal subpopulation variances, use
766      * {@link #tTest(double[], double[], double)}.</p>
767      * <p>
768      * A pooled variance estimate is used to compute the t-statistic. See
769      * {@link #t(double[], double[])} for the formula. The sum of the sample
770      * sizes minus 2 is used as the degrees of freedom.</p>
771      * <p><strong>Examples:</strong></p>
772      * <ol>
773      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
774      * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
775      * </li>
776      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2, </code>
777      * at the 99% level, first verify that the measured mean of
778      * <code>sample 1</code> is less than the mean of <code>sample 2</code>
779      * and then use
780      * <br><code>tTest(sample1, sample2, 0.02) </code>
781      * </li></ol>
782      * <p>
783      * <strong>Usage Note:</strong><br>
784      * The validity of the test depends on the assumptions of the parametric
785      * t-test procedure, as discussed
786      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
787      * here</a></p>
788      * <p><strong>Preconditions</strong>:</p>
789      * <ul>
790      * <li>The observed array lengths must both be at least 2.
791      * </li>
792      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
793      * </li></ul>
794      *
795      * @param sample1 array of sample data values
796      * @param sample2 array of sample data values
797      * @param alpha significance level of the test
798      * @return true if the null hypothesis can be rejected with
799      * confidence 1 - alpha
800      * @throws NullArgumentException if the arrays are <code>null</code>
801      * @throws MathIllegalArgumentException if the length of the arrays is &lt; 2
802      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
803      * @throws MathIllegalStateException if an error occurs computing the p-value
804      */
805     public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
806                                       final double alpha)
807         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
808 
809         checkSignificanceLevel(alpha);
810         return homoscedasticTTest(sample1, sample2) < alpha;
811     }
812 
813     /**
814      * Returns the <i>observed significance level</i>, or
815      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
816      * comparing the means of the datasets described by two StatisticalSummary
817      * instances.
818      * <p>
819      * The number returned is the smallest significance level
820      * at which one can reject the null hypothesis that the two means are
821      * equal in favor of the two-sided alternative that they are different.
822      * For a one-sided test, divide the returned value by 2.</p>
823      * <p>
824      * The test does not assume that the underlying population variances are
825      * equal  and it uses approximated degrees of freedom computed from the
826      * sample data to compute the p-value.   To perform the test assuming
827      * equal variances, use
828      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
829      * <p>
830      * <strong>Usage Note:</strong><br>
831      * The validity of the p-value depends on the assumptions of the parametric
832      * t-test procedure, as discussed
833      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
834      * here</a></p>
835      * <p><strong>Preconditions</strong>:</p>
836      * <ul>
837      * <li>The datasets described by the two Univariates must each contain
838      * at least 2 observations.
839      * </li></ul>
840      *
841      * @param sampleStats1  StatisticalSummary describing data from the first sample
842      * @param sampleStats2  StatisticalSummary describing data from the second sample
843      * @return p-value for t-test
844      * @throws NullArgumentException if the sample statistics are <code>null</code>
845      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
846      * @throws MathIllegalStateException if an error occurs computing the p-value
847      */
848     public double tTest(final StatisticalSummary sampleStats1,
849                         final StatisticalSummary sampleStats2)
850         throws MathIllegalArgumentException, NullArgumentException,
851         MathIllegalStateException {
852 
853         checkSampleData(sampleStats1);
854         checkSampleData(sampleStats2);
855         return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
856                      sampleStats1.getVariance(), sampleStats2.getVariance(),
857                      sampleStats1.getN(), sampleStats2.getN());
858     }
859 
860     /**
861      * Returns the <i>observed significance level</i>, or
862      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
863      * comparing the means of the datasets described by two StatisticalSummary
864      * instances, under the hypothesis of equal subpopulation variances. To
865      * perform a test without the equal variances assumption, use
866      * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
867      * <p>
868      * The number returned is the smallest significance level
869      * at which one can reject the null hypothesis that the two means are
870      * equal in favor of the two-sided alternative that they are different.
871      * For a one-sided test, divide the returned value by 2.</p>
872      * <p>
873      * See {@link #homoscedasticT(double[], double[])} for the formula used to
874      * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
875      * the degrees of freedom.</p>
876      * <p>
877      * <strong>Usage Note:</strong><br>
878      * The validity of the p-value depends on the assumptions of the parametric
879      * t-test procedure, as discussed
880      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
881      * </p><p><strong>Preconditions</strong>:</p>
882      * <ul>
883      * <li>The datasets described by the two Univariates must each contain
884      * at least 2 observations.
885      * </li></ul>
886      *
887      * @param sampleStats1  StatisticalSummary describing data from the first sample
888      * @param sampleStats2  StatisticalSummary describing data from the second sample
889      * @return p-value for t-test
890      * @throws NullArgumentException if the sample statistics are <code>null</code>
891      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
892      * @throws MathIllegalStateException if an error occurs computing the p-value
893      */
894     public double homoscedasticTTest(final StatisticalSummary sampleStats1,
895                                      final StatisticalSummary sampleStats2)
896         throws MathIllegalArgumentException, NullArgumentException,
897         MathIllegalStateException {
898 
899         checkSampleData(sampleStats1);
900         checkSampleData(sampleStats2);
901         return homoscedasticTTest(sampleStats1.getMean(),
902                                   sampleStats2.getMean(),
903                                   sampleStats1.getVariance(),
904                                   sampleStats2.getVariance(),
905                                   sampleStats1.getN(), sampleStats2.getN());
906     }
907 
908     /**
909      * Performs a
910      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
911      * two-sided t-test</a> evaluating the null hypothesis that
912      * <code>sampleStats1</code> and <code>sampleStats2</code> describe
913      * datasets drawn from populations with the same mean, with significance
914      * level <code>alpha</code>.   This test does not assume that the
915      * subpopulation variances are equal.  To perform the test under the equal
916      * variances assumption, use
917      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
918      * <p>
919      * Returns <code>true</code> iff the null hypothesis that the means are
920      * equal can be rejected with confidence <code>1 - alpha</code>.  To
921      * perform a 1-sided test, use <code>alpha * 2</code></p>
922      * <p>
923      * See {@link #t(double[], double[])} for the formula used to compute the
924      * t-statistic.  Degrees of freedom are approximated using the
925      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
926      * Welch-Satterthwaite approximation.</a></p>
927      * <p>* <strong>Examples:</strong></p>
928      * <ol>
929      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
930      * the 95%, use
931      * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
932      * </li>
933      * <li>To test the (one-sided) hypothesis <code> mean 1 &lt; mean 2 </code>
934      * at the 99% level,  first verify that the measured mean of
935      * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
936      * and then use
937      * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
938      * </li></ol>
939      * <p>
940      * <strong>Usage Note:</strong><br>
941      * The validity of the test depends on the assumptions of the parametric
942      * t-test procedure, as discussed
943      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
944      * here</a></p>
945      * <p><strong>Preconditions</strong>:</p>
946      * <ul>
947      * <li>The datasets described by the two Univariates must each contain
948      * at least 2 observations.
949      * </li>
950      * <li> <code> 0 &lt; alpha &lt; 0.5 </code>
951      * </li></ul>
952      *
953      * @param sampleStats1 StatisticalSummary describing sample data values
954      * @param sampleStats2 StatisticalSummary describing sample data values
955      * @param alpha significance level of the test
956      * @return true if the null hypothesis can be rejected with
957      * confidence 1 - alpha
958      * @throws NullArgumentException if the sample statistics are <code>null</code>
959      * @throws MathIllegalArgumentException if the number of samples is &lt; 2
960      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
961      * @throws MathIllegalStateException if an error occurs computing the p-value
962      */
963     public boolean tTest(final StatisticalSummary sampleStats1,
964                          final StatisticalSummary sampleStats2,
965                          final double alpha)
966         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {
967 
968         checkSignificanceLevel(alpha);
969         return tTest(sampleStats1, sampleStats2) < alpha;
970     }
971 
972     //----------------------------------------------- Protected methods
973 
974     /**
975      * Computes approximate degrees of freedom for 2-sample t-test.
976      *
977      * @param v1 first sample variance
978      * @param v2 second sample variance
979      * @param n1 first sample n
980      * @param n2 second sample n
981      * @return approximate degrees of freedom
982      */
983     protected double df(double v1, double v2, double n1, double n2) {
984         return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
985         ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
986                 (n2 * n2 * (n2 - 1d)));
987     }
988 
989     /**
990      * Computes t test statistic for 1-sample t-test.
991      *
992      * @param m sample mean
993      * @param mu constant to test against
994      * @param v sample variance
995      * @param n sample n
996      * @return t test statistic
997      */
998     protected double t(final double m, final double mu,
999                        final double v, final double n) {
1000         return (m - mu) / FastMath.sqrt(v / n);
1001     }
1002 
1003     /**
1004      * Computes t test statistic for 2-sample t-test.
1005      * <p>
1006      * Does not assume that subpopulation variances are equal.</p>
1007      *
1008      * @param m1 first sample mean
1009      * @param m2 second sample mean
1010      * @param v1 first sample variance
1011      * @param v2 second sample variance
1012      * @param n1 first sample n
1013      * @param n2 second sample n
1014      * @return t test statistic
1015      */
1016     protected double t(final double m1, final double m2,
1017                        final double v1, final double v2,
1018                        final double n1, final double n2)  {
1019         return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
1020     }
1021 
1022     /**
1023      * Computes t test statistic for 2-sample t-test under the hypothesis
1024      * of equal subpopulation variances.
1025      *
1026      * @param m1 first sample mean
1027      * @param m2 second sample mean
1028      * @param v1 first sample variance
1029      * @param v2 second sample variance
1030      * @param n1 first sample n
1031      * @param n2 second sample n
1032      * @return t test statistic
1033      */
1034     protected double homoscedasticT(final double m1, final double m2,
1035                                     final double v1, final double v2,
1036                                     final double n1, final double n2)  {
1037         final double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1038         return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1039     }
1040 
1041     /**
1042      * Computes p-value for 2-sided, 1-sample t-test.
1043      *
1044      * @param m sample mean
1045      * @param mu constant to test against
1046      * @param v sample variance
1047      * @param n sample n
1048      * @return p-value
1049      * @throws MathIllegalStateException if an error occurs computing the p-value
1050      * @throws MathIllegalArgumentException if n is not greater than 1
1051      */
1052     protected double tTest(final double m, final double mu,
1053                            final double v, final double n)
1054         throws MathIllegalArgumentException, MathIllegalStateException {
1055 
1056         final double t = FastMath.abs(t(m, mu, v, n));
1057         final TDistribution distribution = new TDistribution( n - 1);
1058         return 2.0 * distribution.cumulativeProbability(-t);
1059 
1060     }
1061 
1062     /**
1063      * Computes p-value for 2-sided, 2-sample t-test.
1064      * <p>
1065      * Does not assume subpopulation variances are equal. Degrees of freedom
1066      * are estimated from the data.</p>
1067      *
1068      * @param m1 first sample mean
1069      * @param m2 second sample mean
1070      * @param v1 first sample variance
1071      * @param v2 second sample variance
1072      * @param n1 first sample n
1073      * @param n2 second sample n
1074      * @return p-value
1075      * @throws MathIllegalStateException if an error occurs computing the p-value
1076      * @throws MathIllegalArgumentException if the estimated degrees of freedom is not
1077      * strictly positive
1078      */
1079     protected double tTest(final double m1, final double m2,
1080                            final double v1, final double v2,
1081                            final double n1, final double n2)
1082         throws MathIllegalArgumentException, MathIllegalStateException {
1083 
1084         final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
1085         final double degreesOfFreedom = df(v1, v2, n1, n2);
1086         final TDistribution distribution = new TDistribution(degreesOfFreedom);
1087         return 2.0 * distribution.cumulativeProbability(-t);
1088 
1089     }
1090 
1091     /**
1092      * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1093      * of equal subpopulation variances.
1094      * <p>
1095      * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1096      *
1097      * @param m1 first sample mean
1098      * @param m2 second sample mean
1099      * @param v1 first sample variance
1100      * @param v2 second sample variance
1101      * @param n1 first sample n
1102      * @param n2 second sample n
1103      * @return p-value
1104      * @throws MathIllegalStateException if an error occurs computing the p-value
1105      * @throws MathIllegalArgumentException if the estimated degrees of freedom is not
1106      * strictly positive
1107      */
1108     protected double homoscedasticTTest(double m1, double m2,
1109                                         double v1, double v2,
1110                                         double n1, double n2)
1111         throws MathIllegalArgumentException, MathIllegalStateException {
1112 
1113         final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1114         final double degreesOfFreedom = n1 + n2 - 2;
1115         final TDistribution distribution = new TDistribution(degreesOfFreedom);
1116         return 2.0 * distribution.cumulativeProbability(-t);
1117 
1118     }
1119 
1120     /**
1121      * Check significance level.
1122      *
1123      * @param alpha significance level
1124      * @throws MathIllegalArgumentException if the significance level is out of bounds.
1125      */
1126     private void checkSignificanceLevel(final double alpha)
1127         throws MathIllegalArgumentException {
1128 
1129         if (alpha <= 0 || alpha > 0.5) {
1130             throw new MathIllegalArgumentException(LocalizedStatFormats.SIGNIFICANCE_LEVEL,
1131                                           alpha, 0.0, 0.5);
1132         }
1133 
1134     }
1135 
1136     /**
1137      * Check sample data.
1138      *
1139      * @param data Sample data.
1140      * @throws NullArgumentException if {@code data} is {@code null}.
1141      * @throws MathIllegalArgumentException if there is not enough sample data.
1142      */
1143     private void checkSampleData(final double[] data)
1144         throws MathIllegalArgumentException, NullArgumentException {
1145 
1146         MathUtils.checkNotNull(data, LocalizedCoreFormats.INPUT_ARRAY);
1147         if (data.length < 2) {
1148             throw new MathIllegalArgumentException(
1149                     LocalizedStatFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1150                     data.length, 2, true);
1151         }
1152 
1153     }
1154 
1155     /**
1156      * Check sample data.
1157      *
1158      * @param stat Statistical summary.
1159      * @throws NullArgumentException if {@code data} is {@code null}.
1160      * @throws MathIllegalArgumentException if there is not enough sample data.
1161      */
1162     private void checkSampleData(final StatisticalSummary stat)
1163         throws MathIllegalArgumentException, NullArgumentException {
1164 
1165         MathUtils.checkNotNull(stat);
1166         if (stat.getN() < 2) {
1167             throw new MathIllegalArgumentException(
1168                     LocalizedStatFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1169                     stat.getN(), 2, true);
1170         }
1171 
1172     }
1173 
1174 }