ChiSquareTest.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.inference;

  22. import org.hipparchus.distribution.continuous.ChiSquaredDistribution;
  23. import org.hipparchus.exception.LocalizedCoreFormats;
  24. import org.hipparchus.exception.MathIllegalArgumentException;
  25. import org.hipparchus.exception.MathIllegalStateException;
  26. import org.hipparchus.exception.NullArgumentException;
  27. import org.hipparchus.stat.LocalizedStatFormats;
  28. import org.hipparchus.util.FastMath;
  29. import org.hipparchus.util.MathArrays;
  30. import org.hipparchus.util.MathUtils;

  31. /**
  32.  * Implements Chi-Square test statistics.
  33.  * <p>
  34.  * This implementation handles both known and unknown distributions.
  35.  * <p>
  36.  * Two samples tests can be used when the distribution is unknown <i>a priori</i>
  37.  * but provided by one sample, or when the hypothesis under test is that the two
  38.  * samples come from the same underlying distribution.
  39.  */
  40. public class ChiSquareTest { // NOPMD - this is not a Junit test class, PMD false positive here

  41.     /** Empty constructor.
  42.      * <p>
  43.      * This constructor is not strictly necessary, but it prevents spurious
  44.      * javadoc warnings with JDK 18 and later.
  45.      * </p>
  46.      * @since 3.0
  47.      */
  48.     public ChiSquareTest() { // NOPMD - unnecessary constructor added intentionally to make javadoc happy
  49.         // nothing to do
  50.     }

  51.     /**
  52.      * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
  53.      * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
  54.      * frequency counts.
  55.      * <p>
  56.      * This statistic can be used to perform a Chi-Square test evaluating the null
  57.      * hypothesis that the observed counts follow the expected distribution.
  58.      * <p>
  59.      * <strong>Preconditions</strong>:
  60.      * <ul>
  61.      * <li>Expected counts must all be positive.</li>
  62.      * <li>Observed counts must all be &ge; 0.</li>
  63.      * <li>The observed and expected arrays must have the same length and
  64.      * their common length must be at least 2.</li>
  65.      * </ul>
  66.      * <p>
  67.      * If any of the preconditions are not met, an
  68.      * <code>IllegalArgumentException</code> is thrown.
  69.      * <p>
  70.      * <strong>Note: </strong>This implementation rescales the
  71.      * <code>expected</code> array if necessary to ensure that the sum of the
  72.      * expected and observed counts are equal.
  73.      *
  74.      * @param observed array of observed frequency counts
  75.      * @param expected array of expected frequency counts
  76.      * @return chiSquare test statistic
  77.      * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
  78.      * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
  79.      * not strictly positive
  80.      * @throws MathIllegalArgumentException if the arrays length is less than 2
  81.      */
  82.     public double chiSquare(final double[] expected, final long[] observed)
  83.         throws MathIllegalArgumentException {

  84.         if (expected.length < 2) {
  85.             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
  86.                                                    expected.length, 2);
  87.         }
  88.         MathUtils.checkDimension(expected.length, observed.length);
  89.         MathArrays.checkPositive(expected);
  90.         MathArrays.checkNonNegative(observed);

  91.         double sumExpected = 0d;
  92.         double sumObserved = 0d;
  93.         for (int i = 0; i < observed.length; i++) {
  94.             sumExpected += expected[i];
  95.             sumObserved += observed[i];
  96.         }
  97.         double ratio = 1.0d;
  98.         boolean rescale = false;
  99.         if (FastMath.abs(sumExpected - sumObserved) > 10E-6) {
  100.             ratio = sumObserved / sumExpected;
  101.             rescale = true;
  102.         }
  103.         double sumSq = 0.0d;
  104.         for (int i = 0; i < observed.length; i++) {
  105.             if (rescale) {
  106.                 final double dev = observed[i] - ratio * expected[i];
  107.                 sumSq += dev * dev / (ratio * expected[i]);
  108.             } else {
  109.                 final double dev = observed[i] - expected[i];
  110.                 sumSq += dev * dev / expected[i];
  111.             }
  112.         }
  113.         return sumSq;
  114.     }

  115.     /**
  116.      * Returns the <i>observed significance level</i>, or <a href=
  117.      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
  118.      * p-value</a>, associated with a
  119.      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
  120.      * Chi-square goodness of fit test</a> comparing the <code>observed</code>
  121.      * frequency counts to those in the <code>expected</code> array.
  122.      * <p>
  123.      * The number returned is the smallest significance level at which one can reject
  124.      * the null hypothesis that the observed counts conform to the frequency distribution
  125.      * described by the expected counts.
  126.      * <p>
  127.      * <strong>Preconditions</strong>:
  128.      * <ul>
  129.      * <li>Expected counts must all be positive.</li>
  130.      * <li>Observed counts must all be &ge; 0.</li>
  131.      * <li>The observed and expected arrays must have the same length and
  132.      * their common length must be at least 2.</li>
  133.      * </ul>
  134.      * <p>
  135.      * If any of the preconditions are not met, an
  136.      * <code>IllegalArgumentException</code> is thrown.
  137.      * <p>
  138.      * <strong>Note: </strong>This implementation rescales the
  139.      * <code>expected</code> array if necessary to ensure that the sum of the
  140.      * expected and observed counts are equal.
  141.      *
  142.      * @param observed array of observed frequency counts
  143.      * @param expected array of expected frequency counts
  144.      * @return p-value
  145.      * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
  146.      * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
  147.      * not strictly positive
  148.      * @throws MathIllegalArgumentException if the arrays length is less than 2
  149.      * @throws MathIllegalStateException if an error occurs computing the p-value
  150.      */
  151.     public double chiSquareTest(final double[] expected, final long[] observed)
  152.         throws MathIllegalArgumentException, MathIllegalStateException {

  153.         final ChiSquaredDistribution distribution = new ChiSquaredDistribution(expected.length - 1.0);
  154.         return 1.0 - distribution.cumulativeProbability(chiSquare(expected, observed));
  155.     }

  156.     /**
  157.      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
  158.      * Chi-square goodness of fit test</a> evaluating the null hypothesis that the
  159.      * observed counts conform to the frequency distribution described by the expected
  160.      * counts, with significance level <code>alpha</code>.  Returns true iff the null
  161.      * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
  162.      * <p>
  163.      * <strong>Example:</strong><br>
  164.      * To test the hypothesis that <code>observed</code> follows
  165.      * <code>expected</code> at the 99% level, use
  166.      * <code>chiSquareTest(expected, observed, 0.01)</code>
  167.      * <p>
  168.      * <strong>Preconditions</strong>:
  169.      * <ul>
  170.      * <li>Expected counts must all be positive.</li>
  171.      * <li>Observed counts must all be &ge; 0.</li>
  172.      * <li>The observed and expected arrays must have the same length and
  173.      * their common length must be at least 2.</li>
  174.      * <li><code> 0 &lt; alpha &lt; 0.5</code></li>
  175.      * </ul>
  176.      * <p>
  177.      * If any of the preconditions are not met, an
  178.      * <code>IllegalArgumentException</code> is thrown.
  179.      * <p>
  180.      * <strong>Note: </strong>This implementation rescales the
  181.      * <code>expected</code> array if necessary to ensure that the sum of the
  182.      * expected and observed counts are equal.
  183.      *
  184.      * @param observed array of observed frequency counts
  185.      * @param expected array of expected frequency counts
  186.      * @param alpha significance level of the test
  187.      * @return true iff null hypothesis can be rejected with confidence
  188.      * 1 - alpha
  189.      * @throws MathIllegalArgumentException if <code>observed</code> has negative entries
  190.      * @throws MathIllegalArgumentException if <code>expected</code> has entries that are
  191.      * not strictly positive
  192.      * @throws MathIllegalArgumentException if the arrays length is less than 2
  193.      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
  194.      * @throws MathIllegalStateException if an error occurs computing the p-value
  195.      */
  196.     public boolean chiSquareTest(final double[] expected, final long[] observed,
  197.                                  final double alpha)
  198.         throws MathIllegalArgumentException, MathIllegalStateException {

  199.         if ((alpha <= 0) || (alpha > 0.5)) {
  200.             throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
  201.                                           alpha, 0, 0.5);
  202.         }
  203.         return chiSquareTest(expected, observed) < alpha;

  204.     }

  205.     /**
  206.      * Computes the Chi-Square statistic associated with a
  207.      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
  208.      * chi-square test of independence</a> based on the input <code>counts</code>
  209.      * array, viewed as a two-way table.
  210.      * <p>
  211.      * The rows of the 2-way table are
  212.      * <code>count[0], ... , count[count.length - 1] </code>
  213.      * <p>
  214.      * <strong>Preconditions</strong>:
  215.      * <ul>
  216.      * <li>All counts must be &ge; 0.</li>
  217.      * <li>The count array must be rectangular (i.e. all count[i] subarrays
  218.      * must have the same length).</li>
  219.      * <li>The 2-way table represented by <code>counts</code> must have at
  220.      * least 2 columns and at least 2 rows.</li>
  221.      * </ul>
  222.      * <p>
  223.      * If any of the preconditions are not met, an
  224.      * <code>IllegalArgumentException</code> is thrown.
  225.      *
  226.      * @param counts array representation of 2-way table
  227.      * @return chiSquare test statistic
  228.      * @throws NullArgumentException if the array is null
  229.      * @throws MathIllegalArgumentException if the array is not rectangular
  230.      * @throws MathIllegalArgumentException if {@code counts} has negative entries
  231.      */
  232.     public double chiSquare(final long[][] counts)
  233.         throws MathIllegalArgumentException, NullArgumentException {

  234.         checkArray(counts);
  235.         int nRows = counts.length;
  236.         int nCols = counts[0].length;

  237.         // compute row, column and total sums
  238.         double[] rowSum = new double[nRows];
  239.         double[] colSum = new double[nCols];
  240.         double total = 0.0d;
  241.         for (int row = 0; row < nRows; row++) {
  242.             for (int col = 0; col < nCols; col++) {
  243.                 rowSum[row] += counts[row][col];
  244.                 colSum[col] += counts[row][col];
  245.                 total += counts[row][col];
  246.             }
  247.         }

  248.         // compute expected counts and chi-square
  249.         double sumSq = 0.0d;
  250.         for (int row = 0; row < nRows; row++) {
  251.             for (int col = 0; col < nCols; col++) {
  252.                 final double expected = (rowSum[row] * colSum[col]) / total;
  253.                 sumSq += ((counts[row][col] - expected) *
  254.                         (counts[row][col] - expected)) / expected;
  255.             }
  256.         }
  257.         return sumSq;
  258.     }

  259.     /**
  260.      * Returns the <i>observed significance level</i>, or <a href=
  261.      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
  262.      * p-value</a>, associated with a
  263.      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
  264.      * chi-square test of independence</a> based on the input <code>counts</code>
  265.      * array, viewed as a two-way table.
  266.      * <p>
  267.      * The rows of the 2-way table are
  268.      * <code>count[0], ... , count[count.length - 1] </code>
  269.      * <p>
  270.      * <strong>Preconditions</strong>:
  271.      * <ul>
  272.      * <li>All counts must be &ge; 0.</li>
  273.      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have
  274.      * the same length).</li>
  275.      * <li>The 2-way table represented by <code>counts</code> must have at least 2
  276.      * columns and at least 2 rows.</li>
  277.      * </ul>
  278.      * <p>
  279.      * If any of the preconditions are not met, an
  280.      * <code>IllegalArgumentException</code> is thrown.
  281.      *
  282.      * @param counts array representation of 2-way table
  283.      * @return p-value
  284.      * @throws NullArgumentException if the array is null
  285.      * @throws MathIllegalArgumentException if the array is not rectangular
  286.      * @throws MathIllegalArgumentException if {@code counts} has negative entries
  287.      * @throws MathIllegalStateException if an error occurs computing the p-value
  288.      */
  289.     public double chiSquareTest(final long[][] counts)
  290.         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {

  291.         checkArray(counts);
  292.         double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
  293.         final ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
  294.         return 1 - distribution.cumulativeProbability(chiSquare(counts));
  295.     }

  296.     /**
  297.      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
  298.      * chi-square test of independence</a> evaluating the null hypothesis that the
  299.      * classifications represented by the counts in the columns of the input 2-way table
  300.      * are independent of the rows, with significance level <code>alpha</code>.
  301.      * Returns true iff the null hypothesis can be rejected with 100 * (1 - alpha) percent
  302.      * confidence.
  303.      * <p>
  304.      * The rows of the 2-way table are
  305.      * <code>count[0], ... , count[count.length - 1] </code>
  306.      * <p>
  307.      * <strong>Example:</strong><br>
  308.      * To test the null hypothesis that the counts in
  309.      * <code>count[0], ... , count[count.length - 1] </code>
  310.      * all correspond to the same underlying probability distribution at the 99% level,
  311.      * use <code>chiSquareTest(counts, 0.01)</code>.
  312.      * <p>
  313.      * <strong>Preconditions</strong>:
  314.      * <ul>
  315.      * <li>All counts must be &ge; 0.</li>
  316.      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the
  317.      * same length).</li>
  318.      * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
  319.      * at least 2 rows.</li>
  320.      * </ul>
  321.      * <p>
  322.      * If any of the preconditions are not met, an
  323.      * <code>IllegalArgumentException</code> is thrown.
  324.      *
  325.      * @param counts array representation of 2-way table
  326.      * @param alpha significance level of the test
  327.      * @return true iff null hypothesis can be rejected with confidence
  328.      * 1 - alpha
  329.      * @throws NullArgumentException if the array is null
  330.      * @throws MathIllegalArgumentException if the array is not rectangular
  331.      * @throws MathIllegalArgumentException if {@code counts} has any negative entries
  332.      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
  333.      * @throws MathIllegalStateException if an error occurs computing the p-value
  334.      */
  335.     public boolean chiSquareTest(final long[][] counts, final double alpha)
  336.         throws MathIllegalArgumentException, NullArgumentException, MathIllegalStateException {

  337.         if ((alpha <= 0) || (alpha > 0.5)) {
  338.             throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
  339.                                           alpha, 0, 0.5);
  340.         }
  341.         return chiSquareTest(counts) < alpha;
  342.     }

  343.     /**
  344.      * Computes a
  345.      * <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
  346.      * Chi-Square two sample test statistic</a> comparing bin frequency counts
  347.      * in <code>observed1</code> and <code>observed2</code>.
  348.      * <p>
  349.      * The sums of frequency counts in the two samples are not required to be the
  350.      * same. The formula used to compute the test statistic is
  351.      * </p>
  352.      * <code>
  353.      * &sum;[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
  354.      * </code>
  355.      * <p>
  356.      * where
  357.      * </p>
  358.      * <code>K = √[&sum;(observed2 / &sum;(observed1)]</code>
  359.      * <p>
  360.      * This statistic can be used to perform a Chi-Square test evaluating the
  361.      * null hypothesis that both observed counts follow the same distribution.
  362.      * </p>
  363.      * <p><strong>Preconditions</strong>:</p>
  364.      * <ul>
  365.      * <li>Observed counts must be non-negative.</li>
  366.      * <li>Observed counts for a specific bin must not both be zero.</li>
  367.      * <li>Observed counts for a specific sample must not all be 0.</li>
  368.      * <li>The arrays <code>observed1</code> and <code>observed2</code> must have
  369.      * the same length and their common length must be at least 2.</li>
  370.      * </ul>
  371.      * <p>
  372.      * If any of the preconditions are not met, an
  373.      * <code>IllegalArgumentException</code> is thrown.
  374.      * </p>
  375.      *
  376.      * @param observed1 array of observed frequency counts of the first data set
  377.      * @param observed2 array of observed frequency counts of the second data set
  378.      * @return chiSquare test statistic
  379.      * @throws MathIllegalArgumentException the the length of the arrays does not match
  380.      * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
  381.      * <code>observed2</code> are negative
  382.      * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
  383.      * <code>observed2</code> are zero, or if the count at some index is zero
  384.      * for both arrays
  385.      */
  386.     public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
  387.         throws MathIllegalArgumentException {

  388.         // Make sure lengths are same
  389.         if (observed1.length < 2) {
  390.             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
  391.                                                    observed1.length, 2);
  392.         }
  393.         MathUtils.checkDimension(observed1.length, observed2.length);

  394.         // Ensure non-negative counts
  395.         MathArrays.checkNonNegative(observed1);
  396.         MathArrays.checkNonNegative(observed2);

  397.         // Compute and compare count sums
  398.         long countSum1 = 0;
  399.         long countSum2 = 0;
  400.         for (int i = 0; i < observed1.length; i++) {
  401.             countSum1 += observed1[i];
  402.             countSum2 += observed2[i];
  403.         }
  404.         // Ensure neither sample is uniformly 0
  405.         if (countSum1 == 0 || countSum2 == 0) {
  406.             throw new MathIllegalArgumentException(LocalizedCoreFormats.ZERO_NOT_ALLOWED);
  407.         }
  408.         // Compare and compute weight only if different
  409.         double weight = 0.0;
  410.         boolean unequalCounts = countSum1 != countSum2;
  411.         if (unequalCounts) {
  412.             weight = FastMath.sqrt((double) countSum1 / (double) countSum2);
  413.         }
  414.         // Compute ChiSquare statistic
  415.         double sumSq = 0.0d;
  416.         for (int i = 0; i < observed1.length; i++) {
  417.             if (observed1[i] == 0 && observed2[i] == 0) {
  418.                 throw new MathIllegalArgumentException(LocalizedCoreFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
  419.             } else {
  420.                 final double obs1 = observed1[i];
  421.                 final double obs2 = observed2[i];
  422.                 final double dev;
  423.                 if (unequalCounts) { // apply weights
  424.                     dev = obs1/weight - obs2 * weight;
  425.                 } else {
  426.                     dev = obs1 - obs2;
  427.                 }
  428.                 sumSq += (dev * dev) / (obs1 + obs2);
  429.             }
  430.         }
  431.         return sumSq;
  432.     }

  433.     /**
  434.      * Returns the <i>observed significance level</i>, or <a href=
  435.      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
  436.      * p-value</a>, associated with a Chi-Square two sample test comparing
  437.      * bin frequency counts in <code>observed1</code> and
  438.      * <code>observed2</code>.
  439.      * <p>
  440.      * The number returned is the smallest significance level at which one
  441.      * can reject the null hypothesis that the observed counts conform to the
  442.      * same distribution.
  443.      * <p>
  444.      * See {@link #chiSquareDataSetsComparison(long[], long[])} for details
  445.      * on the formula used to compute the test statistic. The degrees of
  446.      * of freedom used to perform the test is one less than the common length
  447.      * of the input observed count arrays.
  448.      * <p>
  449.      * <strong>Preconditions</strong>:
  450.      * <ul>
  451.      * <li>Observed counts must be non-negative.</li>
  452.      * <li>Observed counts for a specific bin must not both be zero.</li>
  453.      * <li>Observed counts for a specific sample must not all be 0.</li>
  454.      * <li>The arrays <code>observed1</code> and <code>observed2</code> must
  455.      * have the same length and their common length must be at least 2.</li>
  456.      * </ul>
  457.      * <p>
  458.      * If any of the preconditions are not met, an
  459.      * <code>IllegalArgumentException</code> is thrown.
  460.      *
  461.      * @param observed1 array of observed frequency counts of the first data set
  462.      * @param observed2 array of observed frequency counts of the second data set
  463.      * @return p-value
  464.      * @throws MathIllegalArgumentException the the length of the arrays does not match
  465.      * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
  466.      * <code>observed2</code> are negative
  467.      * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
  468.      * <code>observed2</code> are zero, or if the count at the same index is zero
  469.      * for both arrays
  470.      * @throws MathIllegalStateException if an error occurs computing the p-value
  471.      */
  472.     public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
  473.         throws MathIllegalArgumentException,
  474.         MathIllegalStateException {

  475.         final ChiSquaredDistribution distribution =
  476.                 new ChiSquaredDistribution((double) observed1.length - 1);
  477.         return 1 - distribution.cumulativeProbability(
  478.                 chiSquareDataSetsComparison(observed1, observed2));
  479.     }

  480.     /**
  481.      * Performs a Chi-Square two sample test comparing two binned data
  482.      * sets. The test evaluates the null hypothesis that the two lists of
  483.      * observed counts conform to the same frequency distribution, with
  484.      * significance level <code>alpha</code>.  Returns true iff the null
  485.      * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
  486.      * <p>
  487.      * See {@link #chiSquareDataSetsComparison(long[], long[])} for
  488.      * details on the formula used to compute the Chisquare statistic used
  489.      * in the test. The degrees of of freedom used to perform the test is
  490.      * one less than the common length of the input observed count arrays.
  491.      * <p>
  492.      * <strong>Preconditions</strong>:
  493.      * <ul>
  494.      * <li>Observed counts must be non-negative.</li>
  495.      * <li>Observed counts for a specific bin must not both be zero.</li>
  496.      * <li>Observed counts for a specific sample must not all be 0.</li>
  497.      * <li>The arrays <code>observed1</code> and <code>observed2</code> must
  498.      * have the same length and their common length must be at least 2.</li>
  499.      * <li><code> 0 &lt; alpha &lt; 0.5</code></li>
  500.      * </ul>
  501.      * <p>
  502.      * If any of the preconditions are not met, an
  503.      * <code>IllegalArgumentException</code> is thrown.
  504.      *
  505.      * @param observed1 array of observed frequency counts of the first data set
  506.      * @param observed2 array of observed frequency counts of the second data set
  507.      * @param alpha significance level of the test
  508.      * @return true iff null hypothesis can be rejected with confidence
  509.      * 1 - alpha
  510.      * @throws MathIllegalArgumentException the the length of the arrays does not match
  511.      * @throws MathIllegalArgumentException if any entries in <code>observed1</code> or
  512.      * <code>observed2</code> are negative
  513.      * @throws MathIllegalArgumentException if either all counts of <code>observed1</code> or
  514.      * <code>observed2</code> are zero, or if the count at the same index is zero
  515.      * for both arrays
  516.      * @throws MathIllegalArgumentException if <code>alpha</code> is not in the range (0, 0.5]
  517.      * @throws MathIllegalStateException if an error occurs performing the test
  518.      */
  519.     public boolean chiSquareTestDataSetsComparison(final long[] observed1,
  520.                                                    final long[] observed2,
  521.                                                    final double alpha)
  522.         throws MathIllegalArgumentException, MathIllegalStateException {

  523.         if (alpha <= 0 ||
  524.             alpha > 0.5) {
  525.             throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
  526.                                           alpha, 0, 0.5);
  527.         }
  528.         return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;

  529.     }

  530.     /**
  531.      * Checks to make sure that the input long[][] array is rectangular,
  532.      * has at least 2 rows and 2 columns, and has all non-negative entries.
  533.      *
  534.      * @param in input 2-way table to check
  535.      * @throws NullArgumentException if the array is null
  536.      * @throws MathIllegalArgumentException if the array is not valid
  537.      * @throws MathIllegalArgumentException if the array contains any negative entries
  538.      */
  539.     private void checkArray(final long[][] in)
  540.         throws MathIllegalArgumentException, NullArgumentException {

  541.         if (in.length < 2) {
  542.             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
  543.                                                    in.length, 2);
  544.         }

  545.         if (in[0].length < 2) {
  546.             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
  547.                                                    in[0].length, 2);
  548.         }

  549.         MathArrays.checkRectangular(in);
  550.         MathArrays.checkNonNegative(in);
  551.     }

  552. }