BinomialProportion.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.interval;

  22. import org.hipparchus.distribution.continuous.FDistribution;
  23. import org.hipparchus.distribution.continuous.NormalDistribution;
  24. import org.hipparchus.exception.LocalizedCoreFormats;
  25. import org.hipparchus.exception.MathIllegalArgumentException;
  26. import org.hipparchus.stat.LocalizedStatFormats;
  27. import org.hipparchus.util.FastMath;
  28. import org.hipparchus.util.MathUtils;

  29. /**
  30.  * Utility methods to generate confidence intervals for a binomial proportion.
  31.  *
  32.  * @see
  33.  * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval">
  34.  * Binomial proportion confidence interval (Wikipedia)</a>
  35.  */
  36. public class BinomialProportion {

  37.     /**
  38.      * The standard normal distribution to calculate the inverse cumulative probability.
  39.      * Accessed and used in a thread-safe way.
  40.      */
  41.     private static final NormalDistribution NORMAL_DISTRIBUTION = new NormalDistribution(0, 1);

  42.     /** Utility class, prevent instantiation. */
  43.     private BinomialProportion() {}

  44.     /**
  45.      * Create an Agresti-Coull binomial confidence interval for the true
  46.      * probability of success of an unknown binomial distribution with
  47.      * the given observed number of trials, probability of success and
  48.      * confidence level.
  49.      * <p>
  50.      * Preconditions:
  51.      * <ul>
  52.      * <li>{@code numberOfTrials} must be positive</li>
  53.      * <li>{@code probabilityOfSuccess} must be between 0 and 1 (inclusive)</li>
  54.      * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
  55.      * </ul>
  56.      *
  57.      * @see
  58.      * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Agresti-Coull_Interval">
  59.      * Agresti-Coull interval (Wikipedia)</a>
  60.      *
  61.      * @param numberOfTrials number of trials
  62.      * @param probabilityOfSuccess observed probability of success
  63.      * @param confidenceLevel desired probability that the true probability of
  64.      * success falls within the returned interval
  65.      * @return Confidence interval containing the probability of success with
  66.      * probability {@code confidenceLevel}
  67.      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
  68.      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess} is not in the interval [0, 1].
  69.      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1).
  70.      */
  71.     public static ConfidenceInterval getAgrestiCoullInterval(int numberOfTrials,
  72.                                                              double probabilityOfSuccess,
  73.                                                              double confidenceLevel)
  74.         throws MathIllegalArgumentException {

  75.         checkParameters(numberOfTrials, probabilityOfSuccess, confidenceLevel);

  76.         final int numberOfSuccesses = (int) (numberOfTrials * probabilityOfSuccess);

  77.         final double alpha = (1.0 - confidenceLevel) / 2;
  78.         final double z = NORMAL_DISTRIBUTION.inverseCumulativeProbability(1 - alpha);
  79.         final double zSquared = FastMath.pow(z, 2);
  80.         final double modifiedNumberOfTrials = numberOfTrials + zSquared;
  81.         final double modifiedSuccessesRatio = (1.0 / modifiedNumberOfTrials) *
  82.                                               (numberOfSuccesses + 0.5 * zSquared);
  83.         final double difference = z * FastMath.sqrt(1.0 / modifiedNumberOfTrials *
  84.                                                     modifiedSuccessesRatio *
  85.                                                     (1 - modifiedSuccessesRatio));

  86.         return new ConfidenceInterval(modifiedSuccessesRatio - difference,
  87.                                       modifiedSuccessesRatio + difference,
  88.                                       confidenceLevel);
  89.     }

  90.     /**
  91.      * Create a Clopper-Pearson binomial confidence interval for the true
  92.      * probability of success of an unknown binomial distribution with
  93.      * the given observed number of trials, probability of success and
  94.      * confidence level.
  95.      * <p>
  96.      * Preconditions:
  97.      * <ul>
  98.      * <li>{@code numberOfTrials} must be positive</li>
  99.      * <li>{@code probabilityOfSuccess} must be between 0 and 1 (inclusive)</li>
  100.      * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
  101.      * </ul>
  102.      *
  103.      * @see
  104.      * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper-Pearson_interval">
  105.      * Clopper-Pearson interval (Wikipedia)</a>
  106.      *
  107.      * @param numberOfTrials number of trials
  108.      * @param probabilityOfSuccess observed probability of success
  109.      * @param confidenceLevel desired probability that the true probability of
  110.      * success falls within the returned interval
  111.      * @return Confidence interval containing the probability of success with
  112.      * probability {@code confidenceLevel}
  113.      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
  114.      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess} is not in the interval [0, 1].
  115.      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1).
  116.      */
  117.     public static ConfidenceInterval getClopperPearsonInterval(int numberOfTrials,
  118.                                                                double probabilityOfSuccess,
  119.                                                                double confidenceLevel)
  120.         throws MathIllegalArgumentException {

  121.         checkParameters(numberOfTrials, probabilityOfSuccess, confidenceLevel);

  122.         double lowerBound = 0;
  123.         double upperBound = 0;
  124.         final int numberOfSuccesses = (int) (numberOfTrials * probabilityOfSuccess);

  125.         if (numberOfSuccesses > 0) {
  126.             final double alpha = (1.0 - confidenceLevel) / 2.0;

  127.             final FDistribution distributionLowerBound =
  128.                     new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
  129.                                       2 * numberOfSuccesses);

  130.             final double fValueLowerBound =
  131.                     distributionLowerBound.inverseCumulativeProbability(1 - alpha);
  132.             lowerBound = numberOfSuccesses /
  133.                          (numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);

  134.             final FDistribution distributionUpperBound =
  135.                     new FDistribution(2 * (numberOfSuccesses + 1),
  136.                                       2 * (numberOfTrials - numberOfSuccesses));

  137.             final double fValueUpperBound =
  138.                     distributionUpperBound.inverseCumulativeProbability(1 - alpha);
  139.             upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
  140.                          (numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
  141.         }

  142.         return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
  143.     }

  144.     /**
  145.      * Create a binomial confidence interval using normal approximation
  146.      * for the true probability of success of an unknown binomial distribution
  147.      * with the given observed number of trials, probability of success and
  148.      * confidence level.
  149.      * <p>
  150.      * Preconditions:
  151.      * <ul>
  152.      * <li>{@code numberOfTrials} must be positive</li>
  153.      * <li>{@code probabilityOfSuccess} must be between 0 and 1 (inclusive)</li>
  154.      * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
  155.      * </ul>
  156.      *
  157.      * @see
  158.      * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Normal_approximation_interval">
  159.      * Normal approximation interval (Wikipedia)</a>
  160.      *
  161.      * @param numberOfTrials number of trials
  162.      * @param probabilityOfSuccess observed probability of success
  163.      * @param confidenceLevel desired probability that the true probability of
  164.      * success falls within the returned interval
  165.      * @return Confidence interval containing the probability of success with
  166.      * probability {@code confidenceLevel}
  167.      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
  168.      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess} is not in the interval [0, 1].
  169.      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1).
  170.      */
  171.     public static ConfidenceInterval getNormalApproximationInterval(int numberOfTrials,
  172.                                                                     double probabilityOfSuccess,
  173.                                                                     double confidenceLevel)
  174.         throws MathIllegalArgumentException {

  175.         checkParameters(numberOfTrials, probabilityOfSuccess, confidenceLevel);

  176.         final double mean = probabilityOfSuccess;
  177.         final double alpha = (1.0 - confidenceLevel) / 2;

  178.         final double difference = NORMAL_DISTRIBUTION.inverseCumulativeProbability(1 - alpha) *
  179.                                   FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean));
  180.         return new ConfidenceInterval(mean - difference, mean + difference, confidenceLevel);
  181.     }

  182.     /**
  183.      * Create an Wilson score binomial confidence interval for the true
  184.      * probability of success of an unknown binomial distribution with
  185.      * the given observed number of trials, probability of success and
  186.      * confidence level.
  187.      * <p>
  188.      * Preconditions:
  189.      * <ul>
  190.      * <li>{@code numberOfTrials} must be positive</li>
  191.      * <li>{@code probabilityOfSuccess} must be between 0 and 1 (inclusive)</li>
  192.      * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
  193.      * </ul>
  194.      *
  195.      * @see
  196.      * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval">
  197.      * Wilson score interval (Wikipedia)</a>
  198.      *
  199.      * @param numberOfTrials number of trials
  200.      * @param probabilityOfSuccess observed probability of success
  201.      * @param confidenceLevel desired probability that the true probability of
  202.      * success falls within the returned interval
  203.      * @return Confidence interval containing the probability of success with
  204.      * probability {@code confidenceLevel}
  205.      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
  206.      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess} is not in the interval [0, 1].
  207.      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1).
  208.      */
  209.     public static ConfidenceInterval getWilsonScoreInterval(int numberOfTrials,
  210.                                                             double probabilityOfSuccess,
  211.                                                             double confidenceLevel)
  212.         throws MathIllegalArgumentException {

  213.         checkParameters(numberOfTrials, probabilityOfSuccess, confidenceLevel);

  214.         final double alpha = (1.0 - confidenceLevel) / 2;
  215.         final double z = NORMAL_DISTRIBUTION.inverseCumulativeProbability(1 - alpha);
  216.         final double zSquared = FastMath.pow(z, 2);
  217.         final double mean = probabilityOfSuccess;

  218.         final double factor = 1.0 / (1 + (1.0 / numberOfTrials) * zSquared);
  219.         final double modifiedSuccessRatio = mean + (1.0 / (2 * numberOfTrials)) * zSquared;
  220.         final double difference =
  221.                 z * FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean) +
  222.                                   (1.0 / (4 * FastMath.pow(numberOfTrials, 2)) * zSquared));

  223.         final double lowerBound = factor * (modifiedSuccessRatio - difference);
  224.         final double upperBound = factor * (modifiedSuccessRatio + difference);
  225.         return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
  226.     }

  227.     /**
  228.      * Verifies that parameters satisfy preconditions.
  229.      *
  230.      * @param numberOfTrials number of trials (must be positive)
  231.      * @param probabilityOfSuccess probability of successes (must be between 0 and 1)
  232.      * @param confidenceLevel confidence level (must be strictly between 0 and 1)
  233.      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
  234.      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess is not in the interval [0, 1]}.
  235.      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1)}.
  236.      */
  237.     private static void checkParameters(int numberOfTrials,
  238.                                         double probabilityOfSuccess,
  239.                                         double confidenceLevel) {
  240.         if (numberOfTrials <= 0) {
  241.             throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_TRIALS,
  242.                                                    numberOfTrials);
  243.         }
  244.         MathUtils.checkRangeInclusive(probabilityOfSuccess, 0, 1);
  245.         if (confidenceLevel <= 0 || confidenceLevel >= 1) {
  246.             throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_CONFIDENCE_LEVEL,
  247.                                                    confidenceLevel, 0, 1);
  248.         }
  249.     }

  250. }