View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.distribution.discrete;
24  
25  import org.hipparchus.exception.LocalizedCoreFormats;
26  import org.hipparchus.exception.MathIllegalArgumentException;
27  import org.hipparchus.util.FastMath;
28  
29  /**
30   * Implementation of the hypergeometric distribution.
31   *
32   * @see <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">Hypergeometric distribution (Wikipedia)</a>
33   * @see <a href="http://mathworld.wolfram.com/HypergeometricDistribution.html">Hypergeometric distribution (MathWorld)</a>
34   */
35  public class HypergeometricDistribution extends AbstractIntegerDistribution {
36      /** Serializable version identifier. */
37      private static final long serialVersionUID = 20160320L;
38      /** The number of successes in the population. */
39      private final int numberOfSuccesses;
40      /** The population size. */
41      private final int populationSize;
42      /** The sample size. */
43      private final int sampleSize;
44      /** Cached numerical variance */
45      private final double numericalVariance;
46  
47      /**
48       * Construct a new hypergeometric distribution with the specified population
49       * size, number of successes in the population, and sample size.
50       *
51       * @param populationSize Population size.
52       * @param numberOfSuccesses Number of successes in the population.
53       * @param sampleSize Sample size.
54       * @throws MathIllegalArgumentException if {@code numberOfSuccesses < 0}.
55       * @throws MathIllegalArgumentException if {@code populationSize <= 0}.
56       * @throws MathIllegalArgumentException if {@code numberOfSuccesses > populationSize},
57       * or {@code sampleSize > populationSize}.
58       */
59      public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize)
60          throws MathIllegalArgumentException {
61          if (populationSize <= 0) {
62              throw new MathIllegalArgumentException(LocalizedCoreFormats.POPULATION_SIZE,
63                                                     populationSize);
64          }
65          if (numberOfSuccesses < 0) {
66              throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SUCCESSES,
67                                                     numberOfSuccesses);
68          }
69          if (sampleSize < 0) {
70              throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SAMPLES,
71                                                     sampleSize);
72          }
73  
74          if (numberOfSuccesses > populationSize) {
75              throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
76                                                     numberOfSuccesses, populationSize, true);
77          }
78          if (sampleSize > populationSize) {
79              throw new MathIllegalArgumentException(LocalizedCoreFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
80                                                     sampleSize, populationSize, true);
81          }
82  
83          this.numberOfSuccesses = numberOfSuccesses;
84          this.populationSize = populationSize;
85          this.sampleSize = sampleSize;
86          this.numericalVariance = calculateNumericalVariance();
87      }
88  
89      /** {@inheritDoc} */
90      @Override
91      public double cumulativeProbability(int x) {
92          double ret;
93  
94          int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
95          if (x < domain[0]) {
96              ret = 0.0;
97          } else if (x >= domain[1]) {
98              ret = 1.0;
99          } else {
100             ret = innerCumulativeProbability(domain[0], x, 1);
101         }
102 
103         return ret;
104     }
105 
106     /**
107      * Return the domain for the given hypergeometric distribution parameters.
108      *
109      * @param n Population size.
110      * @param m Number of successes in the population.
111      * @param k Sample size.
112      * @return a two element array containing the lower and upper bounds of the
113      * hypergeometric distribution.
114      */
115     private int[] getDomain(int n, int m, int k) {
116         return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
117     }
118 
119     /**
120      * Return the lowest domain value for the given hypergeometric distribution
121      * parameters.
122      *
123      * @param n Population size.
124      * @param m Number of successes in the population.
125      * @param k Sample size.
126      * @return the lowest domain value of the hypergeometric distribution.
127      */
128     private int getLowerDomain(int n, int m, int k) {
129         return FastMath.max(0, m - (n - k));
130     }
131 
132     /**
133      * Access the number of successes.
134      *
135      * @return the number of successes.
136      */
137     public int getNumberOfSuccesses() {
138         return numberOfSuccesses;
139     }
140 
141     /**
142      * Access the population size.
143      *
144      * @return the population size.
145      */
146     public int getPopulationSize() {
147         return populationSize;
148     }
149 
150     /**
151      * Access the sample size.
152      *
153      * @return the sample size.
154      */
155     public int getSampleSize() {
156         return sampleSize;
157     }
158 
159     /**
160      * Return the highest domain value for the given hypergeometric distribution
161      * parameters.
162      *
163      * @param m Number of successes in the population.
164      * @param k Sample size.
165      * @return the highest domain value of the hypergeometric distribution.
166      */
167     private int getUpperDomain(int m, int k) {
168         return FastMath.min(k, m);
169     }
170 
171     /** {@inheritDoc} */
172     @Override
173     public double probability(int x) {
174         final double logProbability = logProbability(x);
175         return logProbability == Double.NEGATIVE_INFINITY ? 0 : FastMath.exp(logProbability);
176     }
177 
178     /** {@inheritDoc} */
179     @Override
180     public double logProbability(int x) {
181         double ret;
182 
183         int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
184         if (x < domain[0] || x > domain[1]) {
185             ret = Double.NEGATIVE_INFINITY;
186         } else {
187             double p = (double) sampleSize / (double) populationSize;
188             double q = (double) (populationSize - sampleSize) / (double) populationSize;
189             double p1 = SaddlePointExpansion.logBinomialProbability(x,
190                     numberOfSuccesses, p, q);
191             double p2 =
192                     SaddlePointExpansion.logBinomialProbability(sampleSize - x,
193                             populationSize - numberOfSuccesses, p, q);
194             double p3 =
195                     SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
196             ret = p1 + p2 - p3;
197         }
198 
199         return ret;
200     }
201 
202     /**
203      * For this distribution, {@code X}, this method returns {@code P(X >= x)}.
204      *
205      * @param x Value at which the CDF is evaluated.
206      * @return the upper tail CDF for this distribution.
207      */
208     public double upperCumulativeProbability(int x) {
209         double ret;
210 
211         final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
212         if (x <= domain[0]) {
213             ret = 1.0;
214         } else if (x > domain[1]) {
215             ret = 0.0;
216         } else {
217             ret = innerCumulativeProbability(domain[1], x, -1);
218         }
219 
220         return ret;
221     }
222 
223     /**
224      * For this distribution, {@code X}, this method returns
225      * {@code P(x0 <= X <= x1)}.
226      * This probability is computed by summing the point probabilities for the
227      * values {@code x0, x0 + 1, x0 + 2, ..., x1}, in the order directed by
228      * {@code dx}.
229      *
230      * @param x0 Inclusive lower bound.
231      * @param x1 Inclusive upper bound.
232      * @param dx Direction of summation (1 indicates summing from x0 to x1, and
233      * 0 indicates summing from x1 to x0).
234      * @return {@code P(x0 <= X <= x1)}.
235      */
236     private double innerCumulativeProbability(int x0, int x1, int dx) {
237         double ret = probability(x0);
238         while (x0 != x1) {
239             x0 += dx;
240             ret += probability(x0);
241         }
242         return ret;
243     }
244 
245     /**
246      * {@inheritDoc}
247      *
248      * For population size {@code N}, number of successes {@code m}, and sample
249      * size {@code n}, the mean is {@code n * m / N}.
250      */
251     @Override
252     public double getNumericalMean() {
253         return getSampleSize() * (getNumberOfSuccesses() / (double) getPopulationSize());
254     }
255 
256     /**
257      * {@inheritDoc}
258      *
259      * For population size {@code N}, number of successes {@code m}, and sample
260      * size {@code n}, the variance is
261      * {@code [n * m * (N - n) * (N - m)] / [N^2 * (N - 1)]}.
262      */
263     @Override
264     public double getNumericalVariance() {
265         return numericalVariance;
266     }
267 
268     /**
269      * Calculate the numerical variance.
270      *
271      * @return the variance of this distribution
272      */
273     private double calculateNumericalVariance() {
274         final double N = getPopulationSize();
275         final double m = getNumberOfSuccesses();
276         final double n = getSampleSize();
277         return (n * m * (N - n) * (N - m)) / (N * N * (N - 1));
278     }
279 
280     /**
281      * {@inheritDoc}
282      *
283      * For population size {@code N}, number of successes {@code m}, and sample
284      * size {@code n}, the lower bound of the support is
285      * {@code max(0, n + m - N)}.
286      *
287      * @return lower bound of the support
288      */
289     @Override
290     public int getSupportLowerBound() {
291         return FastMath.max(0,
292                             getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
293     }
294 
295     /**
296      * {@inheritDoc}
297      *
298      * For number of successes {@code m} and sample size {@code n}, the upper
299      * bound of the support is {@code min(m, n)}.
300      *
301      * @return upper bound of the support
302      */
303     @Override
304     public int getSupportUpperBound() {
305         return FastMath.min(getNumberOfSuccesses(), getSampleSize());
306     }
307 
308     /**
309      * {@inheritDoc}
310      *
311      * The support of this distribution is connected.
312      *
313      * @return {@code true}
314      */
315     @Override
316     public boolean isSupportConnected() {
317         return true;
318     }
319 }