1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22
23 package org.hipparchus.distribution.discrete;
24
25 import org.hipparchus.exception.LocalizedCoreFormats;
26 import org.hipparchus.exception.MathIllegalArgumentException;
27 import org.hipparchus.util.FastMath;
28
29 /**
30 * Implementation of the hypergeometric distribution.
31 *
32 * @see <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">Hypergeometric distribution (Wikipedia)</a>
33 * @see <a href="http://mathworld.wolfram.com/HypergeometricDistribution.html">Hypergeometric distribution (MathWorld)</a>
34 */
35 public class HypergeometricDistribution extends AbstractIntegerDistribution {
36 /** Serializable version identifier. */
37 private static final long serialVersionUID = 20160320L;
38 /** The number of successes in the population. */
39 private final int numberOfSuccesses;
40 /** The population size. */
41 private final int populationSize;
42 /** The sample size. */
43 private final int sampleSize;
44 /** Cached numerical variance */
45 private final double numericalVariance;
46
47 /**
48 * Construct a new hypergeometric distribution with the specified population
49 * size, number of successes in the population, and sample size.
50 *
51 * @param populationSize Population size.
52 * @param numberOfSuccesses Number of successes in the population.
53 * @param sampleSize Sample size.
54 * @throws MathIllegalArgumentException if {@code numberOfSuccesses < 0}.
55 * @throws MathIllegalArgumentException if {@code populationSize <= 0}.
56 * @throws MathIllegalArgumentException if {@code numberOfSuccesses > populationSize},
57 * or {@code sampleSize > populationSize}.
58 */
59 public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize)
60 throws MathIllegalArgumentException {
61 if (populationSize <= 0) {
62 throw new MathIllegalArgumentException(LocalizedCoreFormats.POPULATION_SIZE,
63 populationSize);
64 }
65 if (numberOfSuccesses < 0) {
66 throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SUCCESSES,
67 numberOfSuccesses);
68 }
69 if (sampleSize < 0) {
70 throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SAMPLES,
71 sampleSize);
72 }
73
74 if (numberOfSuccesses > populationSize) {
75 throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
76 numberOfSuccesses, populationSize, true);
77 }
78 if (sampleSize > populationSize) {
79 throw new MathIllegalArgumentException(LocalizedCoreFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
80 sampleSize, populationSize, true);
81 }
82
83 this.numberOfSuccesses = numberOfSuccesses;
84 this.populationSize = populationSize;
85 this.sampleSize = sampleSize;
86 this.numericalVariance = calculateNumericalVariance();
87 }
88
89 /** {@inheritDoc} */
90 @Override
91 public double cumulativeProbability(int x) {
92 double ret;
93
94 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
95 if (x < domain[0]) {
96 ret = 0.0;
97 } else if (x >= domain[1]) {
98 ret = 1.0;
99 } else {
100 ret = innerCumulativeProbability(domain[0], x, 1);
101 }
102
103 return ret;
104 }
105
106 /**
107 * Return the domain for the given hypergeometric distribution parameters.
108 *
109 * @param n Population size.
110 * @param m Number of successes in the population.
111 * @param k Sample size.
112 * @return a two element array containing the lower and upper bounds of the
113 * hypergeometric distribution.
114 */
115 private int[] getDomain(int n, int m, int k) {
116 return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
117 }
118
119 /**
120 * Return the lowest domain value for the given hypergeometric distribution
121 * parameters.
122 *
123 * @param n Population size.
124 * @param m Number of successes in the population.
125 * @param k Sample size.
126 * @return the lowest domain value of the hypergeometric distribution.
127 */
128 private int getLowerDomain(int n, int m, int k) {
129 return FastMath.max(0, m - (n - k));
130 }
131
132 /**
133 * Access the number of successes.
134 *
135 * @return the number of successes.
136 */
137 public int getNumberOfSuccesses() {
138 return numberOfSuccesses;
139 }
140
141 /**
142 * Access the population size.
143 *
144 * @return the population size.
145 */
146 public int getPopulationSize() {
147 return populationSize;
148 }
149
150 /**
151 * Access the sample size.
152 *
153 * @return the sample size.
154 */
155 public int getSampleSize() {
156 return sampleSize;
157 }
158
159 /**
160 * Return the highest domain value for the given hypergeometric distribution
161 * parameters.
162 *
163 * @param m Number of successes in the population.
164 * @param k Sample size.
165 * @return the highest domain value of the hypergeometric distribution.
166 */
167 private int getUpperDomain(int m, int k) {
168 return FastMath.min(k, m);
169 }
170
171 /** {@inheritDoc} */
172 @Override
173 public double probability(int x) {
174 final double logProbability = logProbability(x);
175 return logProbability == Double.NEGATIVE_INFINITY ? 0 : FastMath.exp(logProbability);
176 }
177
178 /** {@inheritDoc} */
179 @Override
180 public double logProbability(int x) {
181 double ret;
182
183 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
184 if (x < domain[0] || x > domain[1]) {
185 ret = Double.NEGATIVE_INFINITY;
186 } else {
187 double p = ((double) sampleSize) / populationSize;
188 double q = ((double) (populationSize - sampleSize)) / populationSize;
189 double p1 = SaddlePointExpansion.logBinomialProbability(x, numberOfSuccesses, p, q);
190 double p2 = SaddlePointExpansion.logBinomialProbability(sampleSize - x, populationSize - numberOfSuccesses, p, q);
191 double p3 = SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
192 ret = p1 + p2 - p3;
193 }
194
195 return ret;
196 }
197
198 /**
199 * For this distribution, {@code X}, this method returns {@code P(X >= x)}.
200 *
201 * @param x Value at which the CDF is evaluated.
202 * @return the upper tail CDF for this distribution.
203 */
204 public double upperCumulativeProbability(int x) {
205 double ret;
206
207 final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
208 if (x <= domain[0]) {
209 ret = 1.0;
210 } else if (x > domain[1]) {
211 ret = 0.0;
212 } else {
213 ret = innerCumulativeProbability(domain[1], x, -1);
214 }
215
216 return ret;
217 }
218
219 /**
220 * For this distribution, {@code X}, this method returns
221 * {@code P(x0 <= X <= x1)}.
222 * This probability is computed by summing the point probabilities for the
223 * values {@code x0, x0 + 1, x0 + 2, ..., x1}, in the order directed by
224 * {@code dx}.
225 *
226 * @param x0 Inclusive lower bound.
227 * @param x1 Inclusive upper bound.
228 * @param dx Direction of summation (1 indicates summing from x0 to x1, and
229 * 0 indicates summing from x1 to x0).
230 * @return {@code P(x0 <= X <= x1)}.
231 */
232 private double innerCumulativeProbability(int x0, int x1, int dx) {
233 double ret = probability(x0);
234 while (x0 != x1) {
235 x0 += dx;
236 ret += probability(x0);
237 }
238 return ret;
239 }
240
241 /**
242 * {@inheritDoc}
243 *
244 * For population size {@code N}, number of successes {@code m}, and sample
245 * size {@code n}, the mean is {@code n * m / N}.
246 */
247 @Override
248 public double getNumericalMean() {
249 return getSampleSize() * (getNumberOfSuccesses() / (double) getPopulationSize());
250 }
251
252 /**
253 * {@inheritDoc}
254 *
255 * For population size {@code N}, number of successes {@code m}, and sample
256 * size {@code n}, the variance is
257 * {@code [n * m * (N - n) * (N - m)] / [N^2 * (N - 1)]}.
258 */
259 @Override
260 public double getNumericalVariance() {
261 return numericalVariance;
262 }
263
264 /**
265 * Calculate the numerical variance.
266 *
267 * @return the variance of this distribution
268 */
269 private double calculateNumericalVariance() {
270 final double N = getPopulationSize();
271 final double m = getNumberOfSuccesses();
272 final double n = getSampleSize();
273 return (n * m * (N - n) * (N - m)) / (N * N * (N - 1));
274 }
275
276 /**
277 * {@inheritDoc}
278 *
279 * For population size {@code N}, number of successes {@code m}, and sample
280 * size {@code n}, the lower bound of the support is
281 * {@code max(0, n + m - N)}.
282 *
283 * @return lower bound of the support
284 */
285 @Override
286 public int getSupportLowerBound() {
287 return FastMath.max(0,
288 getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
289 }
290
291 /**
292 * {@inheritDoc}
293 *
294 * For number of successes {@code m} and sample size {@code n}, the upper
295 * bound of the support is {@code min(m, n)}.
296 *
297 * @return upper bound of the support
298 */
299 @Override
300 public int getSupportUpperBound() {
301 return FastMath.min(getNumberOfSuccesses(), getSampleSize());
302 }
303
304 /**
305 * {@inheritDoc}
306 *
307 * The support of this distribution is connected.
308 *
309 * @return {@code true}
310 */
311 @Override
312 public boolean isSupportConnected() {
313 return true;
314 }
315 }