1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.random;
23
24 import java.io.Serializable;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collection;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.concurrent.ConcurrentHashMap;
31
32 import org.hipparchus.distribution.EnumeratedDistribution;
33 import org.hipparchus.distribution.IntegerDistribution;
34 import org.hipparchus.distribution.RealDistribution;
35 import org.hipparchus.distribution.continuous.BetaDistribution;
36 import org.hipparchus.distribution.continuous.EnumeratedRealDistribution;
37 import org.hipparchus.distribution.continuous.ExponentialDistribution;
38 import org.hipparchus.distribution.continuous.GammaDistribution;
39 import org.hipparchus.distribution.continuous.LogNormalDistribution;
40 import org.hipparchus.distribution.continuous.NormalDistribution;
41 import org.hipparchus.distribution.continuous.UniformRealDistribution;
42 import org.hipparchus.distribution.discrete.EnumeratedIntegerDistribution;
43 import org.hipparchus.distribution.discrete.PoissonDistribution;
44 import org.hipparchus.distribution.discrete.UniformIntegerDistribution;
45 import org.hipparchus.distribution.discrete.ZipfDistribution;
46 import org.hipparchus.exception.LocalizedCoreFormats;
47 import org.hipparchus.exception.MathIllegalArgumentException;
48 import org.hipparchus.util.CombinatoricsUtils;
49 import org.hipparchus.util.FastMath;
50 import org.hipparchus.util.MathArrays;
51 import org.hipparchus.util.MathUtils;
52 import org.hipparchus.util.Pair;
53 import org.hipparchus.util.Precision;
54 import org.hipparchus.util.ResizableDoubleArray;
55
56 /**
57 * A class for generating random data.
58 */
59 public class RandomDataGenerator extends ForwardingRandomGenerator
60 implements RandomGenerator, Serializable { // NOPMD - this class has a high number of methods, it is normal
61
62 /** Serializable version identifier. */
63 private static final long serialVersionUID = 20160529L;
64
65 /**
66 * Used when generating Exponential samples.
67 * Table containing the constants
68 * q_i = sum_{j=1}^i (ln 2)^j/j! = ln 2 + (ln 2)^2/2 + ... + (ln 2)^i/i!
69 * until the largest representable fraction below 1 is exceeded.
70 *
71 * Note that
72 * 1 = 2 - 1 = exp(ln 2) - 1 = sum_{n=1}^infty (ln 2)^n / n!
73 * thus q_i -> 1 as i -> +inf,
74 * so the higher i, the closer to one we get (the series is not alternating).
75 *
76 * By trying, n = 16 in Java is enough to reach 1.0.
77 */
78 private static final double[] EXPONENTIAL_SA_QI;
79
80 /** Map of <classname, switch constant> for continuous distributions */
81 private static final Map<Class<? extends RealDistribution>, RealDistributionSampler> CONTINUOUS_SAMPLERS = new ConcurrentHashMap<>();
82 /** Map of <classname, switch constant> for discrete distributions */
83 private static final Map<Class<? extends IntegerDistribution>, IntegerDistributionSampler> DISCRETE_SAMPLERS = new ConcurrentHashMap<>();
84
85 /** The default sampler for continuous distributions using the inversion technique. */
86 private static final RealDistributionSampler DEFAULT_REAL_SAMPLER =
87 (generator, dist) -> dist.inverseCumulativeProbability(generator.nextDouble());
88
89 /** The default sampler for discrete distributions using the inversion technique. */
90 private static final IntegerDistributionSampler DEFAULT_INTEGER_SAMPLER =
91 (generator, dist) -> dist.inverseCumulativeProbability(generator.nextDouble());
92
93 /** Source of random data */
94 private final RandomGenerator randomGenerator;
95
96 /** The sampler to be used for the nextZipF method */
97 private transient ZipfRejectionInversionSampler zipfSampler;
98
99 /**
100 * Interface for samplers of continuous distributions.
101 */
102 @FunctionalInterface
103 private interface RealDistributionSampler {
104 /**
105 * Return the next sample following the given distribution.
106 *
107 * @param generator the random data generator to use
108 * @param distribution the distribution to use
109 * @return the next sample
110 */
111 double nextSample(RandomDataGenerator generator, RealDistribution distribution);
112 }
113
114 /**
115 * Interface for samplers of discrete distributions.
116 */
117 @FunctionalInterface
118 private interface IntegerDistributionSampler {
119 /**
120 * Return the next sample following the given distribution.
121 *
122 * @param generator the random data generator to use
123 * @param distribution the distribution to use
124 * @return the next sample
125 */
126 int nextSample(RandomDataGenerator generator, IntegerDistribution distribution);
127 }
128
129 /**
130 * Initialize tables.
131 */
132 static {
133 /**
134 * Filling EXPONENTIAL_SA_QI table.
135 * Note that we don't want qi = 0 in the table.
136 */
137 final double LN2 = FastMath.log(2);
138 double qi = 0;
139 int i = 1;
140
141 /**
142 * ArithmeticUtils provides factorials up to 20, so let's use that
143 * limit together with Precision.EPSILON to generate the following
144 * code (a priori, we know that there will be 16 elements, but it is
145 * better to not hardcode it).
146 */
147 final ResizableDoubleArray ra = new ResizableDoubleArray(20);
148
149 while (qi < 1) {
150 qi += FastMath.pow(LN2, i) / CombinatoricsUtils.factorial(i);
151 ra.addElement(qi);
152 ++i;
153 }
154
155 EXPONENTIAL_SA_QI = ra.getElements();
156
157 // Continuous samplers
158
159 CONTINUOUS_SAMPLERS.put(BetaDistribution.class,
160 (generator, dist) -> {
161 BetaDistribution beta = (BetaDistribution) dist;
162 return generator.nextBeta(beta.getAlpha(), beta.getBeta());
163 });
164
165 CONTINUOUS_SAMPLERS.put(ExponentialDistribution.class,
166 (generator, dist) -> generator.nextExponential(dist.getNumericalMean()));
167
168 CONTINUOUS_SAMPLERS.put(GammaDistribution.class,
169 (generator, dist) -> {
170 GammaDistribution gamma = (GammaDistribution) dist;
171 return generator.nextGamma(gamma.getShape(), gamma.getScale());
172 });
173
174 CONTINUOUS_SAMPLERS.put(NormalDistribution.class,
175 (generator, dist) -> {
176 NormalDistribution normal = (NormalDistribution) dist;
177 return generator.nextNormal(normal.getMean(),
178 normal.getStandardDeviation());
179 });
180
181 CONTINUOUS_SAMPLERS.put(LogNormalDistribution.class,
182 (generator, dist) -> {
183 LogNormalDistribution logNormal = (LogNormalDistribution) dist;
184 return generator.nextLogNormal(logNormal.getShape(),
185 logNormal.getLocation());
186 });
187
188 CONTINUOUS_SAMPLERS.put(UniformRealDistribution.class,
189 (generator, dist) -> generator.nextUniform(dist.getSupportLowerBound(),
190 dist.getSupportUpperBound()));
191
192 CONTINUOUS_SAMPLERS.put(EnumeratedRealDistribution.class,
193 (generator, dist) -> {
194 final EnumeratedRealDistribution edist =
195 (EnumeratedRealDistribution) dist;
196 EnumeratedDistributionSampler<Double> sampler =
197 generator.new EnumeratedDistributionSampler<>(edist.getPmf());
198 return sampler.sample();
199 });
200
201 // Discrete samplers
202
203 DISCRETE_SAMPLERS.put(PoissonDistribution.class,
204 (generator, dist) -> generator.nextPoisson(dist.getNumericalMean()));
205
206 DISCRETE_SAMPLERS.put(UniformIntegerDistribution.class,
207 (generator, dist) -> generator.nextInt(dist.getSupportLowerBound(),
208 dist.getSupportUpperBound()));
209 DISCRETE_SAMPLERS.put(ZipfDistribution.class,
210 (generator, dist) -> {
211 ZipfDistribution zipfDist = (ZipfDistribution) dist;
212 return generator.nextZipf(zipfDist.getNumberOfElements(),
213 zipfDist.getExponent());
214 });
215
216 DISCRETE_SAMPLERS.put(EnumeratedIntegerDistribution.class,
217 (generator, dist) -> {
218 final EnumeratedIntegerDistribution edist =
219 (EnumeratedIntegerDistribution) dist;
220 EnumeratedDistributionSampler<Integer> sampler =
221 generator.new EnumeratedDistributionSampler<>(edist.getPmf());
222 return sampler.sample();
223 });
224 }
225
226 /**
227 * Construct a RandomDataGenerator with a default RandomGenerator as its source of random data.
228 */
229 public RandomDataGenerator() {
230 this(new Well19937c());
231 }
232
233 /**
234 * Construct a RandomDataGenerator with a default RandomGenerator as its source of random data, initialized
235 * with the given seed value.
236 *
237 * @param seed seed value
238 */
239 public RandomDataGenerator(long seed) {
240 this(new Well19937c(seed));
241 }
242
243 /**
244 * Construct a RandomDataGenerator using the given RandomGenerator as its source of random data.
245 *
246 * @param randomGenerator the underlying PRNG
247 * @throws MathIllegalArgumentException if randomGenerator is null
248 */
249 private RandomDataGenerator(RandomGenerator randomGenerator) {
250 MathUtils.checkNotNull(randomGenerator);
251 this.randomGenerator = randomGenerator;
252 }
253
254 /**
255 * Factory method to create a {@code RandomData} instance using the supplied
256 * {@code RandomGenerator}.
257 *
258 * @param randomGenerator source of random bits
259 * @return a RandomData using the given RandomGenerator to source bits
260 * @throws MathIllegalArgumentException if randomGenerator is null
261 */
262 public static RandomDataGenerator of(RandomGenerator randomGenerator) {
263 return new RandomDataGenerator(randomGenerator);
264 }
265
266 /** {@inheritDoc} */
267 @Override
268 protected RandomGenerator delegate() {
269 return randomGenerator;
270 }
271
272 /**
273 * Returns the next pseudo-random beta-distributed value with the given
274 * shape and scale parameters.
275 *
276 * @param alpha First shape parameter (must be positive).
277 * @param beta Second shape parameter (must be positive).
278 * @return beta-distributed random deviate
279 */
280 public double nextBeta(double alpha, double beta) {
281 return ChengBetaSampler.sample(randomGenerator, alpha, beta);
282 }
283
284 /**
285 * Returns the next pseudo-random, exponentially distributed deviate.
286 *
287 * @param mean mean of the exponential distribution
288 * @return exponentially distributed deviate about the given mean
289 */
290 public double nextExponential(double mean) {
291 if (mean <= 0) {
292 throw new MathIllegalArgumentException(LocalizedCoreFormats.MEAN, mean);
293 }
294 // Step 1:
295 double a = 0;
296 double u = randomGenerator.nextDouble();
297
298 // Step 2 and 3:
299 while (u < 0.5) {
300 a += EXPONENTIAL_SA_QI[0];
301 u *= 2;
302 }
303
304 // Step 4 (now u >= 0.5):
305 u += u - 1;
306
307 // Step 5:
308 if (u <= EXPONENTIAL_SA_QI[0]) {
309 return mean * (a + u);
310 }
311
312 // Step 6:
313 int i = 0; // Should be 1, be we iterate before it in while using 0
314 double u2 = randomGenerator.nextDouble();
315 double umin = u2;
316
317 // Step 7 and 8:
318 do {
319 ++i;
320 u2 = randomGenerator.nextDouble();
321
322 if (u2 < umin) {
323 umin = u2;
324 }
325
326 // Step 8:
327 } while (u > EXPONENTIAL_SA_QI[i]); // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1
328
329 return mean * (a + umin * EXPONENTIAL_SA_QI[0]);
330 }
331
332 /**
333 * Returns the next pseudo-random gamma-distributed value with the given shape and scale parameters.
334 *
335 * @param shape shape parameter of the distribution
336 * @param scale scale parameter of the distribution
337 * @return gamma-distributed random deviate
338 */
339 public double nextGamma(double shape, double scale) {
340 if (shape < 1) {
341 // [1]: p. 228, Algorithm GS
342
343 while (true) {
344 // Step 1:
345 final double u = randomGenerator.nextDouble();
346 final double bGS = 1 + shape / FastMath.E;
347 final double p = bGS * u;
348
349 if (p <= 1) {
350 // Step 2:
351
352 final double x = FastMath.pow(p, 1 / shape);
353 final double u2 = randomGenerator.nextDouble();
354
355 if (u2 > FastMath.exp(-x)) {
356 // Reject
357 continue;
358 } else {
359 return scale * x;
360 }
361 } else {
362 // Step 3:
363
364 final double x = -1 * FastMath.log((bGS - p) / shape);
365 final double u2 = randomGenerator.nextDouble();
366
367 if (u2 > FastMath.pow(x, shape - 1)) {
368 // Reject
369 continue;
370 } else {
371 return scale * x;
372 }
373 }
374 }
375 }
376
377 // Now shape >= 1
378
379 final double d = shape - 0.333333333333333333;
380 final double c = 1 / (3 * FastMath.sqrt(d));
381
382 while (true) {
383 final double x = randomGenerator.nextGaussian();
384 final double v = (1 + c * x) * (1 + c * x) * (1 + c * x);
385
386 if (v <= 0) {
387 continue;
388 }
389
390 final double x2 = x * x;
391 final double u = randomGenerator.nextDouble();
392
393 // Squeeze
394 if (u < 1 - 0.0331 * x2 * x2) {
395 return scale * d * v;
396 }
397
398 if (FastMath.log(u) < 0.5 * x2 + d * (1 - v + FastMath.log(v))) {
399 return scale * d * v;
400 }
401 }
402 }
403
404 /**
405 * Returns the next normally-distributed pseudo-random deviate.
406 *
407 * @param mean mean of the normal distribution
408 * @param standardDeviation standard deviation of the normal distribution
409 * @return a random value, normally distributed with the given mean and standard deviation
410 */
411 public double nextNormal(double mean, double standardDeviation) {
412 if (standardDeviation <= 0) {
413 throw new MathIllegalArgumentException (LocalizedCoreFormats.NUMBER_TOO_SMALL, standardDeviation, 0);
414 }
415 return standardDeviation * nextGaussian() + mean;
416 }
417
418 /**
419 * Returns the next log-normally-distributed pseudo-random deviate.
420 *
421 * @param shape shape parameter of the log-normal distribution
422 * @param scale scale parameter of the log-normal distribution
423 * @return a random value, normally distributed with the given mean and standard deviation
424 */
425 public double nextLogNormal(double shape, double scale) {
426 if (shape <= 0) {
427 throw new MathIllegalArgumentException (LocalizedCoreFormats.NUMBER_TOO_SMALL, shape, 0);
428 }
429 return FastMath.exp(scale + shape * nextGaussian());
430 }
431
432 /**
433 * Returns a poisson-distributed deviate with the given mean.
434 *
435 * @param mean expected value
436 * @return poisson deviate
437 * @throws MathIllegalArgumentException if mean is not strictly positive
438 */
439 public int nextPoisson(double mean) {
440 if (mean <= 0) {
441 throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_SMALL, mean, 0);
442 }
443 final double pivot = 40.0d;
444 if (mean < pivot) {
445 double p = FastMath.exp(-mean);
446 long n = 0;
447 double r = 1.0d;
448 double rnd;
449
450 while (n < 1000 * mean) {
451 rnd = randomGenerator.nextDouble();
452 r *= rnd;
453 if (r >= p) {
454 n++;
455 } else {
456 return (int) FastMath.min(n, Integer.MAX_VALUE);
457 }
458 }
459 return (int) FastMath.min(n, Integer.MAX_VALUE);
460 } else {
461 final double lambda = FastMath.floor(mean);
462 final double lambdaFractional = mean - lambda;
463 final double logLambda = FastMath.log(lambda);
464 final double logLambdaFactorial = CombinatoricsUtils.factorialLog((int) lambda);
465 final long y2 = lambdaFractional < Double.MIN_VALUE ? 0 : nextPoisson(lambdaFractional);
466 final double delta = FastMath.sqrt(lambda * FastMath.log(32 * lambda / FastMath.PI + 1));
467 final double halfDelta = delta / 2;
468 final double twolpd = 2 * lambda + delta;
469 final double a1 = FastMath.sqrt(FastMath.PI * twolpd) * FastMath.exp(1 / (8 * lambda));
470 final double a2 = (twolpd / delta) * FastMath.exp(-delta * (1 + delta) / twolpd);
471 final double aSum = a1 + a2 + 1;
472 final double p1 = a1 / aSum;
473 final double p2 = a2 / aSum;
474 final double c1 = 1 / (8 * lambda);
475
476 double x;
477 double y = 0;
478 double v;
479 int a;
480 double t;
481 double qr;
482 double qa;
483 for (;;) {
484 final double u = randomGenerator.nextDouble();
485 if (u <= p1) {
486 final double n = randomGenerator.nextGaussian();
487 x = n * FastMath.sqrt(lambda + halfDelta) - 0.5d;
488 if (x > delta || x < -lambda) {
489 continue;
490 }
491 y = x < 0 ? FastMath.floor(x) : FastMath.ceil(x);
492 final double e = nextExponential(1);
493 v = -e - (n * n / 2) + c1;
494 } else {
495 if (u > p1 + p2) {
496 y = lambda;
497 break;
498 } else {
499 x = delta + (twolpd / delta) * nextExponential(1);
500 y = FastMath.ceil(x);
501 v = -nextExponential(1) - delta * (x + 1) / twolpd;
502 }
503 }
504 a = x < 0 ? 1 : 0;
505 t = y * (y + 1) / (2 * lambda);
506 if (v < -t && a == 0) {
507 y = lambda + y;
508 break;
509 }
510 qr = t * ((2 * y + 1) / (6 * lambda) - 1);
511 qa = qr - (t * t) / (3 * (lambda + a * (y + 1)));
512 if (v < qa) {
513 y = lambda + y;
514 break;
515 }
516 if (v > qr) {
517 continue;
518 }
519 if (v < y * logLambda - CombinatoricsUtils.factorialLog((int) (y + lambda)) + logLambdaFactorial) {
520 y = lambda + y;
521 break;
522 }
523 }
524 return (int) FastMath.min(y2 + (long) y, Integer.MAX_VALUE);
525 }
526 }
527
528 /**
529 * Returns a random deviate from the given distribution.
530 *
531 * @param dist the distribution to sample from
532 * @return a random value following the given distribution
533 */
534 public double nextDeviate(RealDistribution dist) {
535 return getSampler(dist).nextSample(this, dist);
536 }
537
538 /**
539 * Returns an array of random deviates from the given distribution.
540 *
541 * @param dist the distribution to sample from
542 * @param size the number of values to return
543 *
544 * @return an array of {@code size} values following the given distribution
545 */
546 public double[] nextDeviates(RealDistribution dist, int size) {
547 //TODO: check parameters
548
549 RealDistributionSampler sampler = getSampler(dist);
550 double[] out = new double[size];
551 for (int i = 0; i < size; i++) {
552 out[i] = sampler.nextSample(this, dist);
553 }
554 return out;
555 }
556
557 /**
558 * Returns a random deviate from the given distribution.
559 *
560 * @param dist the distribution to sample from
561 * @return a random value following the given distribution
562 */
563 public int nextDeviate(IntegerDistribution dist) {
564 return getSampler(dist).nextSample(this, dist);
565 }
566
567 /**
568 * Returns an array of random deviates from the given distribution.
569 *
570 * @param dist the distribution to sample from
571 * @param size the number of values to return
572 *
573 * @return an array of {@code size }values following the given distribution
574 */
575 public int[] nextDeviates(IntegerDistribution dist, int size) {
576 //TODO: check parameters
577
578 IntegerDistributionSampler sampler = getSampler(dist);
579 int[] out = new int[size];
580 for (int i = 0; i < size; i++) {
581 out[i] = sampler.nextSample(this, dist);
582 }
583 return out;
584 }
585
586 /**
587 * Returns a sampler for the given continuous distribution.
588 * @param dist the distribution
589 * @return a sampler for the distribution
590 */
591 private RealDistributionSampler getSampler(RealDistribution dist) {
592 RealDistributionSampler sampler = CONTINUOUS_SAMPLERS.get(dist.getClass());
593 if (sampler != null) {
594 return sampler;
595 }
596 return DEFAULT_REAL_SAMPLER;
597 }
598
599 /**
600 * Returns a sampler for the given discrete distribution.
601 * @param dist the distribution
602 * @return a sampler for the distribution
603 */
604 private IntegerDistributionSampler getSampler(IntegerDistribution dist) {
605 IntegerDistributionSampler sampler = DISCRETE_SAMPLERS.get(dist.getClass());
606 if (sampler != null) {
607 return sampler;
608 }
609 return DEFAULT_INTEGER_SAMPLER;
610 }
611
612 /**
613 * Returns a uniformly distributed random integer between lower and upper (inclusive).
614 *
615 * @param lower lower bound for the generated value
616 * @param upper upper bound for the generated value
617 * @return a random integer value within the given bounds
618 * @throws MathIllegalArgumentException if lower is not strictly less than or equal to upper
619 */
620 public int nextInt(int lower, int upper) {
621 if (lower >= upper) {
622 throw new MathIllegalArgumentException(LocalizedCoreFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND,
623 lower, upper);
624 }
625 final int max = (upper - lower) + 1;
626 if (max <= 0) {
627 // The range is too wide to fit in a positive int (larger
628 // than 2^31); as it covers more than half the integer range,
629 // we use a simple rejection method.
630 while (true) {
631 final int r = nextInt();
632 if (r >= lower &&
633 r <= upper) {
634 return r;
635 }
636 }
637 } else {
638 // We can shift the range and directly generate a positive int.
639 return lower + nextInt(max);
640 }
641 }
642
643 /**
644 * Returns a uniformly distributed random long integer between lower and upper (inclusive).
645 *
646 * @param lower lower bound for the generated value
647 * @param upper upper bound for the generated value
648 * @return a random long integer value within the given bounds
649 * @throws MathIllegalArgumentException if lower is not strictly less than or equal to upper
650 */
651 public long nextLong(final long lower, final long upper) throws MathIllegalArgumentException {
652 if (lower >= upper) {
653 throw new MathIllegalArgumentException(LocalizedCoreFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND,
654 lower, upper);
655 }
656 final long max = (upper - lower) + 1;
657 if (max <= 0) {
658 // the range is too wide to fit in a positive long (larger than 2^63); as it covers
659 // more than half the long range, we use directly a simple rejection method
660 while (true) {
661 final long r = randomGenerator.nextLong();
662 if (r >= lower && r <= upper) {
663 return r;
664 }
665 }
666 } else if (max < Integer.MAX_VALUE){
667 // we can shift the range and generate directly a positive int
668 return lower + randomGenerator.nextInt((int) max);
669 } else {
670 // we can shift the range and generate directly a positive long
671 return lower + nextLong(max);
672 }
673 }
674
675 /**
676 * Returns a double value uniformly distributed over [lower, upper]
677 * @param lower lower bound
678 * @param upper upper bound
679 * @return uniform deviate
680 * @throws MathIllegalArgumentException if upper is less than or equal to upper
681 */
682 public double nextUniform(double lower, double upper) {
683 if (upper <= lower) {
684 throw new MathIllegalArgumentException(LocalizedCoreFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND, lower, upper);
685 }
686 if (Double.isInfinite(lower) || Double.isInfinite(upper)) {
687 throw new MathIllegalArgumentException(LocalizedCoreFormats.INFINITE_BOUND);
688 }
689 if (Double.isNaN(lower) || Double.isNaN(upper)) {
690 throw new MathIllegalArgumentException(LocalizedCoreFormats.NAN_NOT_ALLOWED);
691 }
692 final double u = randomGenerator.nextDouble();
693 return u * upper + (1 - u) * lower;
694 }
695
696 /**
697 * Returns an integer value following a Zipf distribution with the given parameter.
698 *
699 * @param numberOfElements number of elements of the distribution
700 * @param exponent exponent of the distribution
701 * @return random Zipf value
702 */
703 public int nextZipf(int numberOfElements, double exponent) {
704 if (zipfSampler == null || zipfSampler.getExponent() != exponent || zipfSampler.getNumberOfElements() != numberOfElements) {
705 zipfSampler = new ZipfRejectionInversionSampler(numberOfElements, exponent);
706 }
707 return zipfSampler.sample(randomGenerator);
708 }
709
710 /**
711 * Generates a random string of hex characters of length {@code len}.
712 * <p>
713 * The generated string will be random, but not cryptographically secure.
714 * <p>
715 * <strong>Algorithm Description:</strong> hex strings are generated using a
716 * 2-step process.
717 * </p>
718 * <ol>
719 * <li>{@code len / 2 + 1} binary bytes are generated using the underlying
720 * Random</li>
721 * <li>Each binary byte is translated into 2 hex digits</li>
722 * </ol>
723 *
724 * @param len the desired string length.
725 * @return the random string.
726 * @throws MathIllegalArgumentException if {@code len <= 0}.
727 */
728 public String nextHexString(int len) throws MathIllegalArgumentException {
729 if (len <= 0) {
730 throw new MathIllegalArgumentException(LocalizedCoreFormats.LENGTH, len);
731 }
732
733 // Initialize output buffer
734 StringBuilder outBuffer = new StringBuilder();
735
736 // Get int(len/2)+1 random bytes
737 byte[] randomBytes = new byte[(len / 2) + 1];
738 randomGenerator.nextBytes(randomBytes);
739
740 // Convert each byte to 2 hex digits
741 for (byte randomByte : randomBytes) {
742 Integer c = Integer.valueOf(randomByte);
743
744 /*
745 * Add 128 to byte value to make interval 0-255 before doing hex
746 * conversion. This guarantees <= 2 hex digits from toHexString()
747 * toHexString would otherwise add 2^32 to negative arguments.
748 */
749 String hex = Integer.toHexString(c + 128);
750
751 // Make sure we add 2 hex digits for each byte
752 if (hex.length() == 1) {
753 outBuffer.append('0');
754 }
755 outBuffer.append(hex);
756 }
757 return outBuffer.toString().substring(0, len);
758 }
759
760 /**
761 * Generates an integer array of length {@code k} whose entries are selected
762 * randomly, without repetition, from the integers {@code 0, ..., n - 1}
763 * (inclusive).
764 * <p>
765 * Generated arrays represent permutations of {@code n} taken {@code k} at a
766 * time.</p>
767 * This method calls {@link MathArrays#shuffle(int[],RandomGenerator)
768 * MathArrays.shuffle} in order to create a random shuffle of the set
769 * of natural numbers {@code { 0, 1, ..., n - 1 }}.
770 *
771 *
772 * @param n the domain of the permutation
773 * @param k the size of the permutation
774 * @return a random {@code k}-permutation of {@code n}, as an array of
775 * integers
776 * @throws MathIllegalArgumentException if {@code k > n}.
777 * @throws MathIllegalArgumentException if {@code k <= 0}.
778 */
779 public int[] nextPermutation(int n, int k)
780 throws MathIllegalArgumentException {
781 if (k > n) {
782 throw new MathIllegalArgumentException(LocalizedCoreFormats.PERMUTATION_EXCEEDS_N,
783 k, n, true);
784 }
785 if (k <= 0) {
786 throw new MathIllegalArgumentException(LocalizedCoreFormats.PERMUTATION_SIZE,
787 k);
788 }
789
790 final int[] index = MathArrays.natural(n);
791 MathArrays.shuffle(index, randomGenerator);
792
793 // Return a new array containing the first "k" entries of "index".
794 return Arrays.copyOf(index, k);
795 }
796
797 /**
798 * Returns an array of {@code k} objects selected randomly from the
799 * Collection {@code c}.
800 * <p>
801 * Sampling from {@code c} is without replacement; but if {@code c} contains
802 * identical objects, the sample may include repeats. If all elements of
803 * {@code c} are distinct, the resulting object array represents a
804 * <a href="http://rkb.home.cern.ch/rkb/AN16pp/node250.html#SECTION0002500000000000000000">
805 * Simple Random Sample</a> of size {@code k} from the elements of
806 * {@code c}.</p>
807 * <p>This method calls {@link #nextPermutation(int,int) nextPermutation(c.size(), k)}
808 * in order to sample the collection.
809 * </p>
810 *
811 * @param c the collection to be sampled
812 * @param k the size of the sample
813 * @return a random sample of {@code k} elements from {@code c}
814 * @throws MathIllegalArgumentException if {@code k > c.size()}.
815 * @throws MathIllegalArgumentException if {@code k <= 0}.
816 */
817 public Object[] nextSample(Collection<?> c, int k) throws MathIllegalArgumentException {
818
819 int len = c.size();
820 if (k > len) {
821 throw new MathIllegalArgumentException(LocalizedCoreFormats.SAMPLE_SIZE_EXCEEDS_COLLECTION_SIZE,
822 k, len, true);
823 }
824 if (k <= 0) {
825 throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SAMPLES, k);
826 }
827
828 Object[] objects = c.toArray();
829 int[] index = nextPermutation(len, k);
830 Object[] result = new Object[k];
831 for (int i = 0; i < k; i++) {
832 result[i] = objects[index[i]];
833 }
834 return result;
835 }
836
837 /**
838 * Returns an array of {@code k} double values selected randomly from the
839 * double array {@code a}.
840 * <p>
841 * Sampling from {@code a} is without replacement; but if {@code a} contains
842 * identical objects, the sample may include repeats. If all elements of
843 * {@code a} are distinct, the resulting object array represents a
844 * <a href="http://rkb.home.cern.ch/rkb/AN16pp/node250.html#SECTION0002500000000000000000">
845 * Simple Random Sample</a> of size {@code k} from the elements of
846 * {@code a}.</p>
847 *
848 * @param a the array to be sampled
849 * @param k the size of the sample
850 * @return a random sample of {@code k} elements from {@code a}
851 * @throws MathIllegalArgumentException if {@code k > c.size()}.
852 * @throws MathIllegalArgumentException if {@code k <= 0}.
853 */
854 public double[] nextSample(double[] a, int k) throws MathIllegalArgumentException {
855 int len = a.length;
856 if (k > len) {
857 throw new MathIllegalArgumentException(LocalizedCoreFormats.SAMPLE_SIZE_EXCEEDS_COLLECTION_SIZE,
858 k, len, true);
859 }
860 if (k <= 0) {
861 throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SAMPLES, k);
862 }
863 int[] index = nextPermutation(len, k);
864 double[] result = new double[k];
865 for (int i = 0; i < k; i++) {
866 result[i] = a[index[i]];
867 }
868 return result;
869 }
870
871 /**
872 * Generates a random sample of size sampleSize from {0, 1, ... , weights.length - 1},
873 * using weights as probabilities.
874 * <p>
875 * For 0 < i < weights.length, the probability that i is selected (on any draw) is weights[i].
876 * If necessary, the weights array is normalized to sum to 1 so that weights[i] is a probability
877 * and the array sums to 1.
878 * <p>
879 * Weights can be 0, but must not be negative, infinite or NaN.
880 * At least one weight must be positive.
881 *
882 * @param sampleSize size of sample to generate
883 * @param weights probability sampling weights
884 * @return an array of integers between 0 and weights.length - 1
885 * @throws MathIllegalArgumentException if weights contains negative, NaN or infinite values or only 0s or sampleSize is less than 0
886 */
887 public int[] nextSampleWithReplacement(int sampleSize, double[] weights) {
888
889 // Check sample size
890 if (sampleSize < 0) {
891 throw new MathIllegalArgumentException(LocalizedCoreFormats.NOT_POSITIVE_NUMBER_OF_SAMPLES);
892 }
893
894 // Check and normalize weights
895 double[] normWt = EnumeratedDistribution.checkAndNormalize(weights);
896
897 // Generate sample values by dividing [0,1] into subintervals corresponding to weights.
898 final int[] out = new int[sampleSize];
899 final int len = normWt.length;
900 for (int i = 0; i < sampleSize; i++) {
901 final double u = randomGenerator.nextDouble();
902 double cum = normWt[0];
903 int j = 1;
904 while (cum < u && j < len) {
905 cum += normWt[j++];
906 }
907 out[i] = --j;
908 }
909 return out;
910 }
911
912 /**
913 * Utility class implementing Cheng's algorithms for beta distribution sampling.
914 *
915 * <blockquote>
916 * <pre>
917 * R. C. H. Cheng,
918 * "Generating beta variates with nonintegral shape parameters",
919 * Communications of the ACM, 21, 317-322, 1978.
920 * </pre>
921 * </blockquote>
922 */
923 private static class ChengBetaSampler {
924
925 /** Private constructor for utility class. */
926 private ChengBetaSampler() { // NOPMD - PMD fails to detect this is a utility class
927 // not called
928 }
929
930 /**
931 * Returns the next sample following a beta distribution
932 * with given alpha and beta parameters.
933 *
934 * @param generator the random generator to use
935 * @param alpha the alpha parameter
936 * @param beta the beta parameter
937 * @return the next sample
938 */
939 public static double sample(RandomGenerator generator,
940 double alpha,
941 double beta) {
942 // TODO: validate parameters
943 final double a = FastMath.min(alpha, beta);
944 final double b = FastMath.max(alpha, beta);
945
946 if (a > 1) {
947 return algorithmBB(generator, alpha, a, b);
948 } else {
949 return algorithmBC(generator, alpha, b, a);
950 }
951 }
952
953 /**
954 * Returns one Beta sample using Cheng's BB algorithm,
955 * when both α and β are greater than 1.
956 *
957 * @param generator the random generator to use
958 * @param a0 distribution first shape parameter (α)
959 * @param a min(α, β) where α, β are the two distribution shape parameters
960 * @param b max(α, β) where α, β are the two distribution shape parameters
961 * @return sampled value
962 */
963 private static double algorithmBB(final RandomGenerator generator,
964 final double a0,
965 final double a,
966 final double b) {
967 final double alpha = a + b;
968 final double beta = FastMath.sqrt((alpha - 2.) / (2. * a * b - alpha));
969 final double gamma = a + 1. / beta;
970
971 double r;
972 double w;
973 double t;
974 do {
975 final double u1 = generator.nextDouble();
976 final double u2 = generator.nextDouble();
977 final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1));
978 w = a * FastMath.exp(v);
979 final double z = u1 * u1 * u2;
980 r = gamma * v - 1.3862944;
981 final double s = a + r - w;
982 if (s + 2.609438 >= 5 * z) {
983 break;
984 }
985
986 t = FastMath.log(z);
987 if (s >= t) {
988 break;
989 }
990 } while (r + alpha * (FastMath.log(alpha) - FastMath.log(b + w)) < t);
991
992 w = FastMath.min(w, Double.MAX_VALUE);
993 return Precision.equals(a, a0) ? w / (b + w) : b / (b + w);
994 }
995
996 /**
997 * Returns a Beta-distribute value using Cheng's BC algorithm,
998 * when at least one of α and β is smaller than 1.
999 *
1000 * @param generator the random generator to use
1001 * @param a0 distribution first shape parameter (α)
1002 * @param a max(α, β) where α, β are the two distribution shape parameters
1003 * @param b min(α, β) where α, β are the two distribution shape parameters
1004 * @return sampled value
1005 */
1006 private static double algorithmBC(final RandomGenerator generator,
1007 final double a0,
1008 final double a,
1009 final double b) {
1010 final double alpha = a + b;
1011 final double beta = 1. / b;
1012 final double delta = 1. + a - b;
1013 final double k1 = delta * (0.0138889 + 0.0416667 * b) / (a * beta - 0.777778);
1014 final double k2 = 0.25 + (0.5 + 0.25 / delta) * b;
1015
1016 double w;
1017 for (;;) {
1018 final double u1 = generator.nextDouble();
1019 final double u2 = generator.nextDouble();
1020 final double y = u1 * u2;
1021 final double z = u1 * y;
1022 if (u1 < 0.5) {
1023 if (0.25 * u2 + z - y >= k1) {
1024 continue;
1025 }
1026 } else {
1027 if (z <= 0.25) {
1028 final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1));
1029 w = a * FastMath.exp(v);
1030 break;
1031 }
1032
1033 if (z >= k2) {
1034 continue;
1035 }
1036 }
1037
1038 final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1));
1039 w = a * FastMath.exp(v);
1040 if (alpha * (FastMath.log(alpha) - FastMath.log(b + w) + v) - 1.3862944 >= FastMath.log(z)) {
1041 break;
1042 }
1043 }
1044
1045 w = FastMath.min(w, Double.MAX_VALUE);
1046 return Precision.equals(a, a0) ? w / (b + w) : b / (b + w);
1047 }
1048 }
1049
1050 /**
1051 * Utility class implementing a rejection inversion sampling method for a discrete,
1052 * bounded Zipf distribution that is based on the method described in
1053 * <p>
1054 * Wolfgang Hörmann and Gerhard Derflinger
1055 * "Rejection-inversion to generate variates from monotone discrete distributions."
1056 * ACM Transactions on Modeling and Computer Simulation (TOMACS) 6.3 (1996): 169-184.
1057 * <p>
1058 * The paper describes an algorithm for exponents larger than 1 (Algorithm ZRI).
1059 * The original method uses {@code H(x) := (v + x)^(1 - q) / (1 - q)}
1060 * as the integral of the hat function. This function is undefined for
1061 * q = 1, which is the reason for the limitation of the exponent.
1062 * If instead the integral function
1063 * {@code H(x) := ((v + x)^(1 - q) - 1) / (1 - q)} is used,
1064 * for which a meaningful limit exists for q = 1,
1065 * the method works for all positive exponents.
1066 * <p>
1067 * The following implementation uses v := 0 and generates integral numbers
1068 * in the range [1, numberOfElements]. This is different to the original method
1069 * where v is defined to be positive and numbers are taken from [0, i_max].
1070 * This explains why the implementation looks slightly different.
1071 *
1072 */
1073 static final class ZipfRejectionInversionSampler {
1074
1075 /** Exponent parameter of the distribution. */
1076 private final double exponent;
1077 /** Number of elements. */
1078 private final int numberOfElements;
1079 /** Constant equal to {@code hIntegral(1.5) - 1}. */
1080 private final double hIntegralX1;
1081 /** Constant equal to {@code hIntegral(numberOfElements + 0.5)}. */
1082 private final double hIntegralNumberOfElements;
1083 /** Constant equal to {@code 2 - hIntegralInverse(hIntegral(2.5) - h(2)}. */
1084 private final double s;
1085
1086 /** Simple constructor.
1087 * @param numberOfElements number of elements
1088 * @param exponent exponent parameter of the distribution
1089 */
1090 ZipfRejectionInversionSampler(final int numberOfElements, final double exponent) {
1091 this.exponent = exponent;
1092 this.numberOfElements = numberOfElements;
1093 this.hIntegralX1 = hIntegral(1.5) - 1d;
1094 this.hIntegralNumberOfElements = hIntegral(numberOfElements + 0.5);
1095 this.s = 2d - hIntegralInverse(hIntegral(2.5) - h(2));
1096 }
1097
1098 /** Generate one integral number in the range [1, numberOfElements].
1099 * @param random random generator to use
1100 * @return generated integral number in the range [1, numberOfElements]
1101 */
1102 int sample(final RandomGenerator random) {
1103 while(true) {
1104
1105 final double u = hIntegralNumberOfElements + random.nextDouble() * (hIntegralX1 - hIntegralNumberOfElements);
1106 // u is uniformly distributed in (hIntegralX1, hIntegralNumberOfElements]
1107
1108 double x = hIntegralInverse(u);
1109
1110 int k = (int)(x + 0.5);
1111
1112 // Limit k to the range [1, numberOfElements]
1113 // (k could be outside due to numerical inaccuracies)
1114 if (k < 1) {
1115 k = 1;
1116 }
1117 else if (k > numberOfElements) {
1118 k = numberOfElements;
1119 }
1120
1121 // Here, the distribution of k is given by:
1122 //
1123 // P(k = 1) = C * (hIntegral(1.5) - hIntegralX1) = C
1124 // P(k = m) = C * (hIntegral(m + 1/2) - hIntegral(m - 1/2)) for m >= 2
1125 //
1126 // where C := 1 / (hIntegralNumberOfElements - hIntegralX1)
1127
1128 if (k - x <= s || u >= hIntegral(k + 0.5) - h(k)) {
1129
1130 // Case k = 1:
1131 //
1132 // The right inequality is always true, because replacing k by 1 gives
1133 // u >= hIntegral(1.5) - h(1) = hIntegralX1 and u is taken from
1134 // (hIntegralX1, hIntegralNumberOfElements].
1135 //
1136 // Therefore, the acceptance rate for k = 1 is P(accepted | k = 1) = 1
1137 // and the probability that 1 is returned as random value is
1138 // P(k = 1 and accepted) = P(accepted | k = 1) * P(k = 1) = C = C / 1^exponent
1139 //
1140 // Case k >= 2:
1141 //
1142 // The left inequality (k - x <= s) is just a short cut
1143 // to avoid the more expensive evaluation of the right inequality
1144 // (u >= hIntegral(k + 0.5) - h(k)) in many cases.
1145 //
1146 // If the left inequality is true, the right inequality is also true:
1147 // Theorem 2 in the paper is valid for all positive exponents, because
1148 // the requirements h'(x) = -exponent/x^(exponent + 1) < 0 and
1149 // (-1/hInverse'(x))'' = (1+1/exponent) * x^(1/exponent-1) >= 0
1150 // are both fulfilled.
1151 // Therefore, f(x) := x - hIntegralInverse(hIntegral(x + 0.5) - h(x))
1152 // is a non-decreasing function. If k - x <= s holds,
1153 // k - x <= s + f(k) - f(2) is obviously also true which is equivalent to
1154 // -x <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)),
1155 // -hIntegralInverse(u) <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)),
1156 // and finally u >= hIntegral(k + 0.5) - h(k).
1157 //
1158 // Hence, the right inequality determines the acceptance rate:
1159 // P(accepted | k = m) = h(m) / (hIntegrated(m+1/2) - hIntegrated(m-1/2))
1160 // The probability that m is returned is given by
1161 // P(k = m and accepted) = P(accepted | k = m) * P(k = m) = C * h(m) = C / m^exponent.
1162 //
1163 // In both cases the probabilities are proportional to the probability mass function
1164 // of the Zipf distribution.
1165
1166 return k;
1167 }
1168 }
1169 }
1170
1171 /**
1172 * {@code H(x) :=}
1173 * <ul>
1174 * <li>{@code (x^(1-exponent) - 1)/(1 - exponent)}, if {@code exponent != 1}</li>
1175 * <li>{@code log(x)}, if {@code exponent == 1}</li>
1176 * </ul>
1177 * H(x) is an integral function of h(x),
1178 * the derivative of H(x) is h(x).
1179 *
1180 * @param x free parameter
1181 * @return {@code H(x)}
1182 */
1183 private double hIntegral(final double x) {
1184 final double logX = FastMath.log(x);
1185 return helper2((1d-exponent)*logX)*logX;
1186 }
1187
1188 /**
1189 * {@code h(x) := 1/x^exponent}
1190 *
1191 * @param x free parameter
1192 * @return h(x)
1193 */
1194 private double h(final double x) {
1195 return FastMath.exp(-exponent * FastMath.log(x));
1196 }
1197
1198 /**
1199 * The inverse function of H(x).
1200 *
1201 * @param x free parameter
1202 * @return y for which {@code H(y) = x}
1203 */
1204 private double hIntegralInverse(final double x) {
1205 double t = x*(1d-exponent);
1206 if (t < -1d) {
1207 // Limit value to the range [-1, +inf).
1208 // t could be smaller than -1 in some rare cases due to numerical errors.
1209 t = -1;
1210 }
1211 return FastMath.exp(helper1(t)*x);
1212 }
1213
1214 /**
1215 * @return the exponent of the distribution being sampled
1216 */
1217 public double getExponent() {
1218 return exponent;
1219 }
1220
1221 /**
1222 * @return the number of elements of the distribution being sampled
1223 */
1224 public int getNumberOfElements() {
1225 return numberOfElements;
1226 }
1227
1228 /**
1229 * Helper function that calculates {@code log(1+x)/x}.
1230 * <p>
1231 * A Taylor series expansion is used, if x is close to 0.
1232 *
1233 * @param x a value larger than or equal to -1
1234 * @return {@code log(1+x)/x}
1235 */
1236 static double helper1(final double x) {
1237 if (FastMath.abs(x)>1e-8) {
1238 return FastMath.log1p(x)/x;
1239 }
1240 else {
1241 return 1.-x*((1./2.)-x*((1./3.)-x*(1./4.)));
1242 }
1243 }
1244
1245 /**
1246 * Helper function to calculate {@code (exp(x)-1)/x}.
1247 * <p>
1248 * A Taylor series expansion is used, if x is close to 0.
1249 *
1250 * @param x free parameter
1251 * @return {@code (exp(x)-1)/x} if x is non-zero, or 1 if x=0
1252 */
1253 static double helper2(final double x) {
1254 if (FastMath.abs(x)>1e-8) {
1255 return FastMath.expm1(x)/x;
1256 }
1257 else {
1258 return 1.+x*(1./2.)*(1.+x*(1./3.)*(1.+x*(1./4.)));
1259 }
1260 }
1261 }
1262
1263 /**
1264 * Sampler for enumerated distributions.
1265 *
1266 * @param <T> type of sample space objects
1267 */
1268 private final class EnumeratedDistributionSampler<T> {
1269 /** Probabilities */
1270 private final double[] weights;
1271 /** Values */
1272 private final List<T> values;
1273 /**
1274 * Create an EnumeratedDistributionSampler from the provided pmf.
1275 *
1276 * @param pmf probability mass function describing the distribution
1277 */
1278 EnumeratedDistributionSampler(List<Pair<T, Double>> pmf) {
1279 final int numMasses = pmf.size();
1280 weights = new double[numMasses];
1281 values = new ArrayList<>();
1282 for (int i = 0; i < numMasses; i++) {
1283 weights[i] = pmf.get(i).getSecond();
1284 values.add(pmf.get(i).getFirst());
1285 }
1286 }
1287 /**
1288 * @return a random value from the distribution
1289 */
1290 public T sample() {
1291 int[] chosen = nextSampleWithReplacement(1, weights);
1292 return values.get(chosen[0]);
1293 }
1294 }
1295 }