1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.hipparchus.stat.fitting;
23
24 import java.io.BufferedReader;
25 import java.io.File;
26 import java.io.IOException;
27 import java.io.InputStreamReader;
28 import java.net.URISyntaxException;
29 import java.net.URL;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32
33 import org.hipparchus.UnitTestUtils;
34 import org.hipparchus.analysis.UnivariateFunction;
35 import org.hipparchus.analysis.integration.BaseAbstractUnivariateIntegrator;
36 import org.hipparchus.analysis.integration.IterativeLegendreGaussIntegrator;
37 import org.hipparchus.distribution.RealDistribution;
38 import org.hipparchus.distribution.continuous.ConstantRealDistribution;
39 import org.hipparchus.distribution.continuous.NormalDistribution;
40 import org.hipparchus.distribution.continuous.RealDistributionAbstractTest;
41 import org.hipparchus.distribution.continuous.UniformRealDistribution;
42 import org.hipparchus.exception.MathIllegalArgumentException;
43 import org.hipparchus.exception.MathIllegalStateException;
44 import org.hipparchus.exception.NullArgumentException;
45 import org.hipparchus.stat.descriptive.StreamingStatistics;
46 import org.hipparchus.util.FastMath;
47 import org.junit.Assert;
48 import org.junit.Before;
49 import org.junit.Test;
50
51
52
53
54 public final class EmpiricalDistributionTest extends RealDistributionAbstractTest {
55
56 protected EmpiricalDistribution empiricalDistribution = null;
57 protected EmpiricalDistribution empiricalDistribution2 = null;
58 protected File file = null;
59 protected URL url = null;
60 protected double[] dataArray = null;
61 protected final int n = 10000;
62
63 @Override
64 @Before
65 public void setUp() {
66 super.setUp();
67 empiricalDistribution = new EmpiricalDistribution(100);
68 url = getClass().getResource("testData.txt");
69 final ArrayList<Double> list = new ArrayList<>();
70 try {
71 empiricalDistribution2 = new EmpiricalDistribution(100);
72 BufferedReader in =
73 new BufferedReader(new InputStreamReader(
74 url.openStream()));
75 String str = null;
76 while ((str = in.readLine()) != null) {
77 list.add(Double.valueOf(str));
78 }
79 in.close();
80 in = null;
81 } catch (IOException ex) {
82 Assert.fail("IOException " + ex);
83 }
84
85 dataArray = new double[list.size()];
86 int i = 0;
87 for (Double data : list) {
88 dataArray[i] = data.doubleValue();
89 i++;
90 }
91 }
92
93
94 @Test(expected=MathIllegalArgumentException.class)
95 public void testPrecondition1() {
96 new EmpiricalDistribution(0);
97 }
98
99
100
101
102
103
104 @Test
105 public void testLoad() throws Exception {
106
107 empiricalDistribution.load(url);
108 checkDistribution();
109
110
111 File file = new File(url.toURI());
112 empiricalDistribution.load(file);
113 checkDistribution();
114 }
115
116 private void checkDistribution() {
117
118
119 Assert.assertEquals(empiricalDistribution.getSampleStats().getN(),1000,10E-7);
120
121 Assert.assertEquals(empiricalDistribution.getSampleStats().getMean(),
122 5.069831575018909,10E-7);
123 Assert.assertEquals(empiricalDistribution.getSampleStats().getStandardDeviation(),
124 1.0173699343977738,10E-7);
125 }
126
127 @Test
128 public void testLoadURLError() throws IOException {
129 try {
130 URL existing = getClass().getResource("testData.txt");
131 URL nonexistent = new URL(existing.toString() + "-nonexistent");
132 empiricalDistribution.load(nonexistent);
133 Assert.fail("an exception should have been thrown");
134 } catch (IOException ioe) {
135
136 }
137 }
138
139 @Test
140 public void testLoadFileError() throws IOException, URISyntaxException {
141 try {
142 File existing = new File(getClass().getResource("testData.txt").toURI());
143 File nonexistent = new File(existing.getAbsolutePath() + "-nonexistent");
144 empiricalDistribution.load(nonexistent);
145 Assert.fail("an exception should have been thrown");
146 } catch (IOException ioe) {
147
148 }
149 }
150
151
152
153
154
155
156
157 @Test
158 public void testDoubleLoad() throws Exception {
159 empiricalDistribution2.load(dataArray);
160
161
162 Assert.assertEquals(empiricalDistribution2.getSampleStats().getN(),1000,10E-7);
163
164 Assert.assertEquals(empiricalDistribution2.getSampleStats().getMean(),
165 5.069831575018909,10E-7);
166 Assert.assertEquals(empiricalDistribution2.getSampleStats().getStandardDeviation(),
167 1.0173699343977738,10E-7);
168
169 double[] bounds = empiricalDistribution2.getGeneratorUpperBounds();
170 Assert.assertEquals(bounds.length, 100);
171 Assert.assertEquals(bounds[99], 1.0, 10e-12);
172
173 }
174
175
176
177
178
179
180 @Test
181 public void testNext() throws Exception {
182 tstGen(0.1);
183 tstDoubleGen(0.1);
184 }
185
186
187
188
189
190 @Test
191 public void testNexFail() {
192 try {
193 empiricalDistribution.getNextValue();
194 empiricalDistribution2.getNextValue();
195 Assert.fail("Expecting MathIllegalStateException");
196 } catch (MathIllegalStateException ex) {
197
198 }
199 }
200
201
202
203
204 @Test
205 public void testGridTooFine() throws Exception {
206 empiricalDistribution = new EmpiricalDistribution(1001);
207 tstGen(0.1);
208 empiricalDistribution2 = new EmpiricalDistribution(1001);
209 tstDoubleGen(0.1);
210 }
211
212
213
214
215 @Test
216 public void testGridTooFat() throws Exception {
217 empiricalDistribution = new EmpiricalDistribution(1);
218 tstGen(5);
219
220 empiricalDistribution2 = new EmpiricalDistribution(1);
221 tstDoubleGen(5);
222 }
223
224
225
226
227 @Test
228 public void testBinIndexOverflow() throws Exception {
229 double[] x = new double[] {9474.94326071674, 2080107.8865462579};
230 new EmpiricalDistribution().load(x);
231 }
232
233 @Test
234 public void testSerialization() {
235
236 EmpiricalDistribution dist = new EmpiricalDistribution();
237 EmpiricalDistribution dist2 = (EmpiricalDistribution) UnitTestUtils.serializeAndRecover(dist);
238 verifySame(dist, dist2);
239
240
241 empiricalDistribution2.load(dataArray);
242 dist2 = (EmpiricalDistribution) UnitTestUtils.serializeAndRecover(empiricalDistribution2);
243 verifySame(empiricalDistribution2, dist2);
244 }
245
246 @Test(expected=NullArgumentException.class)
247 public void testLoadNullDoubleArray() {
248 new EmpiricalDistribution().load((double[]) null);
249 }
250
251 @Test(expected=NullArgumentException.class)
252 public void testLoadNullURL() throws Exception {
253 new EmpiricalDistribution().load((URL) null);
254 }
255
256 @Test(expected=NullArgumentException.class)
257 public void testLoadNullFile() throws Exception {
258 new EmpiricalDistribution().load((File) null);
259 }
260
261
262
263
264 @Test
265 public void testGetBinUpperBounds() {
266 double[] testData = {0, 1, 1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10};
267 EmpiricalDistribution dist = new EmpiricalDistribution(5);
268 dist.load(testData);
269 double[] expectedBinUpperBounds = {2, 4, 6, 8, 10};
270 double[] expectedGeneratorUpperBounds = {4d/13d, 7d/13d, 9d/13d, 11d/13d, 1};
271 double tol = 10E-12;
272 UnitTestUtils.assertEquals(expectedBinUpperBounds, dist.getUpperBounds(), tol);
273 UnitTestUtils.assertEquals(expectedGeneratorUpperBounds, dist.getGeneratorUpperBounds(), tol);
274 }
275
276 @Test
277 public void testReSeed() throws Exception {
278 empiricalDistribution.load(url);
279 empiricalDistribution.reSeed(100);
280 final double [] values = new double[10];
281 for (int i = 0; i < 10; i++) {
282 values[i] = empiricalDistribution.getNextValue();
283 }
284 empiricalDistribution.reSeed(100);
285 for (int i = 0; i < 10; i++) {
286 Assert.assertEquals(values[i],empiricalDistribution.getNextValue(), 0d);
287 }
288 }
289
290 private void verifySame(EmpiricalDistribution d1, EmpiricalDistribution d2) {
291 Assert.assertEquals(d1.isLoaded(), d2.isLoaded());
292 Assert.assertEquals(d1.getBinCount(), d2.getBinCount());
293 Assert.assertEquals(d1.getSampleStats(), d2.getSampleStats());
294 if (d1.isLoaded()) {
295 for (int i = 0; i < d1.getUpperBounds().length; i++) {
296 Assert.assertEquals(d1.getUpperBounds()[i], d2.getUpperBounds()[i], 0);
297 }
298 Assert.assertEquals(d1.getBinStats(), d2.getBinStats());
299 }
300 }
301
302 private void tstGen(double tolerance)throws Exception {
303 empiricalDistribution.load(url);
304 empiricalDistribution.reSeed(1000);
305 StreamingStatistics stats = new StreamingStatistics();
306 for (int i = 1; i < 1000; i++) {
307 stats.addValue(empiricalDistribution.getNextValue());
308 }
309 Assert.assertEquals("mean", 5.069831575018909, stats.getMean(),tolerance);
310 Assert.assertEquals("std dev", 1.0173699343977738, stats.getStandardDeviation(),tolerance);
311 }
312
313 private void tstDoubleGen(double tolerance)throws Exception {
314 empiricalDistribution2.load(dataArray);
315 empiricalDistribution2.reSeed(1000);
316 StreamingStatistics stats = new StreamingStatistics();
317 for (int i = 1; i < 1000; i++) {
318 stats.addValue(empiricalDistribution2.getNextValue());
319 }
320 Assert.assertEquals("mean", 5.069831575018909, stats.getMean(), tolerance);
321 Assert.assertEquals("std dev", 1.0173699343977738, stats.getStandardDeviation(), tolerance);
322 }
323
324
325
326 @Override
327 public RealDistribution makeDistribution() {
328
329 final double[] sourceData = new double[n + 1];
330 for (int i = 0; i < n + 1; i++) {
331 sourceData[i] = i;
332 }
333 EmpiricalDistribution dist = new EmpiricalDistribution();
334 dist.load(sourceData);
335 return dist;
336 }
337
338
339 private final double binMass = 10d / (n + 1);
340
341
342 private final double firstBinMass = 11d / (n + 1);
343
344 @Override
345 public double[] makeCumulativeTestPoints() {
346 final double[] testPoints = new double[] {9, 10, 15, 1000, 5004, 9999};
347 return testPoints;
348 }
349
350
351 @Override
352 public double[] makeCumulativeTestValues() {
353
354
355
356
357
358 final double[] testPoints = getCumulativeTestPoints();
359 final double[] cumValues = new double[testPoints.length];
360 final EmpiricalDistribution empiricalDistribution = (EmpiricalDistribution) makeDistribution();
361 final double[] binBounds = empiricalDistribution.getUpperBounds();
362 for (int i = 0; i < testPoints.length; i++) {
363 final int bin = findBin(testPoints[i]);
364 final double lower = bin == 0 ? empiricalDistribution.getSupportLowerBound() :
365 binBounds[bin - 1];
366 final double upper = binBounds[bin];
367
368
369 final double bMinus = bin == 0 ? 0 : (bin - 1) * binMass + firstBinMass;
370 final RealDistribution kernel = findKernel(lower, upper);
371 final double withinBinKernelMass = kernel.probability(lower, upper);
372 final double kernelCum = kernel.probability(lower, testPoints[i]);
373 cumValues[i] = bMinus + (bin == 0 ? firstBinMass : binMass) * kernelCum/withinBinKernelMass;
374 }
375 return cumValues;
376 }
377
378 @Override
379 public double[] makeDensityTestValues() {
380 final double[] testPoints = getCumulativeTestPoints();
381 final double[] densityValues = new double[testPoints.length];
382 final EmpiricalDistribution empiricalDistribution = (EmpiricalDistribution) makeDistribution();
383 final double[] binBounds = empiricalDistribution.getUpperBounds();
384 for (int i = 0; i < testPoints.length; i++) {
385 final int bin = findBin(testPoints[i]);
386 final double lower = bin == 0 ? empiricalDistribution.getSupportLowerBound() :
387 binBounds[bin - 1];
388 final double upper = binBounds[bin];
389 final RealDistribution kernel = findKernel(lower, upper);
390 final double withinBinKernelMass = kernel.probability(lower, upper);
391 final double density = kernel.density(testPoints[i]);
392 densityValues[i] = density * (bin == 0 ? firstBinMass : binMass) / withinBinKernelMass;
393 }
394 return densityValues;
395 }
396
397
398
399
400
401
402
403 @Override
404 @Test
405 public void testDensityIntegrals() {
406 final RealDistribution distribution = makeDistribution();
407 final double tol = 1.0e-9;
408 final BaseAbstractUnivariateIntegrator integrator =
409 new IterativeLegendreGaussIntegrator(5, 1.0e-12, 1.0e-10);
410 final UnivariateFunction d = new UnivariateFunction() {
411 @Override
412 public double value(double x) {
413 return distribution.density(x);
414 }
415 };
416 final double[] lower = {0, 5, 1000, 5001, 9995};
417 final double[] upper = {5, 12, 1030, 5010, 10000};
418 for (int i = 1; i < 5; i++) {
419 Assert.assertEquals(
420 distribution.probability(
421 lower[i], upper[i]),
422 integrator.integrate(
423 1000000,
424 d, lower[i], upper[i]), tol);
425 }
426 }
427
428
429
430
431
432 @Test
433 public void testSampleValuesRange() {
434
435
436 final double[] data = new double[100];
437 for (int i = 0; i < 50; i++) {
438 data[i] = 1 / ((double) i + 1);
439 }
440 for (int i = 51; i < 100; i++) {
441 data[i] = 1 - 1 / (100 - (double) i + 2);
442 }
443 EmpiricalDistribution dist = new EmpiricalDistribution(10);
444 dist.load(data);
445 dist.reseedRandomGenerator(1000);
446 for (int i = 0; i < 1000; i++) {
447 final double dev = dist.getNextValue();
448 Assert.assertTrue(dev < 1);
449 Assert.assertTrue(dev > 0);
450 }
451 }
452
453
454
455
456 @Test
457 public void testNoBinVariance() {
458 final double[] data = {0, 0, 1, 1};
459 EmpiricalDistribution dist = new EmpiricalDistribution(2);
460 dist.load(data);
461 dist.reseedRandomGenerator(1000);
462 for (int i = 0; i < 1000; i++) {
463 final double dev = dist.getNextValue();
464 Assert.assertTrue(dev == 0 || dev == 1);
465 }
466 Assert.assertEquals(0.5, dist.cumulativeProbability(0), Double.MIN_VALUE);
467 Assert.assertEquals(1.0, dist.cumulativeProbability(1), Double.MIN_VALUE);
468 Assert.assertEquals(0.5, dist.cumulativeProbability(0.5), Double.MIN_VALUE);
469 Assert.assertEquals(0.5, dist.cumulativeProbability(0.7), Double.MIN_VALUE);
470 }
471
472
473
474
475 private int findBin(double x) {
476
477 final double nMinus = FastMath.floor(x / 10);
478 final int bin = (int) FastMath.round(nMinus);
479
480 return FastMath.floor(x / 10) == x / 10 ? bin - 1 : bin;
481 }
482
483
484
485
486
487
488
489
490 private RealDistribution findKernel(double lower, double upper) {
491 if (lower < 1) {
492 return new NormalDistribution(5d, 3.3166247903554);
493 } else {
494 return new NormalDistribution((upper + lower + 1) / 2d, 3.0276503540974917);
495 }
496 }
497
498 @Test
499 public void testKernelOverrideConstant() {
500 final EmpiricalDistribution dist = new ConstantKernelEmpiricalDistribution(5);
501 final double[] data = {1d,2d,3d, 4d,5d,6d, 7d,8d,9d, 10d,11d,12d, 13d,14d,15d};
502 dist.load(data);
503
504 double[] values = {2d, 5d, 8d, 11d, 14d};
505 for (int i = 0; i < 20; i++) {
506 Assert.assertTrue(Arrays.binarySearch(values, dist.getNextValue()) >= 0);
507 }
508 final double tol = 10E-12;
509 Assert.assertEquals(0.0, dist.cumulativeProbability(1), tol);
510 Assert.assertEquals(0.2, dist.cumulativeProbability(2), tol);
511 Assert.assertEquals(0.6, dist.cumulativeProbability(10), tol);
512 Assert.assertEquals(0.8, dist.cumulativeProbability(12), tol);
513 Assert.assertEquals(0.8, dist.cumulativeProbability(13), tol);
514 Assert.assertEquals(1.0, dist.cumulativeProbability(15), tol);
515
516 Assert.assertEquals(2.0, dist.inverseCumulativeProbability(0.1), tol);
517 Assert.assertEquals(2.0, dist.inverseCumulativeProbability(0.2), tol);
518 Assert.assertEquals(5.0, dist.inverseCumulativeProbability(0.3), tol);
519 Assert.assertEquals(5.0, dist.inverseCumulativeProbability(0.4), tol);
520 Assert.assertEquals(8.0, dist.inverseCumulativeProbability(0.5), tol);
521 Assert.assertEquals(8.0, dist.inverseCumulativeProbability(0.6), tol);
522 }
523
524 @Test
525 public void testKernelOverrideUniform() {
526 final EmpiricalDistribution dist = new UniformKernelEmpiricalDistribution(5);
527 final double[] data = {1d,2d,3d, 4d,5d,6d, 7d,8d,9d, 10d,11d,12d, 13d,14d,15d};
528 dist.load(data);
529
530 final double[] bounds = {3d, 6d, 9d, 12d};
531 final double tol = 10E-12;
532 for (int i = 0; i < 20; i++) {
533 final double v = dist.getNextValue();
534
535 for (int j = 0; j < bounds.length; j++) {
536 Assert.assertFalse(v > bounds[j] + tol && v < bounds[j] + 1 - tol);
537 }
538 }
539 Assert.assertEquals(0.0, dist.cumulativeProbability(1), tol);
540 Assert.assertEquals(0.1, dist.cumulativeProbability(2), tol);
541 Assert.assertEquals(0.6, dist.cumulativeProbability(10), tol);
542 Assert.assertEquals(0.8, dist.cumulativeProbability(12), tol);
543 Assert.assertEquals(0.8, dist.cumulativeProbability(13), tol);
544 Assert.assertEquals(1.0, dist.cumulativeProbability(15), tol);
545
546 Assert.assertEquals(2.0, dist.inverseCumulativeProbability(0.1), tol);
547 Assert.assertEquals(3.0, dist.inverseCumulativeProbability(0.2), tol);
548 Assert.assertEquals(5.0, dist.inverseCumulativeProbability(0.3), tol);
549 Assert.assertEquals(6.0, dist.inverseCumulativeProbability(0.4), tol);
550 Assert.assertEquals(8.0, dist.inverseCumulativeProbability(0.5), tol);
551 Assert.assertEquals(9.0, dist.inverseCumulativeProbability(0.6), tol);
552 }
553
554 @Test
555 public void testEmptyBins() {
556 double[] data = new double[10];
557 for (int i = 0; i < 10; ++i) {
558 data[i] = i < 5 ? 0 : 1;
559 }
560 EmpiricalDistribution edist = new EmpiricalDistribution(100);
561 edist.load(data);
562 Assert.assertEquals(0.5, edist.cumulativeProbability(0), Double.MIN_VALUE);
563 Assert.assertEquals(0.5, edist.cumulativeProbability(0.3), Double.MIN_VALUE);
564 Assert.assertEquals(0.5, edist.cumulativeProbability(0.9), Double.MIN_VALUE);
565 Assert.assertEquals(1, edist.cumulativeProbability(1), Double.MIN_VALUE);
566 Assert.assertEquals(1, edist.cumulativeProbability(1.5), Double.MIN_VALUE);
567 }
568
569
570
571
572
573 private class ConstantKernelEmpiricalDistribution extends EmpiricalDistribution {
574 private static final long serialVersionUID = 1L;
575 public ConstantKernelEmpiricalDistribution(int i) {
576 super(i);
577 }
578
579 @Override
580 protected RealDistribution getKernel(StreamingStatistics bStats) {
581 return new ConstantRealDistribution(bStats.getMean());
582 }
583 }
584
585
586
587
588 private class UniformKernelEmpiricalDistribution extends EmpiricalDistribution {
589 private static final long serialVersionUID = 2963149194515159653L;
590 public UniformKernelEmpiricalDistribution(int i) {
591 super(i);
592 }
593 @Override
594 protected RealDistribution getKernel(StreamingStatistics bStats) {
595 return new UniformRealDistribution(bStats.getMin(), bStats.getMax());
596 }
597 }
598 }