1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.descriptive.rank;
23
24 import java.io.Serializable;
25 import java.util.Arrays;
26 import java.util.BitSet;
27
28 import org.hipparchus.exception.LocalizedCoreFormats;
29 import org.hipparchus.exception.MathIllegalArgumentException;
30 import org.hipparchus.exception.NullArgumentException;
31 import org.hipparchus.stat.LocalizedStatFormats;
32 import org.hipparchus.stat.descriptive.AbstractUnivariateStatistic;
33 import org.hipparchus.stat.ranking.NaNStrategy;
34 import org.hipparchus.util.FastMath;
35 import org.hipparchus.util.KthSelector;
36 import org.hipparchus.util.MathArrays;
37 import org.hipparchus.util.MathUtils;
38 import org.hipparchus.util.PivotingStrategy;
39 import org.hipparchus.util.Precision;
40
41 /**
42 * Provides percentile computation.
43 * <p>
44 * There are several commonly used methods for estimating percentiles (a.k.a.
45 * quantiles) based on sample data. For large samples, the different methods
46 * agree closely, but when sample sizes are small, different methods will give
47 * significantly different results. The algorithm implemented here works as follows:
48 * <ol>
49 * <li>Let <code>n</code> be the length of the (sorted) array and
50 * <code>0 < p <= 100</code> be the desired percentile.</li>
51 * <li>If <code> n = 1 </code> return the unique array element (regardless of
52 * the value of <code>p</code>); otherwise </li>
53 * <li>Compute the estimated percentile position
54 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
55 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
56 * part of <code>pos</code>).</li>
57 * <li> If <code>pos < 1</code> return the smallest element in the array.</li>
58 * <li> Else if <code>pos >= n</code> return the largest element in the array.</li>
59 * <li> Else let <code>lower</code> be the element in position
60 * <code>floor(pos)</code> in the array and let <code>upper</code> be the
61 * next element in the array. Return <code>lower + d * (upper - lower)</code>
62 * </li>
63 * </ol>
64 * <p>
65 * To compute percentiles, the data must be at least partially ordered. Input
66 * arrays are copied and recursively partitioned using an ordering definition.
67 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
68 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes
69 * <code>Double.NaN</code> larger than any other value (including
70 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median
71 * (50th percentile) of
72 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code>
73 * <p>
74 * Since percentile estimation usually involves interpolation between array
75 * elements, arrays containing <code>NaN</code> or infinite values will often
76 * result in <code>NaN</code> or infinite values returned.
77 * <p>
78 * Further, to include different estimation types such as R1, R2 as mentioned in
79 * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>,
80 * a type specific NaN handling strategy is used to closely match with the
81 * typically observed results from popular tools like R(R1-R9), Excel(R7).
82 * <p>
83 * Percentile uses only selection instead of complete sorting and caches selection
84 * algorithm state between calls to the various {@code evaluate} methods. This
85 * greatly improves efficiency, both for a single percentile and multiple percentile
86 * computations. To maximize performance when multiple percentiles are computed
87 * based on the same data, users should set the data array once using either one
88 * of the {@link #evaluate(double[], double)} or {@link #setData(double[])} methods
89 * and thereafter {@link #evaluate(double)} with just the percentile provided.
90 * <p>
91 * <strong>Note that this implementation is not synchronized.</strong> If
92 * multiple threads access an instance of this class concurrently, and at least
93 * one of the threads invokes the <code>increment()</code> or
94 * <code>clear()</code> method, it must be synchronized externally.
95 */
96 public class Percentile extends AbstractUnivariateStatistic implements Serializable {
97
98 /** Serializable version identifier */
99 private static final long serialVersionUID = 20150412L;
100
101 /** Maximum number of partitioning pivots cached (each level double the number of pivots). */
102 private static final int MAX_CACHED_LEVELS = 10;
103
104 /** Maximum number of cached pivots in the pivots cached array */
105 private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1;
106
107 /** Default KthSelector used with default pivoting strategy */
108 private final KthSelector kthSelector;
109
110 /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */
111 private final EstimationType estimationType;
112
113 /** NaN Handling of the input as defined by {@link NaNStrategy} */
114 private final NaNStrategy nanStrategy;
115
116 /**
117 * Determines what percentile is computed when evaluate() is activated
118 * with no quantile argument.
119 */
120 private double quantile;
121
122 /** Cached pivots. */
123 private int[] cachedPivots;
124
125 /**
126 * Constructs a Percentile with the following defaults.
127 * <ul>
128 * <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li>
129 * <li>default estimation type: {@link EstimationType#LEGACY},
130 * can be reset with {@link #withEstimationType(EstimationType)}</li>
131 * <li>default NaN strategy: {@link NaNStrategy#REMOVED},
132 * can be reset with {@link #withNaNStrategy(NaNStrategy)}</li>
133 * <li>a KthSelector that makes use of {@link PivotingStrategy#MEDIAN_OF_3},
134 * can be reset with {@link #withKthSelector(KthSelector)}</li>
135 * </ul>
136 */
137 public Percentile() {
138 // No try-catch or advertised exception here - arg is valid
139 this(50.0);
140 }
141
142 /**
143 * Constructs a Percentile with the specific quantile value and the following
144 * <ul>
145 * <li>default method type: {@link EstimationType#LEGACY}</li>
146 * <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li>
147 * <li>a Kth Selector : {@link KthSelector}</li>
148 * </ul>
149 * @param quantile the quantile
150 * @throws MathIllegalArgumentException if p is not greater than 0 and less
151 * than or equal to 100
152 */
153 public Percentile(final double quantile) throws MathIllegalArgumentException {
154 this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED,
155 new KthSelector(PivotingStrategy.MEDIAN_OF_3));
156 }
157
158 /**
159 * Copy constructor, creates a new {@code Percentile} identical
160 * to the {@code original}
161 *
162 * @param original the {@code Percentile} instance to copy
163 * @throws NullArgumentException if original is null
164 */
165 public Percentile(final Percentile original) throws NullArgumentException {
166 super(original);
167 estimationType = original.getEstimationType();
168 nanStrategy = original.getNaNStrategy();
169 kthSelector = original.getKthSelector();
170
171 setData(original.getDataRef());
172 if (original.cachedPivots != null) {
173 System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length);
174 }
175 setQuantile(original.quantile);
176 }
177
178 /**
179 * Constructs a Percentile with the specific quantile value,
180 * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}.
181 *
182 * @param quantile the quantile to be computed
183 * @param estimationType one of the percentile {@link EstimationType estimation types}
184 * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs
185 * @param kthSelector a {@link KthSelector} to use for pivoting during search
186 * @throws MathIllegalArgumentException if p is not within (0,100]
187 * @throws NullArgumentException if type or NaNStrategy passed is null
188 */
189 protected Percentile(final double quantile,
190 final EstimationType estimationType,
191 final NaNStrategy nanStrategy,
192 final KthSelector kthSelector)
193 throws MathIllegalArgumentException {
194 setQuantile(quantile);
195 cachedPivots = null;
196 MathUtils.checkNotNull(estimationType);
197 MathUtils.checkNotNull(nanStrategy);
198 MathUtils.checkNotNull(kthSelector);
199 this.estimationType = estimationType;
200 this.nanStrategy = nanStrategy;
201 this.kthSelector = kthSelector;
202 }
203
204 /** {@inheritDoc} */
205 @Override
206 public void setData(final double[] values) {
207 if (values == null) {
208 cachedPivots = null;
209 } else {
210 cachedPivots = new int[PIVOTS_HEAP_LENGTH];
211 Arrays.fill(cachedPivots, -1);
212 }
213 super.setData(values);
214 }
215
216 /** {@inheritDoc} */
217 @Override
218 public void setData(final double[] values, final int begin, final int length)
219 throws MathIllegalArgumentException {
220 MathUtils.checkNotNull(values, LocalizedCoreFormats.INPUT_ARRAY);
221 cachedPivots = new int[PIVOTS_HEAP_LENGTH];
222 Arrays.fill(cachedPivots, -1);
223 super.setData(values, begin, length);
224 }
225
226 /**
227 * Returns the result of evaluating the statistic over the stored data.
228 * <p>
229 * The stored array is the one which was set by previous calls to
230 * {@link #setData(double[])}
231 *
232 * @param p the percentile value to compute
233 * @return the value of the statistic applied to the stored data
234 * @throws MathIllegalArgumentException if p is not a valid quantile value
235 * (p must be greater than 0 and less than or equal to 100)
236 */
237 public double evaluate(final double p) throws MathIllegalArgumentException {
238 return evaluate(getDataRef(), p);
239 }
240
241 /**
242 * Returns an estimate of the <code>quantile</code>th percentile of the
243 * designated values in the <code>values</code> array.
244 * <p>The quantile
245 * estimated is determined by the <code>quantile</code> property.
246 * </p>
247 * <ul>
248 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
249 * <li>Returns (for any value of <code>quantile</code>)
250 * <code>values[begin]</code> if <code>length = 1 </code></li>
251 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
252 * is null, or <code>start</code> or <code>length</code> is invalid</li>
253 * </ul>
254 * <p>
255 * See {@link Percentile} for a description of the percentile estimation
256 * algorithm used.
257 *
258 * @param values the input array
259 * @param start index of the first array element to include
260 * @param length the number of elements to include
261 * @return the percentile value
262 * @throws MathIllegalArgumentException if the parameters are not valid
263 *
264 */
265 @Override
266 public double evaluate(final double[] values, final int start, final int length)
267 throws MathIllegalArgumentException {
268 return evaluate(values, start, length, quantile);
269 }
270
271 /**
272 * Returns an estimate of the <code>p</code>th percentile of the values
273 * in the <code>values</code> array.
274 * <ul>
275 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
276 * <code>0</code></li>
277 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
278 * if <code>values</code> has length <code>1</code></li>
279 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
280 * is null or p is not a valid quantile value (p must be greater than 0
281 * and less than or equal to 100) </li>
282 * </ul>
283 * <p>
284 * The default implementation delegates to
285 * <code>evaluate(double[], int, int, double)</code> in the natural way.
286 *
287 * @param values input array of values
288 * @param p the percentile value to compute
289 * @return the percentile value or Double.NaN if the array is empty
290 * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid
291 */
292 public double evaluate(final double[] values, final double p)
293 throws MathIllegalArgumentException {
294 MathUtils.checkNotNull(values, LocalizedCoreFormats.INPUT_ARRAY);
295 return evaluate(values, 0, values.length, p);
296 }
297
298 /**
299 * Returns an estimate of the <code>p</code>th percentile of the values
300 * in the <code>values</code> array, starting with the element in (0-based)
301 * position <code>begin</code> in the array and including <code>length</code>
302 * values.
303 * <p>
304 * Calls to this method do not modify the internal <code>quantile</code>
305 * state of this statistic.
306 * </p>
307 * <ul>
308 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
309 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
310 * if <code>length = 1 </code></li>
311 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
312 * is null , <code>begin</code> or <code>length</code> is invalid, or
313 * <code>p</code> is not a valid quantile value (p must be greater than 0
314 * and less than or equal to 100)</li>
315 * </ul>
316 * <p>
317 * See {@link Percentile} for a description of the percentile estimation
318 * algorithm used.
319 *
320 * @param values array of input values
321 * @param p the percentile to compute
322 * @param begin the first (0-based) element to include in the computation
323 * @param length the number of array elements to include
324 * @return the percentile value
325 * @throws MathIllegalArgumentException if the parameters are not valid or the
326 * input array is null
327 */
328 public double evaluate(final double[] values, final int begin,
329 final int length, final double p)
330 throws MathIllegalArgumentException {
331
332 MathArrays.verifyValues(values, begin, length);
333 if (p > 100 || p <= 0) {
334 throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
335 p, 0, 100);
336 }
337 if (length == 0) {
338 return Double.NaN;
339 }
340 if (length == 1) {
341 return values[begin]; // always return single value for n = 1
342 }
343
344 final double[] work = getWorkArray(values, begin, length);
345 final int[] pivotsHeap = getPivots(values);
346 return work.length == 0 ? Double.NaN :
347 estimationType.evaluate(work, pivotsHeap, p, kthSelector);
348 }
349
350 /**
351 * Returns the value of the quantile field (determines what percentile is
352 * computed when evaluate() is called with no quantile argument).
353 *
354 * @return quantile set while construction or {@link #setQuantile(double)}
355 */
356 public double getQuantile() {
357 return quantile;
358 }
359
360 /**
361 * Sets the value of the quantile field (determines what percentile is
362 * computed when evaluate() is called with no quantile argument).
363 *
364 * @param p a value between 0 < p <= 100
365 * @throws MathIllegalArgumentException if p is not greater than 0 and less
366 * than or equal to 100
367 */
368 public void setQuantile(final double p) throws MathIllegalArgumentException {
369 if (p <= 0 || p > 100) {
370 throw new MathIllegalArgumentException(
371 LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100);
372 }
373 quantile = p;
374 }
375
376 /** {@inheritDoc} */
377 @Override
378 public Percentile copy() {
379 return new Percentile(this);
380 }
381
382 /**
383 * Get the work array to operate. Makes use of prior {@code storedData} if
384 * it exists or else do a check on NaNs and copy a subset of the array
385 * defined by begin and length parameters. The set {@link #nanStrategy} will
386 * be used to either retain/remove/replace any NaNs present before returning
387 * the resultant array.
388 *
389 * @param values the array of numbers
390 * @param begin index to start reading the array
391 * @param length the length of array to be read from the begin index
392 * @return work array sliced from values in the range [begin,begin+length)
393 * @throws MathIllegalArgumentException if values or indices are invalid
394 */
395 protected double[] getWorkArray(final double[] values, final int begin, final int length) {
396 final double[] work;
397 if (values == getDataRef()) {
398 work = getDataRef();
399 } else {
400 switch (nanStrategy) {
401 case MAXIMAL:// Replace NaNs with +INFs
402 work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY);
403 break;
404 case MINIMAL:// Replace NaNs with -INFs
405 work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY);
406 break;
407 case REMOVED:// Drop NaNs from data
408 work = removeAndSlice(values, begin, length, Double.NaN);
409 break;
410 case FAILED:// just throw exception as NaN is un-acceptable
411 work = copyOf(values, begin, length);
412 MathArrays.checkNotNaN(work);
413 break;
414 default: //FIXED
415 work = copyOf(values, begin, length);
416 break;
417 }
418 }
419 return work;
420 }
421
422 /**
423 * Make a copy of the array for the slice defined by array part from
424 * [begin, begin+length)
425 * @param values the input array
426 * @param begin start index of the array to include
427 * @param length number of elements to include from begin
428 * @return copy of a slice of the original array
429 */
430 private static double[] copyOf(final double[] values, final int begin, final int length) {
431 MathArrays.verifyValues(values, begin, length);
432 return Arrays.copyOfRange(values, begin, begin + length);
433 }
434
435 /**
436 * Replace every occurrence of a given value with a replacement value in a
437 * copied slice of array defined by array part from [begin, begin+length).
438 * @param values the input array
439 * @param begin start index of the array to include
440 * @param length number of elements to include from begin
441 * @param original the value to be replaced with
442 * @param replacement the value to be used for replacement
443 * @return the copy of sliced array with replaced values
444 */
445 private static double[] replaceAndSlice(final double[] values,
446 final int begin, final int length,
447 final double original,
448 final double replacement) {
449 final double[] temp = copyOf(values, begin, length);
450 for(int i = 0; i < length; i++) {
451 temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ?
452 replacement : temp[i];
453 }
454 return temp;
455 }
456
457 /**
458 * Remove the occurrence of a given value in a copied slice of array
459 * defined by the array part from [begin, begin+length).
460 * @param values the input array
461 * @param begin start index of the array to include
462 * @param length number of elements to include from begin
463 * @param removedValue the value to be removed from the sliced array
464 * @return the copy of the sliced array after removing the removedValue
465 */
466 private static double[] removeAndSlice(final double[] values,
467 final int begin, final int length,
468 final double removedValue) {
469 MathArrays.verifyValues(values, begin, length);
470 final double[] temp;
471 //BitSet(length) to indicate where the removedValue is located
472 final BitSet bits = new BitSet(length);
473 for (int i = begin; i < begin+length; i++) {
474 if (Precision.equalsIncludingNaN(removedValue, values[i])) {
475 bits.set(i - begin);
476 }
477 }
478 //Check if empty then create a new copy
479 if (bits.isEmpty()) {
480 temp = copyOf(values, begin, length); // Nothing removed, just copy
481 } else if (bits.cardinality() == length) {
482 temp = new double[0]; // All removed, just empty
483 } else { // Some removable, so new
484 temp = new double[length - bits.cardinality()];
485 int start = begin; //start index from source array (i.e values)
486 int dest = 0; //dest index in destination array(i.e temp)
487 int bitSetPtr = 0; //bitSetPtr is start index pointer of bitset
488 for (int nextOne = bits.nextSetBit(bitSetPtr); nextOne != -1; nextOne = bits.nextSetBit(bitSetPtr)) {
489 final int lengthToCopy = nextOne - bitSetPtr;
490 System.arraycopy(values, start, temp, dest, lengthToCopy);
491 dest += lengthToCopy;
492 start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
493 }
494 //Copy any residue past start index till begin+length
495 if (start < begin + length) {
496 System.arraycopy(values,start,temp,dest,begin + length - start);
497 }
498 }
499 return temp;
500 }
501
502 /**
503 * Get pivots which is either cached or a newly created one
504 *
505 * @param values array containing the input numbers
506 * @return cached pivots or a newly created one
507 */
508 private int[] getPivots(final double[] values) {
509 final int[] pivotsHeap;
510 if (values == getDataRef()) {
511 pivotsHeap = cachedPivots;
512 } else {
513 pivotsHeap = new int[PIVOTS_HEAP_LENGTH];
514 Arrays.fill(pivotsHeap, -1);
515 }
516 return pivotsHeap;
517 }
518
519 /**
520 * Get the estimation {@link EstimationType type} used for computation.
521 *
522 * @return the {@code estimationType} set
523 */
524 public EstimationType getEstimationType() {
525 return estimationType;
526 }
527
528 /**
529 * Build a new instance similar to the current one except for the
530 * {@link EstimationType estimation type}.
531 * <p>
532 * This method is intended to be used as part of a fluent-type builder
533 * pattern. Building finely tune instances should be done as follows:
534 * <pre>
535 * Percentile customized = new Percentile(quantile).
536 * withEstimationType(estimationType).
537 * withNaNStrategy(nanStrategy).
538 * withKthSelector(kthSelector);
539 * </pre>
540 * <p>
541 * If any of the {@code withXxx} method is omitted, the default value for
542 * the corresponding customization parameter will be used.
543 *
544 * @param newEstimationType estimation type for the new instance
545 * @return a new instance, with changed estimation type
546 * @throws NullArgumentException when newEstimationType is null
547 */
548 public Percentile withEstimationType(final EstimationType newEstimationType) {
549 return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector);
550 }
551
552 /**
553 * Get the {@link NaNStrategy NaN Handling} strategy used for computation.
554 * @return {@code NaN Handling} strategy set during construction
555 */
556 public NaNStrategy getNaNStrategy() {
557 return nanStrategy;
558 }
559
560 /**
561 * Build a new instance similar to the current one except for the
562 * {@link NaNStrategy NaN handling} strategy.
563 * <p>
564 * This method is intended to be used as part of a fluent-type builder
565 * pattern. Building finely tune instances should be done as follows:
566 * <pre>
567 * Percentile customized = new Percentile(quantile).
568 * withEstimationType(estimationType).
569 * withNaNStrategy(nanStrategy).
570 * withKthSelector(kthSelector);
571 * </pre>
572 * <p>
573 * If any of the {@code withXxx} method is omitted, the default value for
574 * the corresponding customization parameter will be used.
575 *
576 * @param newNaNStrategy NaN strategy for the new instance
577 * @return a new instance, with changed NaN handling strategy
578 * @throws NullArgumentException when newNaNStrategy is null
579 */
580 public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) {
581 return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector);
582 }
583
584 /**
585 * Get the {@link KthSelector kthSelector} used for computation.
586 * @return the {@code kthSelector} set
587 */
588 public KthSelector getKthSelector() {
589 return kthSelector;
590 }
591
592 /**
593 * Get the {@link PivotingStrategy} used in KthSelector for computation.
594 * @return the pivoting strategy set
595 */
596 public PivotingStrategy getPivotingStrategy() {
597 return kthSelector.getPivotingStrategy();
598 }
599
600 /**
601 * Build a new instance similar to the current one except for the
602 * {@link KthSelector kthSelector} instance specifically set.
603 * <p>
604 * This method is intended to be used as part of a fluent-type builder
605 * pattern. Building finely tune instances should be done as follows:
606 * <pre>
607 * Percentile customized = new Percentile(quantile).
608 * withEstimationType(estimationType).
609 * withNaNStrategy(nanStrategy).
610 * withKthSelector(newKthSelector);
611 * </pre>
612 * <p>
613 * If any of the {@code withXxx} method is omitted, the default value for
614 * the corresponding customization parameter will be used.
615 *
616 * @param newKthSelector KthSelector for the new instance
617 * @return a new instance, with changed KthSelector
618 * @throws NullArgumentException when newKthSelector is null
619 */
620 public Percentile withKthSelector(final KthSelector newKthSelector) {
621 return new Percentile(quantile, estimationType, nanStrategy, newKthSelector);
622 }
623
624 /**
625 * An enum for various estimation strategies of a percentile referred in
626 * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a>
627 * with the names of enum matching those of types mentioned in
628 * wikipedia.
629 * <p>
630 * Each enum corresponding to the specific type of estimation in wikipedia
631 * implements the respective formulae that specializes in the below aspects
632 * <ul>
633 * <li>An <b>index method</b> to calculate approximate index of the
634 * estimate</li>
635 * <li>An <b>estimate method</b> to estimate a value found at the earlier
636 * computed index</li>
637 * <li>A <b> minLimit</b> on the quantile for which first element of sorted
638 * input is returned as an estimate </li>
639 * <li>A <b> maxLimit</b> on the quantile for which last element of sorted
640 * input is returned as an estimate </li>
641 * </ul>
642 * <p>
643 * Users can now create {@link Percentile} by explicitly passing this enum;
644 * such as by invoking {@link Percentile#withEstimationType(EstimationType)}
645 * <p>
646 * References:
647 * <ol>
648 * <li>
649 * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a>
650 * </li>
651 * <li>
652 * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf">
653 * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
654 * packages, American Statistician 50, 361–365</a> </li>
655 * <li>
656 * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">
657 * R-Manual </a></li>
658 * </ol>
659 */
660 public enum EstimationType {
661 /**
662 * This is the default type used in the {@link Percentile}.This method
663 * has the following formulae for index and estimates<br>
664 * \( \begin{align}
665 * &index = (N+1)p\ \\
666 * &estimate = x_{\lceil h\,-\,1/2 \rceil} \\
667 * &minLimit = 0 \\
668 * &maxLimit = 1 \\
669 * \end{align}\)
670 */
671 LEGACY("Legacy Hipparchus") {
672 /**
673 * {@inheritDoc}.This method in particular makes use of existing
674 * Hipparchus style of picking up the index.
675 */
676 @Override
677 protected double index(final double p, final int length) {
678 final double minLimit = 0d;
679 final double maxLimit = 1d;
680 return Double.compare(p, minLimit) == 0 ? 0 :
681 Double.compare(p, maxLimit) == 0 ?
682 length : p * (length + 1);
683 }
684 },
685 /**
686 * The method R_1 has the following formulae for index and estimates<br>
687 * \( \begin{align}
688 * &index= Np + 1/2\, \\
689 * &estimate= x_{\lceil h\,-\,1/2 \rceil} \\
690 * &minLimit = 0 \\
691 * \end{align}\)
692 */
693 R_1("R-1") {
694
695 @Override
696 protected double index(final double p, final int length) {
697 final double minLimit = 0d;
698 return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
699 }
700
701 /**
702 * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5)
703 */
704 @Override
705 protected double estimate(final double[] values,
706 final int[] pivotsHeap, final double pos,
707 final int length, final KthSelector selector) {
708 return super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector);
709 }
710
711 },
712 /**
713 * The method R_2 has the following formulae for index and estimates<br>
714 * \( \begin{align}
715 * &index= Np + 1/2\, \\
716 * &estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} +
717 * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\
718 * &minLimit = 0 \\
719 * &maxLimit = 1 \\
720 * \end{align}\)
721 */
722 R_2("R-2") {
723
724 @Override
725 protected double index(final double p, final int length) {
726 final double minLimit = 0d;
727 final double maxLimit = 1d;
728 return Double.compare(p, maxLimit) == 0 ? length :
729 Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
730 }
731
732 /**
733 * {@inheritDoc}This method in particular for R_2 averages the
734 * values at ceil(p+0.5) and floor(p-0.5).
735 */
736 @Override
737 protected double estimate(final double[] values,
738 final int[] pivotsHeap, final double pos,
739 final int length, final KthSelector selector) {
740 final double low =
741 super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector);
742 final double high =
743 super.estimate(values, pivotsHeap,FastMath.floor(pos + 0.5), length, selector);
744 return (low + high) / 2;
745 }
746
747 },
748 /**
749 * The method R_3 has the following formulae for index and estimates<br>
750 * \( \begin{align}
751 * &index= Np \\
752 * &estimate= x_{\lfloor h \rceil}\, \\
753 * &minLimit = 0.5/N \\
754 * \end{align}\)
755 */
756 R_3("R-3") {
757 @Override
758 protected double index(final double p, final int length) {
759 final double minLimit = 1d/2 / length;
760 return Double.compare(p, minLimit) <= 0 ?
761 0 : FastMath.rint(length * p);
762 }
763
764 },
765 /**
766 * The method R_4 has the following formulae for index and estimates<br>
767 * \( \begin{align}
768 * &index= Np\, \\
769 * &estimate= x_{\lfloor h \rfloor} + (h -
770 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
771 * \rfloor}) \\
772 * &minLimit = 1/N \\
773 * &maxLimit = 1 \\
774 * \end{align}\)
775 */
776 R_4("R-4") {
777 @Override
778 protected double index(final double p, final int length) {
779 final double minLimit = 1d / length;
780 final double maxLimit = 1d;
781 return Double.compare(p, minLimit) < 0 ? 0 :
782 Double.compare(p, maxLimit) == 0 ? length : length * p;
783 }
784
785 },
786 /**
787 * The method R_5 has the following formulae for index and estimates<br>
788 * \( \begin{align}
789 * &index= Np + 1/2\\
790 * &estimate= x_{\lfloor h \rfloor} + (h -
791 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
792 * \rfloor}) \\
793 * &minLimit = 0.5/N \\
794 * &maxLimit = (N-0.5)/N
795 * \end{align}\)
796 */
797 R_5("R-5") {
798
799 @Override
800 protected double index(final double p, final int length) {
801 final double minLimit = 1d/2 / length;
802 final double maxLimit = (length - 0.5) / length;
803 return Double.compare(p, minLimit) < 0 ? 0 :
804 Double.compare(p, maxLimit) >= 0 ?
805 length : length * p + 0.5;
806 }
807 },
808 /**
809 * The method R_6 has the following formulae for index and estimates<br>
810 * \( \begin{align}
811 * &index= (N + 1)p \\
812 * &estimate= x_{\lfloor h \rfloor} + (h -
813 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
814 * \rfloor}) \\
815 * &minLimit = 1/(N+1) \\
816 * &maxLimit = N/(N+1) \\
817 * \end{align}\)
818 * <p>
819 * <b>Note:</b> This method computes the index in a manner very close to
820 * the default Hipparchus Percentile existing implementation. However
821 * the difference to be noted is in picking up the limits with which
822 * first element (p<1(N+1)) and last elements (p>N/(N+1))are done.
823 * While in default case; these are done with p=0 and p=1 respectively.
824 */
825 R_6("R-6") {
826
827 @Override
828 protected double index(final double p, final int length) {
829 final double minLimit = 1d / (length + 1);
830 final double maxLimit = 1d * length / (length + 1);
831 return Double.compare(p, minLimit) < 0 ? 0 :
832 Double.compare(p, maxLimit) >= 0 ?
833 length : (length + 1) * p;
834 }
835 },
836
837 /**
838 * The method R_7 implements Microsoft Excel style computation has the
839 * following formulae for index and estimates.<br>
840 * \( \begin{align}
841 * &index = (N-1)p + 1 \\
842 * &estimate = x_{\lfloor h \rfloor} + (h -
843 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
844 * \rfloor}) \\
845 * &minLimit = 0 \\
846 * &maxLimit = 1 \\
847 * \end{align}\)
848 */
849 R_7("R-7") {
850 @Override
851 protected double index(final double p, final int length) {
852 final double minLimit = 0d;
853 final double maxLimit = 1d;
854 return Double.compare(p, minLimit) == 0 ? 0 :
855 Double.compare(p, maxLimit) == 0 ?
856 length : 1 + (length - 1) * p;
857 }
858
859 },
860
861 /**
862 * The method R_8 has the following formulae for index and estimates<br>
863 * \( \begin{align}
864 * &index = (N + 1/3)p + 1/3 \\
865 * &estimate = x_{\lfloor h \rfloor} + (h -
866 \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
867 * \rfloor}) \\
868 * &minLimit = (2/3)/(N+1/3) \\
869 * &maxLimit = (N-1/3)/(N+1/3) \\
870 * \end{align}\)
871 * <p>
872 * As per Ref [2,3] this approach is most recommended as it provides
873 * an approximate median-unbiased estimate regardless of distribution.
874 */
875 R_8("R-8") {
876 @Override
877 protected double index(final double p, final int length) {
878 final double minLimit = 2 * (1d / 3) / (length + 1d / 3);
879 final double maxLimit =
880 (length - 1d / 3) / (length + 1d / 3);
881 return Double.compare(p, minLimit) < 0 ? 0 :
882 Double.compare(p, maxLimit) >= 0 ? length :
883 (length + 1d / 3) * p + 1d / 3;
884 }
885 },
886
887 /**
888 * The method R_9 has the following formulae for index and estimates<br>
889 * \( \begin{align}
890 * &index = (N + 1/4)p + 3/8\\
891 * &estimate = x_{\lfloor h \rfloor} + (h -
892 \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
893 * \rfloor}) \\
894 * &minLimit = (5/8)/(N+1/4) \\
895 * &maxLimit = (N-3/8)/(N+1/4) \\
896 * \end{align}\)
897 */
898 R_9("R-9") {
899 @Override
900 protected double index(final double p, final int length) {
901 final double minLimit = 5d/8 / (length + 0.25);
902 final double maxLimit = (length - 3d/8) / (length + 0.25);
903 return Double.compare(p, minLimit) < 0 ? 0 :
904 Double.compare(p, maxLimit) >= 0 ? length :
905 (length + 0.25) * p + 3d/8;
906 }
907
908 },
909 ;
910
911 /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */
912 private final String name;
913
914 /**
915 * Constructor
916 *
917 * @param type name of estimation type as per wikipedia
918 */
919 EstimationType(final String type) {
920 this.name = type;
921 }
922
923 /**
924 * Finds the index of array that can be used as starting index to
925 * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
926 * percentile. The calculation of index calculation is specific to each
927 * {@link EstimationType}.
928 *
929 * @param p the p<sup>th</sup> quantile
930 * @param length the total number of array elements in the work array
931 * @return a computed real valued index as explained in the wikipedia
932 */
933 protected abstract double index(double p, int length);
934
935 /**
936 * Estimation based on K<sup>th</sup> selection. This may be overridden
937 * in specific enums to compute slightly different estimations.
938 *
939 * @param work array of numbers to be used for finding the percentile
940 * @param pos indicated positional index prior computed from calling
941 * {@link #index(double, int)}
942 * @param pivotsHeap an earlier populated cache if exists; will be used
943 * @param length size of array considered
944 * @param selector a {@link KthSelector} used for pivoting during search
945 * @return estimated percentile
946 */
947 protected double estimate(final double[] work, final int[] pivotsHeap,
948 final double pos, final int length,
949 final KthSelector selector) {
950
951 final double fpos = FastMath.floor(pos);
952 final int intPos = (int) fpos;
953 final double dif = pos - fpos;
954
955 if (pos < 1) {
956 return selector.select(work, pivotsHeap, 0);
957 }
958 if (pos >= length) {
959 return selector.select(work, pivotsHeap, length - 1);
960 }
961
962 final double lower = selector.select(work, pivotsHeap, intPos - 1);
963 final double upper = selector.select(work, pivotsHeap, intPos);
964 return lower + dif * (upper - lower);
965 }
966
967 /**
968 * Evaluate method to compute the percentile for a given bounded array
969 * using earlier computed pivots heap.<br>
970 * This basically calls the {@link #index(double, int) index} and then
971 * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
972 * functions to return the estimated percentile value.
973 *
974 * @param work array of numbers to be used for finding the percentile
975 * @param pivotsHeap a prior cached heap which can speed up estimation
976 * @param p the p<sup>th</sup> quantile to be computed
977 * @param selector a {@link KthSelector} used for pivoting during search
978 * @return estimated percentile
979 * @throws MathIllegalArgumentException if p is out of range
980 * @throws NullArgumentException if work array is null
981 */
982 protected double evaluate(final double[] work, final int[] pivotsHeap, final double p,
983 final KthSelector selector) {
984 MathUtils.checkNotNull(work);
985 if (p > 100 || p <= 0) {
986 throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
987 p, 0, 100);
988 }
989 return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector);
990 }
991
992 /**
993 * Evaluate method to compute the percentile for a given bounded array.
994 * This basically calls the {@link #index(double, int) index} and then
995 * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
996 * functions to return the estimated percentile value. Please
997 * note that this method does not make use of cached pivots.
998 *
999 * @param work array of numbers to be used for finding the percentile
1000 * @param p the p<sup>th</sup> quantile to be computed
1001 * @return estimated percentile
1002 * @param selector a {@link KthSelector} used for pivoting during search
1003 * @throws MathIllegalArgumentException if length or p is out of range
1004 * @throws NullArgumentException if work array is null
1005 */
1006 public double evaluate(final double[] work, final double p, final KthSelector selector) {
1007 return this.evaluate(work, null, p, selector);
1008 }
1009
1010 /**
1011 * Gets the name of the enum
1012 *
1013 * @return the name
1014 */
1015 String getName() {
1016 return name;
1017 }
1018 }
1019 }