1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * This is not the original file distributed by the Apache Software Foundation 20 * It has been modified by the Hipparchus project 21 */ 22 package org.hipparchus.stat.descriptive.rank; 23 24 import java.io.Serializable; 25 import java.util.Arrays; 26 import java.util.BitSet; 27 28 import org.hipparchus.exception.LocalizedCoreFormats; 29 import org.hipparchus.exception.MathIllegalArgumentException; 30 import org.hipparchus.exception.NullArgumentException; 31 import org.hipparchus.stat.LocalizedStatFormats; 32 import org.hipparchus.stat.descriptive.AbstractUnivariateStatistic; 33 import org.hipparchus.stat.ranking.NaNStrategy; 34 import org.hipparchus.util.FastMath; 35 import org.hipparchus.util.KthSelector; 36 import org.hipparchus.util.MathArrays; 37 import org.hipparchus.util.MathUtils; 38 import org.hipparchus.util.PivotingStrategy; 39 import org.hipparchus.util.Precision; 40 41 /** 42 * Provides percentile computation. 43 * <p> 44 * There are several commonly used methods for estimating percentiles (a.k.a. 45 * quantiles) based on sample data. For large samples, the different methods 46 * agree closely, but when sample sizes are small, different methods will give 47 * significantly different results. The algorithm implemented here works as follows: 48 * <ol> 49 * <li>Let <code>n</code> be the length of the (sorted) array and 50 * <code>0 < p <= 100</code> be the desired percentile.</li> 51 * <li>If <code> n = 1 </code> return the unique array element (regardless of 52 * the value of <code>p</code>); otherwise </li> 53 * <li>Compute the estimated percentile position 54 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code> 55 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional 56 * part of <code>pos</code>).</li> 57 * <li> If <code>pos < 1</code> return the smallest element in the array.</li> 58 * <li> Else if <code>pos >= n</code> return the largest element in the array.</li> 59 * <li> Else let <code>lower</code> be the element in position 60 * <code>floor(pos)</code> in the array and let <code>upper</code> be the 61 * next element in the array. Return <code>lower + d * (upper - lower)</code> 62 * </li> 63 * </ol> 64 * <p> 65 * To compute percentiles, the data must be at least partially ordered. Input 66 * arrays are copied and recursively partitioned using an ordering definition. 67 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined 68 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes 69 * <code>Double.NaN</code> larger than any other value (including 70 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median 71 * (50th percentile) of 72 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code> 73 * <p> 74 * Since percentile estimation usually involves interpolation between array 75 * elements, arrays containing <code>NaN</code> or infinite values will often 76 * result in <code>NaN</code> or infinite values returned. 77 * <p> 78 * Further, to include different estimation types such as R1, R2 as mentioned in 79 * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>, 80 * a type specific NaN handling strategy is used to closely match with the 81 * typically observed results from popular tools like R(R1-R9), Excel(R7). 82 * <p> 83 * Percentile uses only selection instead of complete sorting and caches selection 84 * algorithm state between calls to the various {@code evaluate} methods. This 85 * greatly improves efficiency, both for a single percentile and multiple percentile 86 * computations. To maximize performance when multiple percentiles are computed 87 * based on the same data, users should set the data array once using either one 88 * of the {@link #evaluate(double[], double)} or {@link #setData(double[])} methods 89 * and thereafter {@link #evaluate(double)} with just the percentile provided. 90 * <p> 91 * <strong>Note that this implementation is not synchronized.</strong> If 92 * multiple threads access an instance of this class concurrently, and at least 93 * one of the threads invokes the <code>increment()</code> or 94 * <code>clear()</code> method, it must be synchronized externally. 95 */ 96 public class Percentile extends AbstractUnivariateStatistic implements Serializable { 97 98 /** Serializable version identifier */ 99 private static final long serialVersionUID = 20150412L; 100 101 /** Maximum number of partitioning pivots cached (each level double the number of pivots). */ 102 private static final int MAX_CACHED_LEVELS = 10; 103 104 /** Maximum number of cached pivots in the pivots cached array */ 105 private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1; 106 107 /** Default KthSelector used with default pivoting strategy */ 108 private final KthSelector kthSelector; 109 110 /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */ 111 private final EstimationType estimationType; 112 113 /** NaN Handling of the input as defined by {@link NaNStrategy} */ 114 private final NaNStrategy nanStrategy; 115 116 /** 117 * Determines what percentile is computed when evaluate() is activated 118 * with no quantile argument. 119 */ 120 private double quantile; 121 122 /** Cached pivots. */ 123 private int[] cachedPivots; 124 125 /** 126 * Constructs a Percentile with the following defaults. 127 * <ul> 128 * <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li> 129 * <li>default estimation type: {@link EstimationType#LEGACY}, 130 * can be reset with {@link #withEstimationType(EstimationType)}</li> 131 * <li>default NaN strategy: {@link NaNStrategy#REMOVED}, 132 * can be reset with {@link #withNaNStrategy(NaNStrategy)}</li> 133 * <li>a KthSelector that makes use of {@link PivotingStrategy#MEDIAN_OF_3}, 134 * can be reset with {@link #withKthSelector(KthSelector)}</li> 135 * </ul> 136 */ 137 public Percentile() { 138 // No try-catch or advertised exception here - arg is valid 139 this(50.0); 140 } 141 142 /** 143 * Constructs a Percentile with the specific quantile value and the following 144 * <ul> 145 * <li>default method type: {@link EstimationType#LEGACY}</li> 146 * <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li> 147 * <li>a Kth Selector : {@link KthSelector}</li> 148 * </ul> 149 * @param quantile the quantile 150 * @throws MathIllegalArgumentException if p is not greater than 0 and less 151 * than or equal to 100 152 */ 153 public Percentile(final double quantile) throws MathIllegalArgumentException { 154 this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED, 155 new KthSelector(PivotingStrategy.MEDIAN_OF_3)); 156 } 157 158 /** 159 * Copy constructor, creates a new {@code Percentile} identical 160 * to the {@code original} 161 * 162 * @param original the {@code Percentile} instance to copy 163 * @throws NullArgumentException if original is null 164 */ 165 public Percentile(final Percentile original) throws NullArgumentException { 166 super(original); 167 estimationType = original.getEstimationType(); 168 nanStrategy = original.getNaNStrategy(); 169 kthSelector = original.getKthSelector(); 170 171 setData(original.getDataRef()); 172 if (original.cachedPivots != null) { 173 System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length); 174 } 175 setQuantile(original.quantile); 176 } 177 178 /** 179 * Constructs a Percentile with the specific quantile value, 180 * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}. 181 * 182 * @param quantile the quantile to be computed 183 * @param estimationType one of the percentile {@link EstimationType estimation types} 184 * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs 185 * @param kthSelector a {@link KthSelector} to use for pivoting during search 186 * @throws MathIllegalArgumentException if p is not within (0,100] 187 * @throws NullArgumentException if type or NaNStrategy passed is null 188 */ 189 protected Percentile(final double quantile, 190 final EstimationType estimationType, 191 final NaNStrategy nanStrategy, 192 final KthSelector kthSelector) 193 throws MathIllegalArgumentException { 194 setQuantile(quantile); 195 cachedPivots = null; 196 MathUtils.checkNotNull(estimationType); 197 MathUtils.checkNotNull(nanStrategy); 198 MathUtils.checkNotNull(kthSelector); 199 this.estimationType = estimationType; 200 this.nanStrategy = nanStrategy; 201 this.kthSelector = kthSelector; 202 } 203 204 /** {@inheritDoc} */ 205 @Override 206 public void setData(final double[] values) { 207 if (values == null) { 208 cachedPivots = null; 209 } else { 210 cachedPivots = new int[PIVOTS_HEAP_LENGTH]; 211 Arrays.fill(cachedPivots, -1); 212 } 213 super.setData(values); 214 } 215 216 /** {@inheritDoc} */ 217 @Override 218 public void setData(final double[] values, final int begin, final int length) 219 throws MathIllegalArgumentException { 220 MathUtils.checkNotNull(values, LocalizedCoreFormats.INPUT_ARRAY); 221 cachedPivots = new int[PIVOTS_HEAP_LENGTH]; 222 Arrays.fill(cachedPivots, -1); 223 super.setData(values, begin, length); 224 } 225 226 /** 227 * Returns the result of evaluating the statistic over the stored data. 228 * <p> 229 * The stored array is the one which was set by previous calls to 230 * {@link #setData(double[])} 231 * 232 * @param p the percentile value to compute 233 * @return the value of the statistic applied to the stored data 234 * @throws MathIllegalArgumentException if p is not a valid quantile value 235 * (p must be greater than 0 and less than or equal to 100) 236 */ 237 public double evaluate(final double p) throws MathIllegalArgumentException { 238 return evaluate(getDataRef(), p); 239 } 240 241 /** 242 * Returns an estimate of the <code>quantile</code>th percentile of the 243 * designated values in the <code>values</code> array. 244 * <p>The quantile 245 * estimated is determined by the <code>quantile</code> property. 246 * </p> 247 * <ul> 248 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 249 * <li>Returns (for any value of <code>quantile</code>) 250 * <code>values[begin]</code> if <code>length = 1 </code></li> 251 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> 252 * is null, or <code>start</code> or <code>length</code> is invalid</li> 253 * </ul> 254 * <p> 255 * See {@link Percentile} for a description of the percentile estimation 256 * algorithm used. 257 * 258 * @param values the input array 259 * @param start index of the first array element to include 260 * @param length the number of elements to include 261 * @return the percentile value 262 * @throws MathIllegalArgumentException if the parameters are not valid 263 * 264 */ 265 @Override 266 public double evaluate(final double[] values, final int start, final int length) 267 throws MathIllegalArgumentException { 268 return evaluate(values, start, length, quantile); 269 } 270 271 /** 272 * Returns an estimate of the <code>p</code>th percentile of the values 273 * in the <code>values</code> array. 274 * <ul> 275 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 276 * <code>0</code></li> 277 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 278 * if <code>values</code> has length <code>1</code></li> 279 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> 280 * is null or p is not a valid quantile value (p must be greater than 0 281 * and less than or equal to 100) </li> 282 * </ul> 283 * <p> 284 * The default implementation delegates to 285 * <code>evaluate(double[], int, int, double)</code> in the natural way. 286 * 287 * @param values input array of values 288 * @param p the percentile value to compute 289 * @return the percentile value or Double.NaN if the array is empty 290 * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid 291 */ 292 public double evaluate(final double[] values, final double p) 293 throws MathIllegalArgumentException { 294 MathUtils.checkNotNull(values, LocalizedCoreFormats.INPUT_ARRAY); 295 return evaluate(values, 0, values.length, p); 296 } 297 298 /** 299 * Returns an estimate of the <code>p</code>th percentile of the values 300 * in the <code>values</code> array, starting with the element in (0-based) 301 * position <code>begin</code> in the array and including <code>length</code> 302 * values. 303 * <p> 304 * Calls to this method do not modify the internal <code>quantile</code> 305 * state of this statistic. 306 * </p> 307 * <ul> 308 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 309 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 310 * if <code>length = 1 </code></li> 311 * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> 312 * is null , <code>begin</code> or <code>length</code> is invalid, or 313 * <code>p</code> is not a valid quantile value (p must be greater than 0 314 * and less than or equal to 100)</li> 315 * </ul> 316 * <p> 317 * See {@link Percentile} for a description of the percentile estimation 318 * algorithm used. 319 * 320 * @param values array of input values 321 * @param p the percentile to compute 322 * @param begin the first (0-based) element to include in the computation 323 * @param length the number of array elements to include 324 * @return the percentile value 325 * @throws MathIllegalArgumentException if the parameters are not valid or the 326 * input array is null 327 */ 328 public double evaluate(final double[] values, final int begin, 329 final int length, final double p) 330 throws MathIllegalArgumentException { 331 332 MathArrays.verifyValues(values, begin, length); 333 if (p > 100 || p <= 0) { 334 throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, 335 p, 0, 100); 336 } 337 if (length == 0) { 338 return Double.NaN; 339 } 340 if (length == 1) { 341 return values[begin]; // always return single value for n = 1 342 } 343 344 final double[] work = getWorkArray(values, begin, length); 345 final int[] pivotsHeap = getPivots(values); 346 return work.length == 0 ? Double.NaN : 347 estimationType.evaluate(work, pivotsHeap, p, kthSelector); 348 } 349 350 /** 351 * Returns the value of the quantile field (determines what percentile is 352 * computed when evaluate() is called with no quantile argument). 353 * 354 * @return quantile set while construction or {@link #setQuantile(double)} 355 */ 356 public double getQuantile() { 357 return quantile; 358 } 359 360 /** 361 * Sets the value of the quantile field (determines what percentile is 362 * computed when evaluate() is called with no quantile argument). 363 * 364 * @param p a value between 0 < p <= 100 365 * @throws MathIllegalArgumentException if p is not greater than 0 and less 366 * than or equal to 100 367 */ 368 public void setQuantile(final double p) throws MathIllegalArgumentException { 369 if (p <= 0 || p > 100) { 370 throw new MathIllegalArgumentException( 371 LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100); 372 } 373 quantile = p; 374 } 375 376 /** {@inheritDoc} */ 377 @Override 378 public Percentile copy() { 379 return new Percentile(this); 380 } 381 382 /** 383 * Get the work array to operate. Makes use of prior {@code storedData} if 384 * it exists or else do a check on NaNs and copy a subset of the array 385 * defined by begin and length parameters. The set {@link #nanStrategy} will 386 * be used to either retain/remove/replace any NaNs present before returning 387 * the resultant array. 388 * 389 * @param values the array of numbers 390 * @param begin index to start reading the array 391 * @param length the length of array to be read from the begin index 392 * @return work array sliced from values in the range [begin,begin+length) 393 * @throws MathIllegalArgumentException if values or indices are invalid 394 */ 395 protected double[] getWorkArray(final double[] values, final int begin, final int length) { 396 final double[] work; 397 if (values == getDataRef()) { 398 work = getDataRef(); 399 } else { 400 switch (nanStrategy) { 401 case MAXIMAL:// Replace NaNs with +INFs 402 work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY); 403 break; 404 case MINIMAL:// Replace NaNs with -INFs 405 work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY); 406 break; 407 case REMOVED:// Drop NaNs from data 408 work = removeAndSlice(values, begin, length, Double.NaN); 409 break; 410 case FAILED:// just throw exception as NaN is un-acceptable 411 work = copyOf(values, begin, length); 412 MathArrays.checkNotNaN(work); 413 break; 414 default: //FIXED 415 work = copyOf(values, begin, length); 416 break; 417 } 418 } 419 return work; 420 } 421 422 /** 423 * Make a copy of the array for the slice defined by array part from 424 * [begin, begin+length) 425 * @param values the input array 426 * @param begin start index of the array to include 427 * @param length number of elements to include from begin 428 * @return copy of a slice of the original array 429 */ 430 private static double[] copyOf(final double[] values, final int begin, final int length) { 431 MathArrays.verifyValues(values, begin, length); 432 return Arrays.copyOfRange(values, begin, begin + length); 433 } 434 435 /** 436 * Replace every occurrence of a given value with a replacement value in a 437 * copied slice of array defined by array part from [begin, begin+length). 438 * @param values the input array 439 * @param begin start index of the array to include 440 * @param length number of elements to include from begin 441 * @param original the value to be replaced with 442 * @param replacement the value to be used for replacement 443 * @return the copy of sliced array with replaced values 444 */ 445 private static double[] replaceAndSlice(final double[] values, 446 final int begin, final int length, 447 final double original, 448 final double replacement) { 449 final double[] temp = copyOf(values, begin, length); 450 for(int i = 0; i < length; i++) { 451 temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ? 452 replacement : temp[i]; 453 } 454 return temp; 455 } 456 457 /** 458 * Remove the occurrence of a given value in a copied slice of array 459 * defined by the array part from [begin, begin+length). 460 * @param values the input array 461 * @param begin start index of the array to include 462 * @param length number of elements to include from begin 463 * @param removedValue the value to be removed from the sliced array 464 * @return the copy of the sliced array after removing the removedValue 465 */ 466 private static double[] removeAndSlice(final double[] values, 467 final int begin, final int length, 468 final double removedValue) { 469 MathArrays.verifyValues(values, begin, length); 470 final double[] temp; 471 //BitSet(length) to indicate where the removedValue is located 472 final BitSet bits = new BitSet(length); 473 for (int i = begin; i < begin+length; i++) { 474 if (Precision.equalsIncludingNaN(removedValue, values[i])) { 475 bits.set(i - begin); 476 } 477 } 478 //Check if empty then create a new copy 479 if (bits.isEmpty()) { 480 temp = copyOf(values, begin, length); // Nothing removed, just copy 481 } else if (bits.cardinality() == length) { 482 temp = new double[0]; // All removed, just empty 483 } else { // Some removable, so new 484 temp = new double[length - bits.cardinality()]; 485 int start = begin; //start index from source array (i.e values) 486 int dest = 0; //dest index in destination array(i.e temp) 487 int bitSetPtr = 0; //bitSetPtr is start index pointer of bitset 488 for (int nextOne = bits.nextSetBit(bitSetPtr); nextOne != -1; nextOne = bits.nextSetBit(bitSetPtr)) { 489 final int lengthToCopy = nextOne - bitSetPtr; 490 System.arraycopy(values, start, temp, dest, lengthToCopy); 491 dest += lengthToCopy; 492 start = begin + (bitSetPtr = bits.nextClearBit(nextOne)); 493 } 494 //Copy any residue past start index till begin+length 495 if (start < begin + length) { 496 System.arraycopy(values,start,temp,dest,begin + length - start); 497 } 498 } 499 return temp; 500 } 501 502 /** 503 * Get pivots which is either cached or a newly created one 504 * 505 * @param values array containing the input numbers 506 * @return cached pivots or a newly created one 507 */ 508 private int[] getPivots(final double[] values) { 509 final int[] pivotsHeap; 510 if (values == getDataRef()) { 511 pivotsHeap = cachedPivots; 512 } else { 513 pivotsHeap = new int[PIVOTS_HEAP_LENGTH]; 514 Arrays.fill(pivotsHeap, -1); 515 } 516 return pivotsHeap; 517 } 518 519 /** 520 * Get the estimation {@link EstimationType type} used for computation. 521 * 522 * @return the {@code estimationType} set 523 */ 524 public EstimationType getEstimationType() { 525 return estimationType; 526 } 527 528 /** 529 * Build a new instance similar to the current one except for the 530 * {@link EstimationType estimation type}. 531 * <p> 532 * This method is intended to be used as part of a fluent-type builder 533 * pattern. Building finely tune instances should be done as follows: 534 * <pre> 535 * Percentile customized = new Percentile(quantile). 536 * withEstimationType(estimationType). 537 * withNaNStrategy(nanStrategy). 538 * withKthSelector(kthSelector); 539 * </pre> 540 * <p> 541 * If any of the {@code withXxx} method is omitted, the default value for 542 * the corresponding customization parameter will be used. 543 * 544 * @param newEstimationType estimation type for the new instance 545 * @return a new instance, with changed estimation type 546 * @throws NullArgumentException when newEstimationType is null 547 */ 548 public Percentile withEstimationType(final EstimationType newEstimationType) { 549 return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector); 550 } 551 552 /** 553 * Get the {@link NaNStrategy NaN Handling} strategy used for computation. 554 * @return {@code NaN Handling} strategy set during construction 555 */ 556 public NaNStrategy getNaNStrategy() { 557 return nanStrategy; 558 } 559 560 /** 561 * Build a new instance similar to the current one except for the 562 * {@link NaNStrategy NaN handling} strategy. 563 * <p> 564 * This method is intended to be used as part of a fluent-type builder 565 * pattern. Building finely tune instances should be done as follows: 566 * <pre> 567 * Percentile customized = new Percentile(quantile). 568 * withEstimationType(estimationType). 569 * withNaNStrategy(nanStrategy). 570 * withKthSelector(kthSelector); 571 * </pre> 572 * <p> 573 * If any of the {@code withXxx} method is omitted, the default value for 574 * the corresponding customization parameter will be used. 575 * 576 * @param newNaNStrategy NaN strategy for the new instance 577 * @return a new instance, with changed NaN handling strategy 578 * @throws NullArgumentException when newNaNStrategy is null 579 */ 580 public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) { 581 return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector); 582 } 583 584 /** 585 * Get the {@link KthSelector kthSelector} used for computation. 586 * @return the {@code kthSelector} set 587 */ 588 public KthSelector getKthSelector() { 589 return kthSelector; 590 } 591 592 /** 593 * Get the {@link PivotingStrategy} used in KthSelector for computation. 594 * @return the pivoting strategy set 595 */ 596 public PivotingStrategy getPivotingStrategy() { 597 return kthSelector.getPivotingStrategy(); 598 } 599 600 /** 601 * Build a new instance similar to the current one except for the 602 * {@link KthSelector kthSelector} instance specifically set. 603 * <p> 604 * This method is intended to be used as part of a fluent-type builder 605 * pattern. Building finely tune instances should be done as follows: 606 * <pre> 607 * Percentile customized = new Percentile(quantile). 608 * withEstimationType(estimationType). 609 * withNaNStrategy(nanStrategy). 610 * withKthSelector(newKthSelector); 611 * </pre> 612 * <p> 613 * If any of the {@code withXxx} method is omitted, the default value for 614 * the corresponding customization parameter will be used. 615 * 616 * @param newKthSelector KthSelector for the new instance 617 * @return a new instance, with changed KthSelector 618 * @throws NullArgumentException when newKthSelector is null 619 */ 620 public Percentile withKthSelector(final KthSelector newKthSelector) { 621 return new Percentile(quantile, estimationType, nanStrategy, newKthSelector); 622 } 623 624 /** 625 * An enum for various estimation strategies of a percentile referred in 626 * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a> 627 * with the names of enum matching those of types mentioned in 628 * wikipedia. 629 * <p> 630 * Each enum corresponding to the specific type of estimation in wikipedia 631 * implements the respective formulae that specializes in the below aspects 632 * <ul> 633 * <li>An <b>index method</b> to calculate approximate index of the 634 * estimate</li> 635 * <li>An <b>estimate method</b> to estimate a value found at the earlier 636 * computed index</li> 637 * <li>A <b> minLimit</b> on the quantile for which first element of sorted 638 * input is returned as an estimate </li> 639 * <li>A <b> maxLimit</b> on the quantile for which last element of sorted 640 * input is returned as an estimate </li> 641 * </ul> 642 * <p> 643 * Users can now create {@link Percentile} by explicitly passing this enum; 644 * such as by invoking {@link Percentile#withEstimationType(EstimationType)} 645 * <p> 646 * References: 647 * <ol> 648 * <li> 649 * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a> 650 * </li> 651 * <li> 652 * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf"> 653 * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical 654 * packages, American Statistician 50, 361–365</a> </li> 655 * <li> 656 * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html"> 657 * R-Manual </a></li> 658 * </ol> 659 */ 660 public enum EstimationType { 661 /** 662 * This is the default type used in the {@link Percentile}.This method 663 * has the following formulae for index and estimates<br> 664 * \( \begin{align} 665 * &index = (N+1)p\ \\ 666 * &estimate = x_{\lceil h\,-\,1/2 \rceil} \\ 667 * &minLimit = 0 \\ 668 * &maxLimit = 1 \\ 669 * \end{align}\) 670 */ 671 LEGACY("Legacy Hipparchus") { 672 /** 673 * {@inheritDoc}.This method in particular makes use of existing 674 * Hipparchus style of picking up the index. 675 */ 676 @Override 677 protected double index(final double p, final int length) { 678 final double minLimit = 0d; 679 final double maxLimit = 1d; 680 return Double.compare(p, minLimit) == 0 ? 0 : 681 Double.compare(p, maxLimit) == 0 ? 682 length : p * (length + 1); 683 } 684 }, 685 /** 686 * The method R_1 has the following formulae for index and estimates<br> 687 * \( \begin{align} 688 * &index= Np + 1/2\, \\ 689 * &estimate= x_{\lceil h\,-\,1/2 \rceil} \\ 690 * &minLimit = 0 \\ 691 * \end{align}\) 692 */ 693 R_1("R-1") { 694 695 @Override 696 protected double index(final double p, final int length) { 697 final double minLimit = 0d; 698 return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5; 699 } 700 701 /** 702 * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5) 703 */ 704 @Override 705 protected double estimate(final double[] values, 706 final int[] pivotsHeap, final double pos, 707 final int length, final KthSelector selector) { 708 return super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector); 709 } 710 711 }, 712 /** 713 * The method R_2 has the following formulae for index and estimates<br> 714 * \( \begin{align} 715 * &index= Np + 1/2\, \\ 716 * &estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} + 717 * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\ 718 * &minLimit = 0 \\ 719 * &maxLimit = 1 \\ 720 * \end{align}\) 721 */ 722 R_2("R-2") { 723 724 @Override 725 protected double index(final double p, final int length) { 726 final double minLimit = 0d; 727 final double maxLimit = 1d; 728 return Double.compare(p, maxLimit) == 0 ? length : 729 Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5; 730 } 731 732 /** 733 * {@inheritDoc}This method in particular for R_2 averages the 734 * values at ceil(p+0.5) and floor(p-0.5). 735 */ 736 @Override 737 protected double estimate(final double[] values, 738 final int[] pivotsHeap, final double pos, 739 final int length, final KthSelector selector) { 740 final double low = 741 super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector); 742 final double high = 743 super.estimate(values, pivotsHeap,FastMath.floor(pos + 0.5), length, selector); 744 return (low + high) / 2; 745 } 746 747 }, 748 /** 749 * The method R_3 has the following formulae for index and estimates<br> 750 * \( \begin{align} 751 * &index= Np \\ 752 * &estimate= x_{\lfloor h \rceil}\, \\ 753 * &minLimit = 0.5/N \\ 754 * \end{align}\) 755 */ 756 R_3("R-3") { 757 @Override 758 protected double index(final double p, final int length) { 759 final double minLimit = 1d/2 / length; 760 return Double.compare(p, minLimit) <= 0 ? 761 0 : FastMath.rint(length * p); 762 } 763 764 }, 765 /** 766 * The method R_4 has the following formulae for index and estimates<br> 767 * \( \begin{align} 768 * &index= Np\, \\ 769 * &estimate= x_{\lfloor h \rfloor} + (h - 770 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 771 * \rfloor}) \\ 772 * &minLimit = 1/N \\ 773 * &maxLimit = 1 \\ 774 * \end{align}\) 775 */ 776 R_4("R-4") { 777 @Override 778 protected double index(final double p, final int length) { 779 final double minLimit = 1d / length; 780 final double maxLimit = 1d; 781 return Double.compare(p, minLimit) < 0 ? 0 : 782 Double.compare(p, maxLimit) == 0 ? length : length * p; 783 } 784 785 }, 786 /** 787 * The method R_5 has the following formulae for index and estimates<br> 788 * \( \begin{align} 789 * &index= Np + 1/2\\ 790 * &estimate= x_{\lfloor h \rfloor} + (h - 791 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 792 * \rfloor}) \\ 793 * &minLimit = 0.5/N \\ 794 * &maxLimit = (N-0.5)/N 795 * \end{align}\) 796 */ 797 R_5("R-5") { 798 799 @Override 800 protected double index(final double p, final int length) { 801 final double minLimit = 1d/2 / length; 802 final double maxLimit = (length - 0.5) / length; 803 return Double.compare(p, minLimit) < 0 ? 0 : 804 Double.compare(p, maxLimit) >= 0 ? 805 length : length * p + 0.5; 806 } 807 }, 808 /** 809 * The method R_6 has the following formulae for index and estimates<br> 810 * \( \begin{align} 811 * &index= (N + 1)p \\ 812 * &estimate= x_{\lfloor h \rfloor} + (h - 813 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 814 * \rfloor}) \\ 815 * &minLimit = 1/(N+1) \\ 816 * &maxLimit = N/(N+1) \\ 817 * \end{align}\) 818 * <p> 819 * <b>Note:</b> This method computes the index in a manner very close to 820 * the default Hipparchus Percentile existing implementation. However 821 * the difference to be noted is in picking up the limits with which 822 * first element (p<1(N+1)) and last elements (p>N/(N+1))are done. 823 * While in default case; these are done with p=0 and p=1 respectively. 824 */ 825 R_6("R-6") { 826 827 @Override 828 protected double index(final double p, final int length) { 829 final double minLimit = 1d / (length + 1); 830 final double maxLimit = 1d * length / (length + 1); 831 return Double.compare(p, minLimit) < 0 ? 0 : 832 Double.compare(p, maxLimit) >= 0 ? 833 length : (length + 1) * p; 834 } 835 }, 836 837 /** 838 * The method R_7 implements Microsoft Excel style computation has the 839 * following formulae for index and estimates.<br> 840 * \( \begin{align} 841 * &index = (N-1)p + 1 \\ 842 * &estimate = x_{\lfloor h \rfloor} + (h - 843 * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 844 * \rfloor}) \\ 845 * &minLimit = 0 \\ 846 * &maxLimit = 1 \\ 847 * \end{align}\) 848 */ 849 R_7("R-7") { 850 @Override 851 protected double index(final double p, final int length) { 852 final double minLimit = 0d; 853 final double maxLimit = 1d; 854 return Double.compare(p, minLimit) == 0 ? 0 : 855 Double.compare(p, maxLimit) == 0 ? 856 length : 1 + (length - 1) * p; 857 } 858 859 }, 860 861 /** 862 * The method R_8 has the following formulae for index and estimates<br> 863 * \( \begin{align} 864 * &index = (N + 1/3)p + 1/3 \\ 865 * &estimate = x_{\lfloor h \rfloor} + (h - 866 \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 867 * \rfloor}) \\ 868 * &minLimit = (2/3)/(N+1/3) \\ 869 * &maxLimit = (N-1/3)/(N+1/3) \\ 870 * \end{align}\) 871 * <p> 872 * As per Ref [2,3] this approach is most recommended as it provides 873 * an approximate median-unbiased estimate regardless of distribution. 874 */ 875 R_8("R-8") { 876 @Override 877 protected double index(final double p, final int length) { 878 final double minLimit = 2 * (1d / 3) / (length + 1d / 3); 879 final double maxLimit = 880 (length - 1d / 3) / (length + 1d / 3); 881 return Double.compare(p, minLimit) < 0 ? 0 : 882 Double.compare(p, maxLimit) >= 0 ? length : 883 (length + 1d / 3) * p + 1d / 3; 884 } 885 }, 886 887 /** 888 * The method R_9 has the following formulae for index and estimates<br> 889 * \( \begin{align} 890 * &index = (N + 1/4)p + 3/8\\ 891 * &estimate = x_{\lfloor h \rfloor} + (h - 892 \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h 893 * \rfloor}) \\ 894 * &minLimit = (5/8)/(N+1/4) \\ 895 * &maxLimit = (N-3/8)/(N+1/4) \\ 896 * \end{align}\) 897 */ 898 R_9("R-9") { 899 @Override 900 protected double index(final double p, final int length) { 901 final double minLimit = 5d/8 / (length + 0.25); 902 final double maxLimit = (length - 3d/8) / (length + 0.25); 903 return Double.compare(p, minLimit) < 0 ? 0 : 904 Double.compare(p, maxLimit) >= 0 ? length : 905 (length + 0.25) * p + 3d/8; 906 } 907 908 }, 909 ; 910 911 /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */ 912 private final String name; 913 914 /** 915 * Constructor 916 * 917 * @param type name of estimation type as per wikipedia 918 */ 919 EstimationType(final String type) { 920 this.name = type; 921 } 922 923 /** 924 * Finds the index of array that can be used as starting index to 925 * {@link #estimate(double[], int[], double, int, KthSelector) estimate} 926 * percentile. The calculation of index calculation is specific to each 927 * {@link EstimationType}. 928 * 929 * @param p the p<sup>th</sup> quantile 930 * @param length the total number of array elements in the work array 931 * @return a computed real valued index as explained in the wikipedia 932 */ 933 protected abstract double index(double p, int length); 934 935 /** 936 * Estimation based on K<sup>th</sup> selection. This may be overridden 937 * in specific enums to compute slightly different estimations. 938 * 939 * @param work array of numbers to be used for finding the percentile 940 * @param pos indicated positional index prior computed from calling 941 * {@link #index(double, int)} 942 * @param pivotsHeap an earlier populated cache if exists; will be used 943 * @param length size of array considered 944 * @param selector a {@link KthSelector} used for pivoting during search 945 * @return estimated percentile 946 */ 947 protected double estimate(final double[] work, final int[] pivotsHeap, 948 final double pos, final int length, 949 final KthSelector selector) { 950 951 final double fpos = FastMath.floor(pos); 952 final int intPos = (int) fpos; 953 final double dif = pos - fpos; 954 955 if (pos < 1) { 956 return selector.select(work, pivotsHeap, 0); 957 } 958 if (pos >= length) { 959 return selector.select(work, pivotsHeap, length - 1); 960 } 961 962 final double lower = selector.select(work, pivotsHeap, intPos - 1); 963 final double upper = selector.select(work, pivotsHeap, intPos); 964 return lower + dif * (upper - lower); 965 } 966 967 /** 968 * Evaluate method to compute the percentile for a given bounded array 969 * using earlier computed pivots heap.<br> 970 * This basically calls the {@link #index(double, int) index} and then 971 * {@link #estimate(double[], int[], double, int, KthSelector) estimate} 972 * functions to return the estimated percentile value. 973 * 974 * @param work array of numbers to be used for finding the percentile 975 * @param pivotsHeap a prior cached heap which can speed up estimation 976 * @param p the p<sup>th</sup> quantile to be computed 977 * @param selector a {@link KthSelector} used for pivoting during search 978 * @return estimated percentile 979 * @throws MathIllegalArgumentException if p is out of range 980 * @throws NullArgumentException if work array is null 981 */ 982 protected double evaluate(final double[] work, final int[] pivotsHeap, final double p, 983 final KthSelector selector) { 984 MathUtils.checkNotNull(work); 985 if (p > 100 || p <= 0) { 986 throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, 987 p, 0, 100); 988 } 989 return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector); 990 } 991 992 /** 993 * Evaluate method to compute the percentile for a given bounded array. 994 * This basically calls the {@link #index(double, int) index} and then 995 * {@link #estimate(double[], int[], double, int, KthSelector) estimate} 996 * functions to return the estimated percentile value. Please 997 * note that this method does not make use of cached pivots. 998 * 999 * @param work array of numbers to be used for finding the percentile 1000 * @param p the p<sup>th</sup> quantile to be computed 1001 * @return estimated percentile 1002 * @param selector a {@link KthSelector} used for pivoting during search 1003 * @throws MathIllegalArgumentException if length or p is out of range 1004 * @throws NullArgumentException if work array is null 1005 */ 1006 public double evaluate(final double[] work, final double p, final KthSelector selector) { 1007 return this.evaluate(work, null, p, selector); 1008 } 1009 1010 /** 1011 * Gets the name of the enum 1012 * 1013 * @return the name 1014 */ 1015 String getName() { 1016 return name; 1017 } 1018 } 1019 }