Percentile.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.descriptive.rank;

  22. import java.io.Serializable;
  23. import java.util.Arrays;
  24. import java.util.BitSet;

  25. import org.hipparchus.exception.LocalizedCoreFormats;
  26. import org.hipparchus.exception.MathIllegalArgumentException;
  27. import org.hipparchus.exception.NullArgumentException;
  28. import org.hipparchus.stat.LocalizedStatFormats;
  29. import org.hipparchus.stat.descriptive.AbstractUnivariateStatistic;
  30. import org.hipparchus.stat.ranking.NaNStrategy;
  31. import org.hipparchus.util.FastMath;
  32. import org.hipparchus.util.KthSelector;
  33. import org.hipparchus.util.MathArrays;
  34. import org.hipparchus.util.MathUtils;
  35. import org.hipparchus.util.PivotingStrategy;
  36. import org.hipparchus.util.Precision;

  37. /**
  38.  * Provides percentile computation.
  39.  * <p>
  40.  * There are several commonly used methods for estimating percentiles (a.k.a.
  41.  * quantiles) based on sample data.  For large samples, the different methods
  42.  * agree closely, but when sample sizes are small, different methods will give
  43.  * significantly different results.  The algorithm implemented here works as follows:
  44.  * <ol>
  45.  * <li>Let <code>n</code> be the length of the (sorted) array and
  46.  * <code>0 &lt; p &lt;= 100</code> be the desired percentile.</li>
  47.  * <li>If <code> n = 1 </code> return the unique array element (regardless of
  48.  * the value of <code>p</code>); otherwise </li>
  49.  * <li>Compute the estimated percentile position
  50.  * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
  51.  * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
  52.  * part of <code>pos</code>).</li>
  53.  * <li> If <code>pos &lt; 1</code> return the smallest element in the array.</li>
  54.  * <li> Else if <code>pos &gt;= n</code> return the largest element in the array.</li>
  55.  * <li> Else let <code>lower</code> be the element in position
  56.  * <code>floor(pos)</code> in the array and let <code>upper</code> be the
  57.  * next element in the array.  Return <code>lower + d * (upper - lower)</code>
  58.  * </li>
  59.  * </ol>
  60.  * <p>
  61.  * To compute percentiles, the data must be at least partially ordered.  Input
  62.  * arrays are copied and recursively partitioned using an ordering definition.
  63.  * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
  64.  * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes
  65.  * <code>Double.NaN</code> larger than any other value (including
  66.  * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
  67.  * (50th percentile) of
  68.  * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code>
  69.  * <p>
  70.  * Since percentile estimation usually involves interpolation between array
  71.  * elements, arrays containing  <code>NaN</code> or infinite values will often
  72.  * result in <code>NaN</code> or infinite values returned.
  73.  * <p>
  74.  * Further, to include different estimation types such as R1, R2 as mentioned in
  75.  * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>,
  76.  * a type specific NaN handling strategy is used to closely match with the
  77.  * typically observed results from popular tools like R(R1-R9), Excel(R7).
  78.  * <p>
  79.  * Percentile uses only selection instead of complete sorting and caches selection
  80.  * algorithm state between calls to the various {@code evaluate} methods. This
  81.  * greatly improves efficiency, both for a single percentile and multiple percentile
  82.  * computations. To maximize performance when multiple percentiles are computed
  83.  * based on the same data, users should set the data array once using either one
  84.  * of the {@link #evaluate(double[], double)} or {@link #setData(double[])} methods
  85.  * and thereafter {@link #evaluate(double)} with just the percentile provided.
  86.  * <p>
  87.  * <strong>Note that this implementation is not synchronized.</strong> If
  88.  * multiple threads access an instance of this class concurrently, and at least
  89.  * one of the threads invokes the <code>increment()</code> or
  90.  * <code>clear()</code> method, it must be synchronized externally.
  91.  */
  92. public class Percentile extends AbstractUnivariateStatistic implements Serializable {

  93.     /** Serializable version identifier */
  94.     private static final long serialVersionUID = 20150412L;

  95.     /** Maximum number of partitioning pivots cached (each level double the number of pivots). */
  96.     private static final int MAX_CACHED_LEVELS = 10;

  97.     /** Maximum number of cached pivots in the pivots cached array */
  98.     private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1;

  99.     /** Default KthSelector used with default pivoting strategy */
  100.     private final KthSelector kthSelector;

  101.     /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */
  102.     private final EstimationType estimationType;

  103.     /** NaN Handling of the input as defined by {@link NaNStrategy} */
  104.     private final NaNStrategy nanStrategy;

  105.     /**
  106.      * Determines what percentile is computed when evaluate() is activated
  107.      * with no quantile argument.
  108.      */
  109.     private double quantile;

  110.     /** Cached pivots. */
  111.     private int[] cachedPivots;

  112.     /**
  113.      * Constructs a Percentile with the following defaults.
  114.      * <ul>
  115.      *   <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li>
  116.      *   <li>default estimation type: {@link EstimationType#LEGACY},
  117.      *   can be reset with {@link #withEstimationType(EstimationType)}</li>
  118.      *   <li>default NaN strategy: {@link NaNStrategy#REMOVED},
  119.      *   can be reset with {@link #withNaNStrategy(NaNStrategy)}</li>
  120.      *   <li>a KthSelector that makes use of {@link PivotingStrategy#MEDIAN_OF_3},
  121.      *   can be reset with {@link #withKthSelector(KthSelector)}</li>
  122.      * </ul>
  123.      */
  124.     public Percentile() {
  125.         // No try-catch or advertised exception here - arg is valid
  126.         this(50.0);
  127.     }

  128.     /**
  129.      * Constructs a Percentile with the specific quantile value and the following
  130.      * <ul>
  131.      *   <li>default method type: {@link EstimationType#LEGACY}</li>
  132.      *   <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li>
  133.      *   <li>a Kth Selector : {@link KthSelector}</li>
  134.      * </ul>
  135.      * @param quantile the quantile
  136.      * @throws MathIllegalArgumentException  if p is not greater than 0 and less
  137.      * than or equal to 100
  138.      */
  139.     public Percentile(final double quantile) throws MathIllegalArgumentException {
  140.         this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED,
  141.              new KthSelector(PivotingStrategy.MEDIAN_OF_3));
  142.     }

  143.     /**
  144.      * Copy constructor, creates a new {@code Percentile} identical
  145.      * to the {@code original}
  146.      *
  147.      * @param original the {@code Percentile} instance to copy
  148.      * @throws NullArgumentException if original is null
  149.      */
  150.     public Percentile(final Percentile original) throws NullArgumentException {
  151.         super(original);
  152.         estimationType   = original.getEstimationType();
  153.         nanStrategy      = original.getNaNStrategy();
  154.         kthSelector      = original.getKthSelector();

  155.         setData(original.getDataRef());
  156.         if (original.cachedPivots != null) {
  157.             System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length);
  158.         }
  159.         setQuantile(original.quantile);
  160.     }

  161.     /**
  162.      * Constructs a Percentile with the specific quantile value,
  163.      * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}.
  164.      *
  165.      * @param quantile the quantile to be computed
  166.      * @param estimationType one of the percentile {@link EstimationType  estimation types}
  167.      * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs
  168.      * @param kthSelector a {@link KthSelector} to use for pivoting during search
  169.      * @throws MathIllegalArgumentException if p is not within (0,100]
  170.      * @throws NullArgumentException if type or NaNStrategy passed is null
  171.      */
  172.     protected Percentile(final double quantile,
  173.                          final EstimationType estimationType,
  174.                          final NaNStrategy nanStrategy,
  175.                          final KthSelector kthSelector)
  176.         throws MathIllegalArgumentException {
  177.         setQuantile(quantile);
  178.         cachedPivots = null;
  179.         MathUtils.checkNotNull(estimationType);
  180.         MathUtils.checkNotNull(nanStrategy);
  181.         MathUtils.checkNotNull(kthSelector);
  182.         this.estimationType = estimationType;
  183.         this.nanStrategy = nanStrategy;
  184.         this.kthSelector = kthSelector;
  185.     }

  186.     /** {@inheritDoc} */
  187.     @Override
  188.     public void setData(final double[] values) {
  189.         if (values == null) {
  190.             cachedPivots = null;
  191.         } else {
  192.             cachedPivots = new int[PIVOTS_HEAP_LENGTH];
  193.             Arrays.fill(cachedPivots, -1);
  194.         }
  195.         super.setData(values);
  196.     }

  197.     /** {@inheritDoc} */
  198.     @Override
  199.     public void setData(final double[] values, final int begin, final int length)
  200.         throws MathIllegalArgumentException {
  201.         MathUtils.checkNotNull(values, LocalizedCoreFormats.INPUT_ARRAY);
  202.         cachedPivots = new int[PIVOTS_HEAP_LENGTH];
  203.         Arrays.fill(cachedPivots, -1);
  204.         super.setData(values, begin, length);
  205.     }

  206.     /**
  207.      * Returns the result of evaluating the statistic over the stored data.
  208.      * <p>
  209.      * The stored array is the one which was set by previous calls to
  210.      * {@link #setData(double[])}
  211.      *
  212.      * @param p the percentile value to compute
  213.      * @return the value of the statistic applied to the stored data
  214.      * @throws MathIllegalArgumentException if p is not a valid quantile value
  215.      * (p must be greater than 0 and less than or equal to 100)
  216.      */
  217.     public double evaluate(final double p) throws MathIllegalArgumentException {
  218.         return evaluate(getDataRef(), p);
  219.     }

  220.     /**
  221.      * Returns an estimate of the <code>quantile</code>th percentile of the
  222.      * designated values in the <code>values</code> array.
  223.      * <p>The quantile
  224.      * estimated is determined by the <code>quantile</code> property.
  225.      * </p>
  226.      * <ul>
  227.      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
  228.      * <li>Returns (for any value of <code>quantile</code>)
  229.      * <code>values[begin]</code> if <code>length = 1 </code></li>
  230.      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
  231.      * is null, or <code>start</code> or <code>length</code> is invalid</li>
  232.      * </ul>
  233.      * <p>
  234.      * See {@link Percentile} for a description of the percentile estimation
  235.      * algorithm used.
  236.      *
  237.      * @param values the input array
  238.      * @param start index of the first array element to include
  239.      * @param length the number of elements to include
  240.      * @return the percentile value
  241.      * @throws MathIllegalArgumentException if the parameters are not valid
  242.      *
  243.      */
  244.     @Override
  245.     public double evaluate(final double[] values, final int start, final int length)
  246.         throws MathIllegalArgumentException {
  247.         return evaluate(values, start, length, quantile);
  248.     }

  249.     /**
  250.      * Returns an estimate of the <code>p</code>th percentile of the values
  251.      * in the <code>values</code> array.
  252.      * <ul>
  253.      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
  254.      * <code>0</code></li>
  255.      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
  256.      *  if <code>values</code> has length <code>1</code></li>
  257.      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
  258.      * is null or p is not a valid quantile value (p must be greater than 0
  259.      * and less than or equal to 100) </li>
  260.      * </ul>
  261.      * <p>
  262.      * The default implementation delegates to
  263.      * <code>evaluate(double[], int, int, double)</code> in the natural way.
  264.      *
  265.      * @param values input array of values
  266.      * @param p the percentile value to compute
  267.      * @return the percentile value or Double.NaN if the array is empty
  268.      * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid
  269.      */
  270.     public double evaluate(final double[] values, final double p)
  271.         throws MathIllegalArgumentException {
  272.         MathUtils.checkNotNull(values, LocalizedCoreFormats.INPUT_ARRAY);
  273.         return evaluate(values, 0, values.length, p);
  274.     }

  275.     /**
  276.      * Returns an estimate of the <code>p</code>th percentile of the values
  277.      * in the <code>values</code> array, starting with the element in (0-based)
  278.      * position <code>begin</code> in the array and including <code>length</code>
  279.      * values.
  280.      * <p>
  281.      * Calls to this method do not modify the internal <code>quantile</code>
  282.      * state of this statistic.
  283.      * </p>
  284.      * <ul>
  285.      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
  286.      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
  287.      *  if <code>length = 1 </code></li>
  288.      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
  289.      *  is null , <code>begin</code> or <code>length</code> is invalid, or
  290.      * <code>p</code> is not a valid quantile value (p must be greater than 0
  291.      * and less than or equal to 100)</li>
  292.      * </ul>
  293.      * <p>
  294.      * See {@link Percentile} for a description of the percentile estimation
  295.      * algorithm used.
  296.      *
  297.      * @param values array of input values
  298.      * @param p  the percentile to compute
  299.      * @param begin  the first (0-based) element to include in the computation
  300.      * @param length  the number of array elements to include
  301.      * @return  the percentile value
  302.      * @throws MathIllegalArgumentException if the parameters are not valid or the
  303.      * input array is null
  304.      */
  305.     public double evaluate(final double[] values, final int begin,
  306.                            final int length, final double p)
  307.         throws MathIllegalArgumentException {

  308.         MathArrays.verifyValues(values, begin, length);
  309.         if (p > 100 || p <= 0) {
  310.             throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
  311.                                                    p, 0, 100);
  312.         }
  313.         if (length == 0) {
  314.             return Double.NaN;
  315.         }
  316.         if (length == 1) {
  317.             return values[begin]; // always return single value for n = 1
  318.         }

  319.         final double[] work = getWorkArray(values, begin, length);
  320.         final int[] pivotsHeap = getPivots(values);
  321.         return work.length == 0 ? Double.NaN :
  322.                     estimationType.evaluate(work, pivotsHeap, p, kthSelector);
  323.     }

  324.     /**
  325.      * Returns the value of the quantile field (determines what percentile is
  326.      * computed when evaluate() is called with no quantile argument).
  327.      *
  328.      * @return quantile set while construction or {@link #setQuantile(double)}
  329.      */
  330.     public double getQuantile() {
  331.         return quantile;
  332.     }

  333.     /**
  334.      * Sets the value of the quantile field (determines what percentile is
  335.      * computed when evaluate() is called with no quantile argument).
  336.      *
  337.      * @param p a value between 0 &lt; p &lt;= 100
  338.      * @throws MathIllegalArgumentException  if p is not greater than 0 and less
  339.      * than or equal to 100
  340.      */
  341.     public void setQuantile(final double p) throws MathIllegalArgumentException {
  342.         if (p <= 0 || p > 100) {
  343.             throw new MathIllegalArgumentException(
  344.                     LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100);
  345.         }
  346.         quantile = p;
  347.     }

  348.     /** {@inheritDoc} */
  349.     @Override
  350.     public Percentile copy() {
  351.         return new Percentile(this);
  352.     }

  353.     /**
  354.      * Get the work array to operate. Makes use of prior {@code storedData} if
  355.      * it exists or else do a check on NaNs and copy a subset of the array
  356.      * defined by begin and length parameters. The set {@link #nanStrategy} will
  357.      * be used to either retain/remove/replace any NaNs present before returning
  358.      * the resultant array.
  359.      *
  360.      * @param values the array of numbers
  361.      * @param begin index to start reading the array
  362.      * @param length the length of array to be read from the begin index
  363.      * @return work array sliced from values in the range [begin,begin+length)
  364.      * @throws MathIllegalArgumentException if values or indices are invalid
  365.      */
  366.     protected double[] getWorkArray(final double[] values, final int begin, final int length) {
  367.         final double[] work;
  368.         if (values == getDataRef()) {
  369.             work = getDataRef();
  370.         } else {
  371.             switch (nanStrategy) {
  372.                 case MAXIMAL:// Replace NaNs with +INFs
  373.                     work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY);
  374.                     break;
  375.                 case MINIMAL:// Replace NaNs with -INFs
  376.                     work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY);
  377.                     break;
  378.                 case REMOVED:// Drop NaNs from data
  379.                     work = removeAndSlice(values, begin, length, Double.NaN);
  380.                     break;
  381.                 case FAILED:// just throw exception as NaN is un-acceptable
  382.                     work = copyOf(values, begin, length);
  383.                     MathArrays.checkNotNaN(work);
  384.                     break;
  385.                 default: //FIXED
  386.                     work = copyOf(values, begin, length);
  387.                     break;
  388.             }
  389.         }
  390.         return work;
  391.     }

  392.     /**
  393.      * Make a copy of the array for the slice defined by array part from
  394.      * [begin, begin+length)
  395.      * @param values the input array
  396.      * @param begin start index of the array to include
  397.      * @param length number of elements to include from begin
  398.      * @return copy of a slice of the original array
  399.      */
  400.     private static double[] copyOf(final double[] values, final int begin, final int length) {
  401.         MathArrays.verifyValues(values, begin, length);
  402.         return Arrays.copyOfRange(values, begin, begin + length);
  403.     }

  404.     /**
  405.      * Replace every occurrence of a given value with a replacement value in a
  406.      * copied slice of array defined by array part from [begin, begin+length).
  407.      * @param values the input array
  408.      * @param begin start index of the array to include
  409.      * @param length number of elements to include from begin
  410.      * @param original the value to be replaced with
  411.      * @param replacement the value to be used for replacement
  412.      * @return the copy of sliced array with replaced values
  413.      */
  414.     private static double[] replaceAndSlice(final double[] values,
  415.                                             final int begin, final int length,
  416.                                             final double original,
  417.                                             final double replacement) {
  418.         final double[] temp = copyOf(values, begin, length);
  419.         for(int i = 0; i < length; i++) {
  420.             temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ?
  421.                       replacement : temp[i];
  422.         }
  423.         return temp;
  424.     }

  425.     /**
  426.      * Remove the occurrence of a given value in a copied slice of array
  427.      * defined by the array part from [begin, begin+length).
  428.      * @param values the input array
  429.      * @param begin start index of the array to include
  430.      * @param length number of elements to include from begin
  431.      * @param removedValue the value to be removed from the sliced array
  432.      * @return the copy of the sliced array after removing the removedValue
  433.      */
  434.     private static double[] removeAndSlice(final double[] values,
  435.                                            final int begin, final int length,
  436.                                            final double removedValue) {
  437.         MathArrays.verifyValues(values, begin, length);
  438.         final double[] temp;
  439.         //BitSet(length) to indicate where the removedValue is located
  440.         final BitSet bits = new BitSet(length);
  441.         for (int i = begin; i < begin+length; i++) {
  442.             if (Precision.equalsIncludingNaN(removedValue, values[i])) {
  443.                 bits.set(i - begin);
  444.             }
  445.         }
  446.         //Check if empty then create a new copy
  447.         if (bits.isEmpty()) {
  448.             temp = copyOf(values, begin, length); // Nothing removed, just copy
  449.         } else if (bits.cardinality() == length) {
  450.             temp = new double[0];                 // All removed, just empty
  451.         } else {                                   // Some removable, so new
  452.             temp = new double[length - bits.cardinality()];
  453.             int start = begin;  //start index from source array (i.e values)
  454.             int dest = 0;       //dest index in destination array(i.e temp)
  455.             int bitSetPtr = 0;  //bitSetPtr is start index pointer of bitset
  456.             for (int nextOne = bits.nextSetBit(bitSetPtr); nextOne != -1; nextOne = bits.nextSetBit(bitSetPtr)) {
  457.                 final int lengthToCopy = nextOne - bitSetPtr;
  458.                 System.arraycopy(values, start, temp, dest, lengthToCopy);
  459.                 dest += lengthToCopy;
  460.                 start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
  461.             }
  462.             //Copy any residue past start index till begin+length
  463.             if (start < begin + length) {
  464.                 System.arraycopy(values,start,temp,dest,begin + length - start);
  465.             }
  466.         }
  467.         return temp;
  468.     }

  469.     /**
  470.      * Get pivots which is either cached or a newly created one
  471.      *
  472.      * @param values array containing the input numbers
  473.      * @return cached pivots or a newly created one
  474.      */
  475.     private int[] getPivots(final double[] values) {
  476.         final int[] pivotsHeap;
  477.         if (values == getDataRef()) {
  478.             pivotsHeap = cachedPivots;
  479.         } else {
  480.             pivotsHeap = new int[PIVOTS_HEAP_LENGTH];
  481.             Arrays.fill(pivotsHeap, -1);
  482.         }
  483.         return pivotsHeap;
  484.     }

  485.     /**
  486.      * Get the estimation {@link EstimationType type} used for computation.
  487.      *
  488.      * @return the {@code estimationType} set
  489.      */
  490.     public EstimationType getEstimationType() {
  491.         return estimationType;
  492.     }

  493.     /**
  494.      * Build a new instance similar to the current one except for the
  495.      * {@link EstimationType estimation type}.
  496.      * <p>
  497.      * This method is intended to be used as part of a fluent-type builder
  498.      * pattern. Building finely tune instances should be done as follows:
  499.      * <pre>
  500.      *   Percentile customized = new Percentile(quantile).
  501.      *                           withEstimationType(estimationType).
  502.      *                           withNaNStrategy(nanStrategy).
  503.      *                           withKthSelector(kthSelector);
  504.      * </pre>
  505.      * <p>
  506.      * If any of the {@code withXxx} method is omitted, the default value for
  507.      * the corresponding customization parameter will be used.
  508.      *
  509.      * @param newEstimationType estimation type for the new instance
  510.      * @return a new instance, with changed estimation type
  511.      * @throws NullArgumentException when newEstimationType is null
  512.      */
  513.     public Percentile withEstimationType(final EstimationType newEstimationType) {
  514.         return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector);
  515.     }

  516.     /**
  517.      * Get the {@link NaNStrategy NaN Handling} strategy used for computation.
  518.      * @return {@code NaN Handling} strategy set during construction
  519.      */
  520.     public NaNStrategy getNaNStrategy() {
  521.         return nanStrategy;
  522.     }

  523.     /**
  524.      * Build a new instance similar to the current one except for the
  525.      * {@link NaNStrategy NaN handling} strategy.
  526.      * <p>
  527.      * This method is intended to be used as part of a fluent-type builder
  528.      * pattern. Building finely tune instances should be done as follows:
  529.      * <pre>
  530.      *   Percentile customized = new Percentile(quantile).
  531.      *                           withEstimationType(estimationType).
  532.      *                           withNaNStrategy(nanStrategy).
  533.      *                           withKthSelector(kthSelector);
  534.      * </pre>
  535.      * <p>
  536.      * If any of the {@code withXxx} method is omitted, the default value for
  537.      * the corresponding customization parameter will be used.
  538.      *
  539.      * @param newNaNStrategy NaN strategy for the new instance
  540.      * @return a new instance, with changed NaN handling strategy
  541.      * @throws NullArgumentException when newNaNStrategy is null
  542.      */
  543.     public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) {
  544.         return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector);
  545.     }

  546.     /**
  547.      * Get the {@link KthSelector kthSelector} used for computation.
  548.      * @return the {@code kthSelector} set
  549.      */
  550.     public KthSelector getKthSelector() {
  551.         return kthSelector;
  552.     }

  553.     /**
  554.      * Get the {@link PivotingStrategy} used in KthSelector for computation.
  555.      * @return the pivoting strategy set
  556.      */
  557.     public PivotingStrategy getPivotingStrategy() {
  558.         return kthSelector.getPivotingStrategy();
  559.     }

  560.     /**
  561.      * Build a new instance similar to the current one except for the
  562.      * {@link KthSelector kthSelector} instance specifically set.
  563.      * <p>
  564.      * This method is intended to be used as part of a fluent-type builder
  565.      * pattern. Building finely tune instances should be done as follows:
  566.      * <pre>
  567.      *   Percentile customized = new Percentile(quantile).
  568.      *                           withEstimationType(estimationType).
  569.      *                           withNaNStrategy(nanStrategy).
  570.      *                           withKthSelector(newKthSelector);
  571.      * </pre>
  572.      * <p>
  573.      * If any of the {@code withXxx} method is omitted, the default value for
  574.      * the corresponding customization parameter will be used.
  575.      *
  576.      * @param newKthSelector KthSelector for the new instance
  577.      * @return a new instance, with changed KthSelector
  578.      * @throws NullArgumentException when newKthSelector is null
  579.      */
  580.     public Percentile withKthSelector(final KthSelector newKthSelector) {
  581.         return new Percentile(quantile, estimationType, nanStrategy, newKthSelector);
  582.     }

  583.     /**
  584.      * An enum for various estimation strategies of a percentile referred in
  585.      * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a>
  586.      * with the names of enum matching those of types mentioned in
  587.      * wikipedia.
  588.      * <p>
  589.      * Each enum corresponding to the specific type of estimation in wikipedia
  590.      * implements  the respective formulae that specializes in the below aspects
  591.      * <ul>
  592.      * <li>An <b>index method</b> to calculate approximate index of the
  593.      * estimate</li>
  594.      * <li>An <b>estimate method</b> to estimate a value found at the earlier
  595.      * computed index</li>
  596.      * <li>A <b> minLimit</b> on the quantile for which first element of sorted
  597.      * input is returned as an estimate </li>
  598.      * <li>A <b> maxLimit</b> on the quantile for which last element of sorted
  599.      * input is returned as an estimate </li>
  600.      * </ul>
  601.      * <p>
  602.      * Users can now create {@link Percentile} by explicitly passing this enum;
  603.      * such as by invoking {@link Percentile#withEstimationType(EstimationType)}
  604.      * <p>
  605.      * References:
  606.      * <ol>
  607.      * <li>
  608.      * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a>
  609.      * </li>
  610.      * <li>
  611.      * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf">
  612.      * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
  613.      * packages, American Statistician 50, 361–365</a> </li>
  614.      * <li>
  615.      * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">
  616.      * R-Manual </a></li>
  617.      * </ol>
  618.      */
  619.     public enum EstimationType {
  620.         /**
  621.          * This is the default type used in the {@link Percentile}.This method
  622.          * has the following formulae for index and estimates<br>
  623.          * \( \begin{align}
  624.          * &amp;index    = (N+1)p\ \\
  625.          * &amp;estimate = x_{\lceil h\,-\,1/2 \rceil} \\
  626.          * &amp;minLimit = 0 \\
  627.          * &amp;maxLimit = 1 \\
  628.          * \end{align}\)
  629.          */
  630.         LEGACY("Legacy Hipparchus") {
  631.             /**
  632.              * {@inheritDoc}.This method in particular makes use of existing
  633.              * Hipparchus style of picking up the index.
  634.              */
  635.             @Override
  636.             protected double index(final double p, final int length) {
  637.                 final double minLimit = 0d;
  638.                 final double maxLimit = 1d;
  639.                 return Double.compare(p, minLimit) == 0 ? 0 :
  640.                        Double.compare(p, maxLimit) == 0 ?
  641.                                length : p * (length + 1);
  642.             }
  643.         },
  644.         /**
  645.          * The method R_1 has the following formulae for index and estimates<br>
  646.          * \( \begin{align}
  647.          * &amp;index= Np + 1/2\,  \\
  648.          * &amp;estimate= x_{\lceil h\,-\,1/2 \rceil} \\
  649.          * &amp;minLimit = 0 \\
  650.          * \end{align}\)
  651.          */
  652.         R_1("R-1") {

  653.             @Override
  654.             protected double index(final double p, final int length) {
  655.                 final double minLimit = 0d;
  656.                 return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
  657.             }

  658.             /**
  659.              * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5)
  660.              */
  661.             @Override
  662.             protected double estimate(final double[] values,
  663.                                       final int[] pivotsHeap, final double pos,
  664.                                       final int length, final KthSelector selector) {
  665.                 return super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector);
  666.             }

  667.         },
  668.         /**
  669.          * The method R_2 has the following formulae for index and estimates<br>
  670.          * \( \begin{align}
  671.          * &amp;index= Np + 1/2\, \\
  672.          * &amp;estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} +
  673.          * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\
  674.          * &amp;minLimit = 0 \\
  675.          * &amp;maxLimit = 1 \\
  676.          * \end{align}\)
  677.          */
  678.         R_2("R-2") {

  679.             @Override
  680.             protected double index(final double p, final int length) {
  681.                 final double minLimit = 0d;
  682.                 final double maxLimit = 1d;
  683.                 return Double.compare(p, maxLimit) == 0 ? length :
  684.                        Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
  685.             }

  686.             /**
  687.              * {@inheritDoc}This method in particular for R_2 averages the
  688.              * values at ceil(p+0.5) and floor(p-0.5).
  689.              */
  690.             @Override
  691.             protected double estimate(final double[] values,
  692.                                       final int[] pivotsHeap, final double pos,
  693.                                       final int length, final KthSelector selector) {
  694.                 final double low =
  695.                         super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector);
  696.                 final double high =
  697.                         super.estimate(values, pivotsHeap,FastMath.floor(pos + 0.5), length, selector);
  698.                 return (low + high) / 2;
  699.             }

  700.         },
  701.         /**
  702.          * The method R_3 has the following formulae for index and estimates<br>
  703.          * \( \begin{align}
  704.          * &amp;index= Np \\
  705.          * &amp;estimate= x_{\lfloor h \rceil}\, \\
  706.          * &amp;minLimit = 0.5/N \\
  707.          * \end{align}\)
  708.          */
  709.         R_3("R-3") {
  710.             @Override
  711.             protected double index(final double p, final int length) {
  712.                 final double minLimit = 1d/2 / length;
  713.                 return Double.compare(p, minLimit) <= 0 ?
  714.                         0 : FastMath.rint(length * p);
  715.             }

  716.         },
  717.         /**
  718.          * The method R_4 has the following formulae for index and estimates<br>
  719.          * \( \begin{align}
  720.          * &amp;index= Np\, \\
  721.          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
  722.          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  723.          * \rfloor}) \\
  724.          * &amp;minLimit = 1/N \\
  725.          * &amp;maxLimit = 1 \\
  726.          * \end{align}\)
  727.          */
  728.         R_4("R-4") {
  729.             @Override
  730.             protected double index(final double p, final int length) {
  731.                 final double minLimit = 1d / length;
  732.                 final double maxLimit = 1d;
  733.                 return Double.compare(p, minLimit) < 0 ? 0 :
  734.                        Double.compare(p, maxLimit) == 0 ? length : length * p;
  735.             }

  736.         },
  737.         /**
  738.          * The method R_5 has the following formulae for index and estimates<br>
  739.          * \( \begin{align}
  740.          * &amp;index= Np + 1/2\\
  741.          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
  742.          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  743.          * \rfloor}) \\
  744.          * &amp;minLimit = 0.5/N \\
  745.          * &amp;maxLimit = (N-0.5)/N
  746.          * \end{align}\)
  747.          */
  748.         R_5("R-5") {

  749.             @Override
  750.             protected double index(final double p, final int length) {
  751.                 final double minLimit = 1d/2 / length;
  752.                 final double maxLimit = (length - 0.5) / length;
  753.                 return Double.compare(p, minLimit) < 0 ? 0 :
  754.                        Double.compare(p, maxLimit) >= 0 ?
  755.                                length : length * p + 0.5;
  756.             }
  757.         },
  758.         /**
  759.          * The method R_6 has the following formulae for index and estimates<br>
  760.          * \( \begin{align}
  761.          * &amp;index= (N + 1)p \\
  762.          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
  763.          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  764.          * \rfloor}) \\
  765.          * &amp;minLimit = 1/(N+1) \\
  766.          * &amp;maxLimit = N/(N+1) \\
  767.          * \end{align}\)
  768.          * <p>
  769.          * <b>Note:</b> This method computes the index in a manner very close to
  770.          * the default Hipparchus Percentile existing implementation. However
  771.          * the difference to be noted is in picking up the limits with which
  772.          * first element (p&lt;1(N+1)) and last elements (p&gt;N/(N+1))are done.
  773.          * While in default case; these are done with p=0 and p=1 respectively.
  774.          */
  775.         R_6("R-6") {

  776.             @Override
  777.             protected double index(final double p, final int length) {
  778.                 final double minLimit = 1d / (length + 1);
  779.                 final double maxLimit = 1d * length / (length + 1);
  780.                 return Double.compare(p, minLimit) < 0 ? 0 :
  781.                        Double.compare(p, maxLimit) >= 0 ?
  782.                                length : (length + 1) * p;
  783.             }
  784.         },

  785.         /**
  786.          * The method R_7 implements Microsoft Excel style computation has the
  787.          * following formulae for index and estimates.<br>
  788.          * \( \begin{align}
  789.          * &amp;index = (N-1)p + 1 \\
  790.          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
  791.          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  792.          * \rfloor}) \\
  793.          * &amp;minLimit = 0 \\
  794.          * &amp;maxLimit = 1 \\
  795.          * \end{align}\)
  796.          */
  797.         R_7("R-7") {
  798.             @Override
  799.             protected double index(final double p, final int length) {
  800.                 final double minLimit = 0d;
  801.                 final double maxLimit = 1d;
  802.                 return Double.compare(p, minLimit) == 0 ? 0 :
  803.                        Double.compare(p, maxLimit) == 0 ?
  804.                                length : 1 + (length - 1) * p;
  805.             }

  806.         },

  807.         /**
  808.          * The method R_8 has the following formulae for index and estimates<br>
  809.          * \( \begin{align}
  810.          * &amp;index = (N + 1/3)p + 1/3  \\
  811.          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
  812.            \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  813.          * \rfloor}) \\
  814.          * &amp;minLimit = (2/3)/(N+1/3) \\
  815.          * &amp;maxLimit = (N-1/3)/(N+1/3) \\
  816.          * \end{align}\)
  817.          * <p>
  818.          * As per Ref [2,3] this approach is most recommended as it provides
  819.          * an approximate median-unbiased estimate regardless of distribution.
  820.          */
  821.         R_8("R-8") {
  822.             @Override
  823.             protected double index(final double p, final int length) {
  824.                 final double minLimit = 2 * (1d / 3) / (length + 1d / 3);
  825.                 final double maxLimit =
  826.                         (length - 1d / 3) / (length + 1d / 3);
  827.                 return Double.compare(p, minLimit) < 0 ? 0 :
  828.                        Double.compare(p, maxLimit) >= 0 ? length :
  829.                            (length + 1d / 3) * p + 1d / 3;
  830.             }
  831.         },

  832.         /**
  833.          * The method R_9 has the following formulae for index and estimates<br>
  834.          * \( \begin{align}
  835.          * &amp;index = (N + 1/4)p + 3/8\\
  836.          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
  837.            \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
  838.          * \rfloor}) \\
  839.          * &amp;minLimit = (5/8)/(N+1/4) \\
  840.          * &amp;maxLimit = (N-3/8)/(N+1/4) \\
  841.          * \end{align}\)
  842.          */
  843.         R_9("R-9") {
  844.             @Override
  845.             protected double index(final double p, final int length) {
  846.                 final double minLimit = 5d/8 / (length + 0.25);
  847.                 final double maxLimit = (length - 3d/8) / (length + 0.25);
  848.                 return Double.compare(p, minLimit) < 0 ? 0 :
  849.                        Double.compare(p, maxLimit) >= 0 ? length :
  850.                                (length + 0.25) * p + 3d/8;
  851.             }

  852.         },
  853.         ;

  854.         /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */
  855.         private final String name;

  856.         /**
  857.          * Constructor
  858.          *
  859.          * @param type name of estimation type as per wikipedia
  860.          */
  861.         EstimationType(final String type) {
  862.             this.name = type;
  863.         }

  864.         /**
  865.          * Finds the index of array that can be used as starting index to
  866.          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
  867.          * percentile. The calculation of index calculation is specific to each
  868.          * {@link EstimationType}.
  869.          *
  870.          * @param p the p<sup>th</sup> quantile
  871.          * @param length the total number of array elements in the work array
  872.          * @return a computed real valued index as explained in the wikipedia
  873.          */
  874.         protected abstract double index(double p, int length);

  875.         /**
  876.          * Estimation based on K<sup>th</sup> selection. This may be overridden
  877.          * in specific enums to compute slightly different estimations.
  878.          *
  879.          * @param work array of numbers to be used for finding the percentile
  880.          * @param pos indicated positional index prior computed from calling
  881.          *            {@link #index(double, int)}
  882.          * @param pivotsHeap an earlier populated cache if exists; will be used
  883.          * @param length size of array considered
  884.          * @param selector a {@link KthSelector} used for pivoting during search
  885.          * @return estimated percentile
  886.          */
  887.         protected double estimate(final double[] work, final int[] pivotsHeap,
  888.                                   final double pos, final int length,
  889.                                   final KthSelector selector) {

  890.             final double fpos = FastMath.floor(pos);
  891.             final int intPos = (int) fpos;
  892.             final double dif = pos - fpos;

  893.             if (pos < 1) {
  894.                 return selector.select(work, pivotsHeap, 0);
  895.             }
  896.             if (pos >= length) {
  897.                 return selector.select(work, pivotsHeap, length - 1);
  898.             }

  899.             final double lower = selector.select(work, pivotsHeap, intPos - 1);
  900.             final double upper = selector.select(work, pivotsHeap, intPos);
  901.             return lower + dif * (upper - lower);
  902.         }

  903.         /**
  904.          * Evaluate method to compute the percentile for a given bounded array
  905.          * using earlier computed pivots heap.<br>
  906.          * This basically calls the {@link #index(double, int) index} and then
  907.          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
  908.          * functions to return the estimated percentile value.
  909.          *
  910.          * @param work array of numbers to be used for finding the percentile
  911.          * @param pivotsHeap a prior cached heap which can speed up estimation
  912.          * @param p the p<sup>th</sup> quantile to be computed
  913.          * @param selector a {@link KthSelector} used for pivoting during search
  914.          * @return estimated percentile
  915.          * @throws MathIllegalArgumentException if p is out of range
  916.          * @throws NullArgumentException if work array is null
  917.          */
  918.         protected double evaluate(final double[] work, final int[] pivotsHeap, final double p,
  919.                                   final KthSelector selector) {
  920.             MathUtils.checkNotNull(work);
  921.             if (p > 100 || p <= 0) {
  922.                 throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
  923.                                               p, 0, 100);
  924.             }
  925.             return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector);
  926.         }

  927.         /**
  928.          * Evaluate method to compute the percentile for a given bounded array.
  929.          * This basically calls the {@link #index(double, int) index} and then
  930.          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
  931.          * functions to return the estimated percentile value. Please
  932.          * note that this method does not make use of cached pivots.
  933.          *
  934.          * @param work array of numbers to be used for finding the percentile
  935.          * @param p the p<sup>th</sup> quantile to be computed
  936.          * @return estimated percentile
  937.          * @param selector a {@link KthSelector} used for pivoting during search
  938.          * @throws MathIllegalArgumentException if length or p is out of range
  939.          * @throws NullArgumentException if work array is null
  940.          */
  941.         public double evaluate(final double[] work, final double p, final KthSelector selector) {
  942.             return this.evaluate(work, null, p, selector);
  943.         }

  944.         /**
  945.          * Gets the name of the enum
  946.          *
  947.          * @return the name
  948.          */
  949.         String getName() {
  950.             return name;
  951.         }
  952.     }
  953. }