RandomPercentile.java

  1. /*
  2.  * Licensed to the Hipparchus project under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The Hipparchus project licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.hipparchus.stat.descriptive.rank;

  18. import java.io.Serializable;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.Collection;
  22. import java.util.HashMap;
  23. import java.util.Iterator;
  24. import java.util.List;
  25. import java.util.Map;
  26. import java.util.NoSuchElementException;
  27. import java.util.UUID;

  28. import org.hipparchus.exception.LocalizedCoreFormats;
  29. import org.hipparchus.exception.MathIllegalArgumentException;
  30. import org.hipparchus.exception.MathIllegalStateException;
  31. import org.hipparchus.exception.NullArgumentException;
  32. import org.hipparchus.random.RandomGenerator;
  33. import org.hipparchus.random.Well19937c;
  34. import org.hipparchus.stat.StatUtils;
  35. import org.hipparchus.stat.descriptive.AbstractStorelessUnivariateStatistic;
  36. import org.hipparchus.stat.descriptive.AggregatableStatistic;
  37. import org.hipparchus.stat.descriptive.StorelessUnivariateStatistic;
  38. import org.hipparchus.util.FastMath;
  39. import org.hipparchus.util.MathArrays;

  40. /**
  41.  * A {@link StorelessUnivariateStatistic} estimating percentiles using the
  42.  * <a href=http://dimacs.rutgers.edu/~graham/pubs/papers/nquantiles.pdf>RANDOM</a>
  43.  * Algorithm.
  44.  * <p>
  45.  * Storage requirements for the RANDOM algorithm depend on the desired
  46.  * accuracy of quantile estimates. Quantile estimate accuracy is defined as follows.
  47.  * <p>
  48.  * Let \(X\) be the set of all data values consumed from the stream and let \(q\)
  49.  * be a quantile (measured between 0 and 1) to be estimated. If <ul>
  50.  * <li>\(\epsilon\) is the configured accuracy</li>
  51.  * <li> \(\hat{q}\) is a RandomPercentile estimate for \(q\) (what is returned
  52.  *      by {@link #getResult()} or {@link #getResult(double)}) with \(100q\) as
  53.  *      actual parameter)</li>
  54.  * <li> \(rank(\hat{q}) = |\{x \in X : x &lt; \hat{q}\}|\) is the actual rank of
  55.  *      \(\hat{q}\) in the full data stream</li>
  56.  * <li>\(n = |X|\) is the number of observations</li></ul>
  57.  * then we can expect \((q - \epsilon)n &lt; rank(\hat{q}) &lt; (q + \epsilon)n\).
  58.  * <p>
  59.  * The algorithm maintains \(\left\lceil {log_{2}(1/\epsilon)}\right\rceil + 1\) buffers
  60.  * of size \(\left\lceil {1/\epsilon \sqrt{log_2(1/\epsilon)}}\right\rceil\).  When
  61.  * {@code epsilon} is set to the default value of \(10^{-4}\), this makes 15 buffers
  62.  * of size 36,453.
  63.  * <p>
  64.  * The algorithm uses the buffers to maintain samples of data from the stream.  Until
  65.  * all buffers are full, the entire sample is stored in the buffers.
  66.  * If one of the {@code getResult} methods is called when all data are available in memory
  67.  * and there is room to make a copy of the data (meaning the combined set of buffers is
  68.  * less than half full), the {@code getResult} method delegates to a {@link Percentile}
  69.  * instance to compute and return the exact value for the desired quantile.
  70.  * For default {@code epsilon}, this means exact values will be returned whenever fewer than
  71.  * \(\left\lceil {15 \times 36453 / 2} \right\rceil = 273,398\) values have been consumed
  72.  * from the data stream.
  73.  * <p>
  74.  * When buffers become full, the algorithm merges buffers so that they effectively represent
  75.  * a larger set of values than they can hold. Subsequently, data values are sampled from the
  76.  * stream to fill buffers freed by merge operations.  Both the merging and the sampling
  77.  * require random selection, which is done using a {@code RandomGenerator}.  To get
  78.  * repeatable results for large data streams, users should provide {@code RandomGenerator}
  79.  * instances with fixed seeds. {@code RandomPercentile} itself does not reseed or otherwise
  80.  * initialize the {@code RandomGenerator} provided to it.  By default, it uses a
  81.  * {@link Well19937c} generator with the default seed.
  82.  * <p>
  83.  * Note: This implementation is not thread-safe.
  84.  */
  85. public class RandomPercentile
  86.     extends AbstractStorelessUnivariateStatistic implements StorelessUnivariateStatistic,
  87.     AggregatableStatistic<RandomPercentile>, Serializable {

  88.     /** Default quantile estimation error setting */
  89.     public static final double DEFAULT_EPSILON = 1e-4;
  90.     /** Serialization version id */
  91.     private static final long serialVersionUID = 1L;
  92.     /** Storage size of each buffer */
  93.     private final int s;
  94.     /** Maximum number of buffers minus 1 */
  95.     private final int h;
  96.     /** Data structure used to manage buffers */
  97.     private final BufferMap bufferMap;
  98.     /** Bound on the quantile estimation error */
  99.     private final double epsilon;
  100.     /** Source of random data */
  101.     private final RandomGenerator randomGenerator;
  102.     /** Number of elements consumed from the input data stream */
  103.     private long n;
  104.     /** Buffer currently being filled */
  105.     private Buffer currentBuffer;

  106.     /**
  107.      * Constructs a {@code RandomPercentile} with quantile estimation error
  108.      * {@code epsilon} using {@code randomGenerator} as its source of random data.
  109.      *
  110.      * @param epsilon bound on quantile estimation error (see class javadoc)
  111.      * @param randomGenerator PRNG used in sampling and merge operations
  112.      * @throws MathIllegalArgumentException if percentile is not in the range [0, 100]
  113.      */
  114.     public RandomPercentile(double epsilon, RandomGenerator randomGenerator) {
  115.         if (epsilon <= 0) {
  116.             throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_TOO_SMALL,
  117.                                                    epsilon, 0);
  118.         }
  119.         this.h = (int) FastMath.ceil(log2(1/epsilon));
  120.         this.s = (int) FastMath.ceil(FastMath.sqrt(log2(1/epsilon)) / epsilon);
  121.         this.randomGenerator = randomGenerator;
  122.         bufferMap = new BufferMap(h + 1, s, randomGenerator);
  123.         currentBuffer = bufferMap.create(0);
  124.         this.epsilon = epsilon;
  125.     }

  126.     /**
  127.      * Constructs a {@code RandomPercentile} with default estimation error
  128.      * using {@code randomGenerator} as its source of random data.
  129.      *
  130.      * @param randomGenerator PRNG used in sampling and merge operations
  131.      * @throws MathIllegalArgumentException if percentile is not in the range [0, 100]
  132.      */
  133.     public RandomPercentile(RandomGenerator randomGenerator) {
  134.         this(DEFAULT_EPSILON, randomGenerator);
  135.     }

  136.     /**
  137.      * Constructs a {@code RandomPercentile} with quantile estimation error
  138.      * {@code epsilon} using the default PRNG as source of random data.
  139.      *
  140.      * @param epsilon bound on quantile estimation error (see class javadoc)
  141.      * @throws MathIllegalArgumentException if percentile is not in the range [0, 100]
  142.      */
  143.     public RandomPercentile(double epsilon) {
  144.         this(epsilon, new Well19937c());
  145.     }

  146.     /**
  147.      * Constructs a {@code RandomPercentile} with quantile estimation error
  148.      * set to the default ({@link #DEFAULT_EPSILON}), using the default PRNG
  149.      * as source of random data.
  150.      */
  151.     public RandomPercentile() {
  152.         this(DEFAULT_EPSILON, new Well19937c());
  153.     }

  154.     /**
  155.      * Copy constructor, creates a new {@code RandomPercentile} identical
  156.      * to the {@code original}.  Note: the RandomGenerator used by the new
  157.      * instance is referenced, not copied - i.e., the new instance shares
  158.      * a generator with the original.
  159.      *
  160.      * @param original the {@code PSquarePercentile} instance to copy
  161.      */
  162.     public RandomPercentile(RandomPercentile original) {
  163.         super();
  164.         this.h = original.h;
  165.         this.n = original.n;
  166.         this.s = original.s;
  167.         this.epsilon = original.epsilon;
  168.         this.bufferMap = new BufferMap(original.bufferMap);
  169.         this.randomGenerator = original.randomGenerator;
  170.         Iterator<Buffer> iterator = bufferMap.iterator();
  171.         Buffer current = null;
  172.         Buffer curr = null;
  173.         // See if there is a partially filled buffer - that will be currentBuffer
  174.         while (current == null && iterator.hasNext()) {
  175.             curr = iterator.next();
  176.             if (curr.hasCapacity()) {
  177.                 current = curr;
  178.             }
  179.         }
  180.         // If there is no partially filled buffer, just assign the last one.
  181.         // Next increment() will find no capacity and create a new one or trigger
  182.         // a merge.
  183.         this.currentBuffer = current == null ? curr : current;
  184.     }

  185.     @Override
  186.     public long getN() {
  187.         return n;
  188.     }

  189.     /**
  190.      * Returns an estimate of the given percentile, computed using the designated
  191.      * array segment as input data.
  192.      *
  193.      * @param values source of input data
  194.      * @param begin position of the first element of the values array to include
  195.      * @param length number of array elements to include
  196.      * @param percentile desired percentile (scaled 0 - 100)
  197.      *
  198.      * @return estimated percentile
  199.      * @throws MathIllegalArgumentException if percentile is out of the range [0, 100]
  200.      */
  201.     public double evaluate(final double percentile, final double[] values, final int begin, final int length)
  202.         throws MathIllegalArgumentException {
  203.         if (MathArrays.verifyValues(values, begin, length)) {
  204.             RandomPercentile randomPercentile = new RandomPercentile(this.epsilon,
  205.                                                                      this.randomGenerator);
  206.             randomPercentile.incrementAll(values, begin, length);
  207.             return randomPercentile.getResult(percentile);
  208.         }
  209.         return Double.NaN;
  210.     }

  211.     /**
  212.      * Returns an estimate of the median, computed using the designated
  213.      * array segment as input data.
  214.      *
  215.      * @param values source of input data
  216.      * @param begin position of the first element of the values array to include
  217.      * @param length number of array elements to include
  218.      *
  219.      * @return estimated percentile
  220.      * @throws MathIllegalArgumentException if percentile is out of the range [0, 100]
  221.      */
  222.     @Override
  223.     public double evaluate(final double[] values, final int begin, final int length) {
  224.         return evaluate(50d, values, begin, length);
  225.     }

  226.     /**
  227.      * Returns an estimate of percentile over the given array.
  228.      *
  229.      * @param values source of input data
  230.      * @param percentile desired percentile (scaled 0 - 100)
  231.      *
  232.      * @return estimated percentile
  233.      * @throws MathIllegalArgumentException if percentile is out of the range [0, 100]
  234.      */
  235.     public double evaluate(final double percentile, final double[] values) {
  236.         return evaluate(percentile, values, 0, values.length);
  237.     }

  238.     @Override
  239.     public RandomPercentile copy() {
  240.         return new RandomPercentile(this);
  241.     }

  242.     @Override
  243.     public void clear() {
  244.         n = 0;
  245.         bufferMap.clear();
  246.         currentBuffer = bufferMap.create(0);
  247.     }

  248.     /**
  249.      * Returns an estimate of the median.
  250.      */
  251.     @Override
  252.     public double getResult() {
  253.         return getResult(50d);

  254.     }

  255.     /**
  256.      * Returns an estimate of the given percentile.
  257.      *
  258.      * @param percentile desired percentile (scaled 0 - 100)
  259.      * @return estimated percentile
  260.      * @throws MathIllegalArgumentException if percentile is out of the range [0, 100]
  261.      */
  262.     public double getResult(double percentile) {
  263.         if (percentile > 100 || percentile < 0) {
  264.             throw new MathIllegalArgumentException(LocalizedCoreFormats.OUT_OF_RANGE,
  265.                                                    percentile, 0, 100);
  266.         }
  267.         // Convert to internal quantile scale
  268.         final double q = percentile / 100;
  269.         // First get global min and max to bound search.
  270.         double min = Double.POSITIVE_INFINITY;
  271.         double max = Double.NEGATIVE_INFINITY;
  272.         double bMin;
  273.         double bMax;
  274.         Iterator<Buffer> bufferIterator = bufferMap.iterator();
  275.         while (bufferIterator.hasNext()) {
  276.             Buffer buffer = bufferIterator.next();
  277.             bMin = buffer.min();
  278.             if (bMin < min) {
  279.                 min = bMin;
  280.             }
  281.             bMax = buffer.max();
  282.             if (bMax > max) {
  283.                 max = bMax;
  284.             }
  285.         }

  286.         // Handle degenerate cases
  287.         if (Double.compare(q, 0d) == 0 || n == 1) {
  288.             return min;
  289.         }
  290.         if (Double.compare(q, 1) == 0) {
  291.             return max;
  292.         }
  293.         if (n == 0) {
  294.             return Double.NaN;
  295.         }

  296.         // See if we have all data in memory and enough free memory to copy.
  297.         // If so, use Percentile to perform exact computation.
  298.         if (bufferMap.halfEmpty()) {
  299.             return new Percentile(percentile).evaluate(bufferMap.levelZeroData());
  300.         }

  301.         // Compute target rank
  302.         final double targetRank = q * n;

  303.         // Start with initial guess min + quantile * (max - min).
  304.         double estimate = min + q * (max - min);
  305.         double estimateRank = getRank(estimate);
  306.         double lower;
  307.         double upper;
  308.         if (estimateRank > targetRank) {
  309.             upper = estimate;
  310.             lower = min;
  311.         } else if (estimateRank < targetRank) {
  312.             lower = estimate;
  313.             upper = max;
  314.         } else {
  315.             return estimate;
  316.         }
  317.         final double eps = epsilon / 2;
  318.         final double rankTolerance = eps * n;
  319.         final double minWidth = eps / n;
  320.         double intervalWidth = FastMath.abs(upper - lower);
  321.         while (FastMath.abs(estimateRank - targetRank) > rankTolerance && intervalWidth > minWidth) {
  322.             if (estimateRank > targetRank) {
  323.                 upper = estimate;
  324.             } else {
  325.                 lower = estimate;
  326.             }
  327.             intervalWidth = upper - lower;
  328.             estimate = lower + intervalWidth / 2;
  329.             estimateRank = getRank(estimate);
  330.         }
  331.         return estimate;
  332.     }

  333.     /**
  334.      * Gets the estimated rank of {@code value}, i.e.  \(|\{x \in X : x &lt; value\}|\)
  335.      * where \(X\) is the set of values that have been consumed from the stream.
  336.      *
  337.      * @param value value whose overall rank is sought
  338.      * @return estimated number of sample values that are strictly less than {@code value}
  339.      */
  340.     public double getRank(double value) {
  341.         double rankSum = 0;
  342.         Iterator<Buffer> bufferIterator = bufferMap.iterator();
  343.         while (bufferIterator.hasNext()) {
  344.             Buffer buffer = bufferIterator.next();
  345.             rankSum += buffer.rankOf(value) * FastMath.pow(2, buffer.level);
  346.         }
  347.         return rankSum;
  348.     }

  349.     /**
  350.      * Returns the estimated quantile position of value in the dataset.
  351.      * Specifically, what is returned is an estimate of \(|\{x \in X : x &lt; value\}| / |X|\)
  352.      * where \(X\) is the set of values that have been consumed from the stream.
  353.      *
  354.      * @param value value whose quantile rank is sought.
  355.      * @return estimated proportion of sample values that are strictly less than {@code value}
  356.      */
  357.     public double getQuantileRank(double value) {
  358.         return getRank(value) / getN();
  359.     }

  360.     @Override
  361.     public void increment(double d) {
  362.         n++;
  363.         if (!currentBuffer.hasCapacity()) { // Need to get a new buffer to fill
  364.             // First see if we have not yet created all the buffers
  365.             if (bufferMap.canCreate()) {
  366.                 final int level = (int) Math.ceil(Math.max(0, log2(n/(s * FastMath.pow(2, h - 1)))));
  367.                 currentBuffer = bufferMap.create(level);
  368.             } else { // All buffers have been created - need to merge to free one
  369.                 currentBuffer = bufferMap.merge();
  370.             }
  371.         }
  372.         currentBuffer.consume(d);
  373.     }

  374.     /**
  375.      * Maintains a buffer of values sampled from the input data stream.
  376.      * <p>
  377.      * The {@link #level} of a buffer determines its sampling frequency.
  378.      * The {@link #consume(double)} method retains 1 out of every 2^level values
  379.      * read in from the stream.
  380.      * <p>
  381.      * The {@link #size} of the buffer is the number of values that it can store
  382.      * The buffer is considered full when it has consumed 2^level * size values.
  383.      * <p>
  384.      * The {@link #blockSize} of a buffer is 2^level.
  385.      * The consume method starts each block by generating a random integer in
  386.      * [0, blockSize - 1].  It then skips over all but the element with that offset
  387.      * in the block, retaining only the selected value. So after 2^level * size
  388.      * elements have been consumed, it will have retained size elements - one
  389.      * from each 2^level block.
  390.      * <p>
  391.      * The {@link #mergeWith(Buffer)} method merges this buffer with another one,
  392.      * The merge operation merges the data from the other buffer into this and clears
  393.      * the other buffer (so it can consume data). Both buffers have their level
  394.      * incremented by the merge. This operation is only used on full buffers.
  395.      */
  396.     private static class Buffer implements Serializable {
  397.         /** Serialization version id */
  398.         private static final long serialVersionUID = 1L;
  399.         /** Number of values actually stored in the buffer */
  400.         private final int size;
  401.         /** Data sampled from the stream */
  402.         private final double[] data;
  403.         /** PRNG used for merges and stream sampling */
  404.         private final RandomGenerator randomGenerator;
  405.         /** Level of the buffer */
  406.         private int level;
  407.         /** Block size  = 2^level */
  408.         private long blockSize;
  409.         /** Next location in backing array for stored (taken) value */
  410.         private int next;
  411.         /** Number of values consumed in current 2^level block of values from the stream */
  412.         private long consumed;
  413.         /** Index of next value to take in current 2^level block */
  414.         private long nextToTake;
  415.         /** ID */
  416.         private final UUID id;

  417.         /**
  418.          * Creates a new buffer capable of retaining size values with the given level.
  419.          *
  420.          * @param size number of values the buffer can retain
  421.          * @param level the base 2 log of the sampling frequency
  422.          *        (one out of every 2^level values is retained)
  423.          * @param randomGenerator PRNG used for sampling and merge operations
  424.          */
  425.         Buffer(int size, int level, RandomGenerator randomGenerator) {
  426.             this.size = size;
  427.             data = new double[size];
  428.             this.level = level;
  429.             this.randomGenerator = randomGenerator;
  430.             this.id = UUID.randomUUID();
  431.             computeBlockSize();
  432.         }

  433.         /**
  434.          * Sets blockSize and nextToTake based on level.
  435.          */
  436.         private void computeBlockSize() {
  437.             if (level == 0) {
  438.                 blockSize = 1;
  439.             } else {
  440.                 long product = 1;
  441.                 for (int i = 0; i < level; i++) {
  442.                     product *= 2;
  443.                 }
  444.                 blockSize = product;
  445.             }
  446.             if (blockSize > 1) {
  447.                 nextToTake = randomGenerator.nextLong(blockSize);
  448.             }
  449.         }

  450.         /**
  451.          * Consumes a value from the input stream.
  452.          * <p>
  453.          * For each 2^level values consumed, one is added to the buffer.
  454.          * The buffer is not considered full until 2^level * size values
  455.          * have been consumed.
  456.          * <p>
  457.          * Sorts the data array if the consumption renders the buffer full.
  458.          * <p>
  459.          * There is no capacity check in this method.  Clients are expected
  460.          * to use {@link #hasCapacity()} before invoking this method.  If
  461.          * it is invoked on a full buffer, an ArrayIndexOutOfBounds exception
  462.          * will result.
  463.          *
  464.          * @param value value to consume from the stream
  465.          */
  466.         public void consume(double value) {
  467.             if (consumed == nextToTake) {
  468.                 data[next] = value;
  469.                 next++;
  470.             }
  471.             consumed++;
  472.             if (consumed == blockSize) {
  473.                 if (next == size) {   // Buffer is full
  474.                     Arrays.sort(data);
  475.                 } else {              // Reset in-block counter and nextToTake
  476.                     consumed = 0;
  477.                     if (blockSize > 1) {
  478.                         nextToTake = randomGenerator.nextLong(blockSize);
  479.                     }
  480.                 }
  481.             }
  482.         }

  483.         /**
  484.          * Merges this with other.
  485.          * <p>
  486.          * After the merge, this will be the merged buffer and other will be free.
  487.          * Both will have level+1.
  488.          * Post-merge, other can be used to accept new data.
  489.          * <p>
  490.          * The contents of the merged buffer (this after the merge) are determined
  491.          * by randomly choosing one of the two retained elements in each of the
  492.          * [0...size - 1] positions in the two buffers.
  493.          * <p>
  494.          * This and other must have the same level and both must be full.
  495.          *
  496.          * @param other initially full other buffer at the same level as this.
  497.          * @throws MathIllegalArgumentException if either buffer is not full or they
  498.          * have different levels
  499.          */
  500.         public void mergeWith(Buffer other) {
  501.             // Make sure both this and other are full and have the same level
  502.             if (this.hasCapacity() || other.hasCapacity() || other.level != this.level) {
  503.                 throw new MathIllegalArgumentException(LocalizedCoreFormats.INTERNAL_ERROR);
  504.             }
  505.             // Randomly select one of the two entries for each slot
  506.             for (int i = 0; i < size; i++) {
  507.                 if (randomGenerator.nextBoolean()) {
  508.                     data[i] = other.data[i];
  509.                 }
  510.             }
  511.             // Re-sort data
  512.             Arrays.sort(data);
  513.             // Bump level of both buffers
  514.             other.setLevel(level + 1);
  515.             this.setLevel(level + 1);
  516.             // Clear the free one (and compute new blocksize)
  517.             other.clear();
  518.         }

  519.         /**
  520.          * Merge this into a higher-level buffer.
  521.          * <p>
  522.          * Does not alter this; but after the merge, higher may have some of its
  523.          * data replaced by data from this.  Levels are not changed for either buffer.
  524.          * <p>
  525.          * Probability of selection into the newly constituted higher buffer is weighted
  526.          * according to level. So for example, if this has level 0 and higher has level
  527.          * 2, the ith element of higher is 4 times more likely than the corresponding
  528.          * element of this to retain its spot.
  529.          * <p>
  530.          * This method is only used when aggregating RandomPercentile instances.
  531.          * <p>
  532.          * Preconditions:
  533.          * <ol><li> this.level < higher.level </li>
  534.          *     <li> this.size = higher.size </li>
  535.          *     <li> Both buffers are full </li>
  536.          * </ol>
  537.          *
  538.          * @param higher higher-level buffer to merge this into
  539.          * @throws MathIllegalArgumentException if the buffers have different sizes,
  540.          * either buffer is not full or this has level greater than or equal to higher
  541.          */
  542.         public void mergeInto(Buffer higher) {
  543.             // Check preconditions
  544.             if (this.size != higher.size || this.hasCapacity() || higher.hasCapacity() ||
  545.                     this.level >= higher.level) {
  546.                 throw new MathIllegalArgumentException(LocalizedCoreFormats.INTERNAL_ERROR);
  547.             }
  548.             final int levelDifference = higher.level - this.level;
  549.             int m = 1;
  550.             for (int i = 0; i < levelDifference; i++) {
  551.                 m *= 2;
  552.             }
  553.             // Randomly select one of the two entries for each slot in higher, giving
  554.             // m-times higher weight to the entries of higher.
  555.             for (int i = 0; i < size; i++) {
  556.                 // data[i] <-> {0}, higher.data[i] <-> {1, ..., m}
  557.                 if (randomGenerator.nextInt(m + 1) == 0) {
  558.                     higher.data[i] = data[i];
  559.                 }
  560.             }
  561.             // Resort higher's data
  562.             Arrays.sort(higher.data);
  563.         }

  564.         /**
  565.          * @return true if the buffer has capacity; false if it is full
  566.          */
  567.         public boolean hasCapacity() {
  568.             // Buffer has capacity if it has not yet set all of its data
  569.             // values or if it has but still has not finished its last block
  570.             return next < size || consumed < blockSize;
  571.         }

  572.         /**
  573.          * Sets the level of the buffer.
  574.          *
  575.          * @param level new level value
  576.          */
  577.         public void setLevel(int level) {
  578.             this.level = level;
  579.         }

  580.         /**
  581.          * Clears data, recomputes blockSize and resets consumed and nextToTake.
  582.          */
  583.         public void clear() {
  584.             consumed = 0;
  585.             next = 0;
  586.             computeBlockSize();
  587.         }

  588.         /**
  589.          * Returns a copy of the data that has been added to the buffer
  590.          *
  591.          * @return possibly unsorted copy of the portion of the buffer that has been filled
  592.          */
  593.         public double[] getData() {
  594.             final double[] out = new double[next];
  595.             System.arraycopy(data, 0, out, 0, next);
  596.             return out;
  597.         }

  598.         /**
  599.          * Returns the ordinal rank of value among the sampled values in this buffer.
  600.          *
  601.          * @param value value whose rank is sought
  602.          * @return |{v in data : v < value}|
  603.          */
  604.         public int rankOf(double value) {
  605.             int ret = 0;
  606.             if (!hasCapacity()) { // Full sorted buffer, can do binary search
  607.                 ret = Arrays.binarySearch(data, value);
  608.                 if (ret < 0) {
  609.                     return -ret - 1;
  610.                 } else {
  611.                     return ret;
  612.                 }
  613.             } else { // have to count - not sorted yet and can't sort yet
  614.                 for (int i = 0; i < next; i++) {
  615.                     if (data[i] < value) {
  616.                         ret++;
  617.                     }
  618.                 }
  619.                 return ret;
  620.             }
  621.         }

  622.         /**
  623.          * @return the smallest value held in this buffer
  624.          */
  625.         public double min() {
  626.             if (!hasCapacity()) {
  627.                 return data[0];
  628.             } else {
  629.                 return StatUtils.min(getData());
  630.             }
  631.         }

  632.         /**
  633.          * @return the largest value held in this buffer
  634.          */
  635.         public double max() {
  636.             if (!hasCapacity()) {
  637.                 return data[data.length - 1];
  638.             } else {
  639.                 return StatUtils.max(getData());
  640.             }
  641.         }

  642.         /**
  643.          * @return the level of this buffer
  644.          */
  645.         public int getLevel() {
  646.             return level;
  647.         }

  648.         /**
  649.          * @return the id
  650.          */
  651.         public UUID getId() {
  652.             return id;
  653.         }
  654.     }

  655.     /**
  656.      * Computes base 2 log of the argument.
  657.      *
  658.      * @param x input value
  659.      * @return the value y such that 2^y = x
  660.      */
  661.     private static double log2(double x) {
  662.         return Math.log(x) / Math.log(2);
  663.     }

  664.     /**
  665.      * A map structure to hold the buffers.
  666.      * Keys are levels and values are lists of buffers at the given level.
  667.      * Overall capacity is limited by the total number of buffers.
  668.      */
  669.     private static class BufferMap implements Iterable<Buffer>, Serializable {
  670.         /** Serialization version ID */
  671.         private static final long serialVersionUID = 1L;
  672.         /** Total number of buffers that can be created - cap for count */
  673.         private final int capacity;
  674.         /** PRNG used in merges */
  675.         private final RandomGenerator randomGenerator;
  676.         /** Total count of all buffers */
  677.         private int count;
  678.         /** Uniform buffer size */
  679.         private final int bufferSize;
  680.         /** Backing store for the buffer map. Keys are levels, values are lists of registered buffers. */
  681.         private final Map<Integer,List<Buffer>> registry;
  682.         /** Maximum buffer level */
  683.         private int maxLevel;

  684.         /**
  685.          * Creates a BufferMap that can manage up to capacity buffers.
  686.          * Buffers created by the pool with have size = buffersize.
  687.          *
  688.          * @param capacity cap on the number of buffers
  689.          * @param bufferSize size of each buffer
  690.          * @param randomGenerator RandomGenerator to use in merges
  691.          */
  692.         BufferMap(int capacity, int bufferSize, RandomGenerator randomGenerator) {
  693.             this.bufferSize = bufferSize;
  694.             this.capacity = capacity;
  695.             this.randomGenerator = randomGenerator;
  696.             this.registry = new HashMap<>();
  697.         }

  698.         /**
  699.          * Copy constructor.
  700.          *
  701.          * @param original BufferMap to copy
  702.          */
  703.         BufferMap(BufferMap original) {
  704.             super();
  705.             this.bufferSize = original.bufferSize;
  706.             this.capacity = original.capacity;
  707.             this.count = 0;
  708.             this.randomGenerator = original.randomGenerator;
  709.             this.registry = new HashMap<>();
  710.             Iterator<Buffer> iterator = original.iterator();
  711.             while (iterator.hasNext()) {
  712.                 final Buffer current = iterator.next();
  713.                 // Create and register a new buffer at the same level
  714.                 final Buffer newCopy = create(current.getLevel());
  715.                 // Consume the data
  716.                 final double[] data = current.getData();
  717.                 for (double value : data) {
  718.                     newCopy.consume(value);
  719.                 }
  720.             }
  721.         }

  722.         /**
  723.          * Tries to create a buffer with the given level.
  724.          * <p>
  725.          * If there is capacity to create a new buffer (i.e., fewer than
  726.          * count have been created), a new buffer is created with the given
  727.          * level, registered and returned.  If capacity has been reached,
  728.          * null is returned.
  729.          *
  730.          * @param level level of the new buffer
  731.          * @return an empty buffer or null if a buffer can't be provided
  732.          */
  733.         public Buffer create(int level) {
  734.             if (!canCreate()) {
  735.                 return null;
  736.             }
  737.             count++;
  738.             Buffer buffer = new Buffer(bufferSize, level, randomGenerator);
  739.             List<Buffer> bufferList = registry.get(level);
  740.             if (bufferList == null) {
  741.                 bufferList = new ArrayList<>();
  742.                 registry.put(level, bufferList);
  743.             }
  744.             bufferList.add(buffer);
  745.             if (level > maxLevel) {
  746.                 maxLevel = level;
  747.             }
  748.             return buffer;
  749.         }

  750.         /**
  751.          * Returns true if there is capacity to create a new buffer.
  752.          *
  753.          * @return true if fewer than capacity buffers have been created.
  754.          */
  755.         public boolean canCreate() {
  756.             return count < capacity;
  757.         }

  758.         /**
  759.          * Returns true if we have used less than half of the allocated storage.
  760.          * <p>
  761.          * Includes a check to make sure all buffers have level 0;
  762.          * but this should always be the case.
  763.          * <p>
  764.          * When this method returns true, we have all consumed data in storage
  765.          * and enough space to make a copy of the combined dataset.
  766.          *
  767.          * @return true if all buffers have level 0 and less than half of the
  768.          * available storage has been used
  769.          */
  770.         public boolean halfEmpty() {
  771.             return count * 2 < capacity &&
  772.                     registry.size() == 1 &&
  773.                     registry.containsKey(0);
  774.         }

  775.         /**
  776.          * Returns a fresh copy of all data from level 0 buffers.
  777.          *
  778.          * @return combined data stored in all level 0 buffers
  779.          */
  780.         public double[] levelZeroData() {
  781.             List<Buffer> levelZeroBuffers = registry.get(0);
  782.             // First determine the combined size of the data
  783.             int length = 0;
  784.             for (Buffer buffer : levelZeroBuffers) {
  785.                 if (!buffer.hasCapacity()) { // full buffer
  786.                     length += buffer.size;
  787.                 } else {
  788.                     length += buffer.next;  // filled amount
  789.                 }
  790.             }
  791.             // Copy the data
  792.             int pos = 0;
  793.             int currLen;
  794.             final double[] out = new double[length];
  795.             for (Buffer buffer : levelZeroBuffers) {
  796.                 if (!buffer.hasCapacity()) {
  797.                     currLen = buffer.size;
  798.                 } else {
  799.                     currLen =  buffer.next;
  800.                 }
  801.                 System.arraycopy(buffer.data, 0, out, pos, currLen);
  802.                 pos += currLen;
  803.             }
  804.             return out;
  805.         }

  806.         /**
  807.          * Finds the lowest level l where there exist at least two buffers,
  808.          * merges them to create a new buffer with level l+1 and returns
  809.          * a free buffer with level l+1.
  810.          *
  811.          * @return free buffer that can accept data
  812.          */
  813.         public Buffer merge() {
  814.             int l = 0;
  815.             List<Buffer> mergeCandidates = null;
  816.             // Find the lowest level containing at least two buffers
  817.             while (mergeCandidates == null && l <= maxLevel) {
  818.                 final List<Buffer> bufferList = registry.get(l);
  819.                 if (bufferList != null && bufferList.size() > 1) {
  820.                     mergeCandidates = bufferList;
  821.                 } else {
  822.                     l++;
  823.                 }
  824.             }
  825.             if (mergeCandidates == null) {
  826.                 // Should never happen
  827.                 throw new MathIllegalStateException(LocalizedCoreFormats.INTERNAL_ERROR);
  828.             }
  829.             Buffer buffer1 = mergeCandidates.get(0);
  830.             Buffer buffer2 = mergeCandidates.get(1);
  831.             // Remove buffers to be merged
  832.             mergeCandidates.remove(0);
  833.             mergeCandidates.remove(0);
  834.             // If these are the last level-l buffers, remove the empty list
  835.             if (registry.get(l).isEmpty()) {
  836.                 registry.remove(l);
  837.             }
  838.             // Merge the buffers
  839.             buffer1.mergeWith(buffer2);
  840.             // Now both buffers have level l+1; buffer1 is full and buffer2 is free.
  841.             // Register both buffers
  842.             register(buffer1);
  843.             register(buffer2);

  844.             // Return the free one
  845.             return buffer2;
  846.         }

  847.         /**
  848.          * Clears the buffer map.
  849.          */
  850.         public void clear() {
  851.             for (List<Buffer> bufferList : registry.values()) {
  852.                 bufferList.clear();
  853.             }
  854.             registry.clear();
  855.             count = 0;
  856.         }

  857.         /**
  858.          * Registers a buffer.
  859.          *
  860.          * @param buffer Buffer to be registered.
  861.          */
  862.         public void register(Buffer buffer) {
  863.             final int level = buffer.getLevel();
  864.             List<Buffer> list = registry.get(level);
  865.             if (list == null) {
  866.                 list = new ArrayList<>();
  867.                 registry.put(level, list);
  868.                 if (level > maxLevel) {
  869.                     maxLevel = level;
  870.                 }
  871.             }
  872.             list.add(buffer);
  873.         }

  874.         /**
  875.          * De-register a buffer, without clearing it.
  876.          *
  877.          * @param buffer Buffer to be de-registered
  878.          * @throws IllegalStateException if the buffer is not registered
  879.          */
  880.         public void deRegister(Buffer buffer) {
  881.             final Iterator<Buffer> iterator = registry.get(buffer.getLevel()).iterator();
  882.             while (iterator.hasNext()) {
  883.                 if (iterator.next().getId().equals(buffer.getId())) {
  884.                     iterator.remove();
  885.                     return;
  886.                 }
  887.             }
  888.             throw new MathIllegalStateException(LocalizedCoreFormats.INTERNAL_ERROR);
  889.         }

  890.         /**
  891.          * Returns an iterator over all of the buffers. Iteration goes by level, with
  892.          * level 0 first.  Assumes there are no empty buffer lists.
  893.          */
  894.         @Override
  895.         public Iterator<Buffer> iterator() {
  896.             return new Iterator<Buffer>() {

  897.                 /** Outer loop iterator, from level to level. */
  898.                 private final Iterator<Integer> levelIterator = registry.keySet().iterator();

  899.                 /** List of buffers at current level. */
  900.                 private List<Buffer> currentList = registry.get(levelIterator.next());

  901.                 /** Inner loop iterator, from buffer to buffer. */
  902.                 private Iterator<Buffer> bufferIterator =
  903.                         currentList == null ? null : currentList.iterator();

  904.                 @Override
  905.                 public boolean hasNext() {
  906.                     if (bufferIterator == null) {
  907.                         return false;
  908.                     }
  909.                     if (bufferIterator.hasNext()) {
  910.                         return true;
  911.                     }
  912.                     // The current level iterator has just finished, try to bump level
  913.                     if (levelIterator.hasNext()) {
  914.                         List<Buffer> currentList = registry.get(levelIterator.next());
  915.                         bufferIterator = currentList.iterator();
  916.                         return true;
  917.                     } else {
  918.                         // Nothing left, signal this by nulling bufferIterator
  919.                         bufferIterator = null;
  920.                         return false;
  921.                     }
  922.                 }

  923.                 @Override
  924.                 public Buffer next() {
  925.                      if (hasNext()) {
  926.                          return bufferIterator.next();
  927.                      }
  928.                      throw new NoSuchElementException();
  929.                 }

  930.                 @Override
  931.                 public void remove() {
  932.                     throw new UnsupportedOperationException();
  933.                 }
  934.             };
  935.         }

  936.         /**
  937.          * Absorbs the data in other into this, merging buffers as necessary to trim
  938.          * the aggregate down to capacity. This method is only used when aggregating
  939.          * RandomPercentile instances.
  940.          *
  941.          * @param other other BufferMap to merge in
  942.          */
  943.         public void absorb(BufferMap other) {
  944.             // Add all of other's buffers to the map - possibly exceeding cap
  945.             boolean full = true;
  946.             Iterator<Buffer> otherIterator = other.iterator();
  947.             while (otherIterator.hasNext()) {
  948.                 Buffer buffer = otherIterator.next();
  949.                 if (buffer.hasCapacity()) {
  950.                     full = false;
  951.                 }
  952.                 register(buffer);
  953.                 count++;
  954.             }
  955.             // Now eliminate the excess by merging
  956.             while (count > capacity || (count == capacity && full)) {
  957.                 mergeUp();
  958.                 count--;
  959.             }
  960.         }

  961.         /**
  962.          * Find two buffers, first and second, of minimal level. Then merge
  963.          * first into second and discard first.
  964.          * <p>
  965.          * If the buffers have different levels, make second the higher level
  966.          * buffer and make probability of selection in the merge proportional
  967.          * to level weight ratio.
  968.          * <p>
  969.          * This method is only used when aggregating RandomPercentile instances.
  970.          */
  971.         public void mergeUp() {
  972.             // Find two minimum-level buffers to merge
  973.             // Loop depends on two invariants:
  974.             //   0) iterator goes in level order
  975.             //   1) there are no empty lists in the registry
  976.             Iterator<Buffer> bufferIterator = iterator();
  977.             Buffer first = null;
  978.             Buffer second = null;
  979.             while ((first == null || second == null) && bufferIterator.hasNext()) {
  980.                 Buffer buffer = bufferIterator.next();
  981.                 if (!buffer.hasCapacity()) { // Skip not full buffers
  982.                     if (first == null) {
  983.                         first = buffer;
  984.                     } else {
  985.                         second = buffer;
  986.                     }
  987.                 }
  988.             }
  989.             if (first == null || second == null || first.level > second.level) {
  990.                 throw new MathIllegalStateException(LocalizedCoreFormats.INTERNAL_ERROR);
  991.             }
  992.             // Merge first into second and deregister first.
  993.             // Assumes that first has level <= second (checked above).
  994.             if (first.getLevel() == second.getLevel()) {
  995.                 deRegister(first);
  996.                 deRegister(second);
  997.                 second.mergeWith(first);
  998.                 register(second);
  999.             } else {
  1000.                 deRegister(first);
  1001.                 first.mergeInto(second);
  1002.             }
  1003.         }
  1004.     }

  1005.     /**
  1006.      * Computes the given percentile by combining the data from the collection
  1007.      * of aggregates. The result describes the combined sample of all data added
  1008.      * to any of the aggregates.
  1009.      *
  1010.      * @param percentile desired percentile (scaled 0-100)
  1011.      * @param aggregates RandomPercentile instances to combine data from
  1012.      * @return estimate of the given percentile using combined data from the aggregates
  1013.      * @throws MathIllegalArgumentException if percentile is out of the range [0, 100]
  1014.      */
  1015.     public double reduce(double percentile, Collection<RandomPercentile> aggregates) {
  1016.         if (percentile > 100 || percentile < 0) {
  1017.             throw new MathIllegalArgumentException(LocalizedCoreFormats.OUT_OF_RANGE,
  1018.                                                    percentile, 0, 100);
  1019.         }

  1020.         // First see if we can copy all data and just compute exactly.
  1021.         // The following could be improved to verify that all have only level 0 buffers
  1022.         // and the sum of the data sizes is less than 1/2 total capacity.  Here we
  1023.         // just check that each of the aggregates is less than half full.
  1024.         Iterator<RandomPercentile> iterator = aggregates.iterator();
  1025.         boolean small = true;
  1026.         while (small && iterator.hasNext()) {
  1027.             small = iterator.next().bufferMap.halfEmpty();
  1028.         }
  1029.         if (small) {
  1030.             iterator = aggregates.iterator();
  1031.             double[] combined = {};
  1032.             while (iterator.hasNext()) {
  1033.                combined = MathArrays.concatenate(combined, iterator.next().bufferMap.levelZeroData());
  1034.             }
  1035.             final Percentile exactP = new Percentile(percentile);
  1036.             return exactP.evaluate(combined);
  1037.         }

  1038.         // Below largely duplicates code in getResult(percentile).
  1039.         // Common binary search code with function parameter should be factored out.

  1040.         // Get global max and min to bound binary search and total N
  1041.         double min = Double.POSITIVE_INFINITY;
  1042.         double max = Double.NEGATIVE_INFINITY;
  1043.         double combinedN = 0;
  1044.         iterator = aggregates.iterator();
  1045.         while (iterator.hasNext()) {
  1046.             final RandomPercentile curr = iterator.next();
  1047.             final double curMin = curr.getResult(0);
  1048.             final double curMax = curr.getResult(100);
  1049.             if (curMin < min) {
  1050.                 min = curMin;
  1051.             }
  1052.             if (curMax > max) {
  1053.                 max = curMax;
  1054.             }
  1055.             combinedN += curr.getN();
  1056.         }

  1057.         final double q = percentile / 100;
  1058.         // Handle degenerate cases
  1059.         if (Double.compare(q, 0d) == 0) {
  1060.             return min;
  1061.         }
  1062.         if (Double.compare(q, 1) == 0) {
  1063.             return max;
  1064.         }

  1065.         // Compute target rank
  1066.         final double targetRank = q * combinedN;

  1067.         // Perform binary search using aggregated rank computation
  1068.         // Start with initial guess min + quantile * (max - min).
  1069.         double estimate = min + q * (max - min);
  1070.         double estimateRank = getAggregateRank(estimate, aggregates);
  1071.         double lower;
  1072.         double upper;
  1073.         if (estimateRank > targetRank) {
  1074.             upper = estimate;
  1075.             lower = min;
  1076.         } else if (estimateRank < targetRank) {
  1077.             lower = estimate;
  1078.             upper = max;
  1079.         } else {
  1080.             return estimate;
  1081.         }
  1082.         final double eps = epsilon / 2;
  1083.         double intervalWidth = FastMath.abs(upper - lower);
  1084.         while (FastMath.abs(estimateRank / combinedN - q) > eps && intervalWidth > eps / combinedN) {
  1085.             if (estimateRank == targetRank) {
  1086.                 return estimate;
  1087.             }
  1088.             if (estimateRank > targetRank) {
  1089.                 upper = estimate;
  1090.             } else {
  1091.                 lower = estimate;
  1092.             }
  1093.             intervalWidth = FastMath.abs(upper - lower);
  1094.             estimate = lower + intervalWidth / 2;
  1095.             estimateRank = getAggregateRank(estimate, aggregates);
  1096.         }
  1097.         return estimate;
  1098.     }

  1099.     /**
  1100.      * Computes the estimated rank of value in the combined dataset of the aggregates.
  1101.      * Sums the values from {@link #getRank(double)}.
  1102.      *
  1103.      * @param value value whose rank is sought
  1104.      * @param aggregates collection to aggregate rank over
  1105.      * @return estimated number of elements in the combined dataset that are less than value
  1106.      */
  1107.     public double getAggregateRank(double value, Collection<RandomPercentile> aggregates) {
  1108.         double result = 0;
  1109.         final Iterator<RandomPercentile> iterator = aggregates.iterator();
  1110.         while (iterator.hasNext()) {
  1111.             result += iterator.next().getRank(value);
  1112.         }
  1113.         return result;
  1114.     }

  1115.     /**
  1116.      * Returns the estimated quantile position of value in the combined dataset of the aggregates.
  1117.      * Specifically, what is returned is an estimate of \(|\{x \in X : x &lt; value\}| / |X|\)
  1118.      * where \(X\) is the set of values that have been consumed from all of the datastreams
  1119.      * feeding the aggregates.
  1120.      *
  1121.      * @param value value whose quantile rank is sought.
  1122.      * @param aggregates collection of RandomPercentile instances being combined
  1123.      * @return estimated proportion of combined sample values that are strictly less than {@code value}
  1124.      */
  1125.     public double getAggregateQuantileRank(double value, Collection<RandomPercentile> aggregates) {
  1126.         return getAggregateRank(value, aggregates) / getAggregateN(aggregates);
  1127.     }

  1128.     /**
  1129.      * Returns the total number of values that have been consumed by the aggregates.
  1130.      *
  1131.      * @param aggregates collection of RandomPercentile instances whose combined sample size is sought
  1132.      * @return total number of values that have been consumed by the aggregates
  1133.      */
  1134.     public double getAggregateN(Collection<RandomPercentile> aggregates) {
  1135.         double result = 0;
  1136.         final Iterator<RandomPercentile> iterator = aggregates.iterator();
  1137.         while (iterator.hasNext()) {
  1138.             result += iterator.next().getN();
  1139.         }
  1140.         return result;
  1141.     }

  1142.     /**
  1143.      * Aggregates the provided instance into this instance.
  1144.      * <p>
  1145.      * Other must have the same buffer size as this. If the combined data size
  1146.      * exceeds the maximum storage configured for this instance, buffers are
  1147.      * merged to create capacity. If all that is needed is computation of
  1148.      * aggregate results, {@link #reduce(double, Collection)} is faster,
  1149.      * may be more accurate and does not require the buffer sizes to be the same.
  1150.      *
  1151.      * @param other the instance to aggregate into this instance
  1152.      * @throws NullArgumentException if the input is null
  1153.      * @throws IllegalArgumentException if other has different buffer size than this
  1154.      */
  1155.     @Override
  1156.     public void aggregate(RandomPercentile other)
  1157.         throws NullArgumentException {
  1158.         if (other == null) {
  1159.             throw new NullArgumentException();
  1160.         }
  1161.         if (other.s != s) {
  1162.             throw new MathIllegalArgumentException(LocalizedCoreFormats.INTERNAL_ERROR);
  1163.         }
  1164.         bufferMap.absorb(other.bufferMap);
  1165.         n += other.n;
  1166.     }

  1167.     /**
  1168.      * Returns the maximum number of {@code double} values that a {@code RandomPercentile}
  1169.      * instance created with the given {@code epsilon} value will retain in memory.
  1170.      * <p>
  1171.      * If the number of values that have been consumed from the stream is less than 1/2
  1172.      * of this value, reported statistics are exact.
  1173.      *
  1174.      * @param epsilon bound on the relative quantile error (see class javadoc)
  1175.      * @return upper bound on the total number of primitive double values retained in memory
  1176.      * @throws MathIllegalArgumentException if epsilon is not in the interval (0,1)
  1177.      */
  1178.     public static long maxValuesRetained(double epsilon) {
  1179.         if (epsilon >= 1) {
  1180.             throw new MathIllegalArgumentException(
  1181.                     LocalizedCoreFormats.NUMBER_TOO_LARGE_BOUND_EXCLUDED, epsilon, 1);
  1182.         }
  1183.         if (epsilon <= 0) {
  1184.             throw new MathIllegalArgumentException(
  1185.                     LocalizedCoreFormats.NUMBER_TOO_SMALL_BOUND_EXCLUDED, epsilon, 0);
  1186.         }
  1187.         final long h = (long) FastMath.ceil(log2(1/epsilon));
  1188.         final long s = (long) FastMath.ceil(FastMath.sqrt(log2(1/epsilon)) / epsilon);
  1189.         return (h+1) * s;
  1190.     }
  1191. }