NaturalRanking.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */

  21. package org.hipparchus.stat.ranking;

  22. import java.util.ArrayList;
  23. import java.util.Arrays;
  24. import java.util.Iterator;
  25. import java.util.List;

  26. import org.hipparchus.exception.LocalizedCoreFormats;
  27. import org.hipparchus.exception.MathIllegalArgumentException;
  28. import org.hipparchus.exception.MathRuntimeException;
  29. import org.hipparchus.random.RandomDataGenerator;
  30. import org.hipparchus.random.RandomGenerator;
  31. import org.hipparchus.util.FastMath;


  32. /**
  33.  * <p> Ranking based on the natural ordering on doubles.</p>
  34.  * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
  35.  * are handled using the selected {@link TiesStrategy}.
  36.  * Configuration settings are supplied in optional constructor arguments.
  37.  * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
  38.  * respectively. When using {@link TiesStrategy#RANDOM}, a
  39.  * {@link RandomGenerator} may be supplied as a constructor argument.</p>
  40.  * <table border="1">
  41.  * <caption>Examples</caption>
  42.  * <tr><th colspan="3">
  43.  * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
  44.  * </th></tr>
  45.  * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
  46.  * <th><code>rank(data)</code></th>
  47.  * <tr>
  48.  * <td>default (NaNs maximal)</td>
  49.  * <td>default (ties averaged)</td>
  50.  * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
  51.  * <tr>
  52.  * <td>default (NaNs maximal)</td>
  53.  * <td>MINIMUM</td>
  54.  * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
  55.  * <tr>
  56.  * <td>MINIMAL</td>
  57.  * <td>default (ties averaged)</td>
  58.  * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
  59.  * <tr>
  60.  * <td>REMOVED</td>
  61.  * <td>SEQUENTIAL</td>
  62.  * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
  63.  * <tr>
  64.  * <td>MINIMAL</td>
  65.  * <td>MAXIMUM</td>
  66.  * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
  67.  *
  68.  */
  69. public class NaturalRanking implements RankingAlgorithm {

  70.     /** default NaN strategy */
  71.     public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;

  72.     /** default ties strategy */
  73.     public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;

  74.     /** NaN strategy - defaults to NaNs maximal */
  75.     private final NaNStrategy nanStrategy;

  76.     /** Ties strategy - defaults to ties averaged */
  77.     private final TiesStrategy tiesStrategy;

  78.     /** Source of random data - used only when ties strategy is RANDOM */
  79.     private final RandomDataGenerator randomData;

  80.     /**
  81.      * Create a NaturalRanking with default strategies for handling ties and NaNs.
  82.      */
  83.     public NaturalRanking() {
  84.         super();
  85.         tiesStrategy = DEFAULT_TIES_STRATEGY;
  86.         nanStrategy = DEFAULT_NAN_STRATEGY;
  87.         randomData = null;
  88.     }

  89.     /**
  90.      * Create a NaturalRanking with the given TiesStrategy.
  91.      *
  92.      * @param tiesStrategy the TiesStrategy to use
  93.      */
  94.     public NaturalRanking(TiesStrategy tiesStrategy) {
  95.         super();
  96.         this.tiesStrategy = tiesStrategy;
  97.         nanStrategy = DEFAULT_NAN_STRATEGY;
  98.         randomData = new RandomDataGenerator();
  99.     }

  100.     /**
  101.      * Create a NaturalRanking with the given NaNStrategy.
  102.      *
  103.      * @param nanStrategy the NaNStrategy to use
  104.      */
  105.     public NaturalRanking(NaNStrategy nanStrategy) {
  106.         super();
  107.         this.nanStrategy = nanStrategy;
  108.         tiesStrategy = DEFAULT_TIES_STRATEGY;
  109.         randomData = null;
  110.     }

  111.     /**
  112.      * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
  113.      *
  114.      * @param nanStrategy NaNStrategy to use
  115.      * @param tiesStrategy TiesStrategy to use
  116.      */
  117.     public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
  118.         super();
  119.         this.nanStrategy = nanStrategy;
  120.         this.tiesStrategy = tiesStrategy;
  121.         randomData = new RandomDataGenerator();
  122.     }

  123.     /**
  124.      * Create a NaturalRanking with TiesStrategy.RANDOM and the given
  125.      * RandomGenerator as the source of random data.
  126.      *
  127.      * @param randomGenerator source of random data
  128.      */
  129.     public NaturalRanking(RandomGenerator randomGenerator) {
  130.         super();
  131.         this.tiesStrategy = TiesStrategy.RANDOM;
  132.         nanStrategy = DEFAULT_NAN_STRATEGY;
  133.         randomData = RandomDataGenerator.of(randomGenerator);
  134.     }


  135.     /**
  136.      * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
  137.      * and the given source of random data.
  138.      *
  139.      * @param nanStrategy NaNStrategy to use
  140.      * @param randomGenerator source of random data
  141.      */
  142.     public NaturalRanking(NaNStrategy nanStrategy,
  143.             RandomGenerator randomGenerator) {
  144.         super();
  145.         this.nanStrategy = nanStrategy;
  146.         this.tiesStrategy = TiesStrategy.RANDOM;
  147.         randomData = RandomDataGenerator.of(randomGenerator);
  148.     }

  149.     /**
  150.      * Return the NaNStrategy
  151.      *
  152.      * @return returns the NaNStrategy
  153.      */
  154.     public NaNStrategy getNanStrategy() {
  155.         return nanStrategy;
  156.     }

  157.     /**
  158.      * Return the TiesStrategy
  159.      *
  160.      * @return the TiesStrategy
  161.      */
  162.     public TiesStrategy getTiesStrategy() {
  163.         return tiesStrategy;
  164.     }

  165.     /**
  166.      * Rank <code>data</code> using the natural ordering on Doubles, with
  167.      * NaN values handled according to <code>nanStrategy</code> and ties
  168.      * resolved using <code>tiesStrategy.</code>
  169.      *
  170.      * @param data array to be ranked
  171.      * @return array of ranks
  172.      * @throws MathIllegalArgumentException if the selected {@link NaNStrategy} is {@code FAILED}
  173.      * and a {@link Double#NaN} is encountered in the input data
  174.      */
  175.     @Override
  176.     public double[] rank(double[] data) {

  177.         // Array recording initial positions of data to be ranked
  178.         IntDoublePair[] ranks = new IntDoublePair[data.length];
  179.         for (int i = 0; i < data.length; i++) {
  180.             ranks[i] = new IntDoublePair(data[i], i);
  181.         }

  182.         // Recode, remove or record positions of NaNs
  183.         List<Integer> nanPositions = null;
  184.         switch (nanStrategy) {
  185.             case MAXIMAL: // Replace NaNs with +INFs
  186.                 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
  187.                 break;
  188.             case MINIMAL: // Replace NaNs with -INFs
  189.                 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
  190.                 break;
  191.             case REMOVED: // Drop NaNs from data
  192.                 ranks = removeNaNs(ranks);
  193.                 break;
  194.             case FIXED:   // Record positions of NaNs
  195.                 nanPositions = getNanPositions(ranks);
  196.                 break;
  197.             case FAILED:
  198.                 nanPositions = getNanPositions(ranks);
  199.                 if (!nanPositions.isEmpty()) {
  200.                     throw new MathIllegalArgumentException(LocalizedCoreFormats.NAN_NOT_ALLOWED);
  201.                 }
  202.                 break;
  203.             default: // this should not happen unless NaNStrategy enum is changed
  204.                 throw MathRuntimeException.createInternalError();
  205.         }

  206.         // Sort the IntDoublePairs
  207.         Arrays.sort(ranks, (p1, p2) -> Double.compare(p1.value, p2.value));

  208.         // Walk the sorted array, filling output array using sorted positions,
  209.         // resolving ties as we go
  210.         double[] out = new double[ranks.length];
  211.         int pos = 1;  // position in sorted array
  212.         out[ranks[0].getPosition()] = pos;
  213.         List<Integer> tiesTrace = new ArrayList<>();
  214.         tiesTrace.add(ranks[0].getPosition());
  215.         for (int i = 1; i < ranks.length; i++) {
  216.             if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
  217.                 // tie sequence has ended (or had length 1)
  218.                 pos = i + 1;
  219.                 if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
  220.                     resolveTie(out, tiesTrace);
  221.                 }
  222.                 tiesTrace = new ArrayList<>();
  223.                 tiesTrace.add(ranks[i].getPosition());
  224.             } else {
  225.                 // tie sequence continues
  226.                 tiesTrace.add(ranks[i].getPosition());
  227.             }
  228.             out[ranks[i].getPosition()] = pos;
  229.         }
  230.         if (tiesTrace.size() > 1) {  // handle tie sequence at end
  231.             resolveTie(out, tiesTrace);
  232.         }
  233.         if (nanStrategy == NaNStrategy.FIXED) {
  234.             restoreNaNs(out, nanPositions);
  235.         }
  236.         return out;
  237.     }

  238.     /**
  239.      * Returns an array that is a copy of the input array with IntDoublePairs
  240.      * having NaN values removed.
  241.      *
  242.      * @param ranks input array
  243.      * @return array with NaN-valued entries removed
  244.      */
  245.     private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
  246.         if (!containsNaNs(ranks)) {
  247.             return ranks;
  248.         }
  249.         IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
  250.         int j = 0;
  251.         for (int i = 0; i < ranks.length; i++) {
  252.             if (Double.isNaN(ranks[i].getValue())) {
  253.                 // drop, but adjust original ranks of later elements
  254.                 for (int k = i + 1; k < ranks.length; k++) {
  255.                     ranks[k] = new IntDoublePair(
  256.                             ranks[k].getValue(), ranks[k].getPosition() - 1);
  257.                 }
  258.             } else {
  259.                 outRanks[j] = new IntDoublePair(
  260.                         ranks[i].getValue(), ranks[i].getPosition());
  261.                 j++;
  262.             }
  263.         }
  264.         IntDoublePair[] returnRanks = new IntDoublePair[j];
  265.         System.arraycopy(outRanks, 0, returnRanks, 0, j);
  266.         return returnRanks;
  267.     }

  268.     /**
  269.      * Recodes NaN values to the given value.
  270.      *
  271.      * @param ranks array to recode
  272.      * @param value the value to replace NaNs with
  273.      */
  274.     private void recodeNaNs(IntDoublePair[] ranks, double value) {
  275.         for (int i = 0; i < ranks.length; i++) {
  276.             if (Double.isNaN(ranks[i].getValue())) {
  277.                 ranks[i] = new IntDoublePair(
  278.                         value, ranks[i].getPosition());
  279.             }
  280.         }
  281.     }

  282.     /**
  283.      * Checks for presence of NaNs in <code>ranks.</code>
  284.      *
  285.      * @param ranks array to be searched for NaNs
  286.      * @return true iff ranks contains one or more NaNs
  287.      */
  288.     private boolean containsNaNs(IntDoublePair[] ranks) {
  289.         for (int i = 0; i < ranks.length; i++) {
  290.             if (Double.isNaN(ranks[i].getValue())) {
  291.                 return true;
  292.             }
  293.         }
  294.         return false;
  295.     }

  296.     /**
  297.      * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
  298.      * The input <code>ranks</code> array is expected to take the same value
  299.      * for all indices in <code>tiesTrace</code>.  The common value is recoded
  300.      * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
  301.      * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
  302.      * The same array and trace with tiesStrategy AVERAGE will come out
  303.      * <5,8,3,6,3,7,1,3>.
  304.      *
  305.      * @param ranks array of ranks
  306.      * @param tiesTrace list of indices where <code>ranks</code> is constant
  307.      * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
  308.      * </code>
  309.      */
  310.     private void resolveTie(double[] ranks, List<Integer> tiesTrace) {

  311.         // constant value of ranks over tiesTrace
  312.         final double c = ranks[tiesTrace.get(0)];

  313.         // length of sequence of tied ranks
  314.         final int length = tiesTrace.size();

  315.         switch (tiesStrategy) {
  316.             case  AVERAGE:  // Replace ranks with average
  317.                 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
  318.                 break;
  319.             case MAXIMUM:   // Replace ranks with maximum values
  320.                 fill(ranks, tiesTrace, c + length - 1);
  321.                 break;
  322.             case MINIMUM:   // Replace ties with minimum
  323.                 fill(ranks, tiesTrace, c);
  324.                 break;
  325.             case RANDOM:    // Fill with random integral values in [c, c + length - 1]
  326.                 Iterator<Integer> iterator = tiesTrace.iterator();
  327.                 long f = FastMath.round(c);
  328.                 while (iterator.hasNext()) {
  329.                     // No advertised exception because args are guaranteed valid
  330.                     ranks[iterator.next()] =
  331.                         randomData.nextLong(f, f + length - 1);
  332.                 }
  333.                 break;
  334.             case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
  335.                 // walk and fill
  336.                 iterator = tiesTrace.iterator();
  337.                 f = FastMath.round(c);
  338.                 int i = 0;
  339.                 while (iterator.hasNext()) {
  340.                     ranks[iterator.next()] = f + i++;
  341.                 }
  342.                 break;
  343.             default: // this should not happen unless TiesStrategy enum is changed
  344.                 throw MathRuntimeException.createInternalError();
  345.         }
  346.     }

  347.     /**
  348.      * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
  349.      *
  350.      * @param data array to modify
  351.      * @param tiesTrace list of index values to set
  352.      * @param value value to set
  353.      */
  354.     private void fill(double[] data, List<Integer> tiesTrace, double value) {
  355.         Iterator<Integer> iterator = tiesTrace.iterator();
  356.         while (iterator.hasNext()) {
  357.             data[iterator.next()] = value;
  358.         }
  359.     }

  360.     /**
  361.      * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
  362.      *
  363.      * @param ranks array to modify
  364.      * @param nanPositions list of index values to set to <code>Double.NaN</code>
  365.      */
  366.     private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
  367.         if (nanPositions.isEmpty()) {
  368.             return;
  369.         }
  370.         Iterator<Integer> iterator = nanPositions.iterator();
  371.         while (iterator.hasNext()) {
  372.             ranks[iterator.next().intValue()] = Double.NaN;
  373.         }

  374.     }

  375.     /**
  376.      * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
  377.      *
  378.      * @param ranks array to search for <code>NaNs</code>
  379.      * @return list of indexes i such that <code>ranks[i] = NaN</code>
  380.      */
  381.     private List<Integer> getNanPositions(IntDoublePair[] ranks) {
  382.         ArrayList<Integer> out = new ArrayList<>();
  383.         for (int i = 0; i < ranks.length; i++) {
  384.             if (Double.isNaN(ranks[i].getValue())) {
  385.                 out.add(Integer.valueOf(i));
  386.             }
  387.         }
  388.         return out;
  389.     }

  390.     /**
  391.      * Represents the position of a double value in an ordering.
  392.      */
  393.     private static class IntDoublePair {

  394.         /** Value of the pair */
  395.         private final double value;

  396.         /** Original position of the pair */
  397.         private final int position;

  398.         /**
  399.          * Construct an IntDoublePair with the given value and position.
  400.          * @param value the value of the pair
  401.          * @param position the original position
  402.          */
  403.         IntDoublePair(double value, int position) {
  404.             this.value = value;
  405.             this.position = position;
  406.         }

  407.         /**
  408.          * Returns the value of the pair.
  409.          * @return value
  410.          */
  411.         public double getValue() {
  412.             return value;
  413.         }

  414.         /**
  415.          * Returns the original position of the pair.
  416.          * @return position
  417.          */
  418.         public int getPosition() {
  419.             return position;
  420.         }
  421.     }
  422. }