1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22
23 package org.hipparchus.stat.ranking;
24
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Iterator;
28 import java.util.List;
29
30 import org.hipparchus.exception.LocalizedCoreFormats;
31 import org.hipparchus.exception.MathIllegalArgumentException;
32 import org.hipparchus.exception.MathRuntimeException;
33 import org.hipparchus.random.RandomDataGenerator;
34 import org.hipparchus.random.RandomGenerator;
35 import org.hipparchus.util.FastMath;
36
37
38 /**
39 * <p> Ranking based on the natural ordering on doubles.</p>
40 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
41 * are handled using the selected {@link TiesStrategy}.
42 * Configuration settings are supplied in optional constructor arguments.
43 * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
44 * respectively. When using {@link TiesStrategy#RANDOM}, a
45 * {@link RandomGenerator} may be supplied as a constructor argument.</p>
46 * <table border="1">
47 * <caption>Examples</caption>
48 * <tr><th colspan="3">
49 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
50 * </th></tr>
51 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
52 * <th><code>rank(data)</code></th>
53 * <tr>
54 * <td>default (NaNs maximal)</td>
55 * <td>default (ties averaged)</td>
56 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
57 * <tr>
58 * <td>default (NaNs maximal)</td>
59 * <td>MINIMUM</td>
60 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
61 * <tr>
62 * <td>MINIMAL</td>
63 * <td>default (ties averaged)</td>
64 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
65 * <tr>
66 * <td>REMOVED</td>
67 * <td>SEQUENTIAL</td>
68 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
69 * <tr>
70 * <td>MINIMAL</td>
71 * <td>MAXIMUM</td>
72 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
73 *
74 */
75 public class NaturalRanking implements RankingAlgorithm {
76
77 /** default NaN strategy */
78 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
79
80 /** default ties strategy */
81 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
82
83 /** NaN strategy - defaults to NaNs maximal */
84 private final NaNStrategy nanStrategy;
85
86 /** Ties strategy - defaults to ties averaged */
87 private final TiesStrategy tiesStrategy;
88
89 /** Source of random data - used only when ties strategy is RANDOM */
90 private final RandomDataGenerator randomData;
91
92 /**
93 * Create a NaturalRanking with default strategies for handling ties and NaNs.
94 */
95 public NaturalRanking() {
96 super();
97 tiesStrategy = DEFAULT_TIES_STRATEGY;
98 nanStrategy = DEFAULT_NAN_STRATEGY;
99 randomData = null;
100 }
101
102 /**
103 * Create a NaturalRanking with the given TiesStrategy.
104 *
105 * @param tiesStrategy the TiesStrategy to use
106 */
107 public NaturalRanking(TiesStrategy tiesStrategy) {
108 super();
109 this.tiesStrategy = tiesStrategy;
110 nanStrategy = DEFAULT_NAN_STRATEGY;
111 randomData = new RandomDataGenerator();
112 }
113
114 /**
115 * Create a NaturalRanking with the given NaNStrategy.
116 *
117 * @param nanStrategy the NaNStrategy to use
118 */
119 public NaturalRanking(NaNStrategy nanStrategy) {
120 super();
121 this.nanStrategy = nanStrategy;
122 tiesStrategy = DEFAULT_TIES_STRATEGY;
123 randomData = null;
124 }
125
126 /**
127 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
128 *
129 * @param nanStrategy NaNStrategy to use
130 * @param tiesStrategy TiesStrategy to use
131 */
132 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
133 super();
134 this.nanStrategy = nanStrategy;
135 this.tiesStrategy = tiesStrategy;
136 randomData = new RandomDataGenerator();
137 }
138
139 /**
140 * Create a NaturalRanking with TiesStrategy.RANDOM and the given
141 * RandomGenerator as the source of random data.
142 *
143 * @param randomGenerator source of random data
144 */
145 public NaturalRanking(RandomGenerator randomGenerator) {
146 super();
147 this.tiesStrategy = TiesStrategy.RANDOM;
148 nanStrategy = DEFAULT_NAN_STRATEGY;
149 randomData = RandomDataGenerator.of(randomGenerator);
150 }
151
152
153 /**
154 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
155 * and the given source of random data.
156 *
157 * @param nanStrategy NaNStrategy to use
158 * @param randomGenerator source of random data
159 */
160 public NaturalRanking(NaNStrategy nanStrategy,
161 RandomGenerator randomGenerator) {
162 super();
163 this.nanStrategy = nanStrategy;
164 this.tiesStrategy = TiesStrategy.RANDOM;
165 randomData = RandomDataGenerator.of(randomGenerator);
166 }
167
168 /**
169 * Return the NaNStrategy
170 *
171 * @return returns the NaNStrategy
172 */
173 public NaNStrategy getNanStrategy() {
174 return nanStrategy;
175 }
176
177 /**
178 * Return the TiesStrategy
179 *
180 * @return the TiesStrategy
181 */
182 public TiesStrategy getTiesStrategy() {
183 return tiesStrategy;
184 }
185
186 /**
187 * Rank <code>data</code> using the natural ordering on Doubles, with
188 * NaN values handled according to <code>nanStrategy</code> and ties
189 * resolved using <code>tiesStrategy.</code>
190 *
191 * @param data array to be ranked
192 * @return array of ranks
193 * @throws MathIllegalArgumentException if the selected {@link NaNStrategy} is {@code FAILED}
194 * and a {@link Double#NaN} is encountered in the input data
195 */
196 @Override
197 public double[] rank(double[] data) {
198
199 // Array recording initial positions of data to be ranked
200 IntDoublePair[] ranks = new IntDoublePair[data.length];
201 for (int i = 0; i < data.length; i++) {
202 ranks[i] = new IntDoublePair(data[i], i);
203 }
204
205 // Recode, remove or record positions of NaNs
206 List<Integer> nanPositions = null;
207 switch (nanStrategy) {
208 case MAXIMAL: // Replace NaNs with +INFs
209 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
210 break;
211 case MINIMAL: // Replace NaNs with -INFs
212 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
213 break;
214 case REMOVED: // Drop NaNs from data
215 ranks = removeNaNs(ranks);
216 break;
217 case FIXED: // Record positions of NaNs
218 nanPositions = getNanPositions(ranks);
219 break;
220 case FAILED:
221 nanPositions = getNanPositions(ranks);
222 if (!nanPositions.isEmpty()) {
223 throw new MathIllegalArgumentException(LocalizedCoreFormats.NAN_NOT_ALLOWED);
224 }
225 break;
226 default: // this should not happen unless NaNStrategy enum is changed
227 throw MathRuntimeException.createInternalError();
228 }
229
230 // Sort the IntDoublePairs
231 Arrays.sort(ranks, (p1, p2) -> Double.compare(p1.value, p2.value));
232
233 // Walk the sorted array, filling output array using sorted positions,
234 // resolving ties as we go
235 double[] out = new double[ranks.length];
236 int pos = 1; // position in sorted array
237 out[ranks[0].getPosition()] = pos;
238 List<Integer> tiesTrace = new ArrayList<>();
239 tiesTrace.add(ranks[0].getPosition());
240 for (int i = 1; i < ranks.length; i++) {
241 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
242 // tie sequence has ended (or had length 1)
243 pos = i + 1;
244 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve
245 resolveTie(out, tiesTrace);
246 }
247 tiesTrace = new ArrayList<>();
248 tiesTrace.add(ranks[i].getPosition());
249 } else {
250 // tie sequence continues
251 tiesTrace.add(ranks[i].getPosition());
252 }
253 out[ranks[i].getPosition()] = pos;
254 }
255 if (tiesTrace.size() > 1) { // handle tie sequence at end
256 resolveTie(out, tiesTrace);
257 }
258 if (nanStrategy == NaNStrategy.FIXED) {
259 restoreNaNs(out, nanPositions);
260 }
261 return out;
262 }
263
264 /**
265 * Returns an array that is a copy of the input array with IntDoublePairs
266 * having NaN values removed.
267 *
268 * @param ranks input array
269 * @return array with NaN-valued entries removed
270 */
271 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
272 if (!containsNaNs(ranks)) {
273 return ranks;
274 }
275 IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
276 int j = 0;
277 for (int i = 0; i < ranks.length; i++) {
278 if (Double.isNaN(ranks[i].getValue())) {
279 // drop, but adjust original ranks of later elements
280 for (int k = i + 1; k < ranks.length; k++) {
281 ranks[k] = new IntDoublePair(
282 ranks[k].getValue(), ranks[k].getPosition() - 1);
283 }
284 } else {
285 outRanks[j] = new IntDoublePair(
286 ranks[i].getValue(), ranks[i].getPosition());
287 j++;
288 }
289 }
290 IntDoublePair[] returnRanks = new IntDoublePair[j];
291 System.arraycopy(outRanks, 0, returnRanks, 0, j);
292 return returnRanks;
293 }
294
295 /**
296 * Recodes NaN values to the given value.
297 *
298 * @param ranks array to recode
299 * @param value the value to replace NaNs with
300 */
301 private void recodeNaNs(IntDoublePair[] ranks, double value) {
302 for (int i = 0; i < ranks.length; i++) {
303 if (Double.isNaN(ranks[i].getValue())) {
304 ranks[i] = new IntDoublePair(
305 value, ranks[i].getPosition());
306 }
307 }
308 }
309
310 /**
311 * Checks for presence of NaNs in <code>ranks.</code>
312 *
313 * @param ranks array to be searched for NaNs
314 * @return true iff ranks contains one or more NaNs
315 */
316 private boolean containsNaNs(IntDoublePair[] ranks) {
317 for (IntDoublePair rank : ranks) {
318 if (Double.isNaN(rank.getValue())) {
319 return true;
320 }
321 }
322 return false;
323 }
324
325 /**
326 * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
327 * The input <code>ranks</code> array is expected to take the same value
328 * for all indices in <code>tiesTrace</code>. The common value is recoded
329 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
330 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
331 * The same array and trace with tiesStrategy AVERAGE will come out
332 * <5,8,3,6,3,7,1,3>.
333 *
334 * @param ranks array of ranks
335 * @param tiesTrace list of indices where <code>ranks</code> is constant
336 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
337 * </code>
338 */
339 private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
340
341 // constant value of ranks over tiesTrace
342 final double c = ranks[tiesTrace.get(0)];
343
344 // length of sequence of tied ranks
345 final int length = tiesTrace.size();
346
347 switch (tiesStrategy) {
348 case AVERAGE: // Replace ranks with average
349 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
350 break;
351 case MAXIMUM: // Replace ranks with maximum values
352 fill(ranks, tiesTrace, c + length - 1);
353 break;
354 case MINIMUM: // Replace ties with minimum
355 fill(ranks, tiesTrace, c);
356 break;
357 case RANDOM: // Fill with random integral values in [c, c + length - 1]
358 Iterator<Integer> iterator = tiesTrace.iterator();
359 long f = FastMath.round(c);
360 while (iterator.hasNext()) {
361 // No advertised exception because args are guaranteed valid
362 ranks[iterator.next()] =
363 randomData.nextLong(f, f + length - 1);
364 }
365 break;
366 case SEQUENTIAL: // Fill sequentially from c to c + length - 1
367 // walk and fill
368 iterator = tiesTrace.iterator();
369 f = FastMath.round(c);
370 int i = 0;
371 while (iterator.hasNext()) {
372 ranks[iterator.next()] = f + i++;
373 }
374 break;
375 default: // this should not happen unless TiesStrategy enum is changed
376 throw MathRuntimeException.createInternalError();
377 }
378 }
379
380 /**
381 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
382 *
383 * @param data array to modify
384 * @param tiesTrace list of index values to set
385 * @param value value to set
386 */
387 private void fill(double[] data, List<Integer> tiesTrace, double value) {
388 for (Integer integer : tiesTrace) {
389 data[integer] = value;
390 }
391 }
392
393 /**
394 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
395 *
396 * @param ranks array to modify
397 * @param nanPositions list of index values to set to <code>Double.NaN</code>
398 */
399 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
400 if (nanPositions.isEmpty()) {
401 return;
402 }
403 for (Integer nanPosition : nanPositions) {
404 ranks[nanPosition] = Double.NaN;
405 }
406
407 }
408
409 /**
410 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
411 *
412 * @param ranks array to search for <code>NaNs</code>
413 * @return list of indexes i such that <code>ranks[i] = NaN</code>
414 */
415 private List<Integer> getNanPositions(IntDoublePair[] ranks) {
416 ArrayList<Integer> out = new ArrayList<>();
417 for (int i = 0; i < ranks.length; i++) {
418 if (Double.isNaN(ranks[i].getValue())) {
419 out.add(i);
420 }
421 }
422 return out;
423 }
424
425 /**
426 * Represents the position of a double value in an ordering.
427 */
428 private static class IntDoublePair {
429
430 /** Value of the pair */
431 private final double value;
432
433 /** Original position of the pair */
434 private final int position;
435
436 /**
437 * Construct an IntDoublePair with the given value and position.
438 * @param value the value of the pair
439 * @param position the original position
440 */
441 IntDoublePair(double value, int position) {
442 this.value = value;
443 this.position = position;
444 }
445
446 /**
447 * Returns the value of the pair.
448 * @return value
449 */
450 public double getValue() {
451 return value;
452 }
453
454 /**
455 * Returns the original position of the pair.
456 * @return position
457 */
458 public int getPosition() {
459 return position;
460 }
461 }
462 }