1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat;
23
24 import java.io.Serializable;
25 import java.text.NumberFormat;
26 import java.util.Collection;
27 import java.util.Comparator;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.NavigableMap;
32 import java.util.Objects;
33 import java.util.TreeMap;
34 import java.util.stream.Collectors;
35
36 import org.hipparchus.exception.NullArgumentException;
37 import org.hipparchus.util.MathUtils;
38
39 /**
40 * Maintains a frequency distribution of Comparable values.
41 * <p>
42 * The values are ordered using the default (natural order), unless a
43 * {@code Comparator} is supplied in the constructor.
44 *
45 * @see LongFrequency
46 * @param <T> the element type
47 */
48 public class Frequency<T extends Comparable<T>> implements Serializable {
49
50 /** Serializable version identifier */
51 private static final long serialVersionUID = 20160322L;
52
53 /** underlying collection */
54 private final NavigableMap<T, Long> freqTable;
55
56 /**
57 * Default constructor.
58 */
59 public Frequency() {
60 freqTable = new TreeMap<>();
61 }
62
63 /**
64 * Constructor allowing values Comparator to be specified.
65 *
66 * @param comparator Comparator used to order values
67 */
68 public Frequency(Comparator<? super T> comparator) {
69 freqTable = new TreeMap<>(comparator);
70 }
71
72 /**
73 * Adds 1 to the frequency count for v.
74 *
75 * @param v the value to add.
76 */
77 public void addValue(T v) {
78 incrementValue(v, 1);
79 }
80
81 /**
82 * Increments the frequency count for v.
83 *
84 * @param v the value to add.
85 * @param increment the amount by which the value should be incremented
86 */
87 public void incrementValue(T v, long increment) {
88 Long count = freqTable.getOrDefault(v, Long.valueOf(0));
89 freqTable.put(v, Long.valueOf(count.longValue() + increment));
90 }
91
92 /** Clears the frequency table */
93 public void clear() {
94 freqTable.clear();
95 }
96
97 /**
98 * Returns an Iterator over the set of values that have been added.
99 *
100 * @return values Iterator
101 */
102 public Iterator<T> valuesIterator() {
103 return freqTable.keySet().iterator();
104 }
105
106 /**
107 * Return an Iterator over the set of keys and values that have been added.
108 * Using the entry set to iterate is more efficient in the case where you
109 * need to access respective counts as well as values, since it doesn't
110 * require a "get" for every key...the value is provided in the Map.Entry.
111 *
112 * @return entry set Iterator
113 */
114 public Iterator<Map.Entry<T, Long>> entrySetIterator() {
115 return freqTable.entrySet().iterator();
116 }
117
118 //-------------------------------------------------------------------------
119
120 /**
121 * Returns the sum of all frequencies.
122 *
123 * @return the total frequency count.
124 */
125 public long getSumFreq() {
126 return freqTable.values()
127 .stream()
128 .mapToLong(Long::longValue)
129 .sum();
130 }
131
132 /**
133 * Returns the number of values equal to v.
134 * Returns 0 if the value is not comparable.
135 *
136 * @param v the value to lookup.
137 * @return the frequency of v.
138 */
139 public long getCount(T v) {
140 return freqTable.getOrDefault(v, 0L);
141 }
142
143 /**
144 * Returns the number of values in the frequency table.
145 *
146 * @return the number of unique values that have been added to the frequency table.
147 * @see #valuesIterator()
148 */
149 public int getUniqueCount(){
150 return freqTable.keySet().size();
151 }
152
153 /**
154 * Returns the percentage of values that are equal to v
155 * (as a proportion between 0 and 1).
156 * <p>
157 * Returns {@code Double.NaN} if no values have been added.
158 *
159 * @param v the value to lookup
160 * @return the proportion of values equal to v
161 */
162 public double getPct(T v) {
163 final long sumFreq = getSumFreq();
164 if (sumFreq == 0) {
165 return Double.NaN;
166 }
167 return (double) getCount(v) / (double) sumFreq;
168 }
169
170 //-----------------------------------------------------------------------------------------
171
172 /**
173 * Returns the cumulative frequency of values less than or equal to v.
174 *
175 * @param v the value to lookup.
176 * @return the proportion of values equal to v
177 */
178 public long getCumFreq(T v) {
179 if (getSumFreq() == 0) {
180 return 0;
181 }
182
183 NavigableMap<T, Long> headMap = freqTable.headMap(v, true);
184
185 if (headMap.isEmpty()) {
186 // v is less than first value
187 return 0;
188 } else if (headMap.size() == freqTable.size()) {
189 // v is greater than or equal to last value
190 return getSumFreq();
191 }
192
193 return headMap.values()
194 .stream()
195 .mapToLong(Long::longValue)
196 .sum();
197 }
198
199 //----------------------------------------------------------------------------------------------
200
201 /**
202 * Returns the cumulative percentage of values less than or equal to v
203 * (as a proportion between 0 and 1).
204 * <p>
205 * Returns {@code Double.NaN} if no values have been added.
206 *
207 * @param v the value to lookup
208 * @return the proportion of values less than or equal to v
209 */
210 public double getCumPct(T v) {
211 final long sumFreq = getSumFreq();
212 if (sumFreq == 0) {
213 return Double.NaN;
214 }
215 return (double) getCumFreq(v) / (double) sumFreq;
216 }
217
218 /**
219 * Returns the mode value(s) in comparator order.
220 *
221 * @return a list containing the value(s) which appear most often.
222 */
223 public List<T> getMode() {
224 // Get the max count first
225 final long mostPopular =
226 freqTable.values()
227 .stream()
228 .mapToLong(Long::longValue)
229 .max()
230 .orElse(0L);
231
232 return freqTable.entrySet()
233 .stream()
234 .filter(entry -> entry.getValue() == mostPopular)
235 .map(entry -> entry.getKey())
236 .collect(Collectors.toList());
237 }
238
239 //----------------------------------------------------------------------------------------------
240
241 /**
242 * Merge another Frequency object's counts into this instance.
243 * This Frequency's counts will be incremented (or set when not already set)
244 * by the counts represented by other.
245 *
246 * @param other the other {@link Frequency} object to be merged
247 * @throws NullArgumentException if {@code other} is null
248 */
249 public void merge(final Frequency<? extends T> other) throws NullArgumentException {
250 MathUtils.checkNotNull(other);
251
252 Iterator<? extends Map.Entry<? extends T, Long>> iter = other.entrySetIterator();
253 while (iter.hasNext()) {
254 final Map.Entry<? extends T, Long> entry = iter.next();
255 incrementValue(entry.getKey(), entry.getValue().longValue());
256 }
257 }
258
259 /**
260 * Merge a {@link Collection} of {@link Frequency} objects into this instance.
261 * This Frequency's counts will be incremented (or set when not already set)
262 * by the counts represented by each of the others.
263 *
264 * @param others the other {@link Frequency} objects to be merged
265 * @throws NullArgumentException if the collection is null
266 */
267 public void merge(final Collection<? extends Frequency<? extends T>> others)
268 throws NullArgumentException {
269 MathUtils.checkNotNull(others);
270
271 for (final Frequency<? extends T> freq : others) {
272 merge(freq);
273 }
274 }
275
276 //----------------------------------------------------------------------------------------------
277
278 /**
279 * Return a string representation of this frequency distribution.
280 *
281 * @return a string representation.
282 */
283 @Override
284 public String toString() {
285 NumberFormat nf = NumberFormat.getPercentInstance();
286 StringBuilder outBuffer = new StringBuilder(200); // this size is just a wild guess
287 outBuffer.append("Value \tFreq. \tPct. \tCum Pct. \n");
288 Iterator<T> iter = freqTable.keySet().iterator();
289 while (iter.hasNext()) {
290 T value = iter.next();
291 outBuffer.append(value).
292 append('\t').
293 append(getCount(value)).
294 append('\t').
295 append(nf.format(getPct(value))).
296 append('\t').
297 append(nf.format(getCumPct(value))).
298 append('\n');
299 }
300 return outBuffer.toString();
301 }
302
303 /** {@inheritDoc} */
304 @Override
305 public int hashCode() {
306 final int prime = 31;
307 int result = 1;
308 result = prime * result +
309 ((freqTable == null) ? 0 : freqTable.hashCode());
310 return result;
311 }
312
313 /** {@inheritDoc} */
314 @Override
315 public boolean equals(Object obj) {
316 if (this == obj) {
317 return true;
318 }
319 if (!(obj instanceof Frequency)) {
320 return false;
321 }
322 Frequency<?> other = (Frequency<?>) obj;
323 return Objects.equals(freqTable, other.freqTable);
324 }
325
326 }