Frequency.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat;

  22. import java.io.Serializable;
  23. import java.text.NumberFormat;
  24. import java.util.Collection;
  25. import java.util.Comparator;
  26. import java.util.Iterator;
  27. import java.util.List;
  28. import java.util.Map;
  29. import java.util.NavigableMap;
  30. import java.util.Objects;
  31. import java.util.TreeMap;
  32. import java.util.stream.Collectors;

  33. import org.hipparchus.exception.NullArgumentException;
  34. import org.hipparchus.util.MathUtils;

  35. /**
  36.  * Maintains a frequency distribution of Comparable values.
  37.  * <p>
  38.  * The values are ordered using the default (natural order), unless a
  39.  * {@code Comparator} is supplied in the constructor.
  40.  *
  41.  * @see LongFrequency
  42.  * @param <T> the element type
  43.  */
  44. public class Frequency<T extends Comparable<T>> implements Serializable {

  45.     /** Serializable version identifier */
  46.     private static final long serialVersionUID = 20160322L;

  47.     /** underlying collection */
  48.     private final NavigableMap<T, Long> freqTable;

  49.     /**
  50.      * Default constructor.
  51.      */
  52.     public Frequency() {
  53.         freqTable = new TreeMap<>();
  54.     }

  55.     /**
  56.      * Constructor allowing values Comparator to be specified.
  57.      *
  58.      * @param comparator Comparator used to order values
  59.      */
  60.     public Frequency(Comparator<? super T> comparator) {
  61.         freqTable = new TreeMap<>(comparator);
  62.     }

  63.     /**
  64.      * Adds 1 to the frequency count for v.
  65.      *
  66.      * @param v the value to add.
  67.      */
  68.     public void addValue(T v) {
  69.         incrementValue(v, 1);
  70.     }

  71.     /**
  72.      * Increments the frequency count for v.
  73.      *
  74.      * @param v the value to add.
  75.      * @param increment the amount by which the value should be incremented
  76.      */
  77.     public void incrementValue(T v, long increment) {
  78.         Long count = freqTable.getOrDefault(v, Long.valueOf(0));
  79.         freqTable.put(v, Long.valueOf(count.longValue() + increment));
  80.     }

  81.     /** Clears the frequency table */
  82.     public void clear() {
  83.         freqTable.clear();
  84.     }

  85.     /**
  86.      * Returns an Iterator over the set of values that have been added.
  87.      *
  88.      * @return values Iterator
  89.      */
  90.     public Iterator<T> valuesIterator() {
  91.         return freqTable.keySet().iterator();
  92.     }

  93.     /**
  94.      * Return an Iterator over the set of keys and values that have been added.
  95.      * Using the entry set to iterate is more efficient in the case where you
  96.      * need to access respective counts as well as values, since it doesn't
  97.      * require a "get" for every key...the value is provided in the Map.Entry.
  98.      *
  99.      * @return entry set Iterator
  100.      */
  101.     public Iterator<Map.Entry<T, Long>> entrySetIterator() {
  102.         return freqTable.entrySet().iterator();
  103.     }

  104.     //-------------------------------------------------------------------------

  105.     /**
  106.      * Returns the sum of all frequencies.
  107.      *
  108.      * @return the total frequency count.
  109.      */
  110.     public long getSumFreq() {
  111.         return freqTable.values()
  112.                         .stream()
  113.                         .mapToLong(Long::longValue)
  114.                         .sum();
  115.     }

  116.     /**
  117.      * Returns the number of values equal to v.
  118.      * Returns 0 if the value is not comparable.
  119.      *
  120.      * @param v the value to lookup.
  121.      * @return the frequency of v.
  122.      */
  123.     public long getCount(T v) {
  124.         return freqTable.getOrDefault(v, 0L);
  125.     }

  126.     /**
  127.      * Returns the number of values in the frequency table.
  128.      *
  129.      * @return the number of unique values that have been added to the frequency table.
  130.      * @see #valuesIterator()
  131.      */
  132.     public int getUniqueCount(){
  133.         return freqTable.keySet().size();
  134.     }

  135.     /**
  136.      * Returns the percentage of values that are equal to v
  137.      * (as a proportion between 0 and 1).
  138.      * <p>
  139.      * Returns {@code Double.NaN} if no values have been added.
  140.      *
  141.      * @param v the value to lookup
  142.      * @return the proportion of values equal to v
  143.      */
  144.     public double getPct(T v) {
  145.         final long sumFreq = getSumFreq();
  146.         if (sumFreq == 0) {
  147.             return Double.NaN;
  148.         }
  149.         return (double) getCount(v) / (double) sumFreq;
  150.     }

  151.     //-----------------------------------------------------------------------------------------

  152.     /**
  153.      * Returns the cumulative frequency of values less than or equal to v.
  154.      *
  155.      * @param v the value to lookup.
  156.      * @return the proportion of values equal to v
  157.      */
  158.     public long getCumFreq(T v) {
  159.         if (getSumFreq() == 0) {
  160.             return 0;
  161.         }

  162.         NavigableMap<T, Long> headMap = freqTable.headMap(v, true);

  163.         if (headMap.isEmpty()) {
  164.             // v is less than first value
  165.             return 0;
  166.         } else if (headMap.size() == freqTable.size()) {
  167.             // v is greater than or equal to last value
  168.             return getSumFreq();
  169.         }

  170.         return headMap.values()
  171.                       .stream()
  172.                       .mapToLong(Long::longValue)
  173.                       .sum();
  174.     }

  175.     //----------------------------------------------------------------------------------------------

  176.     /**
  177.      * Returns the cumulative percentage of values less than or equal to v
  178.      * (as a proportion between 0 and 1).
  179.      * <p>
  180.      * Returns {@code Double.NaN} if no values have been added.
  181.      *
  182.      * @param v the value to lookup
  183.      * @return the proportion of values less than or equal to v
  184.      */
  185.     public double getCumPct(T v) {
  186.         final long sumFreq = getSumFreq();
  187.         if (sumFreq == 0) {
  188.             return Double.NaN;
  189.         }
  190.         return (double) getCumFreq(v) / (double) sumFreq;
  191.     }

  192.     /**
  193.      * Returns the mode value(s) in comparator order.
  194.      *
  195.      * @return a list containing the value(s) which appear most often.
  196.      */
  197.     public List<T> getMode() {
  198.         // Get the max count first
  199.         final long mostPopular =
  200.                 freqTable.values()
  201.                          .stream()
  202.                          .mapToLong(Long::longValue)
  203.                          .max()
  204.                          .orElse(0L);

  205.         return freqTable.entrySet()
  206.                         .stream()
  207.                         .filter(entry -> entry.getValue() == mostPopular)
  208.                         .map(entry -> entry.getKey())
  209.                         .collect(Collectors.toList());
  210.     }

  211.     //----------------------------------------------------------------------------------------------

  212.     /**
  213.      * Merge another Frequency object's counts into this instance.
  214.      * This Frequency's counts will be incremented (or set when not already set)
  215.      * by the counts represented by other.
  216.      *
  217.      * @param other the other {@link Frequency} object to be merged
  218.      * @throws NullArgumentException if {@code other} is null
  219.      */
  220.     public void merge(final Frequency<? extends T> other) throws NullArgumentException {
  221.         MathUtils.checkNotNull(other);

  222.         Iterator<? extends Map.Entry<? extends T, Long>> iter = other.entrySetIterator();
  223.         while (iter.hasNext()) {
  224.             final Map.Entry<? extends T, Long> entry = iter.next();
  225.             incrementValue(entry.getKey(), entry.getValue().longValue());
  226.         }
  227.     }

  228.     /**
  229.      * Merge a {@link Collection} of {@link Frequency} objects into this instance.
  230.      * This Frequency's counts will be incremented (or set when not already set)
  231.      * by the counts represented by each of the others.
  232.      *
  233.      * @param others the other {@link Frequency} objects to be merged
  234.      * @throws NullArgumentException if the collection is null
  235.      */
  236.     public void merge(final Collection<? extends Frequency<? extends T>> others)
  237.         throws NullArgumentException {
  238.         MathUtils.checkNotNull(others);

  239.         for (final Frequency<? extends T> freq : others) {
  240.             merge(freq);
  241.         }
  242.     }

  243.     //----------------------------------------------------------------------------------------------

  244.     /**
  245.      * Return a string representation of this frequency distribution.
  246.      *
  247.      * @return a string representation.
  248.      */
  249.     @Override
  250.     public String toString() {
  251.         NumberFormat nf = NumberFormat.getPercentInstance();
  252.         StringBuilder outBuffer = new StringBuilder(200); // this size is just a wild guess
  253.         outBuffer.append("Value \tFreq. \tPct. \tCum Pct. \n");
  254.         Iterator<T> iter = freqTable.keySet().iterator();
  255.         while (iter.hasNext()) {
  256.             T value = iter.next();
  257.             outBuffer.append(value).
  258.                       append('\t').
  259.                       append(getCount(value)).
  260.                       append('\t').
  261.                       append(nf.format(getPct(value))).
  262.                       append('\t').
  263.                       append(nf.format(getCumPct(value))).
  264.                       append('\n');
  265.         }
  266.         return outBuffer.toString();
  267.     }

  268.     /** {@inheritDoc} */
  269.     @Override
  270.     public int hashCode() {
  271.         final int prime = 31;
  272.         int result = 1;
  273.         result = prime * result +
  274.                  ((freqTable == null) ? 0 : freqTable.hashCode());
  275.         return result;
  276.     }

  277.     /** {@inheritDoc} */
  278.     @Override
  279.     public boolean equals(Object obj) {
  280.         if (this == obj) {
  281.             return true;
  282.         }
  283.         if (!(obj instanceof Frequency)) {
  284.             return false;
  285.         }
  286.         Frequency<?> other = (Frequency<?>) obj;
  287.         return Objects.equals(freqTable, other.freqTable);
  288.     }

  289. }