EnumeratedIntegerDistribution.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.distribution.discrete;

  22. import java.util.ArrayList;
  23. import java.util.HashMap;
  24. import java.util.List;
  25. import java.util.Map;
  26. import java.util.Map.Entry;

  27. import org.hipparchus.distribution.EnumeratedDistribution;
  28. import org.hipparchus.exception.MathIllegalArgumentException;
  29. import org.hipparchus.util.MathUtils;
  30. import org.hipparchus.util.Pair;

  31. /**
  32.  * Implementation of an integer-valued {@link EnumeratedDistribution}.
  33.  * <p>
  34.  * Values with zero-probability are allowed but they do not extend the
  35.  * support.
  36.  * <p>
  37.  * Duplicate values are allowed. Probabilities of duplicate values are combined
  38.  * when computing cumulative probabilities and statistics.
  39.  */
  40. public class EnumeratedIntegerDistribution extends AbstractIntegerDistribution {

  41.     /** Serializable UID. */
  42.     private static final long serialVersionUID = 20130308L;

  43.     /**
  44.      * {@link EnumeratedDistribution} instance (using the {@link Integer} wrapper)
  45.      * used to generate the pmf.
  46.      */
  47.     private final EnumeratedDistribution<Integer> innerDistribution;

  48.     /**
  49.      * Create a discrete distribution using the given probability mass function
  50.      * definition.
  51.      *
  52.      * @param singletons array of random variable values.
  53.      * @param probabilities array of probabilities.
  54.      * @throws MathIllegalArgumentException if
  55.      * {@code singletons.length != probabilities.length}
  56.      * @throws MathIllegalArgumentException if probabilities contains negative, infinite or NaN values or only 0's
  57.      */
  58.     public EnumeratedIntegerDistribution(final int[] singletons, final double[] probabilities)
  59.         throws MathIllegalArgumentException {
  60.         innerDistribution =
  61.                 new EnumeratedDistribution<>(createDistribution(singletons, probabilities));
  62.     }

  63.     /**
  64.      * Create a discrete integer-valued distribution from the input data.  Values are assigned
  65.      * mass based on their frequency.  For example, [0,1,1,2] as input creates a distribution
  66.      * with values 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively,
  67.      *
  68.      * @param data input dataset
  69.      */
  70.     public EnumeratedIntegerDistribution(final int[] data) {
  71.         final Map<Integer, Integer> dataMap = new HashMap<>();
  72.         for (int value : data) {
  73.             Integer count = dataMap.get(value);
  74.             if (count == null) {
  75.                 count = 0;
  76.             }
  77.             dataMap.put(value, ++count);
  78.         }
  79.         final int massPoints = dataMap.size();
  80.         final double denom = data.length;
  81.         final int[] values = new int[massPoints];
  82.         final double[] probabilities = new double[massPoints];
  83.         int index = 0;
  84.         for (Entry<Integer, Integer> entry : dataMap.entrySet()) {
  85.             values[index] = entry.getKey();
  86.             probabilities[index] = entry.getValue() / denom;
  87.             index++;
  88.         }
  89.         innerDistribution =
  90.                 new EnumeratedDistribution<>(createDistribution(values, probabilities));
  91.     }

  92.     /**
  93.      * Create the list of Pairs representing the distribution from singletons and probabilities.
  94.      *
  95.      * @param singletons values
  96.      * @param probabilities probabilities
  97.      * @return list of value/probability pairs
  98.      * @throws MathIllegalArgumentException if probabilities contains negative, infinite or NaN values or only 0's
  99.      */
  100.     private static List<Pair<Integer, Double>> createDistribution(int[] singletons,
  101.                                                                   double[] probabilities) {
  102.         MathUtils.checkDimension(singletons.length, probabilities.length);
  103.         final List<Pair<Integer, Double>> samples = new ArrayList<>(singletons.length);

  104.         final double[] normalizedProbabilities = EnumeratedDistribution.checkAndNormalize(probabilities);
  105.         for (int i = 0; i < singletons.length; i++) {
  106.             samples.add(new Pair<>(singletons[i], normalizedProbabilities[i]));
  107.         }
  108.         return samples;
  109.     }

  110.     /**
  111.      * {@inheritDoc}
  112.      */
  113.     @Override
  114.     public double probability(final int x) {
  115.         return innerDistribution.probability(x);
  116.     }

  117.     /**
  118.      * {@inheritDoc}
  119.      */
  120.     @Override
  121.     public double cumulativeProbability(final int x) {
  122.         double probability = 0;

  123.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  124.             if (sample.getKey() <= x) {
  125.                 probability += sample.getValue();
  126.             }
  127.         }

  128.         return probability;
  129.     }

  130.     /**
  131.      * {@inheritDoc}
  132.      *
  133.      * @return {@code sum(singletons[i] * probabilities[i])}
  134.      */
  135.     @Override
  136.     public double getNumericalMean() {
  137.         double mean = 0;

  138.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  139.             mean += sample.getValue() * sample.getKey();
  140.         }

  141.         return mean;
  142.     }

  143.     /**
  144.      * {@inheritDoc}
  145.      *
  146.      * @return {@code sum((singletons[i] - mean) ^ 2 * probabilities[i])}
  147.      */
  148.     @Override
  149.     public double getNumericalVariance() {
  150.         double mean = 0;
  151.         double meanOfSquares = 0;

  152.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  153.             mean += sample.getValue() * sample.getKey();
  154.             meanOfSquares += sample.getValue() * sample.getKey() * sample.getKey();
  155.         }

  156.         return meanOfSquares - mean * mean;
  157.     }

  158.     /**
  159.      * {@inheritDoc}
  160.      *
  161.      * Returns the lowest value with non-zero probability.
  162.      *
  163.      * @return the lowest value with non-zero probability.
  164.      */
  165.     @Override
  166.     public int getSupportLowerBound() {
  167.         int min = Integer.MAX_VALUE;
  168.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  169.             if (sample.getKey() < min && sample.getValue() > 0) {
  170.                 min = sample.getKey();
  171.             }
  172.         }

  173.         return min;
  174.     }

  175.     /**
  176.      * {@inheritDoc}
  177.      *
  178.      * Returns the highest value with non-zero probability.
  179.      *
  180.      * @return the highest value with non-zero probability.
  181.      */
  182.     @Override
  183.     public int getSupportUpperBound() {
  184.         int max = Integer.MIN_VALUE;
  185.         for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) {
  186.             if (sample.getKey() > max && sample.getValue() > 0) {
  187.                 max = sample.getKey();
  188.             }
  189.         }

  190.         return max;
  191.     }

  192.     /**
  193.      * {@inheritDoc}
  194.      *
  195.      * The support of this distribution is connected.
  196.      *
  197.      * @return {@code true}
  198.      */
  199.     @Override
  200.     public boolean isSupportConnected() {
  201.         return true;
  202.     }

  203.     /**
  204.      * Return the probability mass function as a list of (value, probability) pairs.
  205.      *
  206.      * @return the probability mass function.
  207.      */
  208.     public List<Pair<Integer, Double>> getPmf() {
  209.         return innerDistribution.getPmf();
  210.     }

  211. }