1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.distribution.continuous;
23
24 import java.util.ArrayList;
25 import java.util.HashMap;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Map.Entry;
29
30 import org.hipparchus.distribution.EnumeratedDistribution;
31 import org.hipparchus.exception.MathIllegalArgumentException;
32 import org.hipparchus.util.MathArrays;
33 import org.hipparchus.util.MathUtils;
34 import org.hipparchus.util.Pair;
35
36 /**
37 * Implementation of a real-valued {@link EnumeratedDistribution}.
38 * <p>
39 * Values with zero-probability are allowed but they do not extend the
40 * support.
41 * <p>
42 * Duplicate values are allowed. Probabilities of duplicate values are
43 * combined when computing cumulative probabilities and statistics.
44 */
45 public class EnumeratedRealDistribution extends AbstractRealDistribution {
46
47 /** Serializable UID. */
48 private static final long serialVersionUID = 20130308L;
49
50 /**
51 * {@link EnumeratedDistribution} (using the {@link Double} wrapper)
52 * used to generate the pmf.
53 */
54 private final EnumeratedDistribution<Double> innerDistribution;
55
56 /**
57 * Create a discrete real-valued distribution from the input data. Values are assigned
58 * mass based on their frequency. For example, [0,1,1,2] as input creates a distribution
59 * with values 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively,
60 *
61 * @param data input dataset
62 */
63 public EnumeratedRealDistribution(final double[] data) {
64 super();
65 final Map<Double, Integer> dataMap = new HashMap<>();
66 for (double value : data) {
67 Integer count = dataMap.get(value);
68 if (count == null) {
69 count = 0;
70 }
71 dataMap.put(value, ++count);
72 }
73 final int massPoints = dataMap.size();
74 final double denom = data.length;
75 final double[] values = new double[massPoints];
76 final double[] probabilities = new double[massPoints];
77 int index = 0;
78 for (Entry<Double, Integer> entry : dataMap.entrySet()) {
79 values[index] = entry.getKey();
80 probabilities[index] = entry.getValue() / denom;
81 index++;
82 }
83 innerDistribution =
84 new EnumeratedDistribution<>(createDistribution(values, probabilities));
85 }
86
87 /**
88 * Create a discrete real-valued distribution using the given probability mass function
89 * enumeration.
90 *
91 * @param singletons array of random variable values.
92 * @param probabilities array of probabilities.
93 * @throws MathIllegalArgumentException if
94 * {@code singletons.length != probabilities.length}
95 * @throws MathIllegalArgumentException if any of the probabilities are negative.
96 * @throws MathIllegalArgumentException if any of the probabilities are NaN.
97 * @throws MathIllegalArgumentException if any of the probabilities are infinite.
98 */
99 public EnumeratedRealDistribution(final double[] singletons, final double[] probabilities)
100 throws MathIllegalArgumentException {
101 super();
102 innerDistribution =
103 new EnumeratedDistribution<>(createDistribution(singletons, probabilities));
104 }
105
106
107 /**
108 * Create the list of Pairs representing the distribution from singletons and probabilities.
109 *
110 * @param singletons values
111 * @param probabilities probabilities
112 * @return list of value/probability pairs
113 * @throws MathIllegalArgumentException if probabilities contains negative, infinite or NaN values or only 0's
114 */
115 private static List<Pair<Double, Double>> createDistribution(double[] singletons,
116 double[] probabilities) {
117 MathArrays.checkEqualLength(singletons, probabilities);
118 final List<Pair<Double, Double>> samples = new ArrayList<>(singletons.length);
119
120 final double[] normalizedProbabilities = EnumeratedDistribution.checkAndNormalize(probabilities);
121 for (int i = 0; i < singletons.length; i++) {
122 samples.add(new Pair<>(singletons[i], normalizedProbabilities[i]));
123 }
124 return samples;
125 }
126
127 /**
128 * For a random variable {@code X} whose values are distributed according to
129 * this distribution, this method returns {@code P(X = x)}. In other words,
130 * this method represents the probability mass function (PMF) for the
131 * distribution.
132 * <p>
133 * Note that if {@code x1} and {@code x2} satisfy {@code x1.equals(x2)},
134 * or both are null, then {@code probability(x1) = probability(x2)}.
135 *
136 * @param x the point at which the PMF is evaluated
137 * @return the value of the probability mass function at {@code x}
138 */
139 public double probability(final double x) {
140 return innerDistribution.probability(x);
141 }
142
143 /**
144 * For a random variable {@code X} whose values are distributed according to
145 * this distribution, this method returns {@code P(X = x)}. In other words,
146 * this method represents the probability mass function (PMF) for the
147 * distribution.
148 *
149 * @param x the point at which the PMF is evaluated
150 * @return the value of the probability mass function at point {@code x}
151 */
152 @Override
153 public double density(final double x) {
154 return probability(x);
155 }
156
157 /**
158 * {@inheritDoc}
159 */
160 @Override
161 public double cumulativeProbability(final double x) {
162 double probability = 0;
163
164 for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
165 if (sample.getKey() <= x) {
166 probability += sample.getValue();
167 }
168 }
169
170 return probability;
171 }
172
173 /**
174 * {@inheritDoc}
175 */
176 @Override
177 public double inverseCumulativeProbability(final double p) throws MathIllegalArgumentException {
178 MathUtils.checkRangeInclusive(p, 0, 1);
179
180 double probability = 0;
181 double x = getSupportLowerBound();
182 for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
183 if (sample.getValue() == 0.0) {
184 continue;
185 }
186
187 probability += sample.getValue();
188 x = sample.getKey();
189
190 if (probability >= p) {
191 break;
192 }
193 }
194
195 return x;
196 }
197
198 /**
199 * {@inheritDoc}
200 *
201 * @return {@code sum(singletons[i] * probabilities[i])}
202 */
203 @Override
204 public double getNumericalMean() {
205 double mean = 0;
206
207 for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
208 mean += sample.getValue() * sample.getKey();
209 }
210
211 return mean;
212 }
213
214 /**
215 * {@inheritDoc}
216 *
217 * @return {@code sum((singletons[i] - mean) ^ 2 * probabilities[i])}
218 */
219 @Override
220 public double getNumericalVariance() {
221 double mean = 0;
222 double meanOfSquares = 0;
223
224 for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
225 mean += sample.getValue() * sample.getKey();
226 meanOfSquares += sample.getValue() * sample.getKey() * sample.getKey();
227 }
228
229 return meanOfSquares - mean * mean;
230 }
231
232 /**
233 * {@inheritDoc}
234 *
235 * Returns the lowest value with non-zero probability.
236 *
237 * @return the lowest value with non-zero probability.
238 */
239 @Override
240 public double getSupportLowerBound() {
241 double min = Double.POSITIVE_INFINITY;
242 for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
243 if (sample.getKey() < min && sample.getValue() > 0) {
244 min = sample.getKey();
245 }
246 }
247
248 return min;
249 }
250
251 /**
252 * {@inheritDoc}
253 *
254 * Returns the highest value with non-zero probability.
255 *
256 * @return the highest value with non-zero probability.
257 */
258 @Override
259 public double getSupportUpperBound() {
260 double max = Double.NEGATIVE_INFINITY;
261 for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
262 if (sample.getKey() > max && sample.getValue() > 0) {
263 max = sample.getKey();
264 }
265 }
266
267 return max;
268 }
269
270 /**
271 * {@inheritDoc}
272 *
273 * The support of this distribution is connected.
274 *
275 * @return {@code true}
276 */
277 @Override
278 public boolean isSupportConnected() {
279 return true;
280 }
281
282 /**
283 * Return the probability mass function as a list of (value, probability) pairs.
284 *
285 * @return the probability mass function.
286 */
287 public List<Pair<Double, Double>> getPmf() {
288 return innerDistribution.getPmf();
289 }
290 }