View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.descriptive;
23  
24  import java.io.Serializable;
25  import java.util.Arrays;
26  
27  import org.hipparchus.exception.LocalizedCoreFormats;
28  import org.hipparchus.exception.MathIllegalArgumentException;
29  import org.hipparchus.linear.RealMatrix;
30  import org.hipparchus.stat.descriptive.moment.GeometricMean;
31  import org.hipparchus.stat.descriptive.moment.Mean;
32  import org.hipparchus.stat.descriptive.rank.Max;
33  import org.hipparchus.stat.descriptive.rank.Min;
34  import org.hipparchus.stat.descriptive.summary.Sum;
35  import org.hipparchus.stat.descriptive.summary.SumOfLogs;
36  import org.hipparchus.stat.descriptive.summary.SumOfSquares;
37  import org.hipparchus.stat.descriptive.vector.VectorialCovariance;
38  import org.hipparchus.stat.descriptive.vector.VectorialStorelessStatistic;
39  import org.hipparchus.util.FastMath;
40  import org.hipparchus.util.MathArrays;
41  import org.hipparchus.util.MathUtils;
42  
43  /**
44   * Computes summary statistics for a stream of n-tuples added using the
45   * {@link #addValue(double[]) addValue} method. The data values are not stored
46   * in memory, so this class can be used to compute statistics for very large
47   * n-tuple streams.
48   * <p>
49   * To compute statistics for a stream of n-tuples, construct a
50   * {@link MultivariateSummaryStatistics} instance with dimension n and then use
51   * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
52   * methods where Xxx is a statistic return an array of <code>double</code>
53   * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element
54   * is the value of the given statistic for data range consisting of the i<sup>th</sup>
55   * element of each of the input n-tuples.  For example, if <code>addValue</code> is
56   * called with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
57   * <code>getSum</code> will return a three-element array with values {0+3+6, 1+4+7, 2+5+8}
58   * <p>
59   * Note: This class is not thread-safe.
60   */
61  public class MultivariateSummaryStatistics
62      implements StatisticalMultivariateSummary, Serializable {
63  
64      /** Serialization UID */
65      private static final long serialVersionUID = 20160424L;
66  
67      /** Dimension of the data. */
68      private final int k;
69  
70      /** Sum statistic implementation */
71      private final StorelessMultivariateStatistic sumImpl;
72      /** Sum of squares statistic implementation */
73      private final StorelessMultivariateStatistic sumSqImpl;
74      /** Minimum statistic implementation */
75      private final StorelessMultivariateStatistic minImpl;
76      /** Maximum statistic implementation */
77      private final StorelessMultivariateStatistic maxImpl;
78      /** Sum of log statistic implementation */
79      private final StorelessMultivariateStatistic sumLogImpl;
80      /** Geometric mean statistic implementation */
81      private final StorelessMultivariateStatistic geoMeanImpl;
82      /** Mean statistic implementation */
83      private final StorelessMultivariateStatistic meanImpl;
84      /** Covariance statistic implementation */
85      private final VectorialCovariance covarianceImpl;
86  
87      /** Count of values that have been added */
88      private long n;
89  
90      /**
91       * Construct a MultivariateSummaryStatistics instance for the given
92       * dimension. The returned instance will compute the unbiased sample
93       * covariance.
94       * <p>
95       * The returned instance is <b>not</b> thread-safe.
96       *
97       * @param dimension dimension of the data
98       */
99      public MultivariateSummaryStatistics(int dimension) {
100         this(dimension, true);
101     }
102 
103     /**
104      * Construct a MultivariateSummaryStatistics instance for the given
105      * dimension.
106      * <p>
107      * The returned instance is <b>not</b> thread-safe.
108      *
109      * @param dimension dimension of the data
110      * @param covarianceBiasCorrection if true, the returned instance will compute
111      * the unbiased sample covariance, otherwise the population covariance
112      */
113     public MultivariateSummaryStatistics(int dimension, boolean covarianceBiasCorrection) {
114         this.k = dimension;
115 
116         sumImpl     = new VectorialStorelessStatistic(k, new Sum());
117         sumSqImpl   = new VectorialStorelessStatistic(k, new SumOfSquares());
118         minImpl     = new VectorialStorelessStatistic(k, new Min());
119         maxImpl     = new VectorialStorelessStatistic(k, new Max());
120         sumLogImpl  = new VectorialStorelessStatistic(k, new SumOfLogs());
121         geoMeanImpl = new VectorialStorelessStatistic(k, new GeometricMean());
122         meanImpl    = new VectorialStorelessStatistic(k, new Mean());
123 
124         covarianceImpl = new VectorialCovariance(k, covarianceBiasCorrection);
125     }
126 
127     /**
128      * Add an n-tuple to the data
129      *
130      * @param value  the n-tuple to add
131      * @throws MathIllegalArgumentException if the array is null or the length
132      * of the array does not match the one used at construction
133      */
134     public void addValue(double[] value) throws MathIllegalArgumentException {
135         MathUtils.checkNotNull(value, LocalizedCoreFormats.INPUT_ARRAY);
136         MathUtils.checkDimension(value.length, k);
137         sumImpl.increment(value);
138         sumSqImpl.increment(value);
139         minImpl.increment(value);
140         maxImpl.increment(value);
141         sumLogImpl.increment(value);
142         geoMeanImpl.increment(value);
143         meanImpl.increment(value);
144         covarianceImpl.increment(value);
145         n++;
146     }
147 
148     /**
149      * Resets all statistics and storage.
150      */
151     public void clear() {
152         this.n = 0;
153         minImpl.clear();
154         maxImpl.clear();
155         sumImpl.clear();
156         sumLogImpl.clear();
157         sumSqImpl.clear();
158         geoMeanImpl.clear();
159         meanImpl.clear();
160         covarianceImpl.clear();
161     }
162 
163     /** {@inheritDoc} **/
164     @Override
165     public int getDimension() {
166         return k;
167     }
168 
169     /** {@inheritDoc} **/
170     @Override
171     public long getN() {
172         return n;
173     }
174 
175     /** {@inheritDoc} **/
176     @Override
177     public double[] getSum() {
178         return sumImpl.getResult();
179     }
180 
181     /** {@inheritDoc} **/
182     @Override
183     public double[] getSumSq() {
184         return sumSqImpl.getResult();
185     }
186 
187     /** {@inheritDoc} **/
188     @Override
189     public double[] getSumLog() {
190         return sumLogImpl.getResult();
191     }
192 
193     /** {@inheritDoc} **/
194     @Override
195     public double[] getMean() {
196         return meanImpl.getResult();
197     }
198 
199     /** {@inheritDoc} **/
200     @Override
201     public RealMatrix getCovariance() {
202         return covarianceImpl.getResult();
203     }
204 
205     /** {@inheritDoc} **/
206     @Override
207     public double[] getMax() {
208         return maxImpl.getResult();
209     }
210 
211     /** {@inheritDoc} **/
212     @Override
213     public double[] getMin() {
214         return minImpl.getResult();
215     }
216 
217     /** {@inheritDoc} **/
218     @Override
219     public double[] getGeometricMean() {
220         return geoMeanImpl.getResult();
221     }
222 
223     /**
224      * Returns an array whose i<sup>th</sup> entry is the standard deviation of the
225      * i<sup>th</sup> entries of the arrays that have been added using
226      * {@link #addValue(double[])}
227      *
228      * @return the array of component standard deviations
229      */
230     @Override
231     public double[] getStandardDeviation() {
232         double[] stdDev = new double[k];
233         if (getN() < 1) {
234             Arrays.fill(stdDev, Double.NaN);
235         } else if (getN() < 2) {
236             Arrays.fill(stdDev, 0.0);
237         } else {
238             RealMatrix matrix = getCovariance();
239             for (int i = 0; i < k; ++i) {
240                 stdDev[i] = FastMath.sqrt(matrix.getEntry(i, i));
241             }
242         }
243         return stdDev;
244     }
245 
246     /**
247      * Generates a text report displaying
248      * summary statistics from values that
249      * have been added.
250      * @return String with line feeds displaying statistics
251      */
252     @Override
253     public String toString() {
254         final String separator = ", ";
255         final String suffix = System.getProperty("line.separator");
256         StringBuilder outBuffer = new StringBuilder(200); // the size is just a wild guess
257         outBuffer.append("MultivariateSummaryStatistics:").append(suffix).
258                   append("n: ").append(getN()).append(suffix);
259         append(outBuffer, getMin(), "min: ", separator, suffix);
260         append(outBuffer, getMax(), "max: ", separator, suffix);
261         append(outBuffer, getMean(), "mean: ", separator, suffix);
262         append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
263         append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
264         append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
265         append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
266         outBuffer.append("covariance: ").append(getCovariance().toString()).append(suffix);
267         return outBuffer.toString();
268     }
269 
270     /**
271      * Append a text representation of an array to a buffer.
272      * @param buffer buffer to fill
273      * @param data data array
274      * @param prefix text prefix
275      * @param separator elements separator
276      * @param suffix text suffix
277      */
278     private void append(StringBuilder buffer, double[] data,
279                         String prefix, String separator, String suffix) {
280         buffer.append(prefix);
281         for (int i = 0; i < data.length; ++i) {
282             if (i > 0) {
283                 buffer.append(separator);
284             }
285             buffer.append(data[i]);
286         }
287         buffer.append(suffix);
288     }
289 
290     /**
291      * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code>
292      * instance and all statistics have the same values as this.
293      * @param object the object to test equality against.
294      * @return true if object equals this
295      */
296     @Override
297     public boolean equals(Object object) {
298         if (object == this) {
299             return true;
300         }
301         if (!(object instanceof MultivariateSummaryStatistics)) {
302             return false;
303         }
304         MultivariateSummaryStatistics other = (MultivariateSummaryStatistics) object;
305         return other.getN() == getN()                                                      &&
306                MathArrays.equalsIncludingNaN(other.getGeometricMean(), getGeometricMean()) &&
307                MathArrays.equalsIncludingNaN(other.getMax(),           getMax())           &&
308                MathArrays.equalsIncludingNaN(other.getMean(),          getMean())          &&
309                MathArrays.equalsIncludingNaN(other.getMin(),           getMin())           &&
310                MathArrays.equalsIncludingNaN(other.getSum(),           getSum())           &&
311                MathArrays.equalsIncludingNaN(other.getSumSq(),         getSumSq())         &&
312                MathArrays.equalsIncludingNaN(other.getSumLog(),        getSumLog())        &&
313                other.getCovariance().equals(getCovariance());
314     }
315 
316     /**
317      * Returns hash code based on values of statistics
318      *
319      * @return hash code
320      */
321     @Override
322     public int hashCode() {
323         int result = 31 + MathUtils.hash(getN());
324         result = result * 31 + MathUtils.hash(getGeometricMean());
325         result = result * 31 + MathUtils.hash(getMax());
326         result = result * 31 + MathUtils.hash(getMean());
327         result = result * 31 + MathUtils.hash(getMin());
328         result = result * 31 + MathUtils.hash(getSum());
329         result = result * 31 + MathUtils.hash(getSumSq());
330         result = result * 31 + MathUtils.hash(getSumLog());
331         result = result * 31 + getCovariance().hashCode();
332         return result;
333     }
334 
335 }