View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.correlation;
23  
24  import org.hipparchus.exception.LocalizedCoreFormats;
25  import org.hipparchus.exception.MathIllegalArgumentException;
26  import org.hipparchus.exception.MathRuntimeException;
27  import org.hipparchus.linear.MatrixUtils;
28  import org.hipparchus.linear.RealMatrix;
29  import org.hipparchus.util.MathUtils;
30  
31  /**
32   * Covariance implementation that does not require input data to be
33   * stored in memory. The size of the covariance matrix is specified in the
34   * constructor. Specific elements of the matrix are incrementally updated with
35   * calls to incrementRow() or increment Covariance().
36   * <p>
37   * This class is based on a paper written by Philippe P&eacute;bay:
38   * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
39   * Formulas for Robust, One-Pass Parallel Computation of Covariances and
40   * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
41   * Sandia National Laboratories.
42   * <p>
43   * Note: the underlying covariance matrix is symmetric, thus only the
44   * upper triangular part of the matrix is stored and updated each increment.
45   */
46  public class StorelessCovariance extends Covariance {
47  
48      /** the square covariance matrix (upper triangular part) */
49      private final StorelessBivariateCovariance[] covMatrix;
50  
51      /** dimension of the square covariance matrix */
52      private final int dimension;
53  
54      /**
55       * Create a bias corrected covariance matrix with a given dimension.
56       *
57       * @param dim the dimension of the square covariance matrix
58       */
59      public StorelessCovariance(final int dim) {
60          this(dim, true);
61      }
62  
63      /**
64       * Create a covariance matrix with a given number of rows and columns and the
65       * indicated bias correction.
66       *
67       * @param dim the dimension of the covariance matrix
68       * @param biasCorrected if <code>true</code> the covariance estimate is corrected
69       * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
70       * i.e. n in the denominator.
71       */
72      public StorelessCovariance(final int dim, final boolean biasCorrected) {
73          dimension = dim;
74          covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
75          initializeMatrix(biasCorrected);
76      }
77  
78      /**
79       * Initialize the internal two-dimensional array of
80       * {@link StorelessBivariateCovariance} instances.
81       *
82       * @param biasCorrected if the covariance estimate shall be corrected for bias
83       */
84      private void initializeMatrix(final boolean biasCorrected) {
85          for(int i = 0; i < dimension; i++){
86              for(int j = 0; j < dimension; j++){
87                  setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
88              }
89          }
90      }
91  
92      /**
93       * Returns the index (i, j) translated into the one-dimensional
94       * array used to store the upper triangular part of the symmetric
95       * covariance matrix.
96       *
97       * @param i the row index
98       * @param j the column index
99       * @return the corresponding index in the matrix array
100      */
101     private int indexOf(final int i, final int j) {
102         return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
103     }
104 
105     /**
106      * Gets the element at index (i, j) from the covariance matrix
107      * @param i the row index
108      * @param j the column index
109      * @return the {@link StorelessBivariateCovariance} element at the given index
110      */
111     private StorelessBivariateCovariance getElement(final int i, final int j) {
112         return covMatrix[indexOf(i, j)];
113     }
114 
115     /**
116      * Sets the covariance element at index (i, j) in the covariance matrix
117      * @param i the row index
118      * @param j the column index
119      * @param cov the {@link StorelessBivariateCovariance} element to be set
120      */
121     private void setElement(final int i, final int j,
122                             final StorelessBivariateCovariance cov) {
123         covMatrix[indexOf(i, j)] = cov;
124     }
125 
126     /**
127      * Get the covariance for an individual element of the covariance matrix.
128      *
129      * @param xIndex row index in the covariance matrix
130      * @param yIndex column index in the covariance matrix
131      * @return the covariance of the given element
132      * @throws MathIllegalArgumentException if the number of observations
133      * in the cell is &lt; 2
134      */
135     public double getCovariance(final int xIndex, final int yIndex)
136         throws MathIllegalArgumentException {
137 
138         return getElement(xIndex, yIndex).getResult();
139     }
140 
141     /**
142      * Increment the covariance matrix with one row of data.
143      *
144      * @param data array representing one row of data.
145      * @throws MathIllegalArgumentException if the length of <code>rowData</code>
146      * does not match with the covariance matrix
147      */
148     public void increment(final double[] data)
149         throws MathIllegalArgumentException {
150 
151         int length = data.length;
152         MathUtils.checkDimension(length, dimension);
153 
154         // only update the upper triangular part of the covariance matrix
155         // as only these parts are actually stored
156         for (int i = 0; i < length; i++){
157             for (int j = i; j < length; j++){
158                 getElement(i, j).increment(data[i], data[j]);
159             }
160         }
161 
162     }
163 
164     /**
165      * Appends {@code sc} to this, effectively aggregating the computations in {@code sc}
166      * with this.  After invoking this method, covariances returned should be close
167      * to what would have been obtained by performing all of the {@link #increment(double[])}
168      * operations in {@code sc} directly on this.
169      *
170      * @param sc externally computed StorelessCovariance to add to this
171      * @throws MathIllegalArgumentException if the dimension of sc does not match this
172      */
173     public void append(StorelessCovariance sc) throws MathIllegalArgumentException {
174         MathUtils.checkDimension(sc.dimension, dimension);
175 
176         // only update the upper triangular part of the covariance matrix
177         // as only these parts are actually stored
178         for (int i = 0; i < dimension; i++) {
179             for (int j = i; j < dimension; j++) {
180                 getElement(i, j).append(sc.getElement(i, j));
181             }
182         }
183     }
184 
185     /**
186      * {@inheritDoc}
187      * @throws MathIllegalArgumentException if the number of observations
188      * in a cell is &lt; 2
189      */
190     @Override
191     public RealMatrix getCovarianceMatrix() throws MathIllegalArgumentException {
192         return MatrixUtils.createRealMatrix(getData());
193     }
194 
195     /**
196      * Return the covariance matrix as two-dimensional array.
197      *
198      * @return a two-dimensional double array of covariance values
199      * @throws MathIllegalArgumentException if the number of observations
200      * for a cell is &lt; 2
201      */
202     public double[][] getData() throws MathIllegalArgumentException {
203         final double[][] data = new double[dimension][dimension];
204         for (int i = 0; i < dimension; i++) {
205             for (int j = 0; j < dimension; j++) {
206                 data[i][j] = getElement(i, j).getResult();
207             }
208         }
209         return data;
210     }
211 
212     /**
213      * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
214      * since the number of bivariate observations does not have to be the same for different
215      * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
216      *
217      * @return nothing as this implementation always throws a
218      * {@link MathRuntimeException}
219      * @throws MathRuntimeException in all cases
220      */
221     @Override
222     public int getN() throws MathRuntimeException {
223         throw new MathRuntimeException(LocalizedCoreFormats.UNSUPPORTED_OPERATION);
224     }
225 }