Covariance.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.correlation;

  22. import org.hipparchus.exception.LocalizedCoreFormats;
  23. import org.hipparchus.exception.MathIllegalArgumentException;
  24. import org.hipparchus.linear.BlockRealMatrix;
  25. import org.hipparchus.linear.RealMatrix;
  26. import org.hipparchus.stat.descriptive.moment.Mean;
  27. import org.hipparchus.stat.descriptive.moment.Variance;

  28. /**
  29.  * Computes covariances for pairs of arrays or columns of a matrix.
  30.  * <p>
  31.  * The constructors that take {@code RealMatrix} or {@code double[][]}
  32.  * arguments generate covariance matrices. The columns of the input
  33.  * matrices are assumed to represent variable values.
  34.  * <p>
  35.  * The constructor argument {@code biasCorrected} determines whether or
  36.  * not computed covariances are bias-corrected.
  37.  * <p>
  38.  * Unbiased covariances are given by the formula:
  39.  * <p>
  40.  * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
  41.  * <p>
  42.  * where {@code E(X)} is the mean of {@code X} and {@code E(Y)}
  43.  * is the mean of the <code>Y</code> values.
  44.  * <p>
  45.  * Non-bias-corrected estimates use {@code n} in place of {@code n - 1}.
  46.  */
  47. public class Covariance {

  48.     /** The covariance matrix. */
  49.     private final RealMatrix covarianceMatrix;

  50.     /** Number of observations (length of covariate vectors). */
  51.     private final int n;

  52.     /**
  53.      * Create a Covariance with no data.
  54.      */
  55.     public Covariance() {
  56.         super();
  57.         covarianceMatrix = null;
  58.         n = 0;
  59.     }

  60.     /**
  61.      * Create a Covariance matrix from a rectangular array
  62.      * whose columns represent covariates.
  63.      * <p>
  64.      * The <code>biasCorrected</code> parameter determines whether or not
  65.      * covariance estimates are bias-corrected.
  66.      * <p>
  67.      * The input array must be rectangular with at least one column
  68.      * and two rows.
  69.      *
  70.      * @param data rectangular array with columns representing covariates
  71.      * @param biasCorrected true means covariances are bias-corrected
  72.      * @throws MathIllegalArgumentException if the input data array is not
  73.      * rectangular with at least two rows and one column.
  74.      * @throws MathIllegalArgumentException if the input data array is not
  75.      * rectangular with at least one row and one column.
  76.      */
  77.     public Covariance(double[][] data, boolean biasCorrected)
  78.         throws MathIllegalArgumentException {
  79.         this(new BlockRealMatrix(data), biasCorrected);
  80.     }

  81.     /**
  82.      * Create a Covariance matrix from a rectangular array
  83.      * whose columns represent covariates.
  84.      * <p>
  85.      * The input array must be rectangular with at least one column
  86.      * and two rows.
  87.      *
  88.      * @param data rectangular array with columns representing covariates
  89.      * @throws MathIllegalArgumentException if the input data array is not
  90.      * rectangular with at least two rows and one column.
  91.      * @throws MathIllegalArgumentException if the input data array is not
  92.      * rectangular with at least one row and one column.
  93.      */
  94.     public Covariance(double[][] data) throws MathIllegalArgumentException {
  95.         this(data, true);
  96.     }

  97.     /**
  98.      * Create a covariance matrix from a matrix whose columns
  99.      * represent covariates.
  100.      * <p>
  101.      * The <code>biasCorrected</code> parameter determines whether or not
  102.      * covariance estimates are bias-corrected.
  103.      * <p>
  104.      * The matrix must have at least one column and two rows.
  105.      *
  106.      * @param matrix matrix with columns representing covariates
  107.      * @param biasCorrected true means covariances are bias-corrected
  108.      * @throws MathIllegalArgumentException if the input matrix does not have
  109.      * at least two rows and one column
  110.      */
  111.     public Covariance(RealMatrix matrix, boolean biasCorrected)
  112.         throws MathIllegalArgumentException {
  113.         checkSufficientData(matrix);
  114.         n = matrix.getRowDimension();
  115.         covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
  116.     }

  117.     /**
  118.      * Create a covariance matrix from a matrix whose columns
  119.      * represent covariates.
  120.      * <p>
  121.      * The matrix must have at least one column and two rows.
  122.      *
  123.      * @param matrix matrix with columns representing covariates
  124.      * @throws MathIllegalArgumentException if the input matrix does not have
  125.      * at least two rows and one column
  126.      */
  127.     public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
  128.         this(matrix, true);
  129.     }

  130.     /**
  131.      * Returns the covariance matrix
  132.      *
  133.      * @return covariance matrix
  134.      */
  135.     public RealMatrix getCovarianceMatrix() {
  136.         return covarianceMatrix;
  137.     }

  138.     /**
  139.      * Returns the number of observations (length of covariate vectors)
  140.      *
  141.      * @return number of observations
  142.      */
  143.     public int getN() {
  144.         return n;
  145.     }

  146.     /**
  147.      * Compute a covariance matrix from a matrix whose columns represent covariates.
  148.      *
  149.      * @param matrix input matrix (must have at least one column and two rows)
  150.      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
  151.      * @return covariance matrix
  152.      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
  153.      */
  154.     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
  155.         throws MathIllegalArgumentException {

  156.         int dimension = matrix.getColumnDimension();
  157.         Variance variance = new Variance(biasCorrected);
  158.         RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
  159.         for (int i = 0; i < dimension; i++) {
  160.             for (int j = 0; j < i; j++) {
  161.               double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
  162.               outMatrix.setEntry(i, j, cov);
  163.               outMatrix.setEntry(j, i, cov);
  164.             }
  165.             outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
  166.         }
  167.         return outMatrix;
  168.     }

  169.     /**
  170.      * Create a covariance matrix from a matrix whose columns represent
  171.      * covariates. Covariances are computed using the bias-corrected formula.
  172.      *
  173.      * @param matrix input matrix (must have at least one column and two rows)
  174.      * @return covariance matrix
  175.      * @throws MathIllegalArgumentException if matrix does not contain sufficient data
  176.      * @see #Covariance
  177.      */
  178.     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
  179.         throws MathIllegalArgumentException {
  180.         return computeCovarianceMatrix(matrix, true);
  181.     }

  182.     /**
  183.      * Compute a covariance matrix from a rectangular array whose columns represent covariates.
  184.      *
  185.      * @param data input array (must have at least one column and two rows)
  186.      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
  187.      * @return covariance matrix
  188.      * @throws MathIllegalArgumentException if the data array does not contain sufficient data
  189.      * @throws MathIllegalArgumentException if the input data array is not
  190.      * rectangular with at least one row and one column.
  191.      */
  192.     protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
  193.         throws MathIllegalArgumentException {
  194.         return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
  195.     }

  196.     /**
  197.      * Create a covariance matrix from a rectangular array whose columns represent
  198.      * covariates. Covariances are computed using the bias-corrected formula.
  199.      *
  200.      * @param data input array (must have at least one column and two rows)
  201.      * @return covariance matrix
  202.      * @throws MathIllegalArgumentException if the data array does not contain sufficient data
  203.      * @throws MathIllegalArgumentException if the input data array is not
  204.      * rectangular with at least one row and one column.
  205.      * @see #Covariance
  206.      */
  207.     protected RealMatrix computeCovarianceMatrix(double[][] data)
  208.         throws MathIllegalArgumentException {
  209.         return computeCovarianceMatrix(data, true);
  210.     }

  211.     /**
  212.      * Computes the covariance between the two arrays.
  213.      * <p>
  214.      * Array lengths must match and the common length must be at least 2.
  215.      *
  216.      * @param xArray first data array
  217.      * @param yArray second data array
  218.      * @param biasCorrected if true, returned value will be bias-corrected
  219.      * @return returns the covariance for the two arrays
  220.      * @throws  MathIllegalArgumentException if the arrays lengths do not match or
  221.      * there is insufficient data
  222.      */
  223.     public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
  224.         throws MathIllegalArgumentException {
  225.         Mean mean = new Mean();
  226.         double result = 0d;
  227.         int length = xArray.length;
  228.         if (length != yArray.length) {
  229.             throw new MathIllegalArgumentException(
  230.                   LocalizedCoreFormats.DIMENSIONS_MISMATCH, length, yArray.length);
  231.         } else if (length < 2) {
  232.             throw new MathIllegalArgumentException(
  233.                   LocalizedCoreFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
  234.         } else {
  235.             double xMean = mean.evaluate(xArray);
  236.             double yMean = mean.evaluate(yArray);
  237.             for (int i = 0; i < length; i++) {
  238.                 double xDev = xArray[i] - xMean;
  239.                 double yDev = yArray[i] - yMean;
  240.                 result += (xDev * yDev - result) / (i + 1);
  241.             }
  242.         }
  243.         return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
  244.     }

  245.     /**
  246.      * Computes the covariance between the two arrays, using the bias-corrected
  247.      * formula.
  248.      * <p>
  249.      * Array lengths must match and the common length must be at least 2.
  250.      *
  251.      * @param xArray first data array
  252.      * @param yArray second data array
  253.      * @return returns the covariance for the two arrays
  254.      * @throws MathIllegalArgumentException if the arrays lengths do not match or
  255.      * there is insufficient data
  256.      */
  257.     public double covariance(final double[] xArray, final double[] yArray)
  258.         throws MathIllegalArgumentException {
  259.         return covariance(xArray, yArray, true);
  260.     }

  261.     /**
  262.      * Throws MathIllegalArgumentException if the matrix does not have at least
  263.      * one column and two rows.
  264.      *
  265.      * @param matrix matrix to check
  266.      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
  267.      * to compute covariance
  268.      */
  269.     private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
  270.         int nRows = matrix.getRowDimension();
  271.         int nCols = matrix.getColumnDimension();
  272.         if (nRows < 2 || nCols < 1) {
  273.             throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
  274.                                                    nRows, nCols);
  275.         }
  276.     }
  277. }