StorelessBivariateCovariance.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. /*
  18.  * This is not the original file distributed by the Apache Software Foundation
  19.  * It has been modified by the Hipparchus project
  20.  */
  21. package org.hipparchus.stat.correlation;

  22. import org.hipparchus.exception.LocalizedCoreFormats;
  23. import org.hipparchus.exception.MathIllegalArgumentException;

  24. /**
  25.  * Bivariate Covariance implementation that does not require input data to be
  26.  * stored in memory.
  27.  * <p>
  28.  * This class is based on a paper written by Philippe P&eacute;bay:
  29.  * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
  30.  * Formulas for Robust, One-Pass Parallel Computation of Covariances and
  31.  * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
  32.  * Sandia National Laboratories. It computes the covariance for a pair of variables.
  33.  * Use {@link StorelessCovariance} to estimate an entire covariance matrix.
  34.  * <p>
  35.  * Note: This class is package private as it is only used internally in
  36.  * the {@link StorelessCovariance} class.
  37.  */
  38. class StorelessBivariateCovariance {

  39.     /** the mean of variable x */
  40.     private double meanX;

  41.     /** the mean of variable y */
  42.     private double meanY;

  43.     /** number of observations */
  44.     private double n;

  45.     /** the running covariance estimate */
  46.     private double covarianceNumerator;

  47.     /** flag for bias correction */
  48.     private boolean biasCorrected;

  49.     /**
  50.      * Create an empty {@link StorelessBivariateCovariance} instance with
  51.      * bias correction.
  52.      */
  53.     StorelessBivariateCovariance() {
  54.         this(true);
  55.     }

  56.     /**
  57.      * Create an empty {@link StorelessBivariateCovariance} instance.
  58.      *
  59.      * @param biasCorrection if <code>true</code> the covariance estimate is corrected
  60.      * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
  61.      * i.e. n in the denominator.
  62.      */
  63.     StorelessBivariateCovariance(final boolean biasCorrection) {
  64.         meanX = meanY = 0.0;
  65.         n = 0;
  66.         covarianceNumerator = 0.0;
  67.         biasCorrected = biasCorrection;
  68.     }

  69.     /**
  70.      * Update the covariance estimation with a pair of variables (x, y).
  71.      *
  72.      * @param x the x value
  73.      * @param y the y value
  74.      */
  75.     public void increment(final double x, final double y) {
  76.         n++;
  77.         final double deltaX = x - meanX;
  78.         final double deltaY = y - meanY;
  79.         meanX += deltaX / n;
  80.         meanY += deltaY / n;
  81.         covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY;
  82.     }

  83.     /**
  84.      * Appends another bivariate covariance calculation to this.
  85.      * After this operation, statistics returned should be close to what would
  86.      * have been obtained by by performing all of the {@link #increment(double, double)}
  87.      * operations in {@code cov} directly on this.
  88.      *
  89.      * @param cov StorelessBivariateCovariance instance to append.
  90.      */
  91.     public void append(StorelessBivariateCovariance cov) {
  92.         double oldN = n;
  93.         n += cov.n;
  94.         final double deltaX = cov.meanX - meanX;
  95.         final double deltaY = cov.meanY - meanY;
  96.         meanX += deltaX * cov.n / n;
  97.         meanY += deltaY * cov.n / n;
  98.         covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY;
  99.     }

  100.     /**
  101.      * Returns the number of observations.
  102.      *
  103.      * @return number of observations
  104.      */
  105.     public double getN() {
  106.         return n;
  107.     }

  108.     /**
  109.      * Return the current covariance estimate.
  110.      *
  111.      * @return the current covariance
  112.      * @throws MathIllegalArgumentException if the number of observations
  113.      * is &lt; 2
  114.      */
  115.     public double getResult() throws MathIllegalArgumentException {
  116.         if (n < 2) {
  117.             throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_DIMENSION,
  118.                                                    n, 2, true);
  119.         }
  120.         if (biasCorrected) {
  121.             return covarianceNumerator / (n - 1d);
  122.         } else {
  123.             return covarianceNumerator / n;
  124.         }
  125.     }
  126. }