From 53447007bb1c483d4ffd6c11b3f4be2c5d9691f8 Mon Sep 17 00:00:00 2001 From: Jonathan So Date: Sat, 5 Oct 2024 19:53:54 +0100 Subject: [PATCH 1/3] use biased autocorrelation estimate by default (resolves #1785) --- numpyro/diagnostics.py | 11 +++++++++-- test/test_diagnostics.py | 16 ++++++++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/numpyro/diagnostics.py b/numpyro/diagnostics.py index cf3d04a9e..902d31cb6 100644 --- a/numpyro/diagnostics.py +++ b/numpyro/diagnostics.py @@ -96,12 +96,13 @@ def _fft_next_fast_len(target): target += 1 -def autocorrelation(x, axis=0): +def autocorrelation(x, axis=0, unbiased=False): """ Computes the autocorrelation of samples at dimension ``axis``. :param numpy.ndarray x: the input array. :param int axis: the dimension to calculate autocorrelation. + :param unbiased: use an unbiased estimator of the autocorrelation. :return: autocorrelation of ``x``. :rtype: numpy.ndarray """ @@ -127,7 +128,13 @@ def autocorrelation(x, axis=0): # truncate and normalize the result, then transpose back to original shape autocorr = autocorr[..., :N] - autocorr = autocorr / np.arange(N, 0.0, -1) + + # the unbiased estimator is known to have "wild" tails, due to few samples at longer lags. + # see Geyer (1992) and Priestley (1981) for a discussion. also note that it is only strictly + # unbiased when the mean is known, whereas we it estimate from samples here. + if unbiased: + autocorr = autocorr / np.arange(N, 0.0, -1) + with np.errstate(invalid="ignore", divide="ignore"): autocorr = autocorr / autocorr[..., :1] return np.swapaxes(autocorr, axis, -1) diff --git a/test/test_diagnostics.py b/test/test_diagnostics.py index d0a32df87..a193f0bf5 100644 --- a/test/test_diagnostics.py +++ b/test/test_diagnostics.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np -from numpy.testing import assert_allclose +from numpy.testing import assert_, assert_allclose import pytest from scipy.fftpack import next_fast_len @@ -60,10 +60,22 @@ def test_hpdi(): def test_autocorrelation(): x = np.arange(10.0) - actual = autocorrelation(x) + actual = autocorrelation(x, unbiased=True) expected = np.array([1, 0.78, 0.52, 0.21, -0.13, -0.52, -0.94, -1.4, -1.91, -2.45]) assert_allclose(actual, expected, atol=0.01) + actual = autocorrelation(x, unbiased=False) + expected = expected * np.arange(len(x), 0.0, -1) / len(x) + assert_allclose(actual, expected, atol=0.01) + + # the unbiased estimator has variance O(1) at large lags + x = np.random.normal(size=20000) + ac = autocorrelation(x, unbiased=True) + assert_(np.any(np.abs(ac[-100:]) > 0.01)) + + ac = autocorrelation(x, unbiased=False) + assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01) + def test_autocovariance(): x = np.arange(10.0) From 66f6c81e04b20457ac82c24e3ed6ca93175cb211 Mon Sep 17 00:00:00 2001 From: Jonathan So Date: Sat, 5 Oct 2024 20:32:10 +0100 Subject: [PATCH 2/3] increase tolerance in unbiased autocorrelation test --- test/test_diagnostics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_diagnostics.py b/test/test_diagnostics.py index a193f0bf5..058467dfd 100644 --- a/test/test_diagnostics.py +++ b/test/test_diagnostics.py @@ -71,7 +71,7 @@ def test_autocorrelation(): # the unbiased estimator has variance O(1) at large lags x = np.random.normal(size=20000) ac = autocorrelation(x, unbiased=True) - assert_(np.any(np.abs(ac[-100:]) > 0.01)) + assert_(np.any(np.abs(ac[-100:]) > .1)) ac = autocorrelation(x, unbiased=False) assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01) From 2d32aa02d257e14092cdf22ee460e059c2805bd1 Mon Sep 17 00:00:00 2001 From: Jonathan So Date: Sat, 5 Oct 2024 22:36:11 +0100 Subject: [PATCH 3/3] review comments, fixed autocovariance test. --- numpyro/diagnostics.py | 16 +++++++++------- test/test_diagnostics.py | 14 +++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/numpyro/diagnostics.py b/numpyro/diagnostics.py index 902d31cb6..a3eae2af0 100644 --- a/numpyro/diagnostics.py +++ b/numpyro/diagnostics.py @@ -96,13 +96,13 @@ def _fft_next_fast_len(target): target += 1 -def autocorrelation(x, axis=0, unbiased=False): +def autocorrelation(x, axis=0, bias=True): """ Computes the autocorrelation of samples at dimension ``axis``. :param numpy.ndarray x: the input array. :param int axis: the dimension to calculate autocorrelation. - :param unbiased: use an unbiased estimator of the autocorrelation. + :param bias: whether to use a biased estimator. :return: autocorrelation of ``x``. :rtype: numpy.ndarray """ @@ -132,7 +132,7 @@ def autocorrelation(x, axis=0, unbiased=False): # the unbiased estimator is known to have "wild" tails, due to few samples at longer lags. # see Geyer (1992) and Priestley (1981) for a discussion. also note that it is only strictly # unbiased when the mean is known, whereas we it estimate from samples here. - if unbiased: + if not bias: autocorr = autocorr / np.arange(N, 0.0, -1) with np.errstate(invalid="ignore", divide="ignore"): @@ -140,19 +140,20 @@ def autocorrelation(x, axis=0, unbiased=False): return np.swapaxes(autocorr, axis, -1) -def autocovariance(x, axis=0): +def autocovariance(x, axis=0, bias=True): """ Computes the autocovariance of samples at dimension ``axis``. :param numpy.ndarray x: the input array. :param int axis: the dimension to calculate autocovariance. + :param bias: whether to use a biased estimator. :return: autocovariance of ``x``. :rtype: numpy.ndarray """ - return autocorrelation(x, axis) * x.var(axis=axis, keepdims=True) + return autocorrelation(x, axis, bias) * x.var(axis=axis, keepdims=True) -def effective_sample_size(x): +def effective_sample_size(x, bias=True): """ Computes effective sample size of input ``x``, where the first dimension of ``x`` is chain dimension and the second dimension of ``x`` is draw dimension. @@ -165,6 +166,7 @@ def effective_sample_size(x): Stan Development Team :param numpy.ndarray x: the input array. + :param bias: whether to use a biased estimator of the autocovariance. :return: effective sample size of ``x``. :rtype: numpy.ndarray """ @@ -173,7 +175,7 @@ def effective_sample_size(x): assert x.shape[1] >= 2 # find autocovariance for each chain at lag k - gamma_k_c = autocovariance(x, axis=1) + gamma_k_c = autocovariance(x, axis=1, bias=bias) # find autocorrelation at lag k (from Stan reference) var_within, var_estimator = _compute_chain_variance_stats(x) diff --git a/test/test_diagnostics.py b/test/test_diagnostics.py index 058467dfd..e423deab4 100644 --- a/test/test_diagnostics.py +++ b/test/test_diagnostics.py @@ -60,26 +60,26 @@ def test_hpdi(): def test_autocorrelation(): x = np.arange(10.0) - actual = autocorrelation(x, unbiased=True) + actual = autocorrelation(x, bias=False) expected = np.array([1, 0.78, 0.52, 0.21, -0.13, -0.52, -0.94, -1.4, -1.91, -2.45]) assert_allclose(actual, expected, atol=0.01) - actual = autocorrelation(x, unbiased=False) + actual = autocorrelation(x, bias=True) expected = expected * np.arange(len(x), 0.0, -1) / len(x) assert_allclose(actual, expected, atol=0.01) # the unbiased estimator has variance O(1) at large lags x = np.random.normal(size=20000) - ac = autocorrelation(x, unbiased=True) - assert_(np.any(np.abs(ac[-100:]) > .1)) + ac = autocorrelation(x, bias=False) + assert_(np.any(np.abs(ac[-100:]) > 0.1)) - ac = autocorrelation(x, unbiased=False) + ac = autocorrelation(x, bias=True) assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01) def test_autocovariance(): x = np.arange(10.0) - actual = autocovariance(x) + actual = autocovariance(x, bias=False) expected = np.array( [8.25, 6.42, 4.25, 1.75, -1.08, -4.25, -7.75, -11.58, -15.75, -20.25] ) @@ -105,4 +105,4 @@ def test_split_gelman_rubin_agree_with_gelman_rubin(): def test_effective_sample_size(): x = np.arange(1000.0).reshape(100, 10) - assert_allclose(effective_sample_size(x), 52.64, atol=0.01) + assert_allclose(effective_sample_size(x, bias=False), 52.64, atol=0.01)