From 53447007bb1c483d4ffd6c11b3f4be2c5d9691f8 Mon Sep 17 00:00:00 2001
From: Jonathan So <jonny.so@gmail.com>
Date: Sat, 5 Oct 2024 19:53:54 +0100
Subject: [PATCH 1/3] use biased autocorrelation estimate by default (resolves
 #1785)

---
 numpyro/diagnostics.py   | 11 +++++++++--
 test/test_diagnostics.py | 16 ++++++++++++++--
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/numpyro/diagnostics.py b/numpyro/diagnostics.py
index cf3d04a9e..902d31cb6 100644
--- a/numpyro/diagnostics.py
+++ b/numpyro/diagnostics.py
@@ -96,12 +96,13 @@ def _fft_next_fast_len(target):
         target += 1
 
 
-def autocorrelation(x, axis=0):
+def autocorrelation(x, axis=0, unbiased=False):
     """
     Computes the autocorrelation of samples at dimension ``axis``.
 
     :param numpy.ndarray x: the input array.
     :param int axis: the dimension to calculate autocorrelation.
+    :param unbiased: use an unbiased estimator of the autocorrelation.
     :return: autocorrelation of ``x``.
     :rtype: numpy.ndarray
     """
@@ -127,7 +128,13 @@ def autocorrelation(x, axis=0):
 
     # truncate and normalize the result, then transpose back to original shape
     autocorr = autocorr[..., :N]
-    autocorr = autocorr / np.arange(N, 0.0, -1)
+
+    # the unbiased estimator is known to have "wild" tails, due to few samples at longer lags.
+    # see Geyer (1992) and Priestley (1981) for a discussion. also note that it is only strictly
+    # unbiased when the mean is known, whereas we it estimate from samples here.
+    if unbiased:
+        autocorr = autocorr / np.arange(N, 0.0, -1)
+
     with np.errstate(invalid="ignore", divide="ignore"):
         autocorr = autocorr / autocorr[..., :1]
     return np.swapaxes(autocorr, axis, -1)
diff --git a/test/test_diagnostics.py b/test/test_diagnostics.py
index d0a32df87..a193f0bf5 100644
--- a/test/test_diagnostics.py
+++ b/test/test_diagnostics.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
-from numpy.testing import assert_allclose
+from numpy.testing import assert_, assert_allclose
 import pytest
 from scipy.fftpack import next_fast_len
 
@@ -60,10 +60,22 @@ def test_hpdi():
 
 def test_autocorrelation():
     x = np.arange(10.0)
-    actual = autocorrelation(x)
+    actual = autocorrelation(x, unbiased=True)
     expected = np.array([1, 0.78, 0.52, 0.21, -0.13, -0.52, -0.94, -1.4, -1.91, -2.45])
     assert_allclose(actual, expected, atol=0.01)
 
+    actual = autocorrelation(x, unbiased=False)
+    expected = expected * np.arange(len(x), 0.0, -1) / len(x)
+    assert_allclose(actual, expected, atol=0.01)
+
+    # the unbiased estimator has variance O(1) at large lags
+    x = np.random.normal(size=20000)
+    ac = autocorrelation(x, unbiased=True)
+    assert_(np.any(np.abs(ac[-100:]) > 0.01))
+
+    ac = autocorrelation(x, unbiased=False)
+    assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01)
+
 
 def test_autocovariance():
     x = np.arange(10.0)

From 66f6c81e04b20457ac82c24e3ed6ca93175cb211 Mon Sep 17 00:00:00 2001
From: Jonathan So <jonny.so@gmail.com>
Date: Sat, 5 Oct 2024 20:32:10 +0100
Subject: [PATCH 2/3] increase tolerance in unbiased autocorrelation test

---
 test/test_diagnostics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_diagnostics.py b/test/test_diagnostics.py
index a193f0bf5..058467dfd 100644
--- a/test/test_diagnostics.py
+++ b/test/test_diagnostics.py
@@ -71,7 +71,7 @@ def test_autocorrelation():
     # the unbiased estimator has variance O(1) at large lags
     x = np.random.normal(size=20000)
     ac = autocorrelation(x, unbiased=True)
-    assert_(np.any(np.abs(ac[-100:]) > 0.01))
+    assert_(np.any(np.abs(ac[-100:]) > .1))
 
     ac = autocorrelation(x, unbiased=False)
     assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01)

From 2d32aa02d257e14092cdf22ee460e059c2805bd1 Mon Sep 17 00:00:00 2001
From: Jonathan So <jonny.so@gmail.com>
Date: Sat, 5 Oct 2024 22:36:11 +0100
Subject: [PATCH 3/3] review comments, fixed autocovariance test.

---
 numpyro/diagnostics.py   | 16 +++++++++-------
 test/test_diagnostics.py | 14 +++++++-------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/numpyro/diagnostics.py b/numpyro/diagnostics.py
index 902d31cb6..a3eae2af0 100644
--- a/numpyro/diagnostics.py
+++ b/numpyro/diagnostics.py
@@ -96,13 +96,13 @@ def _fft_next_fast_len(target):
         target += 1
 
 
-def autocorrelation(x, axis=0, unbiased=False):
+def autocorrelation(x, axis=0, bias=True):
     """
     Computes the autocorrelation of samples at dimension ``axis``.
 
     :param numpy.ndarray x: the input array.
     :param int axis: the dimension to calculate autocorrelation.
-    :param unbiased: use an unbiased estimator of the autocorrelation.
+    :param bias: whether to use a biased estimator.
     :return: autocorrelation of ``x``.
     :rtype: numpy.ndarray
     """
@@ -132,7 +132,7 @@ def autocorrelation(x, axis=0, unbiased=False):
     # the unbiased estimator is known to have "wild" tails, due to few samples at longer lags.
     # see Geyer (1992) and Priestley (1981) for a discussion. also note that it is only strictly
     # unbiased when the mean is known, whereas we it estimate from samples here.
-    if unbiased:
+    if not bias:
         autocorr = autocorr / np.arange(N, 0.0, -1)
 
     with np.errstate(invalid="ignore", divide="ignore"):
@@ -140,19 +140,20 @@ def autocorrelation(x, axis=0, unbiased=False):
     return np.swapaxes(autocorr, axis, -1)
 
 
-def autocovariance(x, axis=0):
+def autocovariance(x, axis=0, bias=True):
     """
     Computes the autocovariance of samples at dimension ``axis``.
 
     :param numpy.ndarray x: the input array.
     :param int axis: the dimension to calculate autocovariance.
+    :param bias: whether to use a biased estimator.
     :return: autocovariance of ``x``.
     :rtype: numpy.ndarray
     """
-    return autocorrelation(x, axis) * x.var(axis=axis, keepdims=True)
+    return autocorrelation(x, axis, bias) * x.var(axis=axis, keepdims=True)
 
 
-def effective_sample_size(x):
+def effective_sample_size(x, bias=True):
     """
     Computes effective sample size of input ``x``, where the first dimension of
     ``x`` is chain dimension and the second dimension of ``x`` is draw dimension.
@@ -165,6 +166,7 @@ def effective_sample_size(x):
        Stan Development Team
 
     :param numpy.ndarray x: the input array.
+    :param bias: whether to use a biased estimator of the autocovariance.
     :return: effective sample size of ``x``.
     :rtype: numpy.ndarray
     """
@@ -173,7 +175,7 @@ def effective_sample_size(x):
     assert x.shape[1] >= 2
 
     # find autocovariance for each chain at lag k
-    gamma_k_c = autocovariance(x, axis=1)
+    gamma_k_c = autocovariance(x, axis=1, bias=bias)
 
     # find autocorrelation at lag k (from Stan reference)
     var_within, var_estimator = _compute_chain_variance_stats(x)
diff --git a/test/test_diagnostics.py b/test/test_diagnostics.py
index 058467dfd..e423deab4 100644
--- a/test/test_diagnostics.py
+++ b/test/test_diagnostics.py
@@ -60,26 +60,26 @@ def test_hpdi():
 
 def test_autocorrelation():
     x = np.arange(10.0)
-    actual = autocorrelation(x, unbiased=True)
+    actual = autocorrelation(x, bias=False)
     expected = np.array([1, 0.78, 0.52, 0.21, -0.13, -0.52, -0.94, -1.4, -1.91, -2.45])
     assert_allclose(actual, expected, atol=0.01)
 
-    actual = autocorrelation(x, unbiased=False)
+    actual = autocorrelation(x, bias=True)
     expected = expected * np.arange(len(x), 0.0, -1) / len(x)
     assert_allclose(actual, expected, atol=0.01)
 
     # the unbiased estimator has variance O(1) at large lags
     x = np.random.normal(size=20000)
-    ac = autocorrelation(x, unbiased=True)
-    assert_(np.any(np.abs(ac[-100:]) > .1))
+    ac = autocorrelation(x, bias=False)
+    assert_(np.any(np.abs(ac[-100:]) > 0.1))
 
-    ac = autocorrelation(x, unbiased=False)
+    ac = autocorrelation(x, bias=True)
     assert_allclose(np.abs(ac[-100:]), 0.0, atol=0.01)
 
 
 def test_autocovariance():
     x = np.arange(10.0)
-    actual = autocovariance(x)
+    actual = autocovariance(x, bias=False)
     expected = np.array(
         [8.25, 6.42, 4.25, 1.75, -1.08, -4.25, -7.75, -11.58, -15.75, -20.25]
     )
@@ -105,4 +105,4 @@ def test_split_gelman_rubin_agree_with_gelman_rubin():
 
 def test_effective_sample_size():
     x = np.arange(1000.0).reshape(100, 10)
-    assert_allclose(effective_sample_size(x), 52.64, atol=0.01)
+    assert_allclose(effective_sample_size(x, bias=False), 52.64, atol=0.01)