From bd4c4000430b89159f3c1e90644cbb0dcde8664e Mon Sep 17 00:00:00 2001 From: kchern Date: Wed, 18 Feb 2026 23:21:40 -0800 Subject: [PATCH 1/9] Add ESS estimators --- dimod/sampleset.py | 121 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 1 deletion(-) diff --git a/dimod/sampleset.py b/dimod/sampleset.py index 313c52798..65f821bb3 100644 --- a/dimod/sampleset.py +++ b/dimod/sampleset.py @@ -1949,7 +1949,7 @@ def to_pandas_dataframe(self, sample_column=False): Note that sample sets can be constructed to contain data structures incompatible with the target - `Pandas format `_. + `Pandas format `_. """ import pandas as pd @@ -1984,3 +1984,122 @@ def _as_samples_sampleset(samples_like: SampleSet, return arr, labels else: return samples_like.record.sample.astype(dtype, copy=copy), labels + + +def effective_sample_size_estimator(x: np.ndarray, b: int = None) -> float: + """Estimates the effective sample size of `x`. + + The effective sample size (ESS) is the number of effectively independent samples drawn from a + Markov chain's statioanry distribution. The univariate estimator implemented here is the + (multivariate) estimator defined in the first equation at the top of page 14 in + ``_. + + Args: + x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + time steps. + + Returns: + float: Effective sample size estimate. + """ + if x.ndim != 2: + raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " + f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") + m, n = x.shape + + s_squared = x.var(1, ddof=1).mean() + # = second equation at the top of page 7 + # = average of "sample variance within series" + + tau_squared = _replicated_lugsail_batch_means_estimator(x, b) + # = equation (5) + # = nVar(xbar_i.) = total variance of the mean-within-series + + sigma_squared = (n - 1) / n * s_squared + tau_squared / n + # = first equation at the top of page 10 + # = estimate of the distribution's variance + + ess = m * n * sigma_squared / tau_squared + # = estimate of the effective sample size + # = first equation at the top of page 14 + return ess.item() + + +def _replicated_batch_means_estimator(x: np.ndarray, b: int): + """Computes the replicated batch means estimate. + + This estimator (:math:`\hat\tau^2_b`) is defined in the equation above equation (5) of + ``_. + + The estimator batches each Markov chain into batches of size ``b``, estimates the mean of each + batch, and computes the sample variance of these batched means. Please read the paper for a more + rigorous definition. + + The first few columns of ``x`` may be dropped in the estimation process in order to satisfy the + requirement that the length of the Markov chain is divisible by the batch size. + + Args: + x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + time steps. + b (int): Batch size of the replicated batch means estimator. + + Returns: + float: Replicated batch means estimate. + """ + if x.ndim != 2: + raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " + f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") + m, n = x.shape + + trimmed_length = b * (n // b) + n_batches = trimmed_length / b + + x = x[:, (n - trimmed_length):] + ybar = np.mean(np.split(x, n_batches, axis=1), axis=2).mT + muhat = x.mean() + + res = ((ybar - muhat)**2).sum()*b/(n_batches*m-1) + # cleaner approximation: res ~= bs * np.var(ybar, ddof=1) + # the approximation is due to muhat being estimated from full + # data instead of ybar. + return res + + +def _replicated_lugsail_batch_means_estimator(x: np.ndarray, b: int=None) -> float: + """Computes the replicated lugsail batch means estimate. + + This estimator :math:`\hat\tau^2_L` is defined in equation (5) of + ``_. + + Args: + x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + time steps. + b (int | None): Batch size of the lugsail estimator. + + Returns: + float: Replicated lugsail estimate of the correlated sample's variance + """ + if x.ndim != 2: + raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " + f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") + m, n = x.shape + b = b or round(n**0.5) + return 2 * _replicated_batch_means_estimator(x, b) - _replicated_batch_means_estimator(x, b // 3) + + +def estimate_integrated_autocorrelation_time(x: np.ndarray, b: int = None) -> float: + """Computes the integrated autocorrelation time estimate. + + Args: + x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + time steps. + b (int | None): Batch size of the lugsail estimator. + + Returns: + float: Integrated autocorrelation time estimate. + """ + if x.ndim != 2: + raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " + f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") + m, n = x.shape + ess = effective_sample_size_estimator(x, b) + return m * n / ess From 4a2d252d00de76652729ac20172f4b4809c13aad Mon Sep 17 00:00:00 2001 From: kchern Date: Wed, 18 Feb 2026 23:32:25 -0800 Subject: [PATCH 2/9] Rename functions to be consistent and active --- dimod/sampleset.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dimod/sampleset.py b/dimod/sampleset.py index 65f821bb3..428d5df9e 100644 --- a/dimod/sampleset.py +++ b/dimod/sampleset.py @@ -1986,7 +1986,7 @@ def _as_samples_sampleset(samples_like: SampleSet, return samples_like.record.sample.astype(dtype, copy=copy), labels -def effective_sample_size_estimator(x: np.ndarray, b: int = None) -> float: +def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: """Estimates the effective sample size of `x`. The effective sample size (ESS) is the number of effectively independent samples drawn from a @@ -2010,7 +2010,7 @@ def effective_sample_size_estimator(x: np.ndarray, b: int = None) -> float: # = second equation at the top of page 7 # = average of "sample variance within series" - tau_squared = _replicated_lugsail_batch_means_estimator(x, b) + tau_squared = _estimate_replicated_lugsail_batch_means(x, b) # = equation (5) # = nVar(xbar_i.) = total variance of the mean-within-series @@ -2024,10 +2024,10 @@ def effective_sample_size_estimator(x: np.ndarray, b: int = None) -> float: return ess.item() -def _replicated_batch_means_estimator(x: np.ndarray, b: int): +def _estimate_replicated_batch_means(x: np.ndarray, b: int): """Computes the replicated batch means estimate. - This estimator (:math:`\hat\tau^2_b`) is defined in the equation above equation (5) of + This estimator (:math:`\\hat\\tau^2_b`) is defined in the equation above equation (5) of ``_. The estimator batches each Markov chain into batches of size ``b``, estimates the mean of each @@ -2064,10 +2064,10 @@ def _replicated_batch_means_estimator(x: np.ndarray, b: int): return res -def _replicated_lugsail_batch_means_estimator(x: np.ndarray, b: int=None) -> float: +def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int=None) -> float: """Computes the replicated lugsail batch means estimate. - This estimator :math:`\hat\tau^2_L` is defined in equation (5) of + This estimator :math:`\\hat\\tau^2_L` is defined in equation (5) of ``_. Args: @@ -2083,7 +2083,7 @@ def _replicated_lugsail_batch_means_estimator(x: np.ndarray, b: int=None) -> flo f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") m, n = x.shape b = b or round(n**0.5) - return 2 * _replicated_batch_means_estimator(x, b) - _replicated_batch_means_estimator(x, b // 3) + return 2 * _estimate_replicated_batch_means(x, b) - _estimate_replicated_batch_means(x, b // 3) def estimate_integrated_autocorrelation_time(x: np.ndarray, b: int = None) -> float: @@ -2101,5 +2101,9 @@ def estimate_integrated_autocorrelation_time(x: np.ndarray, b: int = None) -> fl raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") m, n = x.shape - ess = effective_sample_size_estimator(x, b) + ess = estimate_effective_sample_size(x, b) return m * n / ess + +print("waaaaaat0") +print(estimate_effective_sample_size(np.random.uniform(0, 1, (100, 123)))) +print("waaaaaat") \ No newline at end of file From 8c102e4bfab3f109e1e29555ceb2bf535b1a1b48 Mon Sep 17 00:00:00 2001 From: kchern Date: Sun, 22 Feb 2026 14:54:05 -0800 Subject: [PATCH 3/9] Move ess out of sampleset.py --- dimod/ess.py | 123 ++++++++++++++++++++++++++++++++++++++++++++ dimod/sampleset.py | 125 +-------------------------------------------- 2 files changed, 124 insertions(+), 124 deletions(-) create mode 100755 dimod/ess.py diff --git a/dimod/ess.py b/dimod/ess.py new file mode 100755 index 000000000..e4147a2e3 --- /dev/null +++ b/dimod/ess.py @@ -0,0 +1,123 @@ +# Copyright 2026 D-Wave Quantum Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import numpy as np + +__all__ = ['estimate_effective_sample_size'] + + +def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: + """Estimates the effective sample size of `x`. + + The effective sample size (ESS) is the number of effectively independent samples drawn from + Markov chains' stationary distribution. The univariate estimator implemented here is the + (multivariate) estimator defined in the first equation at the top of page 14 in + ``_. + + Args: + x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + time steps. + + Returns: + float: An estimate of the effective sample size of ``x``. + """ + if x.ndim != 2: + raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes " + f"independent Markov chains and n indexes time. ``x`` has shape {x.shape}") + m, n = x.shape + + s_squared = x.var(1, ddof=1).mean() + # = second equation at the top of page 7 + # = average of "sample variance within series" + + tau_squared = _estimate_replicated_lugsail_batch_means(x, b) + # = equation (5) + # = nVar(xbar_i.) = total variance of the mean-within-series + + sigma_squared = (n - 1) / n * s_squared + tau_squared / n + # = first equation at the top of page 10 + # = estimate of the distribution's variance + + ess = m * n * sigma_squared / tau_squared + # = estimate of the effective sample size + # = first equation at the top of page 14 + return ess.item() + + +def _estimate_replicated_batch_means(x: np.ndarray, b: int) -> float: + """Computes the replicated batch means estimate. + + This estimator (:math:`\\hat\\tau^2_b`) is defined in the equation above equation (5) of + ``_. + + The estimator batches each Markov chain into batches of size ``b``, estimates the mean of each + batch, and computes the sample variance of these batched means. + + The first few columns of ``x`` may be dropped in the estimation process in order to satisfy the + requirement that the length of the Markov chain is divisible by the batch size. + + Args: + x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + time steps. + b (int): Batch size of the replicated batch means estimator. + + Returns: + float: Replicated batch means estimate. + """ + if x.ndim != 2: + raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes " + f"independent Markov chains and n indexes time. ``x`` has shape {x.shape}") + m, n = x.shape + + trimmed_length = b * (n // b) + n_batches = trimmed_length / b + + x = x[:, (n - trimmed_length):] + ybar = np.mean(np.split(x, n_batches, axis=1), axis=2).mT + muhat = x.mean() + + res = ((ybar - muhat)**2).sum()*b/(n_batches*m-1) + # cleaner approximation: res ~= bs * np.var(ybar, ddof=1) + # the approximation is due to muhat being estimated from full + # data instead of ybar. + return res + + +def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int = None) -> float: + """Computes the replicated lugsail batch means estimate. + + This estimator :math:`\\hat\\tau^2_L` is defined in equation (5) of + ``_. + + Args: + x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + time steps. + b (int | None): Batch size of the lugsail estimator. + + Returns: + float: Replicated lugsail estimate of the correlated sample's variance + """ + if x.ndim != 2: + raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes " + f"independent Markov chains and n indexes time. ``x`` has shape {x.shape}") + m, n = x.shape + b = b or round(n**0.5) + return 2 * _estimate_replicated_batch_means(x, b) - _estimate_replicated_batch_means(x, b // 3) + + +print("waaaaaat0") +print(estimate_effective_sample_size(np.random.uniform(0, 1, (100, 123)))) +print("waaaaaat") diff --git a/dimod/sampleset.py b/dimod/sampleset.py index 428d5df9e..313c52798 100644 --- a/dimod/sampleset.py +++ b/dimod/sampleset.py @@ -1949,7 +1949,7 @@ def to_pandas_dataframe(self, sample_column=False): Note that sample sets can be constructed to contain data structures incompatible with the target - `Pandas format `_. + `Pandas format `_. """ import pandas as pd @@ -1984,126 +1984,3 @@ def _as_samples_sampleset(samples_like: SampleSet, return arr, labels else: return samples_like.record.sample.astype(dtype, copy=copy), labels - - -def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: - """Estimates the effective sample size of `x`. - - The effective sample size (ESS) is the number of effectively independent samples drawn from a - Markov chain's statioanry distribution. The univariate estimator implemented here is the - (multivariate) estimator defined in the first equation at the top of page 14 in - ``_. - - Args: - x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index - time steps. - - Returns: - float: Effective sample size estimate. - """ - if x.ndim != 2: - raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " - f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") - m, n = x.shape - - s_squared = x.var(1, ddof=1).mean() - # = second equation at the top of page 7 - # = average of "sample variance within series" - - tau_squared = _estimate_replicated_lugsail_batch_means(x, b) - # = equation (5) - # = nVar(xbar_i.) = total variance of the mean-within-series - - sigma_squared = (n - 1) / n * s_squared + tau_squared / n - # = first equation at the top of page 10 - # = estimate of the distribution's variance - - ess = m * n * sigma_squared / tau_squared - # = estimate of the effective sample size - # = first equation at the top of page 14 - return ess.item() - - -def _estimate_replicated_batch_means(x: np.ndarray, b: int): - """Computes the replicated batch means estimate. - - This estimator (:math:`\\hat\\tau^2_b`) is defined in the equation above equation (5) of - ``_. - - The estimator batches each Markov chain into batches of size ``b``, estimates the mean of each - batch, and computes the sample variance of these batched means. Please read the paper for a more - rigorous definition. - - The first few columns of ``x`` may be dropped in the estimation process in order to satisfy the - requirement that the length of the Markov chain is divisible by the batch size. - - Args: - x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index - time steps. - b (int): Batch size of the replicated batch means estimator. - - Returns: - float: Replicated batch means estimate. - """ - if x.ndim != 2: - raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " - f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") - m, n = x.shape - - trimmed_length = b * (n // b) - n_batches = trimmed_length / b - - x = x[:, (n - trimmed_length):] - ybar = np.mean(np.split(x, n_batches, axis=1), axis=2).mT - muhat = x.mean() - - res = ((ybar - muhat)**2).sum()*b/(n_batches*m-1) - # cleaner approximation: res ~= bs * np.var(ybar, ddof=1) - # the approximation is due to muhat being estimated from full - # data instead of ybar. - return res - - -def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int=None) -> float: - """Computes the replicated lugsail batch means estimate. - - This estimator :math:`\\hat\\tau^2_L` is defined in equation (5) of - ``_. - - Args: - x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index - time steps. - b (int | None): Batch size of the lugsail estimator. - - Returns: - float: Replicated lugsail estimate of the correlated sample's variance - """ - if x.ndim != 2: - raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " - f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") - m, n = x.shape - b = b or round(n**0.5) - return 2 * _estimate_replicated_batch_means(x, b) - _estimate_replicated_batch_means(x, b // 3) - - -def estimate_integrated_autocorrelation_time(x: np.ndarray, b: int = None) -> float: - """Computes the integrated autocorrelation time estimate. - - Args: - x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index - time steps. - b (int | None): Batch size of the lugsail estimator. - - Returns: - float: Integrated autocorrelation time estimate. - """ - if x.ndim != 2: - raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes independent " - f"Markov chains, and n indexes time. ``x`` has shape {x.shape}") - m, n = x.shape - ess = estimate_effective_sample_size(x, b) - return m * n / ess - -print("waaaaaat0") -print(estimate_effective_sample_size(np.random.uniform(0, 1, (100, 123)))) -print("waaaaaat") \ No newline at end of file From 0919227b5736e3f15846a7301f90b584a87968f8 Mon Sep 17 00:00:00 2001 From: kchern Date: Sun, 22 Feb 2026 16:04:54 -0800 Subject: [PATCH 4/9] Add unit tests for estimating ESS --- dimod/ess.py | 68 +++++++++++++++++++++++------------------------ tests/test_ess.py | 58 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 34 deletions(-) create mode 100755 tests/test_ess.py diff --git a/dimod/ess.py b/dimod/ess.py index e4147a2e3..de94d5468 100755 --- a/dimod/ess.py +++ b/dimod/ess.py @@ -29,11 +29,17 @@ def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: Args: x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index - time steps. + time steps. + b (int): Batch size of the estimator. If ``None``, then ``b`` is set to the floor of the + square root of ``n``. Defaults to None. Returns: float: An estimate of the effective sample size of ``x``. """ + if isinstance(b, int) and b < 3: + raise ValueError( + f"Batch size should be at least three. Batch size is {b}." + ) if x.ndim != 2: raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes " f"independent Markov chains and n indexes time. ``x`` has shape {x.shape}") @@ -47,7 +53,7 @@ def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: # = equation (5) # = nVar(xbar_i.) = total variance of the mean-within-series - sigma_squared = (n - 1) / n * s_squared + tau_squared / n + sigma_squared = ((n - 1) / n) * s_squared + tau_squared / n # = first equation at the top of page 10 # = estimate of the distribution's variance @@ -57,6 +63,26 @@ def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: return ess.item() +def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int = None) -> float: + """Computes the replicated lugsail batch means estimate. + + This estimator :math:`\\hat\\tau^2_L` is defined in equation (5) of + ``_. + + Args: + x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + time steps. + b (int): Batch size of the estimator. If ``None``, then ``b`` is set to the floor of the + square root of ``n``. Defaults to None. + + Returns: + float: Replicated lugsail estimate of the correlated sample's variance. + """ + m, n = x.shape + b = b or round(n**0.5) + return 2 * _estimate_replicated_batch_means(x, b) - _estimate_replicated_batch_means(x, b // 3) + + def _estimate_replicated_batch_means(x: np.ndarray, b: int) -> float: """Computes the replicated batch means estimate. @@ -71,16 +97,17 @@ def _estimate_replicated_batch_means(x: np.ndarray, b: int) -> float: Args: x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index - time steps. - b (int): Batch size of the replicated batch means estimator. + time steps. + b (int): Batch size of the estimator. Returns: float: Replicated batch means estimate. """ - if x.ndim != 2: - raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes " - f"independent Markov chains and n indexes time. ``x`` has shape {x.shape}") m, n = x.shape + if b > n: + raise ValueError( + f"Batch size should be no more than ``n``. Batch size is {b} and ``n`` is {n}." + ) trimmed_length = b * (n // b) n_batches = trimmed_length / b @@ -94,30 +121,3 @@ def _estimate_replicated_batch_means(x: np.ndarray, b: int) -> float: # the approximation is due to muhat being estimated from full # data instead of ybar. return res - - -def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int = None) -> float: - """Computes the replicated lugsail batch means estimate. - - This estimator :math:`\\hat\\tau^2_L` is defined in equation (5) of - ``_. - - Args: - x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index - time steps. - b (int | None): Batch size of the lugsail estimator. - - Returns: - float: Replicated lugsail estimate of the correlated sample's variance - """ - if x.ndim != 2: - raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes " - f"independent Markov chains and n indexes time. ``x`` has shape {x.shape}") - m, n = x.shape - b = b or round(n**0.5) - return 2 * _estimate_replicated_batch_means(x, b) - _estimate_replicated_batch_means(x, b // 3) - - -print("waaaaaat0") -print(estimate_effective_sample_size(np.random.uniform(0, 1, (100, 123)))) -print("waaaaaat") diff --git a/tests/test_ess.py b/tests/test_ess.py new file mode 100755 index 000000000..b2a46818a --- /dev/null +++ b/tests/test_ess.py @@ -0,0 +1,58 @@ +# Copyright 2026 D-Wave Quantum Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test estimating effective sample size.""" +import unittest +from math import isnan + +import numpy as np +from parameterized import parameterized + +from dimod.ess import _estimate_replicated_lugsail_batch_means +from dimod.ess import estimate_effective_sample_size as estimate_ess + + +class TestEffectiveSampleSize(unittest.TestCase): + + def test_estimate_ess(self): + with self.subTest("ESS estimate should be undefined for constant input"): + self.assertTrue(isnan(estimate_ess(np.ones((100, 1000))))) + with self.subTest("Null batch size should raise an error."): + self.assertRaisesRegex(ValueError, "Batch size should be at least three", + estimate_ess, np.ones((100, 1000)), 0) + with self.subTest("Batch size larger than chain length should raise an error."): + self.assertRaisesRegex(ValueError, "Batch size should be no more than ``n``", + estimate_ess, np.ones((100, 1000)), 1001) + with self.subTest("Inputs that are not 2D should raise an error."): + self.assertRaisesRegex(ValueError, "The input matrix ``x`` should have shape", + estimate_ess, np.ones((123, 100, 1000)), 234) + + with self.subTest("Single-batch estimates are incorrect."): + x = np.array([[0, 1, 2], + [0, 2, 4]]) + s_squared = (np.var([0, 1, 2], ddof=1) + np.var([0, 2, 4], ddof=1))/2 + tau_squared = _estimate_replicated_lugsail_batch_means(x, 3) + sigma_squared = 2/3*s_squared + tau_squared/3 + answer = 2*3*sigma_squared/tau_squared + self.assertAlmostEqual(answer, estimate_ess(x, 3)) + + with self.subTest("Two-batch estimatse are incorrect."): + x = np.array([[999, 0, 1, 2, 3, 6, 7], + [999, 0, 2, 4, 4, 6, 7]]) + s_squared = (np.var([999, 0, 1, 2, 3, 6, 7], ddof=1) + + np.var([999, 0, 2, 4, 4, 6, 7], ddof=1))/2 + tau_squared = _estimate_replicated_lugsail_batch_means(x, 3) + sigma_squared = 6/7*s_squared + tau_squared/7 + answer = 2*7*sigma_squared/tau_squared + self.assertAlmostEqual(answer, estimate_ess(x, 3)) From 04f46ebd14d68084e0d0378a2ac377b261505256 Mon Sep 17 00:00:00 2001 From: kchern Date: Mon, 23 Feb 2026 10:06:52 -0800 Subject: [PATCH 5/9] Add release notes --- dimod/ess.py | 2 +- releasenotes/notes/ess-f32b7e8d5a4ad67a.yaml | 5 +++++ tests/test_ess.py | 3 +-- 3 files changed, 7 insertions(+), 3 deletions(-) create mode 100755 releasenotes/notes/ess-f32b7e8d5a4ad67a.yaml diff --git a/dimod/ess.py b/dimod/ess.py index de94d5468..ee1cbeff8 100755 --- a/dimod/ess.py +++ b/dimod/ess.py @@ -66,7 +66,7 @@ def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int = None) -> float: """Computes the replicated lugsail batch means estimate. - This estimator :math:`\\hat\\tau^2_L` is defined in equation (5) of + This estimator :math:`\hat{\tau}^2_L` is defined in equation (5) of ``_. Args: diff --git a/releasenotes/notes/ess-f32b7e8d5a4ad67a.yaml b/releasenotes/notes/ess-f32b7e8d5a4ad67a.yaml new file mode 100755 index 000000000..a9d23344c --- /dev/null +++ b/releasenotes/notes/ess-f32b7e8d5a4ad67a.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Add estimator for effective sample size based on + ``_. \ No newline at end of file diff --git a/tests/test_ess.py b/tests/test_ess.py index b2a46818a..d2dc8c4cb 100755 --- a/tests/test_ess.py +++ b/tests/test_ess.py @@ -12,12 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Test estimating effective sample size.""" +"""Test effective sample size estimation.""" import unittest from math import isnan import numpy as np -from parameterized import parameterized from dimod.ess import _estimate_replicated_lugsail_batch_means from dimod.ess import estimate_effective_sample_size as estimate_ess From 151ed60ebaefe7447b8c62b31404d73247bae7d8 Mon Sep 17 00:00:00 2001 From: kchern Date: Mon, 23 Feb 2026 19:19:53 -0800 Subject: [PATCH 6/9] Fix aesthetics and type-hints --- dimod/ess.py | 26 +++++++++++++------------- tests/test_ess.py | 3 +++ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/dimod/ess.py b/dimod/ess.py index ee1cbeff8..289438683 100755 --- a/dimod/ess.py +++ b/dimod/ess.py @@ -1,4 +1,4 @@ -# Copyright 2026 D-Wave Quantum Inc. +# Copyright 2026 D-Wave # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ __all__ = ['estimate_effective_sample_size'] -def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: - """Estimates the effective sample size of `x`. +def estimate_effective_sample_size(x: np.ndarray, b: int | None = None) -> float: + """Estimates the effective sample size of ``x``. The effective sample size (ESS) is the number of effectively independent samples drawn from Markov chains' stationary distribution. The univariate estimator implemented here is the @@ -28,9 +28,9 @@ def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: ``_. Args: - x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + x: An (m, n) matrix where rows index independent Markov chains and columns index time steps. - b (int): Batch size of the estimator. If ``None``, then ``b`` is set to the floor of the + b: Batch size of the estimator. If ``None``, then ``b`` is set to the floor of the square root of ``n``. Defaults to None. Returns: @@ -63,16 +63,16 @@ def estimate_effective_sample_size(x: np.ndarray, b: int = None) -> float: return ess.item() -def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int = None) -> float: +def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int | None = None) -> float: """Computes the replicated lugsail batch means estimate. This estimator :math:`\hat{\tau}^2_L` is defined in equation (5) of ``_. Args: - x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + x: An (m, n) matrix where rows index independent Markov chains and columns index time steps. - b (int): Batch size of the estimator. If ``None``, then ``b`` is set to the floor of the + b: Batch size of the estimator. If ``None``, then ``b`` is set to the floor of the square root of ``n``. Defaults to None. Returns: @@ -96,9 +96,9 @@ def _estimate_replicated_batch_means(x: np.ndarray, b: int) -> float: requirement that the length of the Markov chain is divisible by the batch size. Args: - x (np.array): An (m, n) matrix where rows index independent Markov chains and columns index + x: An (m, n) matrix where rows index independent Markov chains and columns index time steps. - b (int): Batch size of the estimator. + b: Batch size of the estimator. Returns: float: Replicated batch means estimate. @@ -109,14 +109,14 @@ def _estimate_replicated_batch_means(x: np.ndarray, b: int) -> float: f"Batch size should be no more than ``n``. Batch size is {b} and ``n`` is {n}." ) - trimmed_length = b * (n // b) - n_batches = trimmed_length / b + n_batches = n // b + trimmed_length = b * n_batches x = x[:, (n - trimmed_length):] ybar = np.mean(np.split(x, n_batches, axis=1), axis=2).mT muhat = x.mean() - res = ((ybar - muhat)**2).sum()*b/(n_batches*m-1) + res = ((ybar - muhat) ** 2).sum() * b / (n_batches * m - 1) # cleaner approximation: res ~= bs * np.var(ybar, ddof=1) # the approximation is due to muhat being estimated from full # data instead of ybar. diff --git a/tests/test_ess.py b/tests/test_ess.py index d2dc8c4cb..31ea22ae2 100755 --- a/tests/test_ess.py +++ b/tests/test_ess.py @@ -27,12 +27,15 @@ class TestEffectiveSampleSize(unittest.TestCase): def test_estimate_ess(self): with self.subTest("ESS estimate should be undefined for constant input"): self.assertTrue(isnan(estimate_ess(np.ones((100, 1000))))) + with self.subTest("Null batch size should raise an error."): self.assertRaisesRegex(ValueError, "Batch size should be at least three", estimate_ess, np.ones((100, 1000)), 0) + with self.subTest("Batch size larger than chain length should raise an error."): self.assertRaisesRegex(ValueError, "Batch size should be no more than ``n``", estimate_ess, np.ones((100, 1000)), 1001) + with self.subTest("Inputs that are not 2D should raise an error."): self.assertRaisesRegex(ValueError, "The input matrix ``x`` should have shape", estimate_ess, np.ones((123, 100, 1000)), 234) From 2cbdc9bb21bc7479cc1cabcdb578412e35ad3238 Mon Sep 17 00:00:00 2001 From: kchern Date: Tue, 24 Feb 2026 12:56:32 -0800 Subject: [PATCH 7/9] Validate parameter in public function --- dimod/ess.py | 55 +++++++++++++++++------------------------------ tests/test_ess.py | 14 ++++++++---- 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/dimod/ess.py b/dimod/ess.py index 289438683..d31948a14 100755 --- a/dimod/ess.py +++ b/dimod/ess.py @@ -14,6 +14,8 @@ from __future__ import annotations +from math import floor + import numpy as np __all__ = ['estimate_effective_sample_size'] @@ -36,20 +38,27 @@ def estimate_effective_sample_size(x: np.ndarray, b: int | None = None) -> float Returns: float: An estimate of the effective sample size of ``x``. """ - if isinstance(b, int) and b < 3: - raise ValueError( - f"Batch size should be at least three. Batch size is {b}." - ) if x.ndim != 2: raise ValueError("The input matrix ``x`` should have shape (m, n) where m indexes " f"independent Markov chains and n indexes time. ``x`` has shape {x.shape}") m, n = x.shape + if b is None: + b = int(floor(n**0.5)) + if b > n or b < 3: + raise ValueError( + f"Batch size should be at least three but no more than ``n``. Batch size is {b} and " + f"``n`` is {n}. If size was not given, it defaults to the floor of square-root of ``n`` " + f"(n={n})." + ) s_squared = x.var(1, ddof=1).mean() # = second equation at the top of page 7 # = average of "sample variance within series" - tau_squared = _estimate_replicated_lugsail_batch_means(x, b) + # This estimator $\hat{\tau}^2_L$ is defined in equation (5) of + # Revisiting the Gelman-Rubin Diagnostic (https://arxiv.org/abs/1812.09384) + tau_squared = (2 * _estimate_replicated_batch_means(x, b) + - _estimate_replicated_batch_means(x, b // 3)) # = equation (5) # = nVar(xbar_i.) = total variance of the mean-within-series @@ -63,26 +72,6 @@ def estimate_effective_sample_size(x: np.ndarray, b: int | None = None) -> float return ess.item() -def _estimate_replicated_lugsail_batch_means(x: np.ndarray, b: int | None = None) -> float: - """Computes the replicated lugsail batch means estimate. - - This estimator :math:`\hat{\tau}^2_L` is defined in equation (5) of - ``_. - - Args: - x: An (m, n) matrix where rows index independent Markov chains and columns index - time steps. - b: Batch size of the estimator. If ``None``, then ``b`` is set to the floor of the - square root of ``n``. Defaults to None. - - Returns: - float: Replicated lugsail estimate of the correlated sample's variance. - """ - m, n = x.shape - b = b or round(n**0.5) - return 2 * _estimate_replicated_batch_means(x, b) - _estimate_replicated_batch_means(x, b // 3) - - def _estimate_replicated_batch_means(x: np.ndarray, b: int) -> float: """Computes the replicated batch means estimate. @@ -103,21 +92,17 @@ def _estimate_replicated_batch_means(x: np.ndarray, b: int) -> float: Returns: float: Replicated batch means estimate. """ - m, n = x.shape - if b > n: - raise ValueError( - f"Batch size should be no more than ``n``. Batch size is {b} and ``n`` is {n}." - ) + n = x.shape[1] n_batches = n // b trimmed_length = b * n_batches x = x[:, (n - trimmed_length):] ybar = np.mean(np.split(x, n_batches, axis=1), axis=2).mT - muhat = x.mean() - res = ((ybar - muhat) ** 2).sum() * b / (n_batches * m - 1) - # cleaner approximation: res ~= bs * np.var(ybar, ddof=1) - # the approximation is due to muhat being estimated from full - # data instead of ybar. + res = b*np.var(ybar, ddof=1) + # NOTE: this is equivalent to + # res = ((ybar - muhat) ** 2).sum() * b / (n_batches * m - 1) + # where + # muhat = x.mean() return res diff --git a/tests/test_ess.py b/tests/test_ess.py index 31ea22ae2..003347e51 100755 --- a/tests/test_ess.py +++ b/tests/test_ess.py @@ -18,7 +18,7 @@ import numpy as np -from dimod.ess import _estimate_replicated_lugsail_batch_means +from dimod.ess import _estimate_replicated_batch_means from dimod.ess import estimate_effective_sample_size as estimate_ess @@ -28,12 +28,16 @@ def test_estimate_ess(self): with self.subTest("ESS estimate should be undefined for constant input"): self.assertTrue(isnan(estimate_ess(np.ones((100, 1000))))) + with self.subTest("Null batch size should raise an error."): + self.assertRaisesRegex(ValueError, "Batch size should be at least three", + estimate_ess, np.ones((100, 8))) + with self.subTest("Null batch size should raise an error."): self.assertRaisesRegex(ValueError, "Batch size should be at least three", estimate_ess, np.ones((100, 1000)), 0) with self.subTest("Batch size larger than chain length should raise an error."): - self.assertRaisesRegex(ValueError, "Batch size should be no more than ``n``", + self.assertRaisesRegex(ValueError, "Batch size should be at least three", estimate_ess, np.ones((100, 1000)), 1001) with self.subTest("Inputs that are not 2D should raise an error."): @@ -44,7 +48,8 @@ def test_estimate_ess(self): x = np.array([[0, 1, 2], [0, 2, 4]]) s_squared = (np.var([0, 1, 2], ddof=1) + np.var([0, 2, 4], ddof=1))/2 - tau_squared = _estimate_replicated_lugsail_batch_means(x, 3) + tau_squared = (2 * _estimate_replicated_batch_means(x, 3) + - _estimate_replicated_batch_means(x, 1)) sigma_squared = 2/3*s_squared + tau_squared/3 answer = 2*3*sigma_squared/tau_squared self.assertAlmostEqual(answer, estimate_ess(x, 3)) @@ -54,7 +59,8 @@ def test_estimate_ess(self): [999, 0, 2, 4, 4, 6, 7]]) s_squared = (np.var([999, 0, 1, 2, 3, 6, 7], ddof=1) + np.var([999, 0, 2, 4, 4, 6, 7], ddof=1))/2 - tau_squared = _estimate_replicated_lugsail_batch_means(x, 3) + tau_squared = (2 * _estimate_replicated_batch_means(x, 3) + - _estimate_replicated_batch_means(x, 1)) sigma_squared = 6/7*s_squared + tau_squared/7 answer = 2*7*sigma_squared/tau_squared self.assertAlmostEqual(answer, estimate_ess(x, 3)) From 4f1ce5e7383ca53e5463ac5ef53ecb9f3a3354a3 Mon Sep 17 00:00:00 2001 From: kchern Date: Tue, 24 Feb 2026 12:59:54 -0800 Subject: [PATCH 8/9] Improve error message --- dimod/ess.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dimod/ess.py b/dimod/ess.py index d31948a14..92ab3d3ee 100755 --- a/dimod/ess.py +++ b/dimod/ess.py @@ -46,9 +46,9 @@ def estimate_effective_sample_size(x: np.ndarray, b: int | None = None) -> float b = int(floor(n**0.5)) if b > n or b < 3: raise ValueError( - f"Batch size should be at least three but no more than ``n``. Batch size is {b} and " - f"``n`` is {n}. If size was not given, it defaults to the floor of square-root of ``n`` " - f"(n={n})." + f"Batch size should be at least three but no more than the chain length of the Markov " + f"chain. Batch size is {b} and chain length is {n}. If size was not given, it defaults" + f"to the floor of square-root of the chain length." ) s_squared = x.var(1, ddof=1).mean() From fb04a9753f641f61da7cf48498d18c12940c2617 Mon Sep 17 00:00:00 2001 From: Kevin Chern <32395608+kevinchern@users.noreply.github.com> Date: Wed, 25 Feb 2026 09:21:10 -0800 Subject: [PATCH 9/9] Fix copyright header Co-authored-by: Radomir Stevanovic --- tests/test_ess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_ess.py b/tests/test_ess.py index 003347e51..032754263 100755 --- a/tests/test_ess.py +++ b/tests/test_ess.py @@ -1,4 +1,4 @@ -# Copyright 2026 D-Wave Quantum Inc. +# Copyright 2026 D-Wave # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.