holgern · alex-rosen · Feb 10, 2026 · Feb 10, 2026
diff --git a/skopt/acquisition.py b/skopt/acquisition.py
@@ -234,6 +234,12 @@ def gaussian_pi(X, model, y_opt=0.0, xi=0.01, return_grad=False):
         if not np.all(mask):
             return values, np.zeros_like(std_grad)
 
+        # In extreme tails, norm.pdf(scaled) underflows to 0 while improve_grad
+        # can overflow, leading to NaNs via inf * 0. In this regime, the true
+        # gradient tends to 0, so we return a zero gradient.
+        if np.any(np.abs(scaled) > 35.0):
+            return values, np.zeros_like(std_grad)
+
         # Substitute (y_opt - xi - mu) / sigma = t and apply chain rule.
         # improve_grad is the gradient of t wrt x.
         improve_grad = -mu_grad * std - std_grad * improve
@@ -321,6 +327,12 @@ def gaussian_ei(X, model, y_opt=0.0, xi=0.01, return_grad=False):
         if not np.all(mask):
             return values, np.zeros_like(std_grad)
 
+        # In extreme tails, norm.pdf(scaled) underflows to 0 while improve_grad
+        # can overflow, leading to NaNs via inf * 0. In this regime, the true
+        # gradient tends to 0, so we return a zero gradient.
+        if np.any(np.abs(scaled) > 35.0):
+            return values, np.zeros_like(std_grad)
+
         # Substitute (y_opt - xi - mu) / sigma = t and apply chain rule.
         # improve_grad is the gradient of t wrt x.
         improve_grad = -mu_grad * std - std_grad * improve

diff --git a/skopt/tests/test_acquisition.py b/skopt/tests/test_acquisition.py
@@ -23,6 +23,27 @@ def predict(self, X, return_std=True):
         return np.zeros(X.shape[0]), np.ones(X.shape[0])
 
 
+class TinyStdSurrogateWithGrad:
+    """Minimal surrogate to exercise EI/PI numerical stability.
+
+    For extremely small but positive std, the EI/PI gradient expressions can
+    overflow/underflow and produce NaNs (inf * 0). We assert gradients remain
+    finite.
+    """
+
+    def predict(self, X, return_std=True, return_mean_grad=False, return_std_grad=False):
+        X = np.asarray(X)
+        mu = np.zeros(X.shape[0])
+        std = np.full(X.shape[0], 1e-160)
+
+        if return_mean_grad and return_std_grad:
+            mu_grad = np.zeros((X.shape[0], X.shape[1]))
+            std_grad = np.ones((X.shape[0], X.shape[1]))
+            return mu, std, mu_grad, std_grad
+
+        return mu, std
+
+
 # This is used to test that given constant acquisition values at
 # different points, acquisition functions "EIps" and "PIps"
 # prefer candidate points that take lesser time.
@@ -94,6 +115,18 @@ def test_acquisition_api():
         assert_raises(ValueError, method, rng.rand(10), gpr)
 
 
+@pytest.mark.fast_test
+@pytest.mark.parametrize("acq", [gaussian_pi, gaussian_ei])
+def test_ei_pi_gradients_are_finite_for_tiny_positive_std(acq):
+    X = np.zeros((1, 1))
+    model = TinyStdSurrogateWithGrad()
+
+    values, grad = acq(X, model, y_opt=1.0, xi=0.0, return_grad=True)
+
+    assert np.isfinite(values).all()
+    assert np.isfinite(grad).all()
+
+
 def check_gradient_correctness(X_new, model, acq_func, y_opt):
     def num_grad_func(x):
         return gaussian_acquisition_1D(x, model, y_opt, acq_func=acq_func)[0]