diff --git a/skopt/acquisition.py b/skopt/acquisition.py index 065dd7e..544591d 100644 --- a/skopt/acquisition.py +++ b/skopt/acquisition.py @@ -234,6 +234,12 @@ def gaussian_pi(X, model, y_opt=0.0, xi=0.01, return_grad=False): if not np.all(mask): return values, np.zeros_like(std_grad) + # In extreme tails, norm.pdf(scaled) underflows to 0 while improve_grad + # can overflow, leading to NaNs via inf * 0. In this regime, the true + # gradient tends to 0, so we return a zero gradient. + if np.any(np.abs(scaled) > 35.0): + return values, np.zeros_like(std_grad) + # Substitute (y_opt - xi - mu) / sigma = t and apply chain rule. # improve_grad is the gradient of t wrt x. improve_grad = -mu_grad * std - std_grad * improve @@ -321,6 +327,12 @@ def gaussian_ei(X, model, y_opt=0.0, xi=0.01, return_grad=False): if not np.all(mask): return values, np.zeros_like(std_grad) + # In extreme tails, norm.pdf(scaled) underflows to 0 while improve_grad + # can overflow, leading to NaNs via inf * 0. In this regime, the true + # gradient tends to 0, so we return a zero gradient. + if np.any(np.abs(scaled) > 35.0): + return values, np.zeros_like(std_grad) + # Substitute (y_opt - xi - mu) / sigma = t and apply chain rule. # improve_grad is the gradient of t wrt x. improve_grad = -mu_grad * std - std_grad * improve diff --git a/skopt/tests/test_acquisition.py b/skopt/tests/test_acquisition.py index 4902d53..42ca727 100644 --- a/skopt/tests/test_acquisition.py +++ b/skopt/tests/test_acquisition.py @@ -23,6 +23,27 @@ def predict(self, X, return_std=True): return np.zeros(X.shape[0]), np.ones(X.shape[0]) +class TinyStdSurrogateWithGrad: + """Minimal surrogate to exercise EI/PI numerical stability. + + For extremely small but positive std, the EI/PI gradient expressions can + overflow/underflow and produce NaNs (inf * 0). We assert gradients remain + finite. + """ + + def predict(self, X, return_std=True, return_mean_grad=False, return_std_grad=False): + X = np.asarray(X) + mu = np.zeros(X.shape[0]) + std = np.full(X.shape[0], 1e-160) + + if return_mean_grad and return_std_grad: + mu_grad = np.zeros((X.shape[0], X.shape[1])) + std_grad = np.ones((X.shape[0], X.shape[1])) + return mu, std, mu_grad, std_grad + + return mu, std + + # This is used to test that given constant acquisition values at # different points, acquisition functions "EIps" and "PIps" # prefer candidate points that take lesser time. @@ -94,6 +115,18 @@ def test_acquisition_api(): assert_raises(ValueError, method, rng.rand(10), gpr) +@pytest.mark.fast_test +@pytest.mark.parametrize("acq", [gaussian_pi, gaussian_ei]) +def test_ei_pi_gradients_are_finite_for_tiny_positive_std(acq): + X = np.zeros((1, 1)) + model = TinyStdSurrogateWithGrad() + + values, grad = acq(X, model, y_opt=1.0, xi=0.0, return_grad=True) + + assert np.isfinite(values).all() + assert np.isfinite(grad).all() + + def check_gradient_correctness(X_new, model, acq_func, y_opt): def num_grad_func(x): return gaussian_acquisition_1D(x, model, y_opt, acq_func=acq_func)[0]