Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions skopt/acquisition.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,12 @@ def gaussian_pi(X, model, y_opt=0.0, xi=0.01, return_grad=False):
if not np.all(mask):
return values, np.zeros_like(std_grad)

# In extreme tails, norm.pdf(scaled) underflows to 0 while improve_grad
# can overflow, leading to NaNs via inf * 0. In this regime, the true
# gradient tends to 0, so we return a zero gradient.
if np.any(np.abs(scaled) > 35.0):
return values, np.zeros_like(std_grad)

# Substitute (y_opt - xi - mu) / sigma = t and apply chain rule.
# improve_grad is the gradient of t wrt x.
improve_grad = -mu_grad * std - std_grad * improve
Expand Down Expand Up @@ -321,6 +327,12 @@ def gaussian_ei(X, model, y_opt=0.0, xi=0.01, return_grad=False):
if not np.all(mask):
return values, np.zeros_like(std_grad)

# In extreme tails, norm.pdf(scaled) underflows to 0 while improve_grad
# can overflow, leading to NaNs via inf * 0. In this regime, the true
# gradient tends to 0, so we return a zero gradient.
if np.any(np.abs(scaled) > 35.0):
return values, np.zeros_like(std_grad)

# Substitute (y_opt - xi - mu) / sigma = t and apply chain rule.
# improve_grad is the gradient of t wrt x.
improve_grad = -mu_grad * std - std_grad * improve
Expand Down
33 changes: 33 additions & 0 deletions skopt/tests/test_acquisition.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,27 @@ def predict(self, X, return_std=True):
return np.zeros(X.shape[0]), np.ones(X.shape[0])


class TinyStdSurrogateWithGrad:
"""Minimal surrogate to exercise EI/PI numerical stability.

For extremely small but positive std, the EI/PI gradient expressions can
overflow/underflow and produce NaNs (inf * 0). We assert gradients remain
finite.
"""

def predict(self, X, return_std=True, return_mean_grad=False, return_std_grad=False):
X = np.asarray(X)
mu = np.zeros(X.shape[0])
std = np.full(X.shape[0], 1e-160)

if return_mean_grad and return_std_grad:
mu_grad = np.zeros((X.shape[0], X.shape[1]))
std_grad = np.ones((X.shape[0], X.shape[1]))
return mu, std, mu_grad, std_grad

return mu, std


# This is used to test that given constant acquisition values at
# different points, acquisition functions "EIps" and "PIps"
# prefer candidate points that take lesser time.
Expand Down Expand Up @@ -94,6 +115,18 @@ def test_acquisition_api():
assert_raises(ValueError, method, rng.rand(10), gpr)


@pytest.mark.fast_test
@pytest.mark.parametrize("acq", [gaussian_pi, gaussian_ei])
def test_ei_pi_gradients_are_finite_for_tiny_positive_std(acq):
X = np.zeros((1, 1))
model = TinyStdSurrogateWithGrad()

values, grad = acq(X, model, y_opt=1.0, xi=0.0, return_grad=True)

assert np.isfinite(values).all()
assert np.isfinite(grad).all()


def check_gradient_correctness(X_new, model, acq_func, y_opt):
def num_grad_func(x):
return gaussian_acquisition_1D(x, model, y_opt, acq_func=acq_func)[0]
Expand Down