qLogNEI (#1937)

SebastianAment · facebook-github-bot · commit d6a1e70bf2e4 · 2023-07-17T05:12:02.000-07:00
Summary: Pull Request resolved: #1937 This commit introduces `qLogNoisyExpectedImprovement` (`qLogNEI`) a cousing of `qLogEI`. Similar to `qLogEI` and in contrast to `q(N)EI`, it generally exhibits strong and smooth gradients, leading to better acquisition function optimization and Bayesian optimization as a result. Differential Revision: D47439161 fbshipit-source-id: f16d9090c37a7a3f9f49edd306ed8d6fb7fbf706
diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py
@@ -37,6 +37,7 @@
 from botorch.acquisition.logei import (
     LogImprovementMCAcquisitionFunction,
     qLogExpectedImprovement,
+    qLogNoisyExpectedImprovement,
 )
 from botorch.acquisition.max_value_entropy_search import (
     MaxValueBase,
@@ -96,6 +97,7 @@
     "qExpectedImprovement",
     "LogImprovementMCAcquisitionFunction",
     "qLogExpectedImprovement",
+    "qLogNoisyExpectedImprovement",
     "qKnowledgeGradient",
     "MaxValueBase",
     "qMultiFidelityKnowledgeGradient",
diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
@@ -47,7 +47,10 @@
     qKnowledgeGradient,
     qMultiFidelityKnowledgeGradient,
 )
-from botorch.acquisition.logei import qLogExpectedImprovement
+from botorch.acquisition.logei import (
+    qLogExpectedImprovement,
+    qLogNoisyExpectedImprovement,
+)
 from botorch.acquisition.max_value_entropy_search import (
     qMaxValueEntropy,
     qMultiFidelityMaxValueEntropy,
@@ -508,7 +511,7 @@ def construct_inputs_qEI(
     return {**base_inputs, "best_f": best_f, "constraints": constraints, "eta": eta}
 
 
-@acqf_input_constructor(qNoisyExpectedImprovement)
+@acqf_input_constructor(qNoisyExpectedImprovement, qLogNoisyExpectedImprovement)
 def construct_inputs_qNEI(
     model: Model,
     training_data: MaybeDict[SupervisedDataset],
diff --git a/botorch/acquisition/logei.py b/botorch/acquisition/logei.py
@@ -7,20 +7,26 @@
 Batch implementations of the LogEI family of improvements-based acquisition functions.
 """
 
-
 from __future__ import annotations
 
+from copy import deepcopy
+
 from functools import partial
 
-from typing import Callable, List, Optional, TypeVar, Union
+from typing import Any, Callable, List, Optional, Tuple, TypeVar, Union
 
 import torch
+from botorch.acquisition.cached_cholesky import CachedCholeskyMCAcquisitionFunction
 from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction
 from botorch.acquisition.objective import (
     ConstrainedMCObjective,
     MCAcquisitionObjective,
     PosteriorTransform,
 )
+from botorch.acquisition.utils import (
+    compute_best_feasible_objective,
+    prune_inferior_points,
+)
 from botorch.exceptions.errors import BotorchError
 from botorch.models.model import Model
 from botorch.sampling.base import MCSampler
@@ -31,6 +37,7 @@
     logmeanexp,
     smooth_amax,
 )
+from botorch.utils.transforms import match_batch_shape
 from torch import Tensor
 
 """
@@ -219,6 +226,259 @@ def _sample_forward(self, obj: Tensor) -> Tensor:
         return li
 
 
+class qLogNoisyExpectedImprovement(
+    LogImprovementMCAcquisitionFunction, CachedCholeskyMCAcquisitionFunction
+):
+    r"""MC-based batch Log Noisy Expected Improvement.
+
+    This function does not assume a `best_f` is known (which would require
+    noiseless observations). Instead, it uses samples from the joint posterior
+    over the `q` test points and previously observed points. The improvement
+    over previously observed points is computed for each sample and averaged.
+
+    `qNEI(X) = E(max(max Y - max Y_baseline, 0))`, where
+    `(Y, Y_baseline) ~ f((X, X_baseline)), X = (x_1,...,x_q)`
+
+    Example:
+        >>> model = SingleTaskGP(train_X, train_Y)
+        >>> sampler = SobolQMCNormalSampler(1024)
+        >>> qLogNEI = qLogNoisyExpectedImprovement(model, train_X, sampler)
+        >>> acqval = qLogNEI(test_X)
+    """
+
+    def __init__(
+        self,
+        model: Model,
+        X_baseline: Tensor,
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        X_pending: Optional[Tensor] = None,
+        constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+        eta: Union[Tensor, float] = 1e-3,
+        fatten: bool = True,
+        prune_baseline: bool = False,
+        cache_root: bool = True,
+        tau_max: float = TAU_MAX,
+        tau_relu: float = TAU_RELU,
+        **kwargs: Any,
+    ) -> None:
+        r"""q-Noisy Expected Improvement.
+
+        Args:
+            model: A fitted model.
+            X_baseline: A `batch_shape x r x d`-dim Tensor of `r` design points
+                that have already been observed. These points are considered as
+                the potential best design point.
+            sampler: The sampler used to draw base samples. See `MCAcquisitionFunction`
+                more details.
+            objective: The MCAcquisitionObjective under which the samples are
+                evaluated. Defaults to `IdentityMCObjective()`.
+            posterior_transform: A PosteriorTransform (optional).
+            X_pending: A `batch_shape x m x d`-dim Tensor of `m` design points
+                that have points that have been submitted for function evaluation
+                but have not yet been evaluated. Concatenated into `X` upon
+                forward call. Copied and set to have no gradient.
+            constraints: A list of constraint callables which map a Tensor of posterior
+                samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+                `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+                are satisfied if `constraint(samples) < 0`.
+            eta: Temperature parameter(s) governing the smoothness of the sigmoid
+                approximation to the constraint indicators. See the docs of
+                `compute_(log_)smoothed_constraint_indicator` for details.
+            fatten: Toggles the logarithmic / linear asymptotic behavior of the smooth
+                approximation to the ReLU.
+            prune_baseline: If True, remove points in `X_baseline` that are
+                highly unlikely to be the best point. This can significantly
+                improve performance and is generally recommended. In order to
+                customize pruning parameters, instead manually call
+                `botorch.acquisition.utils.prune_inferior_points` on `X_baseline`
+                before instantiating the acquisition function.
+            cache_root: A boolean indicating whether to cache the root
+                decomposition over `X_baseline` and use low-rank updates.
+            tau_max: Temperature parameter controlling the sharpness of the smooth
+                approximations to max.
+            tau_relu: Temperature parameter controlling the sharpness of the smooth
+                approximations to ReLU.
+            kwargs: Here for qNEI for compatibility.
+
+        TODO: similar to qNEHVI, when we are using sequential greedy candidate
+        selection, we could incorporate pending points X_baseline and compute
+        the incremental q(Log)NEI from the new point. This would greatly increase
+        efficiency for large batches. Prototype: D45668859.
+        """
+        # TODO: separate out baseline variables initialization and other functions
+        # in qNEI to avoid duplication of both code and work at runtime.
+        super().__init__(
+            model=model,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            constraints=constraints,
+            eta=eta,
+            fatten=fatten,
+            tau_max=tau_max,
+        )
+        self.tau_relu = tau_relu
+        self._init_baseline(
+            model=model,
+            X_baseline=X_baseline,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            prune_baseline=prune_baseline,
+            cache_root=cache_root,
+            **kwargs,
+        )
+
+    def _sample_forward(self, obj: Tensor) -> Tensor:
+        r"""Evaluate qLogNoisyExpectedImprovement per sample on the candidate set `X`.
+
+        Args:
+            obj: `mc_shape x batch_shape x q`-dim Tensor of MC objective values.
+
+        Returns:
+            A `sample_shape x batch_shape x q`-dim Tensor of log noisy expected smoothed
+            improvement values.
+        """
+        return _log_improvement(
+            Y=obj,
+            best_f=self.compute_best_f(obj),
+            tau=self.tau_relu,
+            fatten=self._fatten,
+        )
+
+    def _init_baseline(
+        self,
+        model: Model,
+        X_baseline: Tensor,
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        prune_baseline: bool = False,
+        cache_root: bool = True,
+        **kwargs: Any,
+    ) -> None:
+        # setupt of CachedCholeskyMCAcquisitionFunction
+        self._setup(model=model, cache_root=cache_root)
+        if prune_baseline:
+            X_baseline = prune_inferior_points(
+                model=model,
+                X=X_baseline,
+                objective=objective,
+                posterior_transform=posterior_transform,
+                marginalize_dim=kwargs.get("marginalize_dim"),
+            )
+        self.register_buffer("X_baseline", X_baseline)
+        # registering buffers for _get_samples_and_objectives in the next `if` block
+        self.register_buffer("baseline_samples", None)
+        self.register_buffer("baseline_obj", None)
+        if self._cache_root:
+            self.q_in = -1
+            # set baseline samples
+            with torch.no_grad():  # this is _get_samples_and_objectives(X_baseline)
+                posterior = self.model.posterior(
+                    X_baseline, posterior_transform=self.posterior_transform
+                )
+                # Note: The root decomposition is cached in two different places. It
+                # may be confusing to have two different caches, but this is not
+                # trivial to change since each is needed for a different reason:
+                # - LinearOperator caching to `posterior.mvn` allows for reuse within
+                #  this function, which may be helpful if the same root decomposition
+                #  is produced by the calls to `self.base_sampler` and
+                #  `self._cache_root_decomposition`.
+                # - self._baseline_L allows a root decomposition to be persisted outside
+                #   this method.
+                self.baseline_samples = self.get_posterior_samples(posterior)
+                self.baseline_obj = self.objective(self.baseline_samples, X=X_baseline)
+
+            # We make a copy here because we will write an attribute `base_samples`
+            # to `self.base_sampler.base_samples`, and we don't want to mutate
+            # `self.sampler`.
+            self.base_sampler = deepcopy(self.sampler)
+            self.register_buffer(
+                "_baseline_best_f",
+                self._compute_best_feasible_objective(
+                    samples=self.baseline_samples, obj=self.baseline_obj
+                ),
+            )
+            self._baseline_L = self._compute_root_decomposition(posterior=posterior)
+
+    def compute_best_f(self, obj: Tensor) -> Tensor:
+        """Computes the best (feasible) noisy objective value.
+
+        Args:
+            obj: `sample_shape x batch_shape x q`-dim Tensor of objectives in forward.
+
+        Returns:
+            A `sample_shape x batch_shape x 1`-dim Tensor of best feasible objectives.
+        """
+        if self._cache_root:
+            val = self._baseline_best_f
+        else:
+            val = self._compute_best_feasible_objective(
+                samples=self.baseline_samples, obj=self.baseline_obj
+            )
+        # ensuring shape, dtype, device compatibility with obj
+        n_sample_dims = len(self.sample_shape)
+        view_shape = torch.Size(
+            [
+                *val.shape[:n_sample_dims],  # sample dimensions
+                *(1,) * (obj.ndim - val.ndim),  # pad to match obj
+                *val.shape[n_sample_dims:],  # the rest
+            ]
+        )
+        return val.view(view_shape).to(obj)
+
+    def _get_samples_and_objectives(self, X: Tensor) -> Tuple[Tensor, Tensor]:
+        r"""Compute samples at new points, using the cached root decomposition.
+
+        Args:
+            X: A `batch_shape x q x d`-dim tensor of inputs.
+
+        Returns:
+            A two-tuple `(samples, obj)`, where `samples` is a tensor of posterior
+            samples with shape `sample_shape x batch_shape x q x m`, and `obj` is a
+            tensor of MC objective values with shape `sample_shape x batch_shape x q`.
+        """
+        q = X.shape[-2]
+        X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2)
+        # TODO: Implement more efficient way to compute posterior over both training and
+        # test points in GPyTorch (https://github.com/cornellius-gp/gpytorch/issues/567)
+        posterior = self.model.posterior(
+            X_full, posterior_transform=self.posterior_transform
+        )
+        if not self._cache_root:
+            samples_full = super().get_posterior_samples(posterior)
+            samples = samples_full[..., -q:, :]
+            obj_full = self.objective(samples_full, X=X_full)
+            # assigning baseline buffers so `best_f` can be computed in _sample_forward
+            self.baseline_obj, obj = obj_full[..., :-q], obj_full[..., -q:]
+            self.baseline_samples = samples_full[..., :-q, :]
+            return samples, obj
+
+        # handle one-to-many input transforms
+        n_plus_q = X_full.shape[-2]
+        n_w = posterior._extended_shape()[-2] // n_plus_q
+        q_in = q * n_w
+        self._set_sampler(q_in=q_in, posterior=posterior)
+        samples = self._get_f_X_samples(posterior=posterior, q_in=q_in)
+        obj = self.objective(samples, X=X_full[..., -q:, :])
+        return samples, obj
+
+    def _compute_best_feasible_objective(self, samples: Tensor, obj: Tensor) -> Tensor:
+        return compute_best_feasible_objective(
+            samples=samples,
+            obj=obj,
+            constraints=self._constraints,
+            model=self.model,
+            objective=self.objective,
+            posterior_transform=self.posterior_transform,
+            X_baseline=self.X_baseline,
+        )
+
+
 """
 ###################################### utils ##########################################
 """
diff --git a/test/acquisition/test_logei.py b/test/acquisition/test_logei.py