Fix fantasization with FixedNoiseGP and outcome transforms and use FantasizeMixin

sdaulton · facebook-github-bot · commit d548d6b4ac91 · 2023-09-12T17:16:08.000-07:00
Summary:
This fixes fantasization with FixedNoiseGP and outcome transforms where transformed `noise` was outcome-transformed again.

This also improves the fantasization for batched and batched multi-output models to use the average noise for each batch and output.

This also removes repeated code and uses the logic in `FantasizeMixin.fantasize` for handling `X` with size 0 on the -2 dimension.

Differential Revision: D49200325
diff --git a/botorch/models/gp_regression.py b/botorch/models/gp_regression.py
@@ -30,15 +30,14 @@
 
 from __future__ import annotations
 
-from typing import Any, List, NoReturn, Optional, Union
+from typing import Any, List, NoReturn, Optional
 
 import torch
-from botorch import settings
 from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
 from botorch.models.model import FantasizeMixin
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import Log, OutcomeTransform
-from botorch.models.utils import fantasize as fantasize_flag, validate_input_scaling
+from botorch.models.utils import validate_input_scaling
 from botorch.models.utils.gpytorch_modules import (
     get_gaussian_likelihood_with_gamma_prior,
     get_matern_kernel_with_gamma_prior,
@@ -164,7 +163,7 @@ def forward(self, x: Tensor) -> MultivariateNormal:
         return MultivariateNormal(mean_x, covar_x)
 
 
-class FixedNoiseGP(BatchedMultiOutputGPyTorchModel, ExactGP):
+class FixedNoiseGP(BatchedMultiOutputGPyTorchModel, ExactGP, FantasizeMixin):
     r"""A single-task exact GP model using fixed noise levels.
 
     A single-task exact GP that uses fixed observation noise levels, differing from
@@ -270,7 +269,7 @@ def fantasize(
         self,
         X: Tensor,
         sampler: MCSampler,
-        observation_noise: Union[bool, Tensor] = True,
+        observation_noise: bool = True,
         **kwargs: Any,
     ) -> FixedNoiseGP:
         r"""Construct a fantasy model.
@@ -292,27 +291,27 @@ def fantasize(
             sampler: The sampler used for sampling from the posterior at `X`.
             observation_noise: If True, include the mean across the observation
                 noise in the training data as observation noise in the posterior
-                from which the samples are drawn. If a Tensor, use it directly
-                as the specified measurement noise.
+                from which the samples are drawn.
 
         Returns:
             The constructed fantasy model.
         """
-        propagate_grads = kwargs.pop("propagate_grads", False)
-        with fantasize_flag():
-            with settings.propagate_grads(propagate_grads):
-                post_X = self.posterior(
-                    X, observation_noise=observation_noise, **kwargs
-                )
-            Y_fantasized = sampler(post_X)  # num_fantasies x batch_shape x n' x m
-            # Use the mean of the previous noise values (TODO: be smarter here).
-            # noise should be batch_shape x q x m when X is batch_shape x q x d, and
-            # Y_fantasized is num_fantasies x batch_shape x q x m.
-            noise_shape = Y_fantasized.shape[1:]
-            noise = self.likelihood.noise.mean().expand(noise_shape)
-            return self.condition_on_observations(
-                X=self.transform_inputs(X), Y=Y_fantasized, noise=noise
-            )
+        # self.likelihood.noise is an `batch_shape x (m)`-dimensional tensor
+        if self.num_outputs > 1:
+            # make noise ... x n x m
+            noise = self.likelihood.noise.transpose(-1, -2)
+        else:
+            noise = self.likelihood.noise.unsqueeze(-1)
+        mean_noise = noise.mean(dim=-2, keepdim=True)
+        if not observation_noise:
+            mean_noise = mean_noise.clamp_max(MIN_INFERRED_NOISE_LEVEL)
+        return super().fantasize(
+            X=X,
+            sampler=sampler,
+            observation_noise=observation_noise,
+            noise=mean_noise,
+            **kwargs,
+        )
 
     def forward(self, x: Tensor) -> MultivariateNormal:
         # TODO: reduce redundancy with the 'forward' method of
diff --git a/botorch/models/gpytorch.py b/botorch/models/gpytorch.py
@@ -444,7 +444,7 @@ def condition_on_observations(
         noise = kwargs.get("noise")
         if hasattr(self, "outcome_transform"):
             # we need to apply transforms before shifting batch indices around
-            Y, noise = self.outcome_transform(Y, noise)
+            Y, _ = self.outcome_transform(Y)
         self._validate_tensor_args(X=X, Y=Y, Yvar=noise, strict=False)
         inputs = X
         if self._num_outputs > 1:
diff --git a/botorch/models/model.py b/botorch/models/model.py
@@ -311,6 +311,7 @@ def fantasize(
         X: Tensor,
         sampler: MCSampler,
         observation_noise: bool = True,
+        noise: Optional[Tensor] = None,
         **kwargs: Any,
     ) -> TFantasizeMixin:
         r"""Construct a fantasy model.
@@ -329,6 +330,9 @@ def fantasize(
                 batch shape of the model).
             sampler: The sampler used for sampling from the posterior at `X`.
             observation_noise: If True, include observation noise.
+            noise: A `model_batch_shape x 1 x m`-dim tensor containing the average noise
+                for each batch and output. `noise` must be in the outcome-transformed
+                space if `self.outcome_transform` is not None.
             kwargs: Will be passed to `model.condition_on_observations`
 
         Returns:
@@ -352,6 +356,8 @@ def fantasize(
             with settings.propagate_grads(propagate_grads):
                 post_X = self.posterior(X, observation_noise=observation_noise)
             Y_fantasized = sampler(post_X)  # num_fantasies x batch_shape x n' x m
+            if noise is not None:
+                kwargs["noise"] = noise.expand(Y_fantasized.shape[1:])
             return self.condition_on_observations(
                 X=self.transform_inputs(X), Y=Y_fantasized, **kwargs
             )
diff --git a/botorch/utils/testing.py b/botorch/utils/testing.py
@@ -375,6 +375,7 @@ def _get_random_data(
         [torch.linspace(0, 0.95, n, **tkwargs) for _ in range(d)], dim=-1
     )
     train_x = train_x + 0.05 * torch.rand_like(train_x).repeat(rep_shape)
+    train_x[0] += 0.02  # modify the first batch
     train_y = torch.sin(train_x[..., :1] * (2 * math.pi))
     train_y = train_y + 0.2 * torch.randn(n, m, **tkwargs).repeat(rep_shape)
     return train_x, train_y
diff --git a/test/models/test_gp_regression.py b/test/models/test_gp_regression.py
@@ -18,6 +18,7 @@
 from botorch.models.transforms import Normalize, Standardize
 from botorch.models.transforms.input import InputStandardize
 from botorch.models.utils import add_output_dim
+from botorch.models.utils.gpytorch_modules import MIN_INFERRED_NOISE_LEVEL
 from botorch.posteriors import GPyTorchPosterior
 from botorch.sampling import SobolQMCNormalSampler
 from botorch.utils.datasets import SupervisedDataset
@@ -456,6 +457,51 @@ def test_construct_inputs(self):
             self.assertTrue(Y.equal(data_dict["train_Y"]))
             self.assertTrue(Yvar.equal(data_dict["train_Yvar"]))
 
+    def test_fantasized_noise(self):
+        for batch_shape, m, dtype, use_octf in itertools.product(
+            (torch.Size(), torch.Size([2])),
+            (1, 2),
+            (torch.float, torch.double),
+            (False, True),
+        ):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            octf = Standardize(m=m, batch_shape=batch_shape) if use_octf else None
+            model, _ = self._get_model_and_data(
+                batch_shape=batch_shape, m=m, outcome_transform=octf, **tkwargs
+            )
+            # fantasize
+            X_f = torch.rand(torch.Size(batch_shape + torch.Size([4, 1])), **tkwargs)
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([3]))
+            fm = model.fantasize(X=X_f, sampler=sampler)
+            self.assertIsInstance(fm, model.__class__)
+            noise = (
+                model.likelihood.noise.unsqueeze(-1)
+                if m == 1
+                else model.likelihood.noise.transpose(-1, -2)
+            )
+            avg_noise = noise.mean(dim=-2, keepdim=True)
+            fm_noise = (
+                fm.likelihood.noise.unsqueeze(-1)
+                if m == 1
+                else fm.likelihood.noise.transpose(-1, -2)
+            )
+
+            self.assertTrue((fm_noise[..., -4:, :] == avg_noise).all())
+            # self.assertFalse(True)
+            fm = model.fantasize(X=X_f, sampler=sampler, observation_noise=False)
+            fm_noise = (
+                fm.likelihood.noise.unsqueeze(-1)
+                if m == 1
+                else fm.likelihood.noise.transpose(-1, -2)
+            )
+            self.assertIsInstance(fm, model.__class__)
+            self.assertTrue(
+                (
+                    fm_noise[..., -4:, :]
+                    == avg_noise.clamp_max(MIN_INFERRED_NOISE_LEVEL)
+                ).all()
+            )
+
 
 class TestHeteroskedasticSingleTaskGP(TestSingleTaskGP):
     def _get_model_and_data(

Original file line number	Diff line number	Diff line change
`@@ -375,6 +375,7 @@ def _get_random_data(`
`375`	`375`	`[torch.linspace(0, 0.95, n, **tkwargs) for _ in range(d)], dim=-1`
`376`	`376`	`)`
`377`	`377`	`train_x = train_x + 0.05 * torch.rand_like(train_x).repeat(rep_shape)`
	`378`	`+ train_x[0] += 0.02 # modify the first batch`
`378`	`379`	`train_y = torch.sin(train_x[..., :1] * (2 * math.pi))`
`379`	`380`	`train_y = train_y + 0.2 * torch.randn(n, m, **tkwargs).repeat(rep_shape)`
`380`	`381`	`return train_x, train_y`