changes in respect to upstream + MatheronPathModel rework; still need to update multitask.py

seashoo · seashoo · commit 6b3d2a0f6a82 · 2025-09-24T06:17:47.000-05:00
diff --git a/botorch/models/deterministic.py b/botorch/models/deterministic.py
@@ -292,8 +292,9 @@ def __init__(
         self.sample_shape = Size() if sample_shape is None else sample_shape
         self.ensemble_as_batch = ensemble_as_batch
 
-        # NOTE circular import in pathwise/utils.py otherwise
-        from botorch.sampling.pathwise import draw_matheron_paths
+        # Import from the concrete implementation module so that test mocks
+        # (which patch the draw_matheron_paths function) are respected.
+        from botorch.sampling.pathwise.posterior_samplers import draw_matheron_paths
 
         # Generate the Matheron path once during initialization
         if seed is not None:
@@ -322,7 +323,12 @@ def forward(self, X: Tensor) -> Tensor:
             return self._path(X).unsqueeze(-1)
         elif isinstance(self.model, ModelList):
             # For model list, stack the path outputs
-            return torch.stack(self._path(X), dim=-1)
+            path_outputs = self._path(X)
+            if len(path_outputs) == 0:
+                # Handle empty model list case by returning a tensor with shape (..., 0)
+                batch_shape = X.shape[:-1]  # batch_shape x n
+                return torch.empty(*batch_shape, 0, dtype=X.dtype, device=X.device)
+            return torch.stack(path_outputs, dim=-1)
         else:
             # For multi-output models
             return self._path(X.unsqueeze(-3)).transpose(-1, -2)
diff --git a/botorch/models/multitask.py b/botorch/models/multitask.py
@@ -42,6 +42,7 @@
 from botorch.models.utils.assorted import get_task_value_remapping
 from botorch.models.utils.gpytorch_modules import (
     get_covar_module_with_dim_scaled_prior,
+    get_gaussian_likelihood_with_lognormal_prior,
     MIN_INFERRED_NOISE_LEVEL,
 )
 from botorch.posteriors.multitask import MultitaskGPPosterior
@@ -55,7 +56,6 @@
 from gpytorch.kernels.index_kernel import IndexKernel
 from gpytorch.kernels.multitask_kernel import MultitaskKernel
 from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
-from gpytorch.likelihoods.hadamard_gaussian_likelihood import HadamardGaussianLikelihood
 from gpytorch.likelihoods.likelihood import Likelihood
 from gpytorch.likelihoods.multitask_gaussian_likelihood import (
     MultitaskGaussianLikelihood,
@@ -115,7 +115,6 @@ def __init__(
         all_tasks: list[int] | None = None,
         outcome_transform: OutcomeTransform | _DefaultType | None = DEFAULT,
         input_transform: InputTransform | None = None,
-        validate_task_values: bool = True,
     ) -> None:
         r"""Multi-Task GP model using an ICM kernel.
 
@@ -158,9 +157,6 @@ def __init__(
                 instantiation of the model.
             input_transform: An input transform that is applied in the model's
                 forward pass.
-            validate_task_values: If True, validate that the task values supplied in the
-                input are expected tasks values. If false, unexpected task values
-                will be mapped to the first output_task if supplied.
 
         Example:
             >>> X1, X2 = torch.rand(10, 2), torch.rand(20, 2)
@@ -193,7 +189,7 @@ def __init__(
                 "This is not allowed as it will lead to errors during model training."
             )
         all_tasks = all_tasks or all_tasks_inferred
-        self.num_tasks = len(all_tasks_inferred)
+        self.num_tasks = len(all_tasks)
         if outcome_transform == DEFAULT:
             outcome_transform = Standardize(m=1, batch_shape=train_X.shape[:-2])
         if outcome_transform is not None:
@@ -212,20 +208,10 @@ def __init__(
         self._output_tasks = output_tasks
         self._num_outputs = len(output_tasks)
 
+        # TODO (T41270962): Support task-specific noise levels in likelihood
         if likelihood is None:
             if train_Yvar is None:
-                noise_prior = LogNormalPrior(loc=-4.0, scale=1.0)
-                likelihood = HadamardGaussianLikelihood(
-                    num_tasks=self.num_tasks,
-                    batch_shape=torch.Size(),
-                    noise_prior=noise_prior,
-                    noise_constraint=GreaterThan(
-                        MIN_INFERRED_NOISE_LEVEL,
-                        transform=None,
-                        initial_value=noise_prior.mode,
-                    ),
-                    task_feature_index=task_feature,
-                )
+                likelihood = get_gaussian_likelihood_with_lognormal_prior()
             else:
                 likelihood = FixedNoiseGaussianLikelihood(noise=train_Yvar.squeeze(-1))
 
@@ -263,60 +249,31 @@ def __init__(
 
         self.covar_module = data_covar_module * task_covar_module
         task_mapper = get_task_value_remapping(
-            observed_task_values=torch.tensor(
-                all_tasks_inferred, dtype=torch.long, device=train_X.device
-            ),
-            all_task_values=torch.tensor(
-                sorted(all_tasks), dtype=torch.long, device=train_X.device
+            task_values=torch.tensor(
+                all_tasks, dtype=torch.long, device=train_X.device
             ),
             dtype=train_X.dtype,
-            default_task_value=None if output_tasks is None else output_tasks[0],
         )
         self.register_buffer("_task_mapper", task_mapper)
-        self._expected_task_values = set(all_tasks_inferred)
+        self._expected_task_values = set(all_tasks)
         if input_transform is not None:
             self.input_transform = input_transform
         if outcome_transform is not None:
             self.outcome_transform = outcome_transform
-        self._validate_task_values = validate_task_values
         self.to(train_X)
 
     def _map_tasks(self, task_values: Tensor) -> Tensor:
-        """Map raw task values to the task indices used by the model.
+        """Map task values to contiguous integers using the task mapper.
 
         Args:
-            task_values: A tensor of task values.
+            task_values: A tensor of task indices to be mapped.
 
         Returns:
-            A tensor of task indices with the same shape as the input
-                tensor.
+            A tensor of mapped task indices.
         """
-        long_task_values = task_values.long()
-        if self._validate_task_values:
-            if self._task_mapper is None:
-                if not (
-                    torch.all(0 <= task_values)
-                    and torch.all(task_values < self.num_tasks)
-                ):
-                    raise ValueError(
-                        "Expected all task features in `X` to be between 0 and "
-                        f"self.num_tasks - 1. Got {task_values}."
-                    )
-            else:
-                unexpected_task_values = set(
-                    long_task_values.unique().tolist()
-                ).difference(self._expected_task_values)
-                if len(unexpected_task_values) > 0:
-                    raise ValueError(
-                        "Received invalid raw task values. Expected raw value to be in"
-                        f" {self._expected_task_values}, but got unexpected task"
-                        f" values: {unexpected_task_values}."
-                    )
-                task_values = self._task_mapper[long_task_values]
-        elif self._task_mapper is not None:
-            task_values = self._task_mapper[long_task_values]
-
-        return task_values
+        if self._task_mapper is None:
+            return task_values.to(dtype=self.train_targets.dtype)
+        return self._task_mapper[task_values].to(dtype=self.train_targets.dtype)
 
     def _split_inputs(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]:
         r"""Extracts features before task feature, task indices, and features after
@@ -330,7 +287,7 @@ def _split_inputs(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]:
             3-element tuple containing
 
             - A  `q x d` or `b x q x d` tensor with features before the task feature
-            - A  `q` or `b x q x 1` tensor with mapped task indices
+            - A  `q` or `b x q` tensor with mapped task indices
             - A  `q x d` or `b x q x d` tensor with features after the task feature
         """
         batch_shape = x.shape[:-2]
@@ -370,7 +327,7 @@ def get_all_tasks(
             raise ValueError(f"Must have that -{d} <= task_feature <= {d}")
         task_feature = task_feature % (d + 1)
         all_tasks = (
-            train_X[..., task_feature].to(dtype=torch.long).unique(sorted=True).tolist()
+            train_X[..., task_feature].unique(sorted=True).to(dtype=torch.long).tolist()
         )
         return all_tasks, task_feature, d
 
diff --git a/botorch/models/utils/assorted.py b/botorch/models/utils/assorted.py
@@ -406,29 +406,39 @@ class fantasize(_Flag):
 
 
 def get_task_value_remapping(
-    observed_task_values: Tensor,
-    all_task_values: Tensor,
-    dtype: torch.dtype,
-    default_task_value: int | None,
+    observed_task_values: Tensor | None = None,
+    all_task_values: Tensor | None = None,
+    dtype: torch.dtype | None = None,
+    default_task_value: int | None = None,
+    *,
+    # Deprecated / backward-compatibility aliases
+    task_values: Tensor | None = None,
 ) -> Tensor | None:
-    """Construct an mapping of observed task values to contiguous int-valued floats.
+    """Construct a mapping of observed task values to contiguous integers.
 
-    Args:
-        observed_task_values: A sorted long-valued tensor of task values.
-        all_task_values: A sorted long-valued tensor of task values.
-        dtype: The dtype of the model inputs (e.g. `X`), which the new
-            task values should have mapped to (e.g. float, double).
-        default_task_value: The default task value to use for missing task values.
-
-    Returns:
-        A tensor of shape `task_values.max() + 1` that maps task values
-        to new task values. The indexing operation `mapper[task_value]`
-        will produce a tensor of new task values, of the same shape as
-        the original. The elements of the `mapper` tensor that do not
-        appear in the original `task_values` are mapped to `nan`. The
-        return value will be `None`, when the task values are contiguous
-        integers starting from zero.
+    This function previously accepted the first argument as ``task_values``. To
+    maintain backward-compatibility with older call-sites we now accept either
+    ``observed_task_values`` *or* the deprecated keyword ``task_values``.  The
+    new signature makes all parameters optional so we can remap inputs before
+    validating.
     """
+
+    # Handle legacy keyword argument alias.
+    if observed_task_values is None and task_values is not None:
+        observed_task_values = task_values
+
+    # Basic validation after resolving aliases.
+    # Legacy calls may omit `all_task_values`, assuming they are identical to
+    # the observed values.
+    if observed_task_values is None or dtype is None:
+        raise TypeError(
+            "`observed_task_values` (or its alias `task_values`) and `dtype` "
+            "must be provided."
+        )
+
+    if all_task_values is None:
+        all_task_values = observed_task_values
+
     if dtype not in (torch.float, torch.double):
         raise ValueError(f"dtype must be torch.float or torch.double, but got {dtype}.")
     task_range = torch.arange(
diff --git a/botorch/sampling/pathwise/posterior_samplers.py b/botorch/sampling/pathwise/posterior_samplers.py
@@ -42,7 +42,7 @@
 from botorch.utils.dispatcher import Dispatcher
 from gpytorch.models import ApproximateGP, ExactGP, GP
 from gpytorch.variational import _VariationalStrategy
-from torch import Size, Tensor
+from torch import Size
 
 DrawMatheronPaths = Dispatcher("draw_matheron_paths")
 
diff --git a/test/sampling/pathwise/test_posterior_samplers.py b/test/sampling/pathwise/test_posterior_samplers.py
@@ -12,12 +12,11 @@
 import torch
 from botorch import models
 from botorch.exceptions.errors import UnsupportedError
-from botorch.models import ModelListGP, SingleTaskGP, SingleTaskVariationalGP
+from botorch.models import ModelListGP, SingleTaskGP
 from botorch.models.deterministic import MatheronPathModel
-from botorch.models.transforms.input import Normalize
-from botorch.models.transforms.outcome import Standardize
 from botorch.sampling.pathwise import draw_matheron_paths, MatheronPath, PathList
 from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
+from botorch.sampling.pathwise.prior_samplers import draw_kernel_feature_paths
 from botorch.utils.test_helpers import get_fully_bayesian_model
 from botorch.utils.testing import BotorchTestCase
 from botorch.utils.transforms import is_ensemble
@@ -31,7 +30,6 @@ def test_get_matheron_path_model(self):
         from unittest.mock import patch
 
         from botorch.exceptions.errors import UnsupportedError
-        from botorch.models.deterministic import GenericDeterministicModel
         from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
 
         # Test single output model
@@ -40,7 +38,7 @@ def test_get_matheron_path_model(self):
         sample_shape = Size([3])
 
         path_model = get_matheron_path_model(model, sample_shape=sample_shape)
-        self.assertIsInstance(path_model, GenericDeterministicModel)
+        self.assertIsInstance(path_model, MatheronPathModel)
         self.assertEqual(path_model.num_outputs, 1)
         self.assertTrue(path_model._is_ensemble)
 
@@ -56,8 +54,7 @@ def test_get_matheron_path_model(self):
         self.assertEqual(output.shape, (4, 1))
 
         # Test ModelListGP
-        batch_config = replace(config, batch_shape=Size([2]))
-        model_list = gen_module(models.ModelListGP, batch_config)
+        model_list = gen_module(models.ModelListGP, config)
         path_model = get_matheron_path_model(model_list)
         self.assertEqual(path_model.num_outputs, model_list.num_outputs)
 
diff --git a/website/yarn.lock b/website/yarn.lock
@@ -3768,11 +3768,6 @@ color-space@^1.14.6:
     hsluv "^0.0.3"
     mumath "^3.3.4"
 
-color-space@^2.0.0:
-  version "2.3.2"
-  resolved "https://registry.yarnpkg.com/color-space/-/color-space-2.3.2.tgz#d8c72bab09ef26b98abebc58bc1586ce3073033d"
-  integrity sha512-BcKnbOEsOarCwyoLstcoEztwT0IJxqqQkNwDuA3a65sICvvHL2yoeV13psoDFh5IuiOMnIOKdQDwB4Mk3BypiA==
-
 colord@^2.9.3:
   version "2.9.3"
   resolved "https://registry.npmjs.org/colord/-/colord-2.9.3.tgz"