MixedSingleTaskGP (#772)

Balandat · facebook-github-bot · commit 5a5173b6ad89 · 2021-04-19T19:25:11.000-07:00
Summary: Pull Request resolved: #772 A `MixedSingleTaskGP` that uses a combination of the categorical kernel and a kernel on the continuous inputs. Reviewed By: dme65 Differential Revision: D27419521 fbshipit-source-id: bb22623154b3b06d876d71605f1428db9d0f58cb
diff --git a/botorch/models/__init__.py b/botorch/models/__init__.py
@@ -15,6 +15,7 @@
     SingleTaskGP,
 )
 from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP
+from botorch.models.gp_regression_mixed import MixedSingleTaskGP
 from botorch.models.higher_order_gp import HigherOrderGP
 from botorch.models.model_list_gp_regression import ModelListGP
 from botorch.models.multitask import FixedNoiseMultiTaskGP, MultiTaskGP
@@ -28,6 +29,7 @@
     "GenericDeterministicModel",
     "HeteroskedasticSingleTaskGP",
     "HigherOrderGP",
+    "MixedSingleTaskGP",
     "ModelListGP",
     "MultiTaskGP",
     "PairwiseGP",
diff --git a/botorch/models/converter.py b/botorch/models/converter.py
@@ -17,6 +17,7 @@
 from botorch.exceptions import UnsupportedError
 from botorch.models.gp_regression import FixedNoiseGP, HeteroskedasticSingleTaskGP
 from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP
+from botorch.models.gp_regression_mixed import MixedSingleTaskGP
 from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
 from botorch.models.model_list_gp_regression import ModelListGP
 from botorch.models.transforms.input import InputTransform
@@ -207,6 +208,10 @@ def batched_to_model_list(batch_model: BatchedMultiOutputGPyTorchModel) -> Model
         raise NotImplementedError(
             "Conversion of HeteroskedasticSingleTaskGP currently not supported."
         )
+    if isinstance(batch_model, MixedSingleTaskGP):
+        raise NotImplementedError(
+            "Conversion of MixedSingleTaskGP currently not supported."
+        )
     input_transform = getattr(batch_model, "input_transform", None)
     batch_sd = batch_model.state_dict()
 
diff --git a/botorch/models/gp_regression_mixed.py b/botorch/models/gp_regression_mixed.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from typing import Callable
+from typing import Dict, List, Optional, Any
+
+import torch
+from botorch.exceptions.errors import UnsupportedError
+from botorch.models.gp_regression import SingleTaskGP
+from botorch.models.kernels.categorical import CategoricalKernel
+from botorch.models.transforms.input import InputTransform
+from botorch.models.transforms.outcome import OutcomeTransform
+from botorch.utils.containers import TrainingData
+from botorch.utils.transforms import normalize_indices
+from gpytorch.constraints import GreaterThan
+from gpytorch.kernels.kernel import Kernel
+from gpytorch.kernels.matern_kernel import MaternKernel
+from gpytorch.kernels.scale_kernel import ScaleKernel
+from gpytorch.likelihoods.gaussian_likelihood import GaussianLikelihood
+from gpytorch.likelihoods.likelihood import Likelihood
+from gpytorch.priors import GammaPrior
+from torch import Tensor
+
+
+class MixedSingleTaskGP(SingleTaskGP):
+    r"""A single-task exact GP model for mixed search spaces.
+
+    This model uses a kernel that combines a CategoricalKernel (based on
+    Hamming distances) and a regular kernel into a kernel of the form
+
+        K((x1, c1), (x2, c2)) =
+            K_cont_1(x1, x2) + K_cat_1(c1, c2) +
+            K_cont_2(x1, x2) * K_cat_2(c1, c2)
+
+    where `xi` and `ci` are the continuous and categorical features of the
+    input, respectively. The suffix `_i` indicates that we fit different
+    lengthscales for the kernels in the sum and product terms.
+
+    Since this model does not provide gradients for the categorical features,
+    optimization of the acquisition function will need to be performed in
+    a mixed fashion, i.e., treating the categorical features properly as
+    discrete optimization variables.
+    """
+
+    def __init__(
+        self,
+        train_X: Tensor,
+        train_Y: Tensor,
+        cat_dims: List[int],
+        cont_kernel_factory: Optional[Callable[[int, List[int]], Kernel]] = None,
+        likelihood: Optional[Likelihood] = None,
+        outcome_transform: Optional[OutcomeTransform] = None,  # TODO
+        input_transform: Optional[InputTransform] = None,  # TODO
+    ) -> None:
+        r"""A single-task exact GP model supporting categorical parameters.
+
+        Args:
+            train_X: A `batch_shape x n x d` tensor of training features.
+            train_Y: A `batch_shape x n x m` tensor of training observations.
+            cat_dims: A list of indices corresponding to the columns of
+                the input `X` that should be considered categorical features.
+            cont_kernel_factory: A method that accepts `ard_num_dims` and
+                `active_dims` arguments and returns an instatiated GPyTorch
+                `Kernel` object to be used as the ase kernel for the continuous
+                dimensions. If omitted, this model uses a Matern-2.5 kernel as
+                the kernel for the ordinal parameters.
+            likelihood: A likelihood. If omitted, use a standard
+                GaussianLikelihood with inferred noise level.
+            # outcome_transform: An outcome transform that is applied to the
+            #     training data during instantiation and to the posterior during
+            #     inference (that is, the `Posterior` obtained by calling
+            #     `.posterior` on the model will be on the original scale).
+            # input_transform: An input transform that is applied in the model's
+            #     forward pass.
+
+        Example:
+            >>> train_X = torch.cat(
+                    [torch.rand(20, 2), torch.randint(3, (20, 1))], dim=-1)
+                )
+            >>> train_Y = (
+                    torch.sin(train_X[..., :-1]).sum(dim=1, keepdim=True)
+                    + train_X[..., -1:]
+                )
+            >>> model = MixedSingleTaskGP(train_X, train_Y, cat_dims=[-1])
+        """
+        if outcome_transform is not None:
+            raise UnsupportedError("outcome transforms not yet supported")
+        if input_transform is not None:
+            raise UnsupportedError("input transforms not yet supported")
+        if len(cat_dims) == 0:
+            raise ValueError(
+                "Must specify categorical dimensions for MixedSingleTaskGP"
+            )
+        input_batch_shape, aug_batch_shape = self.get_batch_dimensions(
+            train_X=train_X, train_Y=train_Y
+        )
+
+        if cont_kernel_factory is None:
+
+            def cont_kernel_factory(
+                batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int]
+            ) -> MaternKernel:
+                return MaternKernel(
+                    nu=2.5,
+                    batch_shape=batch_shape,
+                    ard_num_dims=ard_num_dims,
+                    active_dims=active_dims,
+                )
+
+        if likelihood is None:
+            # This Gamma prior is quite close to the Horseshoe prior
+            min_noise = 1e-5 if train_X.dtype == torch.float else 1e-6
+            likelihood = GaussianLikelihood(
+                batch_shape=aug_batch_shape,
+                noise_constraint=GreaterThan(
+                    min_noise, transform=None, initial_value=1e-3
+                ),
+                noise_prior=GammaPrior(0.9, 10.0),
+            )
+
+        d = train_X.shape[-1]
+        cat_dims = normalize_indices(indices=cat_dims, d=d)
+        ord_dims = sorted(set(range(d)) - set(cat_dims))
+        if len(ord_dims) == 0:
+            covar_module = ScaleKernel(
+                CategoricalKernel(
+                    batch_shape=aug_batch_shape,
+                    ard_num_dims=len(cat_dims),
+                )
+            )
+        else:
+            sum_kernel = ScaleKernel(
+                cont_kernel_factory(
+                    batch_shape=aug_batch_shape,
+                    ard_num_dims=len(ord_dims),
+                    active_dims=ord_dims,
+                )
+                + ScaleKernel(
+                    CategoricalKernel(
+                        batch_shape=aug_batch_shape,
+                        ard_num_dims=len(cat_dims),
+                        active_dims=cat_dims,
+                    )
+                )
+            )
+            prod_kernel = ScaleKernel(
+                cont_kernel_factory(
+                    batch_shape=aug_batch_shape,
+                    ard_num_dims=len(ord_dims),
+                    active_dims=ord_dims,
+                )
+                * CategoricalKernel(
+                    batch_shape=aug_batch_shape,
+                    ard_num_dims=len(cat_dims),
+                    active_dims=cat_dims,
+                )
+            )
+            covar_module = sum_kernel + prod_kernel
+        super().__init__(
+            train_X=train_X,
+            train_Y=train_Y,
+            likelihood=likelihood,
+            covar_module=covar_module,
+            outcome_transform=outcome_transform,
+            input_transform=input_transform,
+        )
+
+    @classmethod
+    def construct_inputs(
+        cls, training_data: TrainingData, **kwargs: Any
+    ) -> Dict[str, Any]:
+        r"""Construct kwargs for the `Model` from `TrainingData` and other options.
+
+        Args:
+            training_data: `TrainingData` container with data for single outcome
+                or for multiple outcomes for batched multi-output case.
+            **kwargs: None expected for this class.
+        """
+        return {
+            "train_X": training_data.X,
+            "train_Y": training_data.Y,
+            "cat_dims": kwargs["categorical_features"],
+            "likelihood": kwargs.get("likelihood"),
+        }
diff --git a/botorch/utils/testing.py b/botorch/utils/testing.py
@@ -198,24 +198,27 @@ def set_X_pending(self, X_pending: Optional[Tensor] = None):
 
 
 def _get_random_data(
-    batch_shape: torch.Size, num_outputs: int, n: int = 10, **tkwargs
+    batch_shape: torch.Size, m: int, d: int = 1, n: int = 10, **tkwargs
 ) -> Tuple[Tensor, Tensor]:
     r"""Generate random data for testing pursposes.
 
     Args:
         batch_shape: The batch shape of the data.
-        num_outputs: The number of outputs.
+        m: The number of outputs.
+        d: The dimension of the input.
         n: The number of data points.
         tkwargs: `device` and `dtype` tensor constructor kwargs.
 
     Returns:
         A tuple `(train_X, train_Y)` with randomly generated training data.
     """
     rep_shape = batch_shape + torch.Size([1, 1])
-    train_x = torch.linspace(0, 0.95, n, **tkwargs).unsqueeze(-1)
-    train_x = train_x + 0.05 * torch.rand(n, 1, **tkwargs).repeat(rep_shape)
-    train_y = torch.sin(train_x * (2 * math.pi))
-    train_y = train_y + 0.2 * torch.randn(n, num_outputs, **tkwargs).repeat(rep_shape)
+    train_x = torch.stack(
+        [torch.linspace(0, 0.95, n, **tkwargs) for _ in range(d)], dim=-1
+    )
+    train_x = train_x + 0.05 * torch.rand_like(train_x).repeat(rep_shape)
+    train_y = torch.sin(train_x[..., :1] * (2 * math.pi))
+    train_y = train_y + 0.2 * torch.randn(n, m, **tkwargs).repeat(rep_shape)
     return train_x, train_y
 
 
diff --git a/sphinx/source/models.rst b/sphinx/source/models.rst
@@ -44,6 +44,11 @@ Multi-Fidelity GP Regression Models
 .. automodule:: botorch.models.gp_regression_fidelity
     :members:
 
+GP Regression Models for Mixed Parameter Spaces
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.models.gp_regression_mixed
+    :members:
+
 Model List GP Regression Models
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.models.model_list_gp_regression
diff --git a/test/models/test_gp_regression.py b/test/models/test_gp_regression.py
@@ -39,9 +39,7 @@ class TestSingleTaskGP(BotorchTestCase):
     def _get_model_and_data(
         self, batch_shape, m, outcome_transform=None, input_transform=None, **tkwargs
     ):
-        train_X, train_Y = _get_random_data(
-            batch_shape=batch_shape, num_outputs=m, **tkwargs
-        )
+        train_X, train_Y = _get_random_data(batch_shape=batch_shape, m=m, **tkwargs)
         model_kwargs = {
             "train_X": train_X,
             "train_Y": train_Y,
@@ -174,7 +172,7 @@ def test_condition_on_observations(self):
             fant_shape = torch.Size([2])
             # fantasize at different input points
             X_fant, Y_fant = _get_random_data(
-                fant_shape + batch_shape, m, n=3, **tkwargs
+                batch_shape=fant_shape + batch_shape, m=m, n=3, **tkwargs
             )
             c_kwargs = (
                 {"noise": torch.full_like(Y_fant, 0.01)}
@@ -319,9 +317,7 @@ class TestFixedNoiseGP(TestSingleTaskGP):
     def _get_model_and_data(
         self, batch_shape, m, outcome_transform=None, input_transform=None, **tkwargs
     ):
-        train_X, train_Y = _get_random_data(
-            batch_shape=batch_shape, num_outputs=m, **tkwargs
-        )
+        train_X, train_Y = _get_random_data(batch_shape=batch_shape, m=m, **tkwargs)
         model_kwargs = {
             "train_X": train_X,
             "train_Y": train_Y,
@@ -381,9 +377,7 @@ def _get_model_and_data(
         self, batch_shape, m, outcome_transform=None, input_transform=None, **tkwargs
     ):
         with manual_seed(0):
-            train_X, train_Y = _get_random_data(
-                batch_shape=batch_shape, num_outputs=m, **tkwargs
-            )
+            train_X, train_Y = _get_random_data(batch_shape=batch_shape, m=m, **tkwargs)
         train_Yvar = (0.1 + 0.1 * torch.rand_like(train_Y)) ** 2
         model_kwargs = {
             "train_X": train_X,
diff --git a/test/models/test_gp_regression_fidelity.py b/test/models/test_gp_regression_fidelity.py
@@ -6,6 +6,7 @@
 
 import itertools
 import warnings
+from typing import Tuple
 
 import torch
 from botorch import fit_gpytorch_model
@@ -25,13 +26,18 @@
 from gpytorch.likelihoods import FixedNoiseGaussianLikelihood
 from gpytorch.means import ConstantMean
 from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
+from torch import Tensor
 
 
-def _get_random_data_with_fidelity(batch_shape, m, n_fidelity, n=10, **tkwargs):
+def _get_random_data_with_fidelity(
+    batch_shape: torch.Size, m: int, n_fidelity: int, d: int = 1, n: int = 10, **tkwargs
+) -> Tuple[Tensor, Tensor]:
     r"""Construct test data.
     For this test, by convention the trailing dimesions are the fidelity dimensions
     """
-    train_x, train_y = _get_random_data(batch_shape, m, n, **tkwargs)
+    train_x, train_y = _get_random_data(
+        batch_shape=batch_shape, m=m, d=d, n=n, **tkwargs
+    )
     s = torch.rand(n, n_fidelity, **tkwargs).repeat(batch_shape + torch.Size([1, 1]))
     train_x = torch.cat((train_x, s), dim=-1)
     train_y = train_y + (1 - s).pow(2).sum(dim=-1).unsqueeze(-1)
diff --git a/test/models/test_gp_regression_mixed.py b/test/models/test_gp_regression_mixed.py
diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py
diff --git a/test/test_cross_validation.py b/test/test_cross_validation.py