From 48841de77bb545c8dc1f4b4dd40a1267275f5790 Mon Sep 17 00:00:00 2001 From: Sait Cakmak Date: Tue, 14 Oct 2025 15:27:02 -0700 Subject: [PATCH] Clean up some leftover utils from legacy generators Summary: Removes a bunch of model fitting & acqf construction utils that were used in legacy generators. These are not used anywhere since the legacy generator has been removed. Also removed `botorch_moo_defaults` to `botorch_moo_utils` to better communicate its purpose. Reviewed By: dme65 Differential Revision: D84653561 --- ax/adapter/adapter_utils.py | 2 +- ax/adapter/tests/test_torch_moo_adapter.py | 2 +- ax/adapter/torch.py | 2 +- ax/generators/tests/test_botorch_defaults.py | 564 ------------- ..._defaults.py => test_botorch_moo_utils.py} | 97 +-- ax/generators/tests/test_torch_model_utils.py | 29 +- ax/generators/torch/botorch_defaults.py | 746 ----------------- .../torch/botorch_modular/acquisition.py | 2 +- ax/generators/torch/botorch_moo_defaults.py | 775 ------------------ ax/generators/torch/botorch_moo_utils.py | 306 +++++++ ax/generators/torch/tests/test_acquisition.py | 2 +- ax/generators/torch/utils.py | 74 +- 12 files changed, 321 insertions(+), 2280 deletions(-) delete mode 100644 ax/generators/tests/test_botorch_defaults.py rename ax/generators/tests/{test_botorch_moo_defaults.py => test_botorch_moo_utils.py} (78%) delete mode 100644 ax/generators/torch/botorch_defaults.py delete mode 100644 ax/generators/torch/botorch_moo_defaults.py create mode 100644 ax/generators/torch/botorch_moo_utils.py diff --git a/ax/adapter/adapter_utils.py b/ax/adapter/adapter_utils.py index 276ab8c9fa3..258737d06d2 100644 --- a/ax/adapter/adapter_utils.py +++ b/ax/adapter/adapter_utils.py @@ -48,7 +48,7 @@ ) from ax.core.types import TBounds, TCandidateMetadata from ax.exceptions.core import DataRequiredError, UserInputError -from ax.generators.torch.botorch_moo_defaults import ( +from ax.generators.torch.botorch_moo_utils import ( get_weighted_mc_objective_and_objective_thresholds, pareto_frontier_evaluator, ) diff --git a/ax/adapter/tests/test_torch_moo_adapter.py b/ax/adapter/tests/test_torch_moo_adapter.py index 34d92ec7050..0cb72a03766 100644 --- a/ax/adapter/tests/test_torch_moo_adapter.py +++ b/ax/adapter/tests/test_torch_moo_adapter.py @@ -32,7 +32,7 @@ ) from ax.core.parameter_constraint import ParameterConstraint from ax.generators.torch.botorch_modular.generator import BoTorchGenerator -from ax.generators.torch.botorch_moo_defaults import ( +from ax.generators.torch.botorch_moo_utils import ( infer_objective_thresholds, pareto_frontier_evaluator, ) diff --git a/ax/adapter/torch.py b/ax/adapter/torch.py index 86731a99bdf..3e280fc6398 100644 --- a/ax/adapter/torch.py +++ b/ax/adapter/torch.py @@ -64,7 +64,7 @@ from ax.exceptions.core import DataRequiredError, UnsupportedError, UserInputError from ax.exceptions.generation_strategy import OptimizationConfigRequired from ax.generators.torch.botorch_modular.generator import BoTorchGenerator -from ax.generators.torch.botorch_moo_defaults import infer_objective_thresholds +from ax.generators.torch.botorch_moo_utils import infer_objective_thresholds from ax.generators.torch.utils import _get_X_pending_and_observed from ax.generators.torch_base import TorchGenerator, TorchOptConfig from ax.generators.types import TConfig diff --git a/ax/generators/tests/test_botorch_defaults.py b/ax/generators/tests/test_botorch_defaults.py deleted file mode 100644 index 7c0ae294256..00000000000 --- a/ax/generators/tests/test_botorch_defaults.py +++ /dev/null @@ -1,564 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# pyre-strict - -import math -from copy import deepcopy -from unittest import mock -from unittest.mock import Mock - -import torch -from ax.generators.torch.botorch_defaults import ( - _get_acquisition_func, - _get_customized_covar_module, - _get_model, - get_and_fit_model, - get_warping_transform, - NO_OBSERVED_POINTS_MESSAGE, -) -from ax.utils.common.testutils import TestCase -from ax.utils.testing.mock import mock_botorch_optimize -from botorch.acquisition.logei import ( - qLogExpectedImprovement, - qLogNoisyExpectedImprovement, -) -from botorch.acquisition.monte_carlo import ( - qExpectedImprovement, - qNoisyExpectedImprovement, - qProbabilityOfImprovement, - qSimpleRegret, -) -from botorch.acquisition.objective import ConstrainedMCObjective -from botorch.acquisition.penalized import L1PenaltyObjective, PenalizedMCObjective -from botorch.exceptions.errors import UnsupportedError -from botorch.models.gp_regression import SingleTaskGP -from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP -from botorch.models.multitask import MultiTaskGP -from botorch.models.transforms.input import Warp -from botorch.utils.constraints import get_outcome_constraint_transforms -from gpytorch.kernels import MaternKernel, ScaleKernel -from gpytorch.likelihoods.gaussian_likelihood import ( - FixedNoiseGaussianLikelihood, - GaussianLikelihood, -) -from gpytorch.likelihoods.hadamard_gaussian_likelihood import HadamardGaussianLikelihood -from gpytorch.module import Module -from gpytorch.priors import GammaPrior -from gpytorch.priors.lkj_prior import LKJCovariancePrior -from gpytorch.priors.prior import Prior -from pyre_extensions import assert_is_instance, none_throws - - -class BotorchDefaultsTest(TestCase): - def test_get_model(self) -> None: - x = torch.rand(2, 2) - y = torch.rand(2, 1) - var = torch.rand(2, 1) - partial_var = torch.tensor([0, float("nan")]).unsqueeze(-1) - unknown_var = torch.tensor([float("nan"), float("nan")]).unsqueeze(-1) - model = _get_model(x, y, unknown_var, None) - self.assertIsInstance(model, SingleTaskGP) - self.assertIsInstance(model.likelihood, GaussianLikelihood) - - model = _get_model(X=x, Y=y, Yvar=var) - self.assertIsInstance(model, SingleTaskGP) - self.assertIsInstance(model.likelihood, FixedNoiseGaussianLikelihood) - self.assertEqual( - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `lengthscale_prior`. - model.covar_module.lengthscale_prior.loc, - math.log(2.0) / 2 + 2**0.5, - ) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `lengthscale_prior`. - self.assertEqual(model.covar_module.lengthscale_prior.scale, 3**0.5) - model = _get_model(X=x, Y=y, Yvar=unknown_var, task_feature=1) - self.assertIs(type(model), MultiTaskGP) # Don't accept subclasses. - self.assertIsInstance(model.likelihood, HadamardGaussianLikelihood) - model = _get_model(X=x, Y=y, Yvar=var, task_feature=1) - self.assertIsInstance(model, MultiTaskGP) - self.assertIsInstance(model.likelihood, FixedNoiseGaussianLikelihood) - model = _get_model(X=x, Y=y, Yvar=partial_var.clone(), task_feature=1) - self.assertIsInstance(model, MultiTaskGP) - model = _get_model(X=x, Y=y, Yvar=partial_var.clone(), task_feature=1, rank=1) - self.assertEqual(model._rank, 1) - with self.assertRaises(ValueError): - model = _get_model(X=x, Y=y, Yvar=partial_var, task_feature=None) - model = _get_model(X=x, Y=y, Yvar=var, fidelity_features=[-1]) - self.assertTrue(isinstance(model, SingleTaskMultiFidelityGP)) - with self.assertRaises(NotImplementedError): - _get_model(X=x, Y=y, Yvar=var, task_feature=1, fidelity_features=[-1]) - # test fixed prior - prior = { - "type": LKJCovariancePrior, - "sd_prior": GammaPrior(2.0, 0.44), - "eta": 0.6, - } - x[0, 1] = 0 - x[1, 1] = 1 - model = _get_model( - X=x, Y=y, Yvar=partial_var.clone(), task_feature=1, prior=prior - ) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `kernels`. - task_covar_module = model.covar_module.kernels[1] - self.assertIsInstance( - task_covar_module.IndexKernelPrior, - LKJCovariancePrior, - ) - self.assertEqual( - task_covar_module.IndexKernelPrior.sd_prior.concentration, - 2.0, - ) - self.assertEqual(task_covar_module.IndexKernelPrior.sd_prior.rate, 0.44) - self.assertEqual( - task_covar_module.IndexKernelPrior.correlation_prior.eta, - 0.6, - ) - model = _get_model( - X=x, - Y=y, - Yvar=partial_var.clone(), - task_feature=1, - prior={"type": LKJCovariancePrior}, - ) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `kernels`. - task_covar_module = model.covar_module.kernels[1] - self.assertIsInstance( - task_covar_module.IndexKernelPrior, - LKJCovariancePrior, - ) - self.assertEqual( - task_covar_module.IndexKernelPrior.sd_prior.concentration, - 1.0, - ) - self.assertEqual(task_covar_module.IndexKernelPrior.sd_prior.rate, 0.15) - self.assertEqual( - task_covar_module.IndexKernelPrior.correlation_prior.eta, - 0.5, - ) - prior = { - "type": LKJCovariancePrior, - "sd_prior": GammaPrior(2.0, 0.44), - "eta": "hi", - } - with self.assertRaises(ValueError): - _get_model(X=x, Y=y, Yvar=partial_var.clone(), task_feature=1, prior=prior) - - prior = {"type": Prior, "sd_prior": GammaPrior(2.0, 0.44), "eta": 0.5} - with self.assertRaises(NotImplementedError): - _get_model(X=x, Y=y, Yvar=partial_var.clone(), task_feature=1, prior=prior) - # test passing customized prior - prior = { - "covar_module_prior": {"lengthscale_prior": GammaPrior(12.0, 2.0)}, - "type": LKJCovariancePrior, - } - model = _get_model(X=x, Y=y, Yvar=var, prior=deepcopy(prior)) - self.assertIsInstance(model, SingleTaskGP) - self.assertEqual( - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `base_kernel`. - model.covar_module.base_kernel.lengthscale_prior.concentration, - 12.0, - ) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `base_kernel`. - self.assertEqual(model.covar_module.base_kernel.lengthscale_prior.rate, 2.0) - model = _get_model( - X=x, - Y=y, - Yvar=unknown_var, - task_feature=1, - prior=deepcopy(prior), - ) - self.assertIs(type(model), MultiTaskGP) - self.assertIsInstance(model.likelihood, HadamardGaussianLikelihood) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `kernels`. - data_covar_module, task_covar_module = model.covar_module.kernels - self.assertEqual( - data_covar_module.base_kernel.lengthscale_prior.concentration, - 12.0, - ) - self.assertEqual(data_covar_module.base_kernel.lengthscale_prior.rate, 2.0) - self.assertIsInstance( - task_covar_module.IndexKernelPrior, - LKJCovariancePrior, - ) - model = _get_model( - X=x, - Y=y, - Yvar=var, - task_feature=1, - prior=deepcopy(prior), - ) - self.assertIsInstance(model, MultiTaskGP) - self.assertIsInstance(model.likelihood, FixedNoiseGaussianLikelihood) - self.assertEqual( - data_covar_module.base_kernel.lengthscale_prior.concentration, - 12.0, - ) - self.assertEqual(data_covar_module.base_kernel.lengthscale_prior.rate, 2.0) - self.assertIsInstance( - task_covar_module.IndexKernelPrior, - LKJCovariancePrior, - ) - # test passing customized prior - prior = { - "covar_module_prior": {"lengthscale_prior": GammaPrior(12.0, 2.0)}, - } - covar_module = MaternKernel( - nu=2.5, - ard_num_dims=2, - lengthscale_prior=GammaPrior(6.0, 6.0), - ) - model = _get_model( - X=x, - Y=y, - Yvar=var, - covar_module=covar_module, - prior=prior, - ) - self.assertIsInstance(model, SingleTaskGP) - self.assertIsInstance(model.likelihood, FixedNoiseGaussianLikelihood) - self.assertEqual(covar_module, model.covar_module) - - # test input warping dimension checks. - with self.assertRaisesRegex(UnsupportedError, "batched multi output models"): - _get_model( - X=torch.ones(4, 3, 2), - Y=torch.ones(4, 3, 2), - Yvar=torch.zeros(4, 3, 2), - use_input_warping=True, - ) - - @mock.patch("ax.generators.torch.botorch_defaults._get_model", wraps=_get_model) - @mock_botorch_optimize - def test_task_feature(self, get_model_mock: Mock) -> None: - x = [torch.zeros(2, 2)] - y = [torch.zeros(2, 1)] - yvars = [torch.ones(2, 1)] - get_and_fit_model( - Xs=x, - Ys=y, - Yvars=yvars, - task_features=[1], - fidelity_features=[], - metric_signatures=["L2NormMetric"], - state_dict=None, - refit_model=False, - ) - # Check that task feature was correctly passed to _get_model - self.assertEqual(get_model_mock.mock_calls[0][2]["task_feature"], 1) - - # check error on multiple task features - with self.assertRaises(NotImplementedError): - get_and_fit_model( - Xs=x, - Ys=y, - Yvars=yvars, - task_features=[0, 1], - fidelity_features=[], - metric_signatures=["L2NormMetric"], - state_dict=None, - refit_model=False, - ) - - # check error on multiple fidelity features - with self.assertRaises(NotImplementedError): - get_and_fit_model( - Xs=x, - Ys=y, - Yvars=yvars, - task_features=[], - fidelity_features=[-1, -2], - metric_signatures=["L2NormMetric"], - state_dict=None, - refit_model=False, - ) - - # check error on botch task and fidelity feature - with self.assertRaises(NotImplementedError): - get_and_fit_model( - Xs=x, - Ys=y, - Yvars=yvars, - task_features=[1], - fidelity_features=[-1], - metric_signatures=["L2NormMetric"], - state_dict=None, - refit_model=False, - ) - - @mock_botorch_optimize - def test_pass_customized_prior(self) -> None: - x = [torch.zeros(2, 2)] - y = [torch.zeros(2, 1)] - yvars = [torch.ones(2, 1)] - prior = { - "covar_module_prior": { - "lengthscale_prior": GammaPrior(12.0, 2.0), - "outputscale_prior": GammaPrior(2.0, 12.0), - }, - } - model = get_and_fit_model( - Xs=x, - Ys=y, - Yvars=yvars, - task_features=[], - fidelity_features=[], - metric_signatures=["L2NormMetric"], - state_dict=None, - refit_model=False, - prior=prior, - ) - self.assertIsInstance(model, SingleTaskGP) - self.assertIsInstance(model.likelihood, FixedNoiseGaussianLikelihood) - - self.assertEqual( - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `base_kernel`. - model.covar_module.base_kernel.lengthscale_prior.concentration, - 12.0, - ) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `base_kernel`. - self.assertEqual(model.covar_module.base_kernel.lengthscale_prior.rate, 2.0) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `outputscale_prior`. - self.assertEqual(model.covar_module.outputscale_prior.concentration, 2.0) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `outputscale_prior`. - self.assertEqual(model.covar_module.outputscale_prior.rate, 12.0) - - model = get_and_fit_model( - Xs=x + x, - Ys=y + y, - Yvars=yvars + yvars, - task_features=[1], - fidelity_features=[], - metric_signatures=["L2NormMetric", "L2NormMetric2"], - state_dict=None, - refit_model=False, - prior=prior, - ) - # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Tensor, Module]` is not a - # function. - for m in model.models: - self.assertIs(type(m), MultiTaskGP) - data_covar_module, task_covar_module = m.covar_module.kernels - self.assertIsInstance(m.likelihood, FixedNoiseGaussianLikelihood) - self.assertEqual( - data_covar_module.base_kernel.lengthscale_prior.concentration, - 12.0, - ) - self.assertEqual(data_covar_module.base_kernel.lengthscale_prior.rate, 2.0) - self.assertEqual(data_covar_module.outputscale_prior.concentration, 2.0) - self.assertEqual(data_covar_module.outputscale_prior.rate, 12.0) - - def test_get_acquisition_func(self) -> None: - d, m = 3, 2 - n = 16 - x = torch.randn(n, d) - y = torch.randn(n, m) - unknown_var = torch.tensor([float("nan"), float("nan")]).unsqueeze(-1) - model = _get_model(x, y, unknown_var, None) - objective_weights = torch.tensor([1.0, 0.0]) # first output is objective - outcome_constraints = ( - torch.tensor([[0.0, 1.0], [0.0, -1.0], [1.0, 1.0]]), # k x m - torch.tensor([[1.0], [-1.0], [0.0]]), # k x 1 - ) - X_observed = torch.zeros(2, d) - expected_constraints = none_throws( - get_outcome_constraint_transforms(outcome_constraints) - ) - samples = torch.zeros(n, m) # to test constraints - - for acqf_name, acqf_class in zip( - ["qEI", "qLogEI", "qPI", "qNEI", "qLogNEI"], - [ - qExpectedImprovement, - qLogExpectedImprovement, - qProbabilityOfImprovement, - qNoisyExpectedImprovement, - qLogNoisyExpectedImprovement, - ], - ): - acqf = _get_acquisition_func( - model=model, - acquisition_function_name=acqf_name, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - # SampleReducingMCAcquisitionFunctions don't need this objective - constrained_mc_objective=None, - ) - self.assertIsInstance(acqf, acqf_class) - acqf_constraints = acqf._constraints - self.assertIsNotNone(acqf_constraints) - - # while the function pointer is different, return value has to be the same - # pyre-fixme[6]: For 1st argument expected `Iterable[_T1]` but got - # `Union[Tensor, Module]`. - for acqf_con, exp_con in zip(acqf_constraints, expected_constraints): - self.assertAllClose(acqf_con(samples), exp_con(samples)) - - with self.assertRaisesRegex(ValueError, NO_OBSERVED_POINTS_MESSAGE): - _get_acquisition_func( - model=model, - acquisition_function_name=acqf_name, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - X_observed=None, # errors because of no observations - ) - - # test support for PenalizedMCObjective - penalty_objective = L1PenaltyObjective(init_point=torch.zeros(1, d)) - for acqf_name, acqf_class in zip( - ["qEI", "qLogEI", "qNEI", "qLogNEI"], - [ - qExpectedImprovement, - qLogExpectedImprovement, - qNoisyExpectedImprovement, - qLogNoisyExpectedImprovement, - ], - ): - acqf = _get_acquisition_func( - model=model, - acquisition_function_name=acqf_name, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - mc_objective=PenalizedMCObjective, - constrained_mc_objective=None, - mc_objective_kwargs={ - "penalty_objective": penalty_objective, - "regularization_parameter": 0.1, - }, - ) - self.assertIsInstance(acqf, acqf_class) - acqf_constraints = acqf._constraints - self.assertIsNotNone(acqf_constraints) - self.assertIsInstance(acqf.objective, PenalizedMCObjective) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `penalty_objective`. - self.assertIsInstance(acqf.objective.penalty_objective, L1PenaltyObjective) - # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute - # `regularization_parameter`. - self.assertEqual(acqf.objective.regularization_parameter, 0.1) - - acqf_name = "qSR" - acqf_class = qSimpleRegret - acqf = _get_acquisition_func( - model=model, - acquisition_function_name=acqf_name, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - # these two need the legacy constrained objective - constrained_mc_objective=ConstrainedMCObjective, - ) - self.assertIsInstance(acqf, acqf_class) - acqf_constraints = acqf._constraints - self.assertIsNone(acqf_constraints) # because this uses the legacy path - self.assertIsInstance(acqf.objective, ConstrainedMCObjective) - - # the following two errors are only thrown when the acquisition function is - # not a SampleReducingMCAcquisitionFunction. - with self.assertRaisesRegex( - ValueError, - "constrained_mc_objective cannot be set to None " - "when applying outcome constraints.", - ): - _get_acquisition_func( - model=model, - acquisition_function_name=acqf_name, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - constrained_mc_objective=None, - ) - - # these are not yet supported, will require passing additional arguments to - # the botorch constructor (i.e. beta for UCB, ref_point and Yfor EHVI.) - for acqf_name in ["qUCB", "qEHVI", "qNEHVI"]: - with self.assertRaisesRegex(NotImplementedError, "not implemented yet"): - _get_acquisition_func( - model=model, - acquisition_function_name=acqf_name, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - ) - - def test_get_customized_covar_module(self) -> None: - ard_num_dims = 3 - batch_shape = torch.Size([2]) - covar_module = _get_customized_covar_module( - covar_module_prior_dict={}, - ard_num_dims=ard_num_dims, - aug_batch_shape=batch_shape, - task_feature=None, - ) - self.assertIsInstance(covar_module, Module) - self.assertIsInstance(covar_module, ScaleKernel) - self.assertIsInstance(covar_module.outputscale_prior, GammaPrior) - prior = assert_is_instance(covar_module.outputscale_prior, GammaPrior) - self.assertEqual(prior.concentration, 2.0) - self.assertEqual(prior.rate, 0.15) - self.assertIsInstance(covar_module.base_kernel, MaternKernel) - base_kernel = assert_is_instance(covar_module.base_kernel, MaternKernel) - self.assertIsInstance(base_kernel.lengthscale_prior, GammaPrior) - self.assertEqual( - assert_is_instance(base_kernel.lengthscale_prior, GammaPrior).concentration, - 3.0, - ) - self.assertEqual( - assert_is_instance(base_kernel.lengthscale_prior, GammaPrior).rate, 6.0 - ) - self.assertEqual(base_kernel.ard_num_dims, ard_num_dims) - self.assertEqual(base_kernel.batch_shape, batch_shape) - - covar_module = _get_customized_covar_module( - covar_module_prior_dict={ - "lengthscale_prior": GammaPrior(12.0, 2.0), - "outputscale_prior": GammaPrior(2.0, 12.0), - }, - ard_num_dims=ard_num_dims, - aug_batch_shape=batch_shape, - task_feature=3, - ) - self.assertIsInstance(covar_module, Module) - self.assertIsInstance(covar_module, ScaleKernel) - self.assertIsInstance(covar_module.outputscale_prior, GammaPrior) - prior = assert_is_instance(covar_module.outputscale_prior, GammaPrior) - self.assertEqual(prior.concentration, 2.0) - self.assertEqual(prior.rate, 12.0) - self.assertIsInstance(covar_module.base_kernel, MaternKernel) - base_kernel = assert_is_instance(covar_module.base_kernel, MaternKernel) - self.assertIsInstance(base_kernel.lengthscale_prior, GammaPrior) - self.assertEqual( - assert_is_instance(base_kernel.lengthscale_prior, GammaPrior).concentration, - 12.0, - ) - self.assertEqual( - assert_is_instance(base_kernel.lengthscale_prior, GammaPrior).rate, 2.0 - ) - self.assertEqual(base_kernel.ard_num_dims, ard_num_dims - 1) - self.assertEqual(base_kernel.batch_shape, batch_shape) - - def test_get_warping_transform(self) -> None: - warp_tf = get_warping_transform(d=4) - self.assertIsInstance(warp_tf, Warp) - self.assertEqual(warp_tf.indices.tolist(), list(range(4))) - warp_tf = get_warping_transform(d=4, task_feature=2) - self.assertEqual(warp_tf.indices.tolist(), [0, 1, 3]) - warp_tf = get_warping_transform(d=4, batch_shape=torch.Size([2])) - self.assertIsInstance(warp_tf, Warp) - self.assertEqual(warp_tf.indices.tolist(), list(range(4))) - self.assertEqual(warp_tf.batch_shape, torch.Size([2])) diff --git a/ax/generators/tests/test_botorch_moo_defaults.py b/ax/generators/tests/test_botorch_moo_utils.py similarity index 78% rename from ax/generators/tests/test_botorch_moo_defaults.py rename to ax/generators/tests/test_botorch_moo_utils.py index b94c9ebdee6..6baac4e14f5 100644 --- a/ax/generators/tests/test_botorch_moo_defaults.py +++ b/ax/generators/tests/test_botorch_moo_utils.py @@ -7,36 +7,30 @@ # pyre-strict from contextlib import ExitStack -from typing import Any, cast +from typing import Any from unittest import mock from warnings import catch_warnings, simplefilter import numpy as np import torch from ax.core.search_space import SearchSpaceDigest -from ax.generators.torch.botorch_defaults import NO_OBSERVED_POINTS_MESSAGE from ax.generators.torch.botorch_modular.generator import BoTorchGenerator -from ax.generators.torch.botorch_moo_defaults import ( - get_outcome_constraint_transforms, - get_qLogEHVI, - get_qLogNEHVI, +from ax.generators.torch.botorch_moo_utils import ( get_weighted_mc_objective_and_objective_thresholds, infer_objective_thresholds, pareto_frontier_evaluator, ) from ax.generators.torch_base import TorchGenerator -from ax.utils.common.random import with_rng_seed from ax.utils.common.testutils import TestCase from ax.utils.testing.mock import mock_botorch_optimize_context_manager from botorch.models.gp_regression import SingleTaskGP from botorch.utils.datasets import SupervisedDataset from botorch.utils.multi_objective.hypervolume import infer_reference_point -from botorch.utils.testing import MockModel, MockPosterior +from botorch.utils.testing import MockPosterior from gpytorch.utils.warnings import NumericalWarning -MOO_DEFAULTS_PATH: str = "ax.generators.torch.botorch_moo_defaults" -GET_ACQF_PATH: str = MOO_DEFAULTS_PATH + ".get_acquisition_function" +MOO_DEFAULTS_PATH: str = "ax.generators.torch.botorch_moo_utils" GET_CONSTRAINT_PATH: str = MOO_DEFAULTS_PATH + ".get_outcome_constraint_transforms" GET_OBJ_PATH: str = ( MOO_DEFAULTS_PATH + ".get_weighted_mc_objective_and_objective_thresholds" @@ -223,20 +217,7 @@ def test_pareto_frontier_evaluator_with_nan(self) -> None: self.assertEqual(idx.tolist(), [4]) -class BotorchMOODefaultsTest(TestCase): - def test_get_qLogEHVI_input_validation_errors(self) -> None: - weights = torch.ones(2) - objective_thresholds = torch.zeros(2) - # Note: this is a real BoTorch `Model` with a real `Posterior`, not a - # `unittest.mock.Mock` - mm = MockModel(posterior=MockPosterior()) - with self.assertRaisesRegex(ValueError, NO_OBSERVED_POINTS_MESSAGE): - get_qLogEHVI( - model=mm, - objective_weights=weights, - objective_thresholds=objective_thresholds, - ) - +class BotorchMOOUtilsTest(TestCase): def test_get_weighted_mc_objective_and_objective_thresholds(self) -> None: objective_weights = torch.tensor([0.0, 1.0, 0.0, 1.0]) objective_thresholds = torch.arange(4, dtype=torch.float) @@ -251,71 +232,9 @@ def test_get_weighted_mc_objective_and_objective_thresholds(self) -> None: self.assertEqual(weighted_obj.outcomes.tolist(), [1, 3]) self.assertTrue(torch.equal(new_obj_thresholds, objective_thresholds[[1, 3]])) - def test_get_qLogNEHVI_input_validation_errors(self) -> None: - weights = torch.ones(2) - objective_thresholds = torch.zeros(2) - with self.assertRaisesRegex(ValueError, NO_OBSERVED_POINTS_MESSAGE): - get_qLogNEHVI( - # pyre-fixme[6] In call `get_qLogNEHVI`, for argument `model`, - # expected `Model` but got `None`. - model=None, - objective_weights=weights, - objective_thresholds=objective_thresholds, - ) - - @mock.patch( # pyre-ignore - "ax.generators.torch.botorch_moo_defaults._check_posterior_type", - wraps=lambda y: y, - ) - def test_get_qLogEHVI(self, _) -> None: - weights = torch.tensor([0.0, 1.0, 1.0]) - X_observed = torch.rand(4, 3) - X_pending = torch.rand(1, 3) - constraints = (torch.tensor([1.0, 0.0, 0.0]), torch.tensor([[10.0]])) - Y = torch.rand(4, 3) - mm = MockModel(MockPosterior(mean=Y)) - objective_thresholds = torch.arange(3, dtype=torch.float) - obj_and_obj_t = get_weighted_mc_objective_and_objective_thresholds( - objective_weights=weights, - objective_thresholds=objective_thresholds, - ) - (weighted_obj, new_obj_thresholds) = obj_and_obj_t - cons_tfs = get_outcome_constraint_transforms(constraints) - with with_rng_seed(0): - seed = torch.randint(1, 10000, (1,)).item() - with ExitStack() as es: - mock_get_acqf = es.enter_context(mock.patch(GET_ACQF_PATH)) - es.enter_context( - mock.patch(MOO_DEFAULTS_PATH + ".assert_is_instance", wraps=cast) - ) - es.enter_context(mock.patch(GET_CONSTRAINT_PATH, return_value=cons_tfs)) - es.enter_context(mock.patch(GET_OBJ_PATH, return_value=obj_and_obj_t)) - es.enter_context(with_rng_seed(0)) - get_qLogEHVI( - model=mm, - objective_weights=weights, - outcome_constraints=constraints, - objective_thresholds=objective_thresholds, - X_observed=X_observed, - X_pending=X_pending, - ) - mock_get_acqf.assert_called_once_with( - acquisition_function_name="qLogEHVI", - model=mm, - objective=weighted_obj, - X_observed=X_observed, - X_pending=X_pending, - constraints=cons_tfs, - mc_samples=128, - alpha=0.0, - seed=seed, - ref_point=new_obj_thresholds.tolist(), - Y=Y, - ) - # test infer objective thresholds alone @mock.patch( # pyre-ignore - "ax.generators.torch.botorch_moo_defaults._check_posterior_type", + "ax.generators.torch.botorch_moo_utils._check_posterior_type", wraps=lambda y: y, ) def test_infer_objective_thresholds(self, _, cuda: bool = False) -> None: @@ -342,7 +261,7 @@ def test_infer_objective_thresholds(self, _, cuda: bool = False) -> None: with ExitStack() as es: _mock_infer_reference_point = es.enter_context( mock.patch( - "ax.generators.torch.botorch_moo_defaults" + "ax.generators.torch.botorch_moo_utils" ".infer_reference_point", wraps=infer_reference_point, ) @@ -410,7 +329,7 @@ def test_infer_objective_thresholds(self, _, cuda: bool = False) -> None: with ExitStack() as es: _mock_infer_reference_point = es.enter_context( mock.patch( - "ax.generators.torch.botorch_moo_defaults" + "ax.generators.torch.botorch_moo_utils" ".infer_reference_point", wraps=infer_reference_point, ) diff --git a/ax/generators/tests/test_torch_model_utils.py b/ax/generators/tests/test_torch_model_utils.py index 07cf8b1795a..da642d94ce5 100644 --- a/ax/generators/tests/test_torch_model_utils.py +++ b/ax/generators/tests/test_torch_model_utils.py @@ -10,12 +10,7 @@ import numpy as np import torch -from ax.core.search_space import SearchSpaceDigest -from ax.generators.torch.utils import ( - _generate_sobol_points, - subset_model, - tensor_callable_to_array_callable, -) +from ax.generators.torch.utils import subset_model, tensor_callable_to_array_callable from ax.utils.common.testutils import TestCase from botorch.models import SingleTaskGP from botorch.models.deterministic import GenericDeterministicModel @@ -27,28 +22,6 @@ class TorchUtilsTest(TestCase): - def test_GenerateSobolPoints(self) -> None: - bounds = [(0.0, 1.0) for _ in range(3)] - linear_constraints = ( - torch.tensor([[1, -1, 0]], dtype=torch.double), - torch.tensor([[0]], dtype=torch.double), - ) - - def test_rounding_func(x: Tensor) -> Tensor: - return x - - gen_sobol = _generate_sobol_points( - n_sobol=100, - search_space_digest=SearchSpaceDigest( - feature_names=["a", "b", "c"], bounds=bounds - ), - device=torch.device("cpu"), - linear_constraints=linear_constraints, - rounding_func=test_rounding_func, - ) - self.assertEqual(len(gen_sobol), 100) - self.assertIsInstance(gen_sobol, Tensor) - def test_TensorCallableToArrayCallable(self) -> None: def tensor_func(x: Tensor) -> Tensor: return torch.pow(x, 2) diff --git a/ax/generators/torch/botorch_defaults.py b/ax/generators/torch/botorch_defaults.py deleted file mode 100644 index c6aabc8c6a2..00000000000 --- a/ax/generators/torch/botorch_defaults.py +++ /dev/null @@ -1,746 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# pyre-strict - -import functools -from collections.abc import Callable -from copy import deepcopy -from random import randint -from typing import Any, Protocol - -import torch -from ax.generators.torch.botorch_modular.optimizer_defaults import ( - BATCH_LIMIT, - INIT_BATCH_LIMIT, - MAX_OPT_AGG_SIZE, -) -from botorch.acquisition import get_acquisition_function -from botorch.acquisition.acquisition import AcquisitionFunction -from botorch.acquisition.objective import ConstrainedMCObjective, GenericMCObjective -from botorch.acquisition.utils import get_infeasible_cost -from botorch.exceptions.errors import UnsupportedError -from botorch.fit import fit_gpytorch_mll -from botorch.models.gp_regression import SingleTaskGP -from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP -from botorch.models.gpytorch import GPyTorchModel -from botorch.models.model import Model -from botorch.models.model_list_gp_regression import ModelListGP -from botorch.models.multitask import MultiTaskGP -from botorch.models.transforms.input import Warp -from botorch.optim.optimize import optimize_acqf -from botorch.utils import ( - get_objective_weights_transform, - get_outcome_constraint_transforms, -) -from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization -from botorch.utils.transforms import is_ensemble -from gpytorch.kernels import MaternKernel, ScaleKernel -from gpytorch.kernels.kernel import Kernel -from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood -from gpytorch.mlls.leave_one_out_pseudo_likelihood import LeaveOneOutPseudoLikelihood -from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood -from gpytorch.priors import Prior -from gpytorch.priors.lkj_prior import LKJCovariancePrior -from gpytorch.priors.torch_priors import GammaPrior, LogNormalPrior -from torch import Tensor - - -MIN_OBSERVED_NOISE_LEVEL = 1e-6 -NO_OBSERVED_POINTS_MESSAGE = ( - "There are no observed points meeting all parameter " - "constraints or have all necessary metrics attached." -) - - -def _construct_model( - task_feature: int | None, - Xs: list[Tensor], - Ys: list[Tensor], - Yvars: list[Tensor], - fidelity_features: list[int], - metric_signatures: list[str], - use_input_warping: bool = False, - prior: dict[str, Any] | None = None, - *, - multitask_gp_ranks: dict[str, Prior | float] | None = None, - **kwargs: Any, -) -> GPyTorchModel: - """ - Figures out how to call `_get_model` depending on inputs. Used by - `get_and_fit_model`. - """ - if task_feature is None: - if len(Xs) == 1: - # Use single output, single task GP - return _get_model( - X=Xs[0], - Y=Ys[0], - Yvar=Yvars[0], - task_feature=task_feature, - fidelity_features=fidelity_features, - use_input_warping=use_input_warping, - prior=deepcopy(prior), - **kwargs, - ) - if all(torch.equal(Xs[0], X) for X in Xs[1:]) and not use_input_warping: - # Use batched multioutput, single task GP - # Require using a ModelListGP if using input warping - Y = torch.cat(Ys, dim=-1) - Yvar = torch.cat(Yvars, dim=-1) - return _get_model( - X=Xs[0], - Y=Y, - Yvar=Yvar, - task_feature=task_feature, - fidelity_features=fidelity_features, - prior=deepcopy(prior), - **kwargs, - ) - - if task_feature is None: - models = [ - _get_model( - X=X, - Y=Y, - Yvar=Yvar, - use_input_warping=use_input_warping, - prior=deepcopy(prior), - **kwargs, - ) - for X, Y, Yvar in zip(Xs, Ys, Yvars) - ] - else: - # use multi-task GP - mtgp_rank_dict = {} if multitask_gp_ranks is None else multitask_gp_ranks - # assembles list of ranks associated with each metric - if len({len(Xs), len(Ys), len(Yvars), len(metric_signatures)}) > 1: - raise ValueError( - "Lengths of Xs, Ys, Yvars, and metric_signatures must match. Your " - f"inputs have lengths {len(Xs)}, {len(Ys)}, {len(Yvars)}, and " - f"{len(metric_signatures)}, respectively." - ) - mtgp_rank_list = [ - mtgp_rank_dict.get(metric, None) for metric in metric_signatures - ] - models = [ - _get_model( - X=X, - Y=Y, - Yvar=Yvar, - task_feature=task_feature, - rank=mtgp_rank, - use_input_warping=use_input_warping, - prior=deepcopy(prior), - **kwargs, - ) - for X, Y, Yvar, mtgp_rank in zip(Xs, Ys, Yvars, mtgp_rank_list) - ] - return ModelListGP(*models) - - -def get_and_fit_model( - Xs: list[Tensor], - Ys: list[Tensor], - Yvars: list[Tensor], - task_features: list[int], - fidelity_features: list[int], - metric_signatures: list[str], - state_dict: dict[str, Tensor] | None = None, - refit_model: bool = True, - use_input_warping: bool = False, - use_loocv_pseudo_likelihood: bool = False, - prior: dict[str, Any] | None = None, - *, - multitask_gp_ranks: dict[str, Prior | float] | None = None, - **kwargs: Any, -) -> GPyTorchModel: - r"""Instantiates and fits a botorch GPyTorchModel using the given data. - N.B. Currently, the logic for choosing ModelListGP vs other models is handled - using if-else statements in lines 96-137. In the future, this logic should be - taken care of by modular botorch. - - Args: - Xs: List of X data, one tensor per outcome. - Ys: List of Y data, one tensor per outcome. - Yvars: List of observed variance of Ys. - task_features: List of columns of X that are tasks. - fidelity_features: List of columns of X that are fidelity parameters. - metric_signatures: Signature of each outcome Y in Ys. - state_dict: If provided, will set model parameters to this state - dictionary. Otherwise, will fit the model. - refit_model: Flag for refitting model. - prior: Optional[Dict]. A dictionary that contains the specification of - GP model prior. Currently, the keys include: - - covar_module_prior: prior on covariance matrix e.g. - {"lengthscale_prior": GammaPrior(3.0, 6.0)}. - - type: type of prior on task covariance matrix e.g.`LKJCovariancePrior`. - - sd_prior: A scalar prior over nonnegative numbers, which is used for the - default LKJCovariancePrior task_covar_prior. - - eta: The eta parameter on the default LKJ task_covar_prior. - kwargs: Passed to `_get_model`. - - Returns: - A fitted GPyTorchModel. - """ - - if len(fidelity_features) > 0 and len(task_features) > 0: - raise NotImplementedError( - "Currently do not support MF-GP models with task_features!" - ) - if len(fidelity_features) > 1: - raise NotImplementedError( - "Fidelity MF-GP models currently support only a single fidelity parameter!" - ) - if len(task_features) > 1: - raise NotImplementedError( - f"This model only supports 1 task feature (got {task_features})" - ) - elif len(task_features) == 1: - task_feature = task_features[0] - else: - task_feature = None - - model = _construct_model( - task_feature=task_feature, - Xs=Xs, - Ys=Ys, - Yvars=Yvars, - fidelity_features=fidelity_features, - metric_signatures=metric_signatures, - use_input_warping=use_input_warping, - prior=prior, - multitask_gp_ranks=multitask_gp_ranks, - **kwargs, - ) - - # TODO: Better logic for deciding when to use a ModelListGP. Currently the - # logic is unclear. The two cases in which ModelListGP is used are - # (i) the training inputs (Xs) are not the same for the different outcomes, and - # (ii) a multi-task model is used - - model.to(Xs[0]) - if state_dict is not None: - model.load_state_dict(state_dict) - if state_dict is None or refit_model: - # TODO: Add bounds for optimization stability - requires revamp upstream - bounds = {} - if use_loocv_pseudo_likelihood: - mll_cls = LeaveOneOutPseudoLikelihood - else: - mll_cls = ExactMarginalLogLikelihood - if isinstance(model, ModelListGP): - mll = SumMarginalLogLikelihood(model.likelihood, model, mll_cls=mll_cls) - else: - mll = mll_cls(model.likelihood, model) - mll = fit_gpytorch_mll(mll, optimizer_kwargs={"bounds": bounds}) - return model - - -class TAcqfConstructor(Protocol): - def __call__( - self, # making this a static method makes Pyre unhappy, better to keep `self` - model: Model, - objective_weights: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - **kwargs: Any, - ) -> AcquisitionFunction: ... # pragma: no cover - - -def get_acqf( - acquisition_function_name: str, -) -> Callable[[Callable[[], None]], TAcqfConstructor]: - """Returns a decorator whose wrapper function instantiates an acquisition function. - - NOTE: This is a decorator factory instead of a simple factory as serialization - of Botorch model kwargs requires callables to be have module-level paths, and - closures created by a simple factory do not have such paths. We solve this by - wrapping "empty" module-level functions with this decorator, we ensure that they - are serialized correctly, in addition to reducing code duplication. - - Example: - >>> @get_acqf("qEI") - ... def get_qEI() -> None: - ... pass - >>> acqf = get_qEI( - ... model=model, - ... objective_weights=objective_weights, - ... outcome_constraints=outcome_constraints, - ... X_observed=X_observed, - ... X_pending=X_pending, - ... **kwargs, - ... ) - >>> type(acqf) - ... botorch.acquisition.monte_carlo.qExpectedImprovement - - Args: - acquisition_function_name: The name of the acquisition function to be - instantiated by the returned function. - - Returns: - A decorator whose wrapper function is a TAcqfConstructor, i.e. it requires a - `model`, `objective_weights`, and optional `outcome_constraints`, `X_observed`, - and `X_pending` as inputs, as well as `kwargs`, and returns an - `AcquisitionFunction` instance that corresponds to `acquisition_function_name`. - """ - - def decorator(empty_acqf_getter: Callable[[], None]) -> TAcqfConstructor: - # `wraps` allows the function to keep its original, module-level name, enabling - # serialization via `callable_to_reference`. `empty_acqf_getter` is otherwise - # not used in the wrapper. - @functools.wraps(empty_acqf_getter) - def wrapper( - model: Model, - objective_weights: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - **kwargs: Any, - ) -> AcquisitionFunction: - kwargs.pop("objective_thresholds", None) - return _get_acquisition_func( - model=model, - acquisition_function_name=acquisition_function_name, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - X_pending=X_pending, - **kwargs, - ) - - return wrapper - - return decorator - - -@get_acqf("qEI") -def get_qEI() -> None: - """A TAcqfConstructor to instantiate a qEI acquisition function. The function body - is filled in by the decorator function `get_acqf` to simultaneously reduce code - duplication and allow serialization in Ax. TODO: Deprecate with legacy Ax model. - """ - - -@get_acqf("qLogEI") -def get_qLogEI() -> None: - """TAcqfConstructor instantiating qLogEI. See docstring of get_qEI for details.""" - - -@get_acqf("qNEI") -def get_NEI() -> None: # no "q" in method name for backward compatibility - """TAcqfConstructor instantiating qNEI. See docstring of get_qEI for details.""" - - -@get_acqf("qLogNEI") -def get_qLogNEI() -> None: - """TAcqfConstructor instantiating qLogNEI. See docstring of get_qEI for details.""" - - -def _get_acquisition_func( - model: Model, - acquisition_function_name: str, - objective_weights: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - mc_objective: type[GenericMCObjective] = GenericMCObjective, - constrained_mc_objective: None - | (type[ConstrainedMCObjective]) = ConstrainedMCObjective, - # pyre-fixme[24]: Generic type `dict` expects 2 type parameters, use - # `typing.Dict` to avoid runtime subscripting errors. - mc_objective_kwargs: dict | None = None, - *, - chebyshev_scalarization: bool = False, - prune_baseline: bool = True, - mc_samples: int = 512, - marginalize_dim: int | None = None, -) -> AcquisitionFunction: - r"""Instantiates a acquisition function. - - Args: - model: The underlying model which the acqusition function uses - to estimate acquisition values of candidates. - acquisition_function_name: Name of the acquisition function. - objective_weights: The objective is to maximize a weighted sum of - the columns of f(x). These are the weights. - outcome_constraints: A tuple of (A, b). For k outcome constraints - and m outputs at f(x), A is (k x m) and b is (k x 1) such that - A f(x) <= b. (Not used by single task models) - X_observed: A tensor containing points observed for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - X_pending: A tensor containing points whose evaluation is pending (i.e. - that have been submitted for evaluation) present for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - mc_objective: GenericMCObjective class, used for constructing a - MC-objective. If constructing a penalized MC-objective, pass in - PenalizedMCObjective together with mc_objective_kwargs . - constrained_mc_objective: ConstrainedMCObjective class, used when - applying constraints on the outcomes. - mc_objective_kwargs: kwargs for constructing MC-objective. - For GenericMCObjective, leave it as None. For PenalizedMCObjective, - it needs to be specified in the format of kwargs. - mc_samples: The number of MC samples to use (default: 512). - prune_baseline: If True, prune the baseline points for NEI (default: True). - chebyshev_scalarization: Use augmented Chebyshev scalarization. - - Returns: - The instantiated acquisition function. - """ - if acquisition_function_name not in [ - "qSR", - "qEI", - "qLogEI", - "qPI", - "qNEI", - "qLogNEI", - ]: - raise NotImplementedError(f"{acquisition_function_name=} not implemented yet.") - - if X_observed is None: - raise ValueError(NO_OBSERVED_POINTS_MESSAGE) - # construct Objective module - if chebyshev_scalarization: - with torch.no_grad(): - Y = model.posterior(X_observed).mean # pyre-ignore [16] - if is_ensemble(model): - Y = torch.mean(Y, dim=0) - obj_tf = get_chebyshev_scalarization(weights=objective_weights, Y=Y) - else: - obj_tf = get_objective_weights_transform(objective_weights) - - # pyre-fixme[53]: Captured variable `obj_tf` is not annotated. - def objective(samples: Tensor, X: Tensor | None = None) -> Tensor: - return obj_tf(samples) - - mc_objective_kwargs = {} if mc_objective_kwargs is None else mc_objective_kwargs - objective = mc_objective(objective=objective, **mc_objective_kwargs) - - if outcome_constraints is None: - con_tfs = None - else: - con_tfs = get_outcome_constraint_transforms(outcome_constraints) - # All acquisition functions registered in BoTorch's `get_acquisition_function` - # except qSR and qUCB support a principled treatment of the constraints by - # directly passing them to the constructor. - if acquisition_function_name == "qSR": - if constrained_mc_objective is None: - raise ValueError( - "constrained_mc_objective cannot be set to None " - "when applying outcome constraints." - ) - - inf_cost = get_infeasible_cost( - X=X_observed, model=model, objective=objective - ) - objective = constrained_mc_objective( - objective=objective, constraints=con_tfs or [], infeasible_cost=inf_cost - ) - - return get_acquisition_function( - acquisition_function_name=acquisition_function_name, - model=model, - objective=objective, - X_observed=X_observed, - X_pending=X_pending, - prune_baseline=prune_baseline, - mc_samples=mc_samples, - seed=randint(1, 10000), - marginalize_dim=marginalize_dim, - constraints=con_tfs, - ) - - -def scipy_optimizer( - acq_function: AcquisitionFunction, - bounds: Tensor, - n: int, - inequality_constraints: list[tuple[Tensor, Tensor, float]] | None = None, - equality_constraints: list[tuple[Tensor, Tensor, float]] | None = None, - fixed_features: dict[int, float] | None = None, - rounding_func: Callable[[Tensor], Tensor] | None = None, - *, - num_restarts: int = 20, - raw_samples: int | None = None, - joint_optimization: bool = False, - options: dict[str, bool | float | int | str] | None = None, -) -> tuple[Tensor, Tensor]: - r"""Optimizer using scipy's minimize module on a numpy-adpator. - - Args: - acq_function: A botorch AcquisitionFunction. - bounds: A `2 x d`-dim tensor, where `bounds[0]` (`bounds[1]`) are the - lower (upper) bounds of the feasible hyperrectangle. - n: The number of candidates to generate. - inequality constraints: A list of tuples (indices, coefficients, rhs), - with each tuple encoding an inequality constraint of the form - `\sum_i (X[indices[i]] * coefficients[i]) >= rhs` - equality constraints: A list of tuples (indices, coefficients, rhs), - with each tuple encoding an equality constraint of the form - `\sum_i (X[indices[i]] * coefficients[i]) == rhs` - fixed_features: A map {feature_index: value} for features that should - be fixed to a particular value during generation. - rounding_func: A function that rounds an optimization result - appropriately (i.e., according to `round-trip` transformations). - - Returns: - 2-element tuple containing - - - A `n x d`-dim tensor of generated candidates. - - In the case of joint optimization, a scalar tensor containing - the joint acquisition value of the `n` points. In the case of - sequential optimization, a `n`-dim tensor of conditional acquisition - values, where `i`-th element is the expected acquisition value - conditional on having observed candidates `0,1,...,i-1`. - """ - - sequential = not joint_optimization - optimize_acqf_options: dict[str, bool | float | int | str] = { - "batch_limit": BATCH_LIMIT, - "init_batch_limit": INIT_BATCH_LIMIT, - "max_optimization_problem_aggregation_size": MAX_OPT_AGG_SIZE, - } - if options is not None: - optimize_acqf_options.update(options) - X, expected_acquisition_value = optimize_acqf( - acq_function=acq_function, - bounds=bounds, - q=n, - num_restarts=num_restarts, - raw_samples=50 * num_restarts if raw_samples is None else raw_samples, - options=optimize_acqf_options, - inequality_constraints=inequality_constraints, - equality_constraints=equality_constraints, - fixed_features=fixed_features, - sequential=sequential, - post_processing_func=rounding_func, - ) - return X, expected_acquisition_value - - -def _get_model( - X: Tensor, - Y: Tensor, - Yvar: Tensor, - task_feature: int | None = None, - fidelity_features: list[int] | None = None, - use_input_warping: bool = False, - covar_module: Kernel | None = None, - prior: dict[str, Any] | None = None, - **kwargs: Any, -) -> GPyTorchModel: - """Instantiate a model of type depending on the input data. - - Args: - X: A `n x d` tensor of input features. - Y: A `n x m` tensor of input observations. - Yvar: A `n x m` tensor of input variances (NaN if unobserved). - task_feature: The index of the column pertaining to the task feature - (if present). - fidelity_features: List of columns of X that are fidelity parameters. - covar_module: Optional. A data kernel of GP model. - prior: Optional[Dict]. A dictionary that contains the specification of - GP model prior. Currently, the keys include: - - covar_module_prior: prior on covariance matrix e.g. - {"lengthscale_prior": GammaPrior(3.0, 6.0)}. - - type: type of prior on task covariance matrix e.g.`LKJCovariancePrior`. - - sd_prior: A scalar prior over nonnegative numbers, which is used for the - default LKJCovariancePrior task_covar_prior. - - eta: The eta parameter on the default LKJ task_covar_prior. - - Returns: - A GPyTorchModel (unfitted). - """ - Yvar = Yvar.clamp_min(MIN_OBSERVED_NOISE_LEVEL) - is_nan = torch.isnan(Yvar) - any_nan_Yvar = torch.any(is_nan) - all_nan_Yvar = torch.all(is_nan) - if any_nan_Yvar and not all_nan_Yvar: - if task_feature: - # TODO (jej): Replace with inferred noise before making perf judgements. - Yvar[Yvar != Yvar] = MIN_OBSERVED_NOISE_LEVEL - else: - raise ValueError( - "Mix of known and unknown variances indicates valuation function " - "errors. Variances should all be specified, or none should be." - ) - if use_input_warping: - if Y.shape[-1] > 1 and X.ndim > 2: - raise UnsupportedError( - "Input warping is not supported for batched multi output models." - ) - warp_tf = get_warping_transform( - d=X.shape[-1], - task_feature=task_feature, - batch_shape=X.shape[:-2], - ) - else: - warp_tf = None - if fidelity_features is None: - fidelity_features = [] - if len(fidelity_features) == 0: - # only pass linear_truncated arg if there are fidelities - kwargs = {k: v for k, v in kwargs.items() if k != "linear_truncated"} - # construct kernel based on customized prior if covar_module is None - prior_dict = prior or {} - covar_module_prior_dict = prior_dict.pop("covar_module_prior", None) - if (covar_module_prior_dict is not None) and (covar_module is None): - covar_module = _get_customized_covar_module( - covar_module_prior_dict=covar_module_prior_dict, - ard_num_dims=X.shape[-1], - aug_batch_shape=_get_aug_batch_shape(X, Y), - task_feature=task_feature, - ) - - if len(fidelity_features) > 0: - if task_feature: - raise NotImplementedError( - "multi-task multi-fidelity models not yet available" - ) - # at this point we can assume that there is only a single fidelity parameter - gp = SingleTaskMultiFidelityGP( - train_X=X, - train_Y=Y, - data_fidelities=fidelity_features[:1], - input_transform=warp_tf, - **kwargs, - ) - elif task_feature is None: - gp = SingleTaskGP( - train_X=X, - train_Y=Y, - train_Yvar=None if all_nan_Yvar else Yvar, - covar_module=covar_module, - input_transform=warp_tf, - **{"outcome_transform": None, **kwargs}, - ) - else: - # instantiate multitask GP - all_tasks, _, _ = MultiTaskGP.get_all_tasks(X, task_feature) - num_tasks = len(all_tasks) - task_covar_prior = None - if len(prior_dict) > 0: - prior_type = prior_dict.get("type", None) - if issubclass(prior_type, LKJCovariancePrior): - sd_prior = prior_dict.get("sd_prior", GammaPrior(1.0, 0.15)) - sd_prior._event_shape = torch.Size([num_tasks]) - eta = prior_dict.get("eta", 0.5) - if not isinstance(eta, float) and not isinstance(eta, int): - raise ValueError(f"eta must be a real number, your eta was {eta}") - task_covar_prior = LKJCovariancePrior(num_tasks, eta, sd_prior) - - else: - raise NotImplementedError( - "Currently only LKJ prior is supported," - f"your prior type was {prior_type}." - ) - - gp = MultiTaskGP( - train_X=X, - train_Y=Y, - train_Yvar=None if all_nan_Yvar else Yvar, - task_feature=task_feature, - covar_module=covar_module, - rank=kwargs.get("rank"), - task_covar_prior=task_covar_prior, - input_transform=warp_tf, - # specify output_tasks so that model.num_outputs - # is 1, since the model is only modeling - # a since metric. - output_tasks=all_tasks[:1], - ) - return gp - - -def _get_customized_covar_module( - covar_module_prior_dict: dict[str, Prior], - ard_num_dims: int, - aug_batch_shape: torch.Size, - task_feature: int | None = None, -) -> Kernel: - """Construct a GP kernel based on customized prior dict. - - Args: - covar_module_prior_dict: Dict. The keys are the names of the prior and values - are the priors. e.g. {"lengthscale_prior": GammaPrior(3.0, 6.0)}. - ard_num_dims: The dimension of the inputs, including task features. - aug_batch_shape: The output dimension augmented batch shape of the model - (different from the batch shape for batched multi-output models). - task_feature: The index of the task feature. - """ - # TODO: add more checks of covar_module_prior_dict - if task_feature is not None: - ard_num_dims -= 1 - return ScaleKernel( - MaternKernel( - nu=2.5, - ard_num_dims=ard_num_dims, - batch_shape=aug_batch_shape, - lengthscale_prior=covar_module_prior_dict.get( - "lengthscale_prior", GammaPrior(3.0, 6.0) - ), - ), - batch_shape=aug_batch_shape, - outputscale_prior=covar_module_prior_dict.get( - "outputscale_prior", GammaPrior(2.0, 0.15) - ), - ) - - -def _get_aug_batch_shape(X: Tensor, Y: Tensor) -> torch.Size: - """Obtain the output-augmented batch shape of GP model. - - Args: - X: A `(input_batch_shape) x n x d` tensor of input features. - Y: A `n x m` tensor of input observations. - - Returns: - The output-augmented batch shape: `input_batch_shape x (m)` - """ - batch_shape = X.shape[:-2] - num_outputs = Y.shape[-1] - if num_outputs > 1: - batch_shape += torch.Size([num_outputs]) # pyre-ignore - return batch_shape - - -def get_warping_transform( - d: int, - batch_shape: torch.Size | None = None, - task_feature: int | None = None, -) -> Warp: - """Construct input warping transform. - - Args: - d: The dimension of the input, including task features - batch_shape: The batch_shape of the model - task_feature: The index of the task feature - - Returns: - The input warping transform. - """ - indices = list(range(d)) - # apply warping to all non-task features, including fidelity features - if task_feature is not None: - del indices[task_feature] - # Legacy Ax models operate in the unit cube - bounds = torch.zeros(2, d, dtype=torch.double) - bounds[1] = 1 - # Note: this currently uses the same warping functions for all tasks - tf = Warp( - d=d, - indices=indices, - # prior with a median of 1 - concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), - concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), - batch_shape=batch_shape, - # Legacy Ax models operate in the unit cube - bounds=bounds, - ) - return tf diff --git a/ax/generators/torch/botorch_modular/acquisition.py b/ax/generators/torch/botorch_modular/acquisition.py index d57528104ad..629a8311c7a 100644 --- a/ax/generators/torch/botorch_modular/acquisition.py +++ b/ax/generators/torch/botorch_modular/acquisition.py @@ -25,7 +25,7 @@ ) from ax.generators.torch.botorch_modular.optimizer_argparse import optimizer_argparse from ax.generators.torch.botorch_modular.surrogate import Surrogate -from ax.generators.torch.botorch_moo_defaults import infer_objective_thresholds +from ax.generators.torch.botorch_moo_utils import infer_objective_thresholds from ax.generators.torch.utils import ( _get_X_pending_and_observed, get_botorch_objective_and_transform, diff --git a/ax/generators/torch/botorch_moo_defaults.py b/ax/generators/torch/botorch_moo_defaults.py deleted file mode 100644 index 6dac5340275..00000000000 --- a/ax/generators/torch/botorch_moo_defaults.py +++ /dev/null @@ -1,775 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -# pyre-strict - -""" -References - -.. [Daulton2020qehvi] - S. Daulton, M. Balandat, and E. Bakshy. Differentiable Expected Hypervolume - Improvement for Parallel Multi-Objective Bayesian Optimization. Advances in Neural - Information Processing Systems 33, 2020. - -.. [Daulton2021nehvi] - S. Daulton, M. Balandat, and E. Bakshy. Parallel Bayesian Optimization of - Multiple Noisy Objectives with Expected Hypervolume Improvement. Advances - in Neural Information Processing Systems 34, 2021. - -.. [Ament2023logei] - S. Ament, S. Daulton, D. Eriksson, M. Balandat, and E. Bakshy. - Unexpected Improvements to Expected Improvement for Bayesian Optimization. Advances - in Neural Information Processing Systems 36, 2023. -""" - -from __future__ import annotations - -from collections.abc import Callable -from typing import cast, Optional, Union - -import torch -from ax.exceptions.core import AxError -from ax.generators.torch.botorch_defaults import NO_OBSERVED_POINTS_MESSAGE -from ax.generators.torch.botorch_modular.optimizer_defaults import ( - BATCH_LIMIT, - INIT_BATCH_LIMIT, -) -from ax.generators.torch.utils import get_outcome_constraint_transforms, subset_model -from ax.generators.torch_base import TorchGenerator -from botorch.acquisition import get_acquisition_function -from botorch.acquisition.acquisition import AcquisitionFunction -from botorch.acquisition.multi_objective.logei import ( - qLogExpectedHypervolumeImprovement, - qLogNoisyExpectedHypervolumeImprovement, -) -from botorch.acquisition.multi_objective.monte_carlo import ( - qExpectedHypervolumeImprovement, - qNoisyExpectedHypervolumeImprovement, -) -from botorch.acquisition.multi_objective.objective import WeightedMCMultiOutputObjective -from botorch.acquisition.multi_objective.utils import get_default_partitioning_alpha -from botorch.models.model import Model -from botorch.optim.optimize import optimize_acqf_list -from botorch.posteriors.gpytorch import GPyTorchPosterior -from botorch.posteriors.posterior import Posterior -from botorch.posteriors.posterior_list import PosteriorList -from botorch.utils.multi_objective.hypervolume import infer_reference_point -from botorch.utils.multi_objective.pareto import is_non_dominated -from pyre_extensions import assert_is_instance, none_throws -from torch import Tensor - -DEFAULT_EHVI_MC_SAMPLES = 128 - - -# Callable that takes tensors of observations and model parameters, -# then returns means of observations that make up a pareto frontier, -# along with their covariances and their index in the input observations. -TFrontierEvaluator = Callable[ - [ - TorchGenerator, - Tensor, - Optional[Tensor], - Optional[Tensor], - Optional[Tensor], - Optional[Tensor], - Optional[tuple[Tensor, Tensor]], - ], - tuple[Tensor, Tensor, Tensor], -] - -NO_FEASIBLE_POINTS_MESSAGE = ( - " Cannot infer objective thresholds due to no observed feasible points. " - " This likely means that one or more outcome constraints is set too strictly. " - " Consider adding thresholds to your objectives to bypass this error." -) - - -def get_weighted_mc_objective_and_objective_thresholds( - objective_weights: Tensor, objective_thresholds: Tensor -) -> tuple[WeightedMCMultiOutputObjective, Tensor]: - r"""Construct weighted objective and apply the weights to objective thresholds. - - Args: - objective_weights: The objective is to maximize a weighted sum of - the columns of f(x). These are the weights. - objective_thresholds: A tensor containing thresholds forming a reference point - from which to calculate pareto frontier hypervolume. Points that do not - dominate the objective_thresholds contribute nothing to hypervolume. - - Returns: - A two-element tuple with the objective and objective thresholds: - - - The objective - - The objective thresholds - - """ - nonzero_idcs = objective_weights.nonzero(as_tuple=False).view(-1) - objective_weights = objective_weights[nonzero_idcs] - objective_thresholds = objective_thresholds[nonzero_idcs] - objective = WeightedMCMultiOutputObjective( - weights=objective_weights, outcomes=nonzero_idcs.tolist() - ) - objective_thresholds = torch.mul(objective_thresholds, objective_weights) - return objective, objective_thresholds - - -def get_NEHVI( - model: Model, - objective_weights: Tensor, - objective_thresholds: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - *, - prune_baseline: bool = True, - mc_samples: int = DEFAULT_EHVI_MC_SAMPLES, - alpha: float | None = None, - marginalize_dim: int | None = None, - cache_root: bool = True, - seed: int | None = None, -) -> qNoisyExpectedHypervolumeImprovement: - r"""Instantiates a qNoisyExpectedHyperVolumeImprovement acquisition function. - - Args: - model: The underlying model which the acqusition function uses - to estimate acquisition values of candidates. - objective_weights: The objective is to maximize a weighted sum of - the columns of f(x). These are the weights. - outcome_constraints: A tuple of (A, b). For k outcome constraints - and m outputs at f(x), A is (k x m) and b is (k x 1) such that - A f(x) <= b. (Not used by single task models) - X_observed: A tensor containing points observed for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - X_pending: A tensor containing points whose evaluation is pending (i.e. - that have been submitted for evaluation) present for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - prune_baseline: If True, prune the baseline points for NEI (default: True). - mc_samples: The number of MC samples to use (default: 512). - alpha: The hyperparameter controlling the approximate non-dominated - partitioning. The default value of 0.0 means an exact partitioning - is used. As the number of objectives `m` increases, consider increasing - this parameter in order to limit computational complexity (default: None). - marginalize_dim: The dimension along which to marginalize over, used for fully - Bayesian models (default: None). - cache_root: If True, cache the root of the covariance matrix (default: True). - seed: The random seed for generating random starting points for optimization ( - default: None). - - Returns: - qNoisyExpectedHyperVolumeImprovement: The instantiated acquisition function. - """ - return assert_is_instance( - _get_NEHVI( - acqf_name="qNEHVI", - model=model, - objective_weights=objective_weights, - objective_thresholds=objective_thresholds, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - X_pending=X_pending, - prune_baseline=prune_baseline, - mc_samples=mc_samples, - alpha=alpha, - marginalize_dim=marginalize_dim, - cache_root=cache_root, - seed=seed, - ), - qNoisyExpectedHypervolumeImprovement, - ) - - -def get_qLogNEHVI( - model: Model, - objective_weights: Tensor, - objective_thresholds: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - *, - prune_baseline: bool = True, - mc_samples: int = DEFAULT_EHVI_MC_SAMPLES, - alpha: float | None = None, - marginalize_dim: int | None = None, - cache_root: bool = True, - seed: int | None = None, -) -> qLogNoisyExpectedHypervolumeImprovement: - r"""Instantiates a qLogNoisyExpectedHyperVolumeImprovement acquisition function. - - Args: - model: The underlying model which the acqusition function uses - to estimate acquisition values of candidates. - objective_weights: The objective is to maximize a weighted sum of - the columns of f(x). These are the weights. - outcome_constraints: A tuple of (A, b). For k outcome constraints - and m outputs at f(x), A is (k x m) and b is (k x 1) such that - A f(x) <= b. (Not used by single task models) - X_observed: A tensor containing points observed for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - X_pending: A tensor containing points whose evaluation is pending (i.e. - that have been submitted for evaluation) present for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - prune_baseline: If True, prune the baseline points for NEI (default: True). - mc_samples: The number of MC samples to use (default: 512). - alpha: The hyperparameter controlling the approximate non-dominated - partitioning. The default value of 0.0 means an exact partitioning - is used. As the number of objectives `m` increases, consider increasing - this parameter in order to limit computational complexity (default: None). - marginalize_dim: The dimension along which to marginalize over, used for fully - Bayesian models (default: None). - cache_root: If True, cache the root of the covariance matrix (default: True). - seed: The random seed for generating random starting points for optimization ( - default: None). - - Returns: - qLogNoisyExpectedHyperVolumeImprovement: The instantiated acquisition function. - """ - return assert_is_instance( - _get_NEHVI( - acqf_name="qLogNEHVI", - model=model, - objective_weights=objective_weights, - objective_thresholds=objective_thresholds, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - X_pending=X_pending, - prune_baseline=prune_baseline, - mc_samples=mc_samples, - alpha=alpha, - marginalize_dim=marginalize_dim, - cache_root=cache_root, - seed=seed, - ), - qLogNoisyExpectedHypervolumeImprovement, - ) - - -def _get_NEHVI( - acqf_name: str, - model: Model, - objective_weights: Tensor, - objective_thresholds: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - *, - prune_baseline: bool = True, - mc_samples: int = DEFAULT_EHVI_MC_SAMPLES, - alpha: float | None = None, - marginalize_dim: int | None = None, - cache_root: bool = True, - seed: int | None = None, -) -> qNoisyExpectedHypervolumeImprovement | qLogNoisyExpectedHypervolumeImprovement: - if X_observed is None: - raise ValueError(NO_OBSERVED_POINTS_MESSAGE) - # construct Objective module - ( - objective, - objective_thresholds, - ) = get_weighted_mc_objective_and_objective_thresholds( - objective_weights=objective_weights, objective_thresholds=objective_thresholds - ) - # For EHVI acquisition functions we pass the constraint transform directly. - if outcome_constraints is None: - cons_tfs = None - else: - cons_tfs = get_outcome_constraint_transforms(outcome_constraints) - num_objectives = objective_thresholds.shape[0] - if alpha is None: - alpha = get_default_partitioning_alpha(num_objectives=num_objectives) - # NOTE: Not using checked_cast here because for Python 3.9, isinstance fails with - # `TypeError: Subscripted generics cannot be used with class and instance checks`. - return cast( - Union[ - qNoisyExpectedHypervolumeImprovement, - qLogNoisyExpectedHypervolumeImprovement, - ], - get_acquisition_function( - acquisition_function_name=acqf_name, - model=model, - objective=objective, - X_observed=X_observed, - X_pending=X_pending, - constraints=cons_tfs, - prune_baseline=prune_baseline, - mc_samples=mc_samples, - alpha=alpha, - seed=( - seed - if seed is not None - else cast(int, torch.randint(1, 10000, (1,)).item()) - ), - ref_point=objective_thresholds.tolist(), - marginalize_dim=marginalize_dim, - cache_root=cache_root, - ), - ) - - -def get_EHVI( - model: Model, - objective_weights: Tensor, - objective_thresholds: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - *, - mc_samples: int = DEFAULT_EHVI_MC_SAMPLES, - alpha: float | None = None, - seed: int | None = None, -) -> qExpectedHypervolumeImprovement: - r"""Instantiates a qExpectedHyperVolumeImprovement acquisition function. - - Args: - model: The underlying model which the acqusition function uses - to estimate acquisition values of candidates. - objective_weights: The objective is to maximize a weighted sum of - the columns of f(x). These are the weights. - objective_thresholds: A tensor containing thresholds forming a reference point - from which to calculate pareto frontier hypervolume. Points that do not - dominate the objective_thresholds contribute nothing to hypervolume. - outcome_constraints: A tuple of (A, b). For k outcome constraints - and m outputs at f(x), A is (k x m) and b is (k x 1) such that - A f(x) <= b. (Not used by single task models) - X_observed: A tensor containing points observed for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - X_pending: A tensor containing points whose evaluation is pending (i.e. - that have been submitted for evaluation) present for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - mc_samples: The number of MC samples to use (default: 512). - alpha: The hyperparameter controlling the approximate non-dominated - partitioning. The default value of 0.0 means an exact partitioning - is used. As the number of objectives `m` increases, consider increasing - this parameter in order to limit computational complexity. - seed: The random seed for generating random starting points for optimization. - - Returns: - qExpectedHypervolumeImprovement: The instantiated acquisition function. - """ - return assert_is_instance( - _get_EHVI( - acqf_name="qEHVI", - model=model, - objective_weights=objective_weights, - objective_thresholds=objective_thresholds, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - X_pending=X_pending, - mc_samples=mc_samples, - alpha=alpha, - seed=seed, - ), - qExpectedHypervolumeImprovement, - ) - - -def get_qLogEHVI( - model: Model, - objective_weights: Tensor, - objective_thresholds: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - *, - mc_samples: int = DEFAULT_EHVI_MC_SAMPLES, - alpha: float | None = None, - seed: int | None = None, -) -> qLogExpectedHypervolumeImprovement: - r"""Instantiates a qLogExpectedHyperVolumeImprovement acquisition function. - - Args: - model: The underlying model which the acqusition function uses - to estimate acquisition values of candidates. - objective_weights: The objective is to maximize a weighted sum of - the columns of f(x). These are the weights. - objective_thresholds: A tensor containing thresholds forming a reference point - from which to calculate pareto frontier hypervolume. Points that do not - dominate the objective_thresholds contribute nothing to hypervolume. - outcome_constraints: A tuple of (A, b). For k outcome constraints - and m outputs at f(x), A is (k x m) and b is (k x 1) such that - A f(x) <= b. (Not used by single task models) - X_observed: A tensor containing points observed for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - X_pending: A tensor containing points whose evaluation is pending (i.e. - that have been submitted for evaluation) present for all objective - outcomes and outcomes that appear in the outcome constraints (if - there are any). - mc_samples: The number of MC samples to use (default: 512). - alpha: The hyperparameter controlling the approximate non-dominated - partitioning. The default value of 0.0 means an exact partitioning - is used. As the number of objectives `m` increases, consider increasing - this parameter in order to limit computational complexity. - seed: The random seed for generating random starting points for optimization. - - Returns: - qLogExpectedHypervolumeImprovement: The instantiated acquisition function. - """ - return assert_is_instance( - _get_EHVI( - acqf_name="qLogEHVI", - model=model, - objective_weights=objective_weights, - objective_thresholds=objective_thresholds, - outcome_constraints=outcome_constraints, - X_observed=X_observed, - X_pending=X_pending, - mc_samples=mc_samples, - alpha=alpha, - seed=seed, - ), - qLogExpectedHypervolumeImprovement, - ) - - -def _get_EHVI( - acqf_name: str, - model: Model, - objective_weights: Tensor, - objective_thresholds: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - X_observed: Tensor | None = None, - X_pending: Tensor | None = None, - *, - mc_samples: int = DEFAULT_EHVI_MC_SAMPLES, - alpha: float | None = None, - seed: int | None = None, -) -> qExpectedHypervolumeImprovement | qLogExpectedHypervolumeImprovement: - if X_observed is None: - raise ValueError(NO_OBSERVED_POINTS_MESSAGE) - # construct Objective module - ( - objective, - objective_thresholds, - ) = get_weighted_mc_objective_and_objective_thresholds( - objective_weights=objective_weights, objective_thresholds=objective_thresholds - ) - with torch.no_grad(): - Y = _check_posterior_type(model.posterior(X_observed)).mean - # For EHVI acquisition functions we pass the constraint transform directly. - if outcome_constraints is None: - cons_tfs = None - else: - cons_tfs = get_outcome_constraint_transforms( - outcome_constraints=outcome_constraints - ) - num_objectives = objective_thresholds.shape[0] - # NOTE: Not using checked_cast here because for Python 3.9, isinstance fails with - # `TypeError: Subscripted generics cannot be used with class and instance checks`. - return cast( - Union[qExpectedHypervolumeImprovement, qLogExpectedHypervolumeImprovement], - get_acquisition_function( - acquisition_function_name=acqf_name, - model=model, - objective=objective, - X_observed=X_observed, - X_pending=X_pending, - constraints=cons_tfs, - mc_samples=mc_samples, - alpha=( - get_default_partitioning_alpha(num_objectives=num_objectives) - if alpha is None - else alpha - ), - seed=( - seed - if seed is not None - else cast(int, torch.randint(1, 10000, (1,)).item()) - ), - ref_point=objective_thresholds.tolist(), - Y=Y, - ), - ) - - -# TODO (jej): rewrite optimize_acqf wrappers to avoid duplicate code. -def scipy_optimizer_list( - acq_function_list: list[AcquisitionFunction], - bounds: Tensor, - inequality_constraints: list[tuple[Tensor, Tensor, float]] | None = None, - fixed_features: dict[int, float] | None = None, - rounding_func: Callable[[Tensor], Tensor] | None = None, - num_restarts: int = 20, - raw_samples: int | None = None, - options: dict[str, bool | float | int | str] | None = None, -) -> tuple[Tensor, Tensor]: - r"""Sequential optimizer using scipy's minimize module on a numpy-adapter. - - The ith acquisition in the sequence uses the ith given acquisition_function. - - Args: - acq_function_list: A list of botorch AcquisitionFunctions, - optimized sequentially. - bounds: A `2 x d`-dim tensor, where `bounds[0]` (`bounds[1]`) are the - lower (upper) bounds of the feasible hyperrectangle. - n: The number of candidates to generate. - inequality constraints: A list of tuples (indices, coefficients, rhs), - with each tuple encoding an inequality constraint of the form - `\sum_i (X[indices[i]] * coefficients[i]) >= rhs` - fixed_features: A map {feature_index: value} for features that should - be fixed to a particular value during generation. - rounding_func: A function that rounds an optimization result - appropriately (i.e., according to `round-trip` transformations). - - Returns: - 2-element tuple containing - - - A `n x d`-dim tensor of generated candidates. - - A `n`-dim tensor of conditional acquisition - values, where `i`-th element is the expected acquisition value - conditional on having observed candidates `0,1,...,i-1`. - """ - # Use SLSQP by default for small problems since it yields faster wall times. - optimize_options: dict[str, bool | float | int | str] = { - "batch_limit": BATCH_LIMIT, - "init_batch_limit": INIT_BATCH_LIMIT, - "method": "SLSQP", - } - if options is not None: - optimize_options.update(options) - X, expected_acquisition_value = optimize_acqf_list( - acq_function_list=acq_function_list, - bounds=bounds, - num_restarts=num_restarts, - raw_samples=50 * num_restarts if raw_samples is None else raw_samples, - options=optimize_options, - inequality_constraints=inequality_constraints, - fixed_features=fixed_features, - post_processing_func=rounding_func, - ) - return X, expected_acquisition_value - - -def pareto_frontier_evaluator( - model: TorchGenerator | None, - objective_weights: Tensor, - objective_thresholds: Tensor | None = None, - X: Tensor | None = None, - Y: Tensor | None = None, - Yvar: Tensor | None = None, - outcome_constraints: tuple[Tensor, Tensor] | None = None, -) -> tuple[Tensor, Tensor, Tensor]: - """Return outcomes predicted to lie on a pareto frontier. - - Given a model and points to evaluate, use the model to predict which points - lie on the Pareto frontier. - - Args: - model: Model used to predict outcomes. - objective_weights: A `m` tensor of values indicating the weight to put - on different outcomes. For pareto frontiers only the sign matters. - objective_thresholds: A tensor containing thresholds forming a reference point - from which to calculate pareto frontier hypervolume. Points that do not - dominate the objective_thresholds contribute nothing to hypervolume. - X: A `n x d` tensor of features to evaluate. - Y: A `n x m` tensor of outcomes to use instead of predictions. - Yvar: A `n x m x m` tensor of input covariances (NaN if unobserved). - outcome_constraints: A tuple of (A, b). For k outcome constraints - and m outputs at f(x), A is (k x m) and b is (k x 1) such that - A f(x) <= b. - - Returns: - 3-element tuple containing - - - A `j x m` tensor of outcome on the pareto frontier. j is the number - of frontier points. - - A `j x m x m` tensor of predictive covariances. - cov[j, m1, m2] is Cov[m1@j, m2@j]. - - A `j` tensor of the index of each frontier point in the input Y. - """ - # TODO: better input validation, making more explicit whether we are using - # model predictions or not - if X is not None: - Y, Yvar = none_throws(model).predict(X) - # model.predict returns cpu tensors - Y = Y.to(X.device) - Yvar = Yvar.to(X.device) - elif Y is None or Yvar is None: - raise ValueError( - "Requires `X` to predict or both `Y` and `Yvar` to select a subset of " - "points on the pareto frontier." - ) - - # Apply objective_weights to outcomes and objective_thresholds. - # If objective_thresholds is not None use a dummy tensor of zeros. - ( - obj, - weighted_objective_thresholds, - ) = get_weighted_mc_objective_and_objective_thresholds( - objective_weights=objective_weights, - objective_thresholds=( - objective_thresholds - if objective_thresholds is not None - else torch.zeros( - objective_weights.shape, - dtype=objective_weights.dtype, - device=objective_weights.device, - ) - ), - ) - Y_obj = obj(Y) - indx_frontier = torch.arange(Y.shape[0], dtype=torch.long, device=Y.device) - - # Filter Y, Yvar, Y_obj to items that dominate all objective thresholds - if objective_thresholds is not None: - objective_thresholds_mask = torch.all( - Y_obj >= weighted_objective_thresholds, dim=1 - ) - Y = Y[objective_thresholds_mask] - Yvar = Yvar[objective_thresholds_mask] - Y_obj = Y_obj[objective_thresholds_mask] - indx_frontier = indx_frontier[objective_thresholds_mask] - - # Get feasible points that do not violate outcome_constraints - if outcome_constraints is not None: - cons_tfs = get_outcome_constraint_transforms(outcome_constraints) - # Handle NaNs in Y, if those elements are not part of the constraints. - # By setting the unused elements to 0, we prevent them from marking - # the whole constraint value as NaN and evaluating to infeasible. - Y_cons = Y.clone() - Y_cons[..., (outcome_constraints[0] == 0).all(dim=0)] = 0 - # pyre-ignore [16] - feas = torch.stack([c(Y_cons) <= 0 for c in cons_tfs], dim=-1).all(dim=-1) - Y = Y[feas] - Yvar = Yvar[feas] - Y_obj = Y_obj[feas] - indx_frontier = indx_frontier[feas] - - if Y.shape[0] == 0: - # if there are no feasible points that are better than the reference point - # return empty tensors - return Y.cpu(), Yvar.cpu(), indx_frontier.cpu() - - # calculate pareto front with only objective outcomes: - frontier_mask = is_non_dominated(Y_obj) - - # Apply masks - Y_frontier = Y[frontier_mask] - Yvar_frontier = Yvar[frontier_mask] - indx_frontier = indx_frontier[frontier_mask] - return Y_frontier.cpu(), Yvar_frontier.cpu(), indx_frontier.cpu() - - -def infer_objective_thresholds( - model: Model, - objective_weights: Tensor, # objective_directions - X_observed: Tensor, - outcome_constraints: tuple[Tensor, Tensor] | None = None, - subset_idcs: Tensor | None = None, - objective_thresholds: Tensor | None = None, -) -> Tensor: - """Infer objective thresholds. - - This method uses the model-estimated Pareto frontier over the in-sample points - to infer absolute (not relativized) objective thresholds. - - This uses a heuristic that sets the objective threshold to be a scaled nadir - point, where the nadir point is scaled back based on the range of each - objective across the current in-sample Pareto frontier. - - See `botorch.utils.multi_objective.hypervolume.infer_reference_point` for - details on the heuristic. - - Args: - model: A fitted botorch Model. - objective_weights: The objective is to maximize a weighted sum of - the columns of f(x). These are the weights. These should not - be subsetted. - X_observed: A `n x d`-dim tensor of in-sample points to use for - determining the current in-sample Pareto frontier. - outcome_constraints: A tuple of (A, b). For k outcome constraints - and m outputs at f(x), A is (k x m) and b is (k x 1) such that - A f(x) <= b. These should not be subsetted. - subset_idcs: The indices of the outcomes that are modeled by the - provided model. If subset_idcs not None, this method infers - whether the model is subsetted. - objective_thresholds: Any known objective thresholds to pass to - `infer_reference_point` heuristic. This should not be subsetted. - If only a subset of the objectives have known thresholds, the - remaining objectives should be NaN. If no objective threshold - was provided, this can be `None`. - - Returns: - A `m`-dim tensor of objective thresholds, where the objective - threshold is `nan` if the outcome is not an objective. - """ - num_outcomes = objective_weights.shape[0] - if subset_idcs is None: - # Subset the model so that we only compute the posterior - # over the relevant outcomes. - # This is a no-op if the model is already only modeling - # the relevant outcomes. - subset_model_results = subset_model( - model=model, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - ) - model = subset_model_results.model - objective_weights = subset_model_results.objective_weights - outcome_constraints = subset_model_results.outcome_constraints - subset_idcs = subset_model_results.indices - else: - objective_weights = objective_weights[subset_idcs] - if outcome_constraints is not None: - outcome_constraints = ( - outcome_constraints[0][:, subset_idcs], - outcome_constraints[1], - ) - with torch.no_grad(): - pred = _check_posterior_type( - none_throws(model).posterior(none_throws(X_observed)) - ).mean - - if outcome_constraints is not None: - cons_tfs = get_outcome_constraint_transforms(outcome_constraints) - # pyre-ignore [16] - feas = torch.stack([c(pred) <= 0 for c in cons_tfs], dim=-1).all(dim=-1) - pred = pred[feas] - if pred.shape[0] == 0: - raise AxError(NO_FEASIBLE_POINTS_MESSAGE) - obj_mask = objective_weights.nonzero().view(-1) - obj_weights_subset = objective_weights[obj_mask] - obj = pred[..., obj_mask] * obj_weights_subset - pareto_obj = obj[is_non_dominated(obj)] - # If objective thresholds are provided, set max_ref_point accordingly. - if objective_thresholds is not None: - max_ref_point = objective_thresholds[obj_mask] * obj_weights_subset - else: - max_ref_point = None - objective_thresholds = infer_reference_point( - pareto_Y=pareto_obj, - max_ref_point=max_ref_point, - scale=0.1, - ) - # multiply by objective weights to return objective thresholds in the - # unweighted space - objective_thresholds = objective_thresholds * obj_weights_subset - full_objective_thresholds = torch.full( - (num_outcomes,), - float("nan"), - dtype=objective_weights.dtype, - device=objective_weights.device, - ) - obj_idcs = subset_idcs[obj_mask] - full_objective_thresholds[obj_idcs] = objective_thresholds.clone() - return full_objective_thresholds - - -def _check_posterior_type( - posterior: Posterior, -) -> GPyTorchPosterior | PosteriorList: - """Check whether the posterior type is `GPyTorchPosterior` or `PosteriorList`.""" - if isinstance(posterior, GPyTorchPosterior) or isinstance(posterior, PosteriorList): - return posterior - else: - raise ValueError( - f"Value was not of type GPyTorchPosterior or PosteriorList:\n{posterior}" - ) diff --git a/ax/generators/torch/botorch_moo_utils.py b/ax/generators/torch/botorch_moo_utils.py new file mode 100644 index 00000000000..86a102e3602 --- /dev/null +++ b/ax/generators/torch/botorch_moo_utils.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +""" +References + +.. [Daulton2020qehvi] + S. Daulton, M. Balandat, and E. Bakshy. Differentiable Expected Hypervolume + Improvement for Parallel Multi-Objective Bayesian Optimization. Advances in Neural + Information Processing Systems 33, 2020. + +.. [Daulton2021nehvi] + S. Daulton, M. Balandat, and E. Bakshy. Parallel Bayesian Optimization of + Multiple Noisy Objectives with Expected Hypervolume Improvement. Advances + in Neural Information Processing Systems 34, 2021. + +.. [Ament2023logei] + S. Ament, S. Daulton, D. Eriksson, M. Balandat, and E. Bakshy. + Unexpected Improvements to Expected Improvement for Bayesian Optimization. Advances + in Neural Information Processing Systems 36, 2023. +""" + +from __future__ import annotations + +import torch +from ax.exceptions.core import AxError +from ax.generators.torch.utils import subset_model +from ax.generators.torch_base import TorchGenerator +from botorch.acquisition.multi_objective.objective import WeightedMCMultiOutputObjective +from botorch.models.model import Model +from botorch.posteriors.gpytorch import GPyTorchPosterior +from botorch.posteriors.posterior import Posterior +from botorch.posteriors.posterior_list import PosteriorList +from botorch.utils.constraints import get_outcome_constraint_transforms +from botorch.utils.multi_objective.hypervolume import infer_reference_point +from botorch.utils.multi_objective.pareto import is_non_dominated +from pyre_extensions import none_throws +from torch import Tensor + + +NO_FEASIBLE_POINTS_MESSAGE = ( + " Cannot infer objective thresholds due to no observed feasible points. " + " This likely means that one or more outcome constraints is set too strictly. " + " Consider adding thresholds to your objectives to bypass this error." +) + + +def get_weighted_mc_objective_and_objective_thresholds( + objective_weights: Tensor, objective_thresholds: Tensor +) -> tuple[WeightedMCMultiOutputObjective, Tensor]: + r"""Construct weighted objective and apply the weights to objective thresholds. + + Args: + objective_weights: The objective is to maximize a weighted sum of + the columns of f(x). These are the weights. + objective_thresholds: A tensor containing thresholds forming a reference point + from which to calculate pareto frontier hypervolume. Points that do not + dominate the objective_thresholds contribute nothing to hypervolume. + + Returns: + A two-element tuple with the objective and objective thresholds: + + - The objective + - The objective thresholds + + """ + nonzero_idcs = objective_weights.nonzero(as_tuple=False).view(-1) + objective_weights = objective_weights[nonzero_idcs] + objective_thresholds = objective_thresholds[nonzero_idcs] + objective = WeightedMCMultiOutputObjective( + weights=objective_weights, outcomes=nonzero_idcs.tolist() + ) + objective_thresholds = torch.mul(objective_thresholds, objective_weights) + return objective, objective_thresholds + + +def pareto_frontier_evaluator( + model: TorchGenerator | None, + objective_weights: Tensor, + objective_thresholds: Tensor | None = None, + X: Tensor | None = None, + Y: Tensor | None = None, + Yvar: Tensor | None = None, + outcome_constraints: tuple[Tensor, Tensor] | None = None, +) -> tuple[Tensor, Tensor, Tensor]: + """Return outcomes predicted to lie on a pareto frontier. + + Given a model and points to evaluate, use the model to predict which points + lie on the Pareto frontier. + + Args: + model: Model used to predict outcomes. + objective_weights: A `m` tensor of values indicating the weight to put + on different outcomes. For pareto frontiers only the sign matters. + objective_thresholds: A tensor containing thresholds forming a reference point + from which to calculate pareto frontier hypervolume. Points that do not + dominate the objective_thresholds contribute nothing to hypervolume. + X: A `n x d` tensor of features to evaluate. + Y: A `n x m` tensor of outcomes to use instead of predictions. + Yvar: A `n x m x m` tensor of input covariances (NaN if unobserved). + outcome_constraints: A tuple of (A, b). For k outcome constraints + and m outputs at f(x), A is (k x m) and b is (k x 1) such that + A f(x) <= b. + + Returns: + 3-element tuple containing + + - A `j x m` tensor of outcome on the pareto frontier. j is the number + of frontier points. + - A `j x m x m` tensor of predictive covariances. + cov[j, m1, m2] is Cov[m1@j, m2@j]. + - A `j` tensor of the index of each frontier point in the input Y. + """ + # TODO: better input validation, making more explicit whether we are using + # model predictions or not + if X is not None: + Y, Yvar = none_throws(model).predict(X) + # model.predict returns cpu tensors + Y = Y.to(X.device) + Yvar = Yvar.to(X.device) + elif Y is None or Yvar is None: + raise ValueError( + "Requires `X` to predict or both `Y` and `Yvar` to select a subset of " + "points on the pareto frontier." + ) + + # Apply objective_weights to outcomes and objective_thresholds. + # If objective_thresholds is not None use a dummy tensor of zeros. + ( + obj, + weighted_objective_thresholds, + ) = get_weighted_mc_objective_and_objective_thresholds( + objective_weights=objective_weights, + objective_thresholds=( + objective_thresholds + if objective_thresholds is not None + else torch.zeros( + objective_weights.shape, + dtype=objective_weights.dtype, + device=objective_weights.device, + ) + ), + ) + Y_obj = obj(Y) + indx_frontier = torch.arange(Y.shape[0], dtype=torch.long, device=Y.device) + + # Filter Y, Yvar, Y_obj to items that dominate all objective thresholds + if objective_thresholds is not None: + objective_thresholds_mask = torch.all( + Y_obj >= weighted_objective_thresholds, dim=1 + ) + Y = Y[objective_thresholds_mask] + Yvar = Yvar[objective_thresholds_mask] + Y_obj = Y_obj[objective_thresholds_mask] + indx_frontier = indx_frontier[objective_thresholds_mask] + + # Get feasible points that do not violate outcome_constraints + if outcome_constraints is not None: + cons_tfs = get_outcome_constraint_transforms(outcome_constraints) + # Handle NaNs in Y, if those elements are not part of the constraints. + # By setting the unused elements to 0, we prevent them from marking + # the whole constraint value as NaN and evaluating to infeasible. + Y_cons = Y.clone() + Y_cons[..., (outcome_constraints[0] == 0).all(dim=0)] = 0 + # pyre-ignore [16] + feas = torch.stack([c(Y_cons) <= 0 for c in cons_tfs], dim=-1).all(dim=-1) + Y = Y[feas] + Yvar = Yvar[feas] + Y_obj = Y_obj[feas] + indx_frontier = indx_frontier[feas] + + if Y.shape[0] == 0: + # if there are no feasible points that are better than the reference point + # return empty tensors + return Y.cpu(), Yvar.cpu(), indx_frontier.cpu() + + # calculate pareto front with only objective outcomes: + frontier_mask = is_non_dominated(Y_obj) + + # Apply masks + Y_frontier = Y[frontier_mask] + Yvar_frontier = Yvar[frontier_mask] + indx_frontier = indx_frontier[frontier_mask] + return Y_frontier.cpu(), Yvar_frontier.cpu(), indx_frontier.cpu() + + +def infer_objective_thresholds( + model: Model, + objective_weights: Tensor, # objective_directions + X_observed: Tensor, + outcome_constraints: tuple[Tensor, Tensor] | None = None, + subset_idcs: Tensor | None = None, + objective_thresholds: Tensor | None = None, +) -> Tensor: + """Infer objective thresholds. + + This method uses the model-estimated Pareto frontier over the in-sample points + to infer absolute (not relativized) objective thresholds. + + This uses a heuristic that sets the objective threshold to be a scaled nadir + point, where the nadir point is scaled back based on the range of each + objective across the current in-sample Pareto frontier. + + See `botorch.utils.multi_objective.hypervolume.infer_reference_point` for + details on the heuristic. + + Args: + model: A fitted botorch Model. + objective_weights: The objective is to maximize a weighted sum of + the columns of f(x). These are the weights. These should not + be subsetted. + X_observed: A `n x d`-dim tensor of in-sample points to use for + determining the current in-sample Pareto frontier. + outcome_constraints: A tuple of (A, b). For k outcome constraints + and m outputs at f(x), A is (k x m) and b is (k x 1) such that + A f(x) <= b. These should not be subsetted. + subset_idcs: The indices of the outcomes that are modeled by the + provided model. If subset_idcs not None, this method infers + whether the model is subsetted. + objective_thresholds: Any known objective thresholds to pass to + `infer_reference_point` heuristic. This should not be subsetted. + If only a subset of the objectives have known thresholds, the + remaining objectives should be NaN. If no objective threshold + was provided, this can be `None`. + + Returns: + A `m`-dim tensor of objective thresholds, where the objective + threshold is `nan` if the outcome is not an objective. + """ + num_outcomes = objective_weights.shape[0] + if subset_idcs is None: + # Subset the model so that we only compute the posterior + # over the relevant outcomes. + # This is a no-op if the model is already only modeling + # the relevant outcomes. + subset_model_results = subset_model( + model=model, + objective_weights=objective_weights, + outcome_constraints=outcome_constraints, + ) + model = subset_model_results.model + objective_weights = subset_model_results.objective_weights + outcome_constraints = subset_model_results.outcome_constraints + subset_idcs = subset_model_results.indices + else: + objective_weights = objective_weights[subset_idcs] + if outcome_constraints is not None: + outcome_constraints = ( + outcome_constraints[0][:, subset_idcs], + outcome_constraints[1], + ) + with torch.no_grad(): + pred = _check_posterior_type( + none_throws(model).posterior(none_throws(X_observed)) + ).mean + + if outcome_constraints is not None: + cons_tfs = get_outcome_constraint_transforms(outcome_constraints) + # pyre-ignore [16] + feas = torch.stack([c(pred) <= 0 for c in cons_tfs], dim=-1).all(dim=-1) + pred = pred[feas] + if pred.shape[0] == 0: + raise AxError(NO_FEASIBLE_POINTS_MESSAGE) + obj_mask = objective_weights.nonzero().view(-1) + obj_weights_subset = objective_weights[obj_mask] + obj = pred[..., obj_mask] * obj_weights_subset + pareto_obj = obj[is_non_dominated(obj)] + # If objective thresholds are provided, set max_ref_point accordingly. + if objective_thresholds is not None: + max_ref_point = objective_thresholds[obj_mask] * obj_weights_subset + else: + max_ref_point = None + objective_thresholds = infer_reference_point( + pareto_Y=pareto_obj, + max_ref_point=max_ref_point, + scale=0.1, + ) + # multiply by objective weights to return objective thresholds in the + # unweighted space + objective_thresholds = objective_thresholds * obj_weights_subset + full_objective_thresholds = torch.full( + (num_outcomes,), + float("nan"), + dtype=objective_weights.dtype, + device=objective_weights.device, + ) + obj_idcs = subset_idcs[obj_mask] + full_objective_thresholds[obj_idcs] = objective_thresholds.clone() + return full_objective_thresholds + + +def _check_posterior_type( + posterior: Posterior, +) -> GPyTorchPosterior | PosteriorList: + """Check whether the posterior type is `GPyTorchPosterior` or `PosteriorList`.""" + if isinstance(posterior, GPyTorchPosterior) or isinstance(posterior, PosteriorList): + return posterior + else: + raise ValueError( + f"Value was not of type GPyTorchPosterior or PosteriorList:\n{posterior}" + ) diff --git a/ax/generators/torch/tests/test_acquisition.py b/ax/generators/torch/tests/test_acquisition.py index 8fa5a44178a..733443d2766 100644 --- a/ax/generators/torch/tests/test_acquisition.py +++ b/ax/generators/torch/tests/test_acquisition.py @@ -916,7 +916,7 @@ def test_evaluate(self, mock_call: Mock, mock_evaluate: Mock) -> None: @mock_botorch_optimize @mock.patch( # pyre-ignore - "ax.generators.torch.botorch_moo_defaults._check_posterior_type", + "ax.generators.torch.botorch_moo_utils._check_posterior_type", wraps=lambda y: y, ) @mock.patch(f"{ACQUISITION_PATH}._get_X_pending_and_observed") diff --git a/ax/generators/torch/utils.py b/ax/generators/torch/utils.py index b8333ea07af..5103c356227 100644 --- a/ax/generators/torch/utils.py +++ b/ax/generators/torch/utils.py @@ -10,17 +10,13 @@ from dataclasses import dataclass from typing import Any, cast -import numpy as np import numpy.typing as npt import torch -from ax.core.search_space import SearchSpaceDigest from ax.exceptions.core import UnsupportedError from ax.generators.model_utils import ( filter_constraints_and_fixed_features, get_observed, ) -from ax.generators.random.sobol import SobolGenerator -from ax.generators.types import TConfig from ax.utils.common.constants import Keys from botorch.acquisition.acquisition import AcquisitionFunction from botorch.acquisition.analytic import PosteriorMean @@ -60,7 +56,7 @@ from botorch.utils.constraints import get_outcome_constraint_transforms from botorch.utils.datasets import SupervisedDataset from botorch.utils.objective import get_objective_weights_transform -from botorch.utils.sampling import sample_hypersphere, sample_simplex +from botorch.utils.sampling import sample_simplex from botorch.utils.transforms import is_ensemble from torch import Tensor from torch.nn import ModuleList # @manual @@ -202,41 +198,6 @@ def _get_X_pending_and_observed( return X_pending, unfiltered_X_observed -def _generate_sobol_points( - n_sobol: int, - search_space_digest: SearchSpaceDigest, - device: torch.device, - linear_constraints: tuple[Tensor, Tensor] | None = None, - fixed_features: dict[int, float] | None = None, - rounding_func: Callable[[Tensor], Tensor] | None = None, - model_gen_options: TConfig | None = None, -) -> Tensor: - linear_constraints_array = None - - if linear_constraints is not None: - linear_constraints_array = ( - linear_constraints[0].detach().cpu().numpy(), - linear_constraints[1].detach().cpu().numpy(), - ) - - array_rounding_func = None - if rounding_func is not None: - array_rounding_func = tensor_callable_to_array_callable( - tensor_func=rounding_func, device=device - ) - - sobol = SobolGenerator(deduplicate=False, seed=np.random.randint(10000)) - array_X, _ = sobol.gen( - n=n_sobol, - search_space_digest=search_space_digest, - linear_constraints=linear_constraints_array, - fixed_features=fixed_features, - rounding_func=array_rounding_func, - model_gen_options=model_gen_options, - ) - return torch.from_numpy(array_X).to(device) - - def subset_model( model: Model, objective_weights: Tensor, @@ -560,39 +521,6 @@ def predict_from_model( return mean, cov -# TODO(jej): Possibly refactor to use "objective_directions". -def randomize_objective_weights( - objective_weights: Tensor, - random_scalarization_distribution: str = SIMPLEX, -) -> Tensor: - """Generate a random weighting based on acquisition function settings. - - Args: - objective_weights: Base weights to multiply by random values. - random_scalarization_distribution: "simplex" or "hypersphere". - - Returns: - A normalized list of indices such that each index is between `0` and `d-1`. - """ - # Set distribution and sample weights. - distribution = random_scalarization_distribution - dtype = objective_weights.dtype - device = objective_weights.device - if distribution == SIMPLEX: - random_weights = sample_simplex( - len(objective_weights), dtype=dtype, device=device - ).squeeze() - elif distribution == HYPERSPHERE: - random_weights = torch.abs( - sample_hypersphere( - len(objective_weights), dtype=dtype, device=device - ).squeeze() - ) - # pyre-fixme[61]: `random_weights` may not be initialized here. - objective_weights = torch.mul(objective_weights, random_weights) - return objective_weights - - def _datasets_to_legacy_inputs( datasets: Sequence[SupervisedDataset], ) -> tuple[list[Tensor], list[Tensor], list[Tensor]]: