From 272b6dd8ae213157cca5551fe6e58f3559836d61 Mon Sep 17 00:00:00 2001
From: "Jacob E. Jensen" <jej@fb.com>
Date: Tue, 25 Aug 2020 13:57:05 -0700
Subject: [PATCH] MultiObjective model and modelbridge

Summary:
Large refactor to begin adding richer support for MultiObjective API and to support qNEHVI.

* Creates MultiObjectiveBotorchModel and MultiObjectiveTorchModelBridge and adds "ref_point" to those apis along with moving logic for handling random and chebyshev scalarization methods.
* Declutter botorch_defaults by creating botorch_moo_defaults for multi-objective-specific methods.
* Update other APIs.

Pending (not necessarily this diff)
* Smarter "ref_point" inference.
* Move and add unit tests. (Scalarization partially tested in "test_botorch_models; EHVI not tested)
* qNEHVI
* Pareto frontier and hypervolume methods in Modelbridge

Reviewed By: sdaulton

Differential Revision: D22412876

fbshipit-source-id: 3d256dd3908408c214c00ca4be104d1d35552ecc
---
 ax/modelbridge/__init__.py                    |   2 +
 ax/modelbridge/base.py                        |   1 +
 ax/modelbridge/factory.py                     |  57 ++-
 ax/modelbridge/multi_objective_torch.py       | 245 +++++++++++
 ax/modelbridge/registry.py                    |   9 +
 ax/modelbridge/tests/test_factory.py          |  39 +-
 .../test_multi_objective_torch_modelbridge.py | 194 +++++++++
 ax/models/tests/test_botorch_model.py         | 170 --------
 ax/models/tests/test_botorch_moo_defaults.py  |  46 ++
 ax/models/tests/test_botorch_moo_model.py     | 405 ++++++++++++++++++
 ax/models/torch/botorch.py                    | 114 ++---
 ax/models/torch/botorch_defaults.py           |  57 +--
 ax/models/torch/botorch_moo.py                | 340 +++++++++++++++
 ax/models/torch/botorch_moo_defaults.py       | 148 +++++++
 ax/service/utils/best_point.py                |  23 +-
 ax/utils/testing/core_stubs.py                |  23 +-
 sphinx/source/modelbridge.rst                 |   8 +
 17 files changed, 1550 insertions(+), 331 deletions(-)
 create mode 100644 ax/modelbridge/multi_objective_torch.py
 create mode 100644 ax/modelbridge/tests/test_multi_objective_torch_modelbridge.py
 create mode 100644 ax/models/tests/test_botorch_moo_defaults.py
 create mode 100644 ax/models/tests/test_botorch_moo_model.py
 create mode 100644 ax/models/torch/botorch_moo.py
 create mode 100644 ax/models/torch/botorch_moo_defaults.py

diff --git a/ax/modelbridge/__init__.py b/ax/modelbridge/__init__.py
index 546b3a11939..51d0f933b58 100644
--- a/ax/modelbridge/__init__.py
+++ b/ax/modelbridge/__init__.py
@@ -15,6 +15,7 @@
     get_thompson,
     get_uniform,
 )
+from ax.modelbridge.multi_objective_torch import MultiObjectiveTorchModelBridge
 from ax.modelbridge.numpy import NumpyModelBridge
 from ax.modelbridge.torch import TorchModelBridge
 
@@ -24,6 +25,7 @@
     "Models",
     "NumpyModelBridge",
     "TorchModelBridge",
+    "MultiObjectiveTorchModelBridge",
     "get_factorial",
     "get_GPEI",
     "get_GPKG",
diff --git a/ax/modelbridge/base.py b/ax/modelbridge/base.py
index cfabe3c4b42..cb579829273 100644
--- a/ax/modelbridge/base.py
+++ b/ax/modelbridge/base.py
@@ -653,6 +653,7 @@ def gen(
                 arms_by_signature=self._arms_by_signature,
             )
             best_arm = best_arms[0]
+
         arms, candidate_metadata = gen_arms(
             observation_features=observation_features,
             arms_by_signature=self._arms_by_signature,
diff --git a/ax/modelbridge/factory.py b/ax/modelbridge/factory.py
index 8123c3e827d..6027f3a291a 100644
--- a/ax/modelbridge/factory.py
+++ b/ax/modelbridge/factory.py
@@ -17,6 +17,7 @@
 from ax.core.search_space import SearchSpace
 from ax.core.types import TConfig
 from ax.modelbridge.discrete import DiscreteModelBridge
+from ax.modelbridge.multi_objective_torch import MultiObjectiveTorchModelBridge
 from ax.modelbridge.random import RandomModelBridge
 from ax.modelbridge.registry import (
     Cont_X_trans,
@@ -363,6 +364,43 @@ def get_GPMES(
     return checked_cast(TorchModelBridge, Models.GPMES(**inputs))  # pyre-ignore: [16]
 
 
+def get_MOO_EHVI(
+    experiment: Experiment,
+    data: Data,
+    ref_point: List[float],
+    search_space: Optional[SearchSpace] = None,
+    dtype: torch.dtype = torch.double,
+    device: torch.device = (
+        torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    ),
+) -> MultiObjectiveTorchModelBridge:
+    """Instantiates a multi-objective model that generates points with EHVI.
+
+    Requires a `ref_point`, a list of the baseline value for every objective being
+    optimized. An arm only improves hypervolume if it is strictly better than this
+    point in all metrics.
+    """
+    # pyre-ignore: [16] `Optional` has no attribute `objective`.
+    if not isinstance(experiment.optimization_config.objective, MultiObjective):
+        raise ValueError("Multi-objective optimization requires multiple objectives.")
+    if data.df.empty:  # pragma: no cover
+        raise ValueError("MultiObjectiveOptimization requires non-empty data.")
+    return checked_cast(
+        MultiObjectiveTorchModelBridge,
+        Models.MOO(
+            experiment=experiment,
+            data=data,
+            ref_point=ref_point,
+            search_space=search_space or experiment.search_space,
+            torch_dtype=dtype,
+            torch_device=device,
+            default_model_gen_options={
+                "acquisition_function_kwargs": {"sequential": True}
+            },
+        ),
+    )
+
+
 def get_MOO_PAREGO(
     experiment: Experiment,
     data: Data,
@@ -370,7 +408,7 @@ def get_MOO_PAREGO(
     search_space: Optional[SearchSpace] = None,
     dtype: torch.dtype = torch.double,
     device: torch.device = DEFAULT_TORCH_DEVICE,
-) -> TorchModelBridge:
+) -> MultiObjectiveTorchModelBridge:
     """Instantiates a multi-objective model that generates points with ParEGO.
 
     qParEGO optimizes random augmented chebyshev scalarizations of the multiple
@@ -382,14 +420,15 @@ def get_MOO_PAREGO(
     if data.df.empty:
         raise ValueError("MultiObjectiveOptimization requires non-empty data.")
     return checked_cast(
-        TorchModelBridge,
-        Models.BOTORCH(
+        MultiObjectiveTorchModelBridge,
+        Models.MOO(
             experiment=experiment,
             data=data,
+            ref_point=ref_point,
             search_space=search_space or experiment.search_space,
             torch_dtype=dtype,
             torch_device=device,
-            ref_point=None,
+            acqf_constructor=get_NEI,
             default_model_gen_options={
                 "acquisition_function_kwargs": {
                     "chebyshev_scalarization": True,
@@ -407,8 +446,8 @@ def get_MOO_RS(
     search_space: Optional[SearchSpace] = None,
     dtype: torch.dtype = torch.double,
     device: torch.device = DEFAULT_TORCH_DEVICE,
-) -> TorchModelBridge:
-    """Instantiates a Linear Random Scalarization multi-objective model.
+) -> MultiObjectiveTorchModelBridge:
+    """Instantiates a Random Scalarization multi-objective model.
 
     Chooses a different random linear scalarization of the objectives
     for generating each new candidate arm. This will only explore the
@@ -420,13 +459,15 @@ def get_MOO_RS(
     if data.df.empty:
         raise ValueError("MultiObjectiveOptimization requires non-empty data.")
     return checked_cast(
-        TorchModelBridge,
-        Models.BOTORCH(
+        MultiObjectiveTorchModelBridge,
+        Models.MOO(
             experiment=experiment,
             data=data,
+            ref_point=ref_point,
             search_space=search_space or experiment.search_space,
             torch_dtype=dtype,
             torch_device=device,
+            acqf_constructor=get_NEI,
             default_model_gen_options={
                 "acquisition_function_kwargs": {
                     "random_scalarization": True,
diff --git a/ax/modelbridge/multi_objective_torch.py b/ax/modelbridge/multi_objective_torch.py
new file mode 100644
index 00000000000..537dec010e1
--- /dev/null
+++ b/ax/modelbridge/multi_objective_torch.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Callable, Dict, List, Optional, Tuple, Type
+
+import numpy as np
+import torch
+from ax.core.data import Data
+from ax.core.experiment import Experiment
+from ax.core.generator_run import GeneratorRun
+from ax.core.multi_type_experiment import MultiTypeExperiment
+from ax.core.observation import ObservationData, ObservationFeatures
+from ax.core.optimization_config import OptimizationConfig
+from ax.core.search_space import SearchSpace
+from ax.core.types import TCandidateMetadata, TConfig, TGenMetadata
+from ax.modelbridge.array import FIT_MODEL_ERROR
+from ax.modelbridge.torch import TorchModelBridge
+from ax.modelbridge.transforms.base import Transform
+from ax.models.torch_base import TorchModel
+from ax.utils.common.logger import get_logger
+from torch import Tensor
+
+
+logger = get_logger("MultiObjectiveTorchModelBridge")
+
+
+class MultiObjectiveTorchModelBridge(TorchModelBridge):
+    """A model bridge for using multi-objective torch-based models.
+
+    Specifies an interface that is implemented by MultiObjectiveTorchModel. In
+    particular, model should have methods fit, predict, and gen. See
+    MultiObjectiveTorchModel for the API for each of these methods.
+
+    Requires that all parameters have been transformed to RangeParameters
+    or FixedParameters with float type and no log scale.
+
+    This class converts Ax parameter types to torch tensors before passing
+    them to the model.
+    """
+
+    _transformed_ref_point: Optional[List[float]]
+    _objective_metric_names: Optional[List[str]]
+
+    def __init__(
+        self,
+        experiment: Experiment,
+        search_space: SearchSpace,
+        data: Data,
+        model: TorchModel,
+        transforms: List[Type[Transform]],
+        transform_configs: Optional[Dict[str, TConfig]] = None,
+        torch_dtype: Optional[torch.dtype] = None,  # noqa T484
+        torch_device: Optional[torch.device] = None,
+        status_quo_name: Optional[str] = None,
+        status_quo_features: Optional[ObservationFeatures] = None,
+        optimization_config: Optional[OptimizationConfig] = None,
+        fit_out_of_design: bool = False,
+        ref_point: Optional[List[float]] = None,
+        default_model_gen_options: Optional[TConfig] = None,
+    ) -> None:
+        if isinstance(experiment, MultiTypeExperiment) and ref_point is not None:
+            raise NotImplementedError(
+                "Ref-point dependent multi-objective optimization algorithms "
+                "like EHVI are not yet supported for MultiTypeExperiments. "
+                "Remove the reference point arg and use a compatible algorithm "
+                "like ParEGO."
+            )
+        self.ref_point = ref_point
+        self._transformed_ref_point = None
+        self._objective_metric_names = None
+        oc = optimization_config or experiment.optimization_config
+        if oc:
+            self._objective_metric_names = [m.name for m in oc.objective.metrics]
+        super().__init__(
+            experiment=experiment,
+            search_space=search_space,
+            data=data,
+            model=model,
+            transforms=transforms,
+            transform_configs=transform_configs,
+            torch_dtype=torch_dtype,
+            torch_device=torch_device,
+            status_quo_name=status_quo_name,
+            status_quo_features=status_quo_features,
+            optimization_config=optimization_config,
+            fit_out_of_design=fit_out_of_design,
+            default_model_gen_options=default_model_gen_options,
+        )
+
+    def _model_gen(
+        self,
+        n: int,
+        bounds: List[Tuple[float, float]],
+        objective_weights: np.ndarray,
+        outcome_constraints: Optional[Tuple[np.ndarray, np.ndarray]],
+        linear_constraints: Optional[Tuple[np.ndarray, np.ndarray]],
+        fixed_features: Optional[Dict[int, float]],
+        pending_observations: Optional[List[np.ndarray]],
+        model_gen_options: Optional[TConfig],
+        rounding_func: Callable[[np.ndarray], np.ndarray],
+        target_fidelities: Optional[Dict[int, float]],
+    ) -> Tuple[np.ndarray, np.ndarray, TGenMetadata, List[TCandidateMetadata]]:
+        if not self.model:  # pragma: no cover
+            raise ValueError(FIT_MODEL_ERROR.format(action="_model_gen"))
+        obj_w, oc_c, l_c, pend_obs = self._validate_and_convert_to_tensors(
+            objective_weights=objective_weights,
+            outcome_constraints=outcome_constraints,
+            linear_constraints=linear_constraints,
+            pending_observations=pending_observations,
+        )
+        ref_point = None
+        if self._transformed_ref_point:
+            ref_point = self._transformed_ref_point
+        elif self.ref_point:
+            # Assign ref point if available.
+            logger.warning(
+                "No attribute _transformed_ref_point. Using untransformed ref_point."
+            )
+            ref_point = self.ref_point
+        tensor_rounding_func = self._array_callable_to_tensor_callable(rounding_func)
+        augmented_model_gen_options = {
+            **self._default_model_gen_options,
+            **(model_gen_options or {}),
+        }
+        # pyre-fixme[16]: `Optional` has no attribute `gen`.
+        X, w, gen_metadata, candidate_metadata = self.model.gen(
+            n=n,
+            bounds=bounds,
+            objective_weights=obj_w,
+            outcome_constraints=oc_c,
+            linear_constraints=l_c,
+            fixed_features=fixed_features,
+            pending_observations=pend_obs,
+            model_gen_options=augmented_model_gen_options,
+            rounding_func=tensor_rounding_func,
+            target_fidelities=target_fidelities,
+            ref_point=ref_point,
+        )
+        return (
+            X.detach().cpu().clone().numpy(),
+            w.detach().cpu().clone().numpy(),
+            gen_metadata,
+            candidate_metadata,
+        )
+
+    def _transform_data(
+        self,
+        obs_feats: List[ObservationFeatures],
+        obs_data: List[ObservationData],
+        search_space: SearchSpace,
+        transforms: Optional[List[Type[Transform]]],
+        transform_configs: Optional[Dict[str, TConfig]],
+    ) -> Tuple[List[ObservationFeatures], List[ObservationData], SearchSpace]:
+        """Initialize transforms and apply them to provided data."""
+        # Run superclass version to fit transforms to observations
+        obs_feats, obs_data, search_space = super()._transform_data(
+            obs_feats=obs_feats,
+            obs_data=obs_data,
+            search_space=search_space,
+            transforms=transforms,
+            transform_configs=transform_configs,
+        )
+
+        ref_point = self.ref_point
+        metric_names = list(self._metric_names or [])
+        objective_metric_names = list(self._objective_metric_names or [])
+        if ref_point and metric_names and objective_metric_names:
+            num_metrics = len(metric_names)
+            ref_dict = dict(zip(objective_metric_names, ref_point))
+            if obs_data:
+                # Create synthetic ObservationData representing the reference point.
+                # Pad with non-objective outcomes from existing data.
+                # Should always have existing data with BO.
+                sample_obs_data = obs_data[0]
+                padded_ref_dict: Dict[str, float] = dict(
+                    zip(sample_obs_data.metric_names, sample_obs_data.means)
+                )
+                padded_ref_dict.update(ref_dict)
+                ref_obs_data = [
+                    ObservationData(
+                        metric_names=list(padded_ref_dict.keys()),
+                        means=np.array(list(padded_ref_dict.values())),
+                        covariance=np.zeros((num_metrics, num_metrics)),
+                    )
+                ]
+                ref_obs_feats = []
+
+                # Apply initialized transforms to reference point.
+                for t in self.transforms.values():
+                    ref_obs_data = t.transform_observation_data(
+                        ref_obs_data, ref_obs_feats
+                    )
+                transformed_ref_obsd = ref_obs_data.pop()
+                transformed_ref_dict = dict(
+                    zip(transformed_ref_obsd.metric_names, transformed_ref_obsd.means)
+                )
+                self._transformed_ref_point = []
+                for objective_metric_name in objective_metric_names:
+                    # pyre-fixme[16]
+                    self._transformed_ref_point.append(
+                        transformed_ref_dict[objective_metric_name]
+                    )
+            else:
+                # No previous data means transform can't have been fit.
+                pass
+        return obs_feats, obs_data, search_space
+
+    def gen(
+        self,
+        n: int,
+        search_space: Optional[SearchSpace] = None,
+        optimization_config: Optional[OptimizationConfig] = None,
+        pending_observations: Optional[Dict[str, List[ObservationFeatures]]] = None,
+        fixed_features: Optional[ObservationFeatures] = None,
+        model_gen_options: Optional[TConfig] = None,
+    ) -> GeneratorRun:
+        if optimization_config:
+            # Update objective metric names if new optimization config is present.
+            self._objective_metric_names = [
+                m.name for m in optimization_config.objective.metrics
+            ]
+        return super().gen(
+            n=n,
+            search_space=search_space,
+            optimization_config=optimization_config,
+            pending_observations=pending_observations,
+            fixed_features=fixed_features,
+            model_gen_options=model_gen_options,
+        )
+
+    # TODO: Complete these stubs based on https://fb.quip.com/fUMRATIeahCy
+    def pareto_frontier(self, X: Tensor) -> Tensor:
+        raise NotImplementedError()
+
+    def observed_pareto_frontier(self) -> Tensor:
+        raise NotImplementedError()
+
+    def hypervolume(self, X: Tensor) -> Tensor:
+        raise NotImplementedError()
+
+    def observed_hypervolume(self) -> Tensor:
+        raise NotImplementedError()
diff --git a/ax/modelbridge/registry.py b/ax/modelbridge/registry.py
index 35aea4e9de7..c55fd390fef 100644
--- a/ax/modelbridge/registry.py
+++ b/ax/modelbridge/registry.py
@@ -17,6 +17,7 @@
 from ax.core.search_space import SearchSpace
 from ax.modelbridge.base import ModelBridge
 from ax.modelbridge.discrete import DiscreteModelBridge
+from ax.modelbridge.multi_objective_torch import MultiObjectiveTorchModelBridge
 from ax.modelbridge.random import RandomModelBridge
 from ax.modelbridge.torch import TorchModelBridge
 from ax.modelbridge.transforms.base import Transform
@@ -44,6 +45,7 @@
 from ax.models.torch.botorch import BotorchModel
 from ax.models.torch.botorch_kg import KnowledgeGradient
 from ax.models.torch.botorch_mes import MaxValueEntropySearch
+from ax.models.torch.botorch_moo import MultiObjectiveBotorchModel
 from ax.utils.common.kwargs import (
     consolidate_kwargs,
     get_function_argument_names,
@@ -181,6 +183,12 @@ class ModelSetup(NamedTuple):
         model_class=UniformGenerator,
         transforms=Cont_X_trans,
     ),
+    "MOO": ModelSetup(
+        bridge_class=MultiObjectiveTorchModelBridge,
+        model_class=MultiObjectiveBotorchModel,
+        transforms=Cont_X_trans + Y_trans,
+        standard_bridge_kwargs=STANDARD_TORCH_BRIDGE_KWARGS,
+    ),
 }
 
 
@@ -210,6 +218,7 @@ class Models(Enum):
     BOTORCH = "BO"
     EMPIRICAL_BAYES_THOMPSON = "EB"
     UNIFORM = "Uniform"
+    MOO = "MOO"
 
     @property
     def model_class(self) -> Type[Model]:
diff --git a/ax/modelbridge/tests/test_factory.py b/ax/modelbridge/tests/test_factory.py
index 2181ac3599f..078220c00d9 100644
--- a/ax/modelbridge/tests/test_factory.py
+++ b/ax/modelbridge/tests/test_factory.py
@@ -13,6 +13,7 @@
     get_GPEI,
     get_GPKG,
     get_GPMES,
+    get_MOO_EHVI,
     get_MOO_PAREGO,
     get_MOO_RS,
     get_MTGP,
@@ -20,6 +21,7 @@
     get_thompson,
     get_uniform,
 )
+from ax.modelbridge.multi_objective_torch import MultiObjectiveTorchModelBridge
 from ax.modelbridge.random import RandomModelBridge
 from ax.modelbridge.torch import TorchModelBridge
 from ax.models.discrete.eb_thompson import EmpiricalBayesThompsonSampler
@@ -216,7 +218,7 @@ def test_MOO_RS(self):
 
         multi_obj_exp.trials[0].run()
         moo_rs = get_MOO_RS(experiment=multi_obj_exp, data=multi_obj_exp.fetch_data())
-        self.assertIsInstance(moo_rs, TorchModelBridge)
+        self.assertIsInstance(moo_rs, MultiObjectiveTorchModelBridge)
         self.assertEqual(
             {
                 "acquisition_function_kwargs": {
@@ -226,8 +228,8 @@ def test_MOO_RS(self):
             },
             moo_rs._default_model_gen_options,
         )
-        moo_rs_run = moo_rs.gen(n=5)
-        self.assertEqual(len(moo_rs_run.arms), 5)
+        moo_rs_run = moo_rs.gen(n=2)
+        self.assertEqual(len(moo_rs_run.arms), 2)
 
     def test_MOO_PAREGO(self):
         single_obj_exp = get_branin_experiment(with_batch=True)
@@ -242,7 +244,7 @@ def test_MOO_PAREGO(self):
         moo_parego = get_MOO_PAREGO(
             experiment=multi_obj_exp, data=multi_obj_exp.fetch_data()
         )
-        self.assertIsInstance(moo_parego, TorchModelBridge)
+        self.assertIsInstance(moo_parego, MultiObjectiveTorchModelBridge)
         self.assertEqual(
             {
                 "acquisition_function_kwargs": {
@@ -252,5 +254,30 @@ def test_MOO_PAREGO(self):
             },
             moo_parego._default_model_gen_options,
         )
-        moo_parego_run = moo_parego.gen(n=5)
-        self.assertEqual(len(moo_parego_run.arms), 5)
+        moo_parego_run = moo_parego.gen(n=2)
+        self.assertEqual(len(moo_parego_run.arms), 2)
+
+    def test_MOO_EHVI(self):
+        single_obj_exp = get_branin_experiment(with_batch=True)
+        with self.assertRaises(ValueError):
+            get_MOO_EHVI(
+                experiment=single_obj_exp,
+                data=single_obj_exp.fetch_data(),
+                ref_point=[0, 0],
+            )
+
+        multi_obj_exp = get_branin_experiment_with_multi_objective(with_batch=True)
+        with self.assertRaises(ValueError):
+            get_MOO_EHVI(
+                experiment=multi_obj_exp,
+                data=multi_obj_exp.fetch_data(),
+                ref_point=[0, 0],
+            )
+
+        multi_obj_exp.trials[0].run()
+        moo_ehvi = get_MOO_EHVI(
+            experiment=multi_obj_exp, data=multi_obj_exp.fetch_data(), ref_point=[0, 0]
+        )
+        self.assertIsInstance(moo_ehvi, MultiObjectiveTorchModelBridge)
+        moo_ehvi_run = moo_ehvi.gen(n=1)
+        self.assertEqual(len(moo_ehvi_run.arms), 1)
diff --git a/ax/modelbridge/tests/test_multi_objective_torch_modelbridge.py b/ax/modelbridge/tests/test_multi_objective_torch_modelbridge.py
new file mode 100644
index 00000000000..843b68c88dd
--- /dev/null
+++ b/ax/modelbridge/tests/test_multi_objective_torch_modelbridge.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from unittest.mock import patch
+
+import numpy as np
+from ax.core.metric import Metric
+from ax.core.objective import MultiObjective
+from ax.core.optimization_config import OptimizationConfig
+from ax.core.outcome_constraint import ComparisonOp, OutcomeConstraint
+from ax.modelbridge.base import ModelBridge
+from ax.modelbridge.multi_objective_torch import MultiObjectiveTorchModelBridge
+from ax.modelbridge.transforms.base import Transform
+from ax.models.torch.botorch_moo import MultiObjectiveBotorchModel
+from ax.utils.common.testutils import TestCase
+from ax.utils.testing.core_stubs import (
+    get_branin_data_multi_objective,
+    get_branin_experiment_with_multi_objective,
+    get_branin_metric,
+    get_multi_type_experiment,
+)
+
+
+# Prepare mock transforms
+class t1(Transform):
+    def transform_search_space(self, ss):
+        new_ss = ss.clone()
+        for param_name in new_ss.parameters:
+            new_ss.parameters[param_name]._lower += 1.0
+            new_ss.parameters[param_name]._upper += 1.0
+        return new_ss
+
+    def transform_optimization_config(
+        self, optimization_config, modelbridge, fixed_features
+    ):
+        return (
+            optimization_config + 1
+            if isinstance(optimization_config, int)
+            else optimization_config
+        )
+
+    def transform_observation_features(self, x):
+        for obsf in x:
+            for param_name in obsf.parameters:
+                obsf.parameters[param_name] += 1
+        return x
+
+    def transform_observation_data(self, x, y):
+        for obsd in x:
+            obsd.means += 1
+        return x
+
+    def untransform_observation_features(self, x):
+        for obsf in x:
+            for param_name in obsf.parameters:
+                obsf.parameters[param_name] -= 1
+        return x
+
+    def untransform_observation_data(self, x, y):
+        for obsd in x:
+            obsd.means -= 1
+        return x
+
+
+class t2(Transform):
+    def transform_search_space(self, ss):
+        new_ss = ss.clone()
+        for param_name in new_ss.parameters:
+            new_ss.parameters[param_name]._lower = (
+                new_ss.parameters[param_name]._lower ** 2
+            )
+            new_ss.parameters[param_name]._upper = (
+                new_ss.parameters[param_name]._upper ** 2
+            )
+        return new_ss
+
+    def transform_optimization_config(
+        self, optimization_config, modelbridge, fixed_features
+    ):
+        return (
+            optimization_config ** 2
+            if isinstance(optimization_config, int)
+            else optimization_config
+        )
+
+    def transform_observation_features(self, x):
+        for obsf in x:
+            for param_name in obsf.parameters:
+                obsf.parameters[param_name] = obsf.parameters[param_name] ** 2
+        return x
+
+    def transform_observation_data(self, x, y):
+        for obsd in x:
+            obsd.means = obsd.means ** 2
+        return x
+
+    def untransform_observation_features(self, x):
+        for obsf in x:
+            for param_name in obsf.parameters:
+                obsf.parameters[param_name] = np.sqrt(obsf.parameters[param_name])
+        return x
+
+    def untransform_observation_data(self, x, y):
+        for obsd in x:
+            obsd.means = np.sqrt(obsd.means)
+        return x
+
+
+class MultiObjectiveTorchModelBridgeTest(TestCase):
+    @patch(
+        f"{ModelBridge.__module__}.unwrap_observation_data",
+        autospec=True,
+        return_value=(2, 2),
+    )
+    @patch(
+        f"{ModelBridge.__module__}.ModelBridge.predict",
+        autospec=True,
+        return_value=({"m": [1.0]}, {"m": {"m": [2.0]}}),
+    )
+    @patch(
+        (
+            f"{MultiObjectiveTorchModelBridge.__module__}."
+            "MultiObjectiveTorchModelBridge._fit"
+        ),
+        autospec=True,
+    )
+    def test_transform_ref_point(self, _mock_fit, _mock_predict, _mock_unwrap):
+        exp = get_branin_experiment_with_multi_objective(
+            has_optimization_config=True, with_batch=False
+        )
+        modelbridge = MultiObjectiveTorchModelBridge(
+            search_space=exp.search_space,
+            model=MultiObjectiveBotorchModel(),
+            optimization_config=exp.optimization_config,
+            transforms=[t1, t2],
+            experiment=exp,
+            data=exp.fetch_data(),
+            ref_point=[0.0, 0.0],
+        )
+        self.assertIsNone(modelbridge._transformed_ref_point)
+        exp = get_branin_experiment_with_multi_objective(
+            has_optimization_config=True, with_batch=True
+        )
+        exp.attach_data(get_branin_data_multi_objective(trial_indices=exp.trials))
+        modelbridge = MultiObjectiveTorchModelBridge(
+            search_space=exp.search_space,
+            model=MultiObjectiveBotorchModel(),
+            optimization_config=exp.optimization_config,
+            transforms=[t1, t2],
+            experiment=exp,
+            data=exp.fetch_data(),
+            ref_point=[0.0, 0.0],
+        )
+        self.assertIsNotNone(modelbridge._transformed_ref_point)
+        self.assertEqual(2, len(modelbridge._transformed_ref_point))
+
+        mixed_objective_constraints_optimization_config = OptimizationConfig(
+            objective=MultiObjective(
+                metrics=[get_branin_metric(name="branin_b")], minimize=False
+            ),
+            outcome_constraints=[
+                OutcomeConstraint(
+                    metric=Metric(name="branin_a"), op=ComparisonOp.LEQ, bound=1
+                )
+            ],
+        )
+        modelbridge = MultiObjectiveTorchModelBridge(
+            search_space=exp.search_space,
+            model=MultiObjectiveBotorchModel(),
+            optimization_config=mixed_objective_constraints_optimization_config,
+            transforms=[t1, t2],
+            experiment=exp,
+            data=exp.fetch_data(),
+            ref_point=[0.0],
+        )
+        self.assertEqual({"branin_a", "branin_b"}, modelbridge._metric_names)
+        self.assertEqual(["branin_b"], modelbridge._objective_metric_names)
+        self.assertIsNotNone(modelbridge._transformed_ref_point)
+        self.assertEqual(1, len(modelbridge._transformed_ref_point))
+
+    def test_multi_type_experiment(self):
+        exp = get_multi_type_experiment()
+        with self.assertRaises(NotImplementedError):
+            MultiObjectiveTorchModelBridge(
+                experiment=exp,
+                search_space=exp.search_space,
+                model=MultiObjectiveBotorchModel(),
+                transforms=[],
+                data=exp.fetch_data(),
+                ref_point=[0.0],
+            )
diff --git a/ax/models/tests/test_botorch_model.py b/ax/models/tests/test_botorch_model.py
index 9610dd4f886..7fe9f37de39 100644
--- a/ax/models/tests/test_botorch_model.py
+++ b/ax/models/tests/test_botorch_model.py
@@ -14,12 +14,10 @@
     get_and_fit_model,
     recommend_best_out_of_sample_point,
 )
-from ax.models.torch.utils import HYPERSPHERE
 from ax.utils.common.testutils import TestCase
 from botorch.acquisition.utils import get_infeasible_cost
 from botorch.models import FixedNoiseGP, ModelListGP
 from botorch.utils import get_objective_weights_transform
-from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization
 from gpytorch.likelihoods import _GaussianLikelihoodBase
 from gpytorch.priors import GammaPrior
 from gpytorch.priors.lkj_prior import LKJCovariancePrior
@@ -510,171 +508,3 @@ def test_BotorchModelConstraints(self):
         # because there are no feasible points:
         with self.assertRaises(ValueError):
             model.gen(n, bounds, objective_weights)
-
-    def test_BotorchModel_with_scalarization(self, dtype=torch.float, cuda=False):
-        tkwargs = {
-            "device": torch.device("cuda") if cuda else torch.device("cpu"),
-            "dtype": torch.float,
-        }
-        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
-            dtype=dtype, cuda=cuda, constant_noise=True
-        )
-        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
-            dtype=dtype, cuda=cuda, constant_noise=True
-        )
-        n = 3
-        objective_weights = torch.tensor([1.0, 1.0], **tkwargs)
-        model = BotorchModel()
-        with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model:
-            model.fit(
-                Xs=Xs1 + Xs2,
-                Ys=Ys1 + Ys2,
-                Yvars=Yvars1 + Yvars2,
-                bounds=bounds,
-                task_features=tfs,
-                feature_names=fns,
-                metric_names=mns,
-                fidelity_features=[],
-            )
-            _mock_fit_model.assert_called_once()
-
-        X_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
-        acqfv_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
-
-        with mock.patch(
-            SAMPLE_SIMPLEX_UTIL_PATH,
-            autospec=True,
-            return_value=torch.tensor([0.7, 0.3], **tkwargs),
-        ) as _mock_sample_simplex, mock.patch(
-            "ax.models.torch.botorch_defaults.optimize_acqf",
-            return_value=(X_dummy, acqfv_dummy),
-        ) as _:
-            model.gen(
-                n,
-                bounds,
-                objective_weights,
-                model_gen_options={
-                    "acquisition_function_kwargs": {"random_scalarization": True},
-                    "optimizer_kwargs": _get_optimizer_kwargs(),
-                },
-            )
-            # Sample_simplex should be called once for generated candidate.
-            self.assertEqual(n, _mock_sample_simplex.call_count)
-
-        with mock.patch(
-            SAMPLE_HYPERSPHERE_UTIL_PATH,
-            autospec=True,
-            return_value=torch.tensor([0.6, 0.8], **tkwargs),
-        ) as _mock_sample_hypersphere, mock.patch(
-            "ax.models.torch.botorch_defaults.optimize_acqf",
-            return_value=(X_dummy, acqfv_dummy),
-        ) as _:
-            model.gen(
-                n,
-                bounds,
-                objective_weights,
-                model_gen_options={
-                    "acquisition_function_kwargs": {
-                        "random_scalarization": True,
-                        "random_scalarization_distribution": HYPERSPHERE,
-                    },
-                    "optimizer_kwargs": _get_optimizer_kwargs(),
-                },
-            )
-            # Sample_simplex should be called once per generated candidate.
-            self.assertEqual(n, _mock_sample_hypersphere.call_count)
-
-        with mock.patch(
-            CHEBYSHEV_SCALARIZATION_PATH, wraps=get_chebyshev_scalarization
-        ) as _mock_chebyshev_scalarization, mock.patch(
-            "ax.models.torch.botorch_defaults.optimize_acqf",
-            return_value=(X_dummy, acqfv_dummy),
-        ) as _:
-            model.gen(
-                n,
-                bounds,
-                objective_weights,
-                model_gen_options={
-                    "acquisition_function_kwargs": {"chebyshev_scalarization": True},
-                    "optimizer_kwargs": _get_optimizer_kwargs(),
-                },
-            )
-            # get_chebyshev_scalarization should be called once for generated candidate.
-            self.assertEqual(n, _mock_chebyshev_scalarization.call_count)
-
-    def test_BotorchModel_with_scalarization_and_outcome_constraints(
-        self, dtype=torch.float, cuda=False
-    ):
-        tkwargs = {
-            "device": torch.device("cuda") if cuda else torch.device("cpu"),
-            "dtype": torch.float,
-        }
-        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
-            dtype=dtype, cuda=cuda, constant_noise=True
-        )
-        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
-            dtype=dtype, cuda=cuda, constant_noise=True
-        )
-        n = 2
-        objective_weights = torch.tensor([1.0, 1.0], **tkwargs)
-        model = BotorchModel()
-        with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model:
-            model.fit(
-                Xs=Xs1 + Xs2,
-                Ys=Ys1 + Ys2,
-                Yvars=Yvars1 + Yvars2,
-                bounds=bounds,
-                task_features=tfs,
-                feature_names=fns,
-                metric_names=mns,
-                fidelity_features=[],
-            )
-            _mock_fit_model.assert_called_once()
-
-        X_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
-        acqfv_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
-
-        with mock.patch(
-            SAMPLE_SIMPLEX_UTIL_PATH,
-            autospec=True,
-            return_value=torch.tensor([0.7, 0.3], **tkwargs),
-        ) as _mock_sample_simplex, mock.patch(
-            "ax.models.torch.botorch_defaults.optimize_acqf",
-            return_value=(X_dummy, acqfv_dummy),
-        ) as _:
-            model.gen(
-                n,
-                bounds,
-                objective_weights,
-                outcome_constraints=(
-                    torch.tensor([[1.0, 1.0]], **tkwargs),
-                    torch.tensor([[10.0]], **tkwargs),
-                ),
-                model_gen_options={
-                    "acquisition_function_kwargs": {"random_scalarization": True},
-                    "optimizer_kwargs": _get_optimizer_kwargs(),
-                },
-            )
-            self.assertEqual(n, _mock_sample_simplex.call_count)
-
-        with mock.patch(
-            CHEBYSHEV_SCALARIZATION_PATH, wraps=get_chebyshev_scalarization
-        ) as _mock_chebyshev_scalarization, mock.patch(
-            "ax.models.torch.botorch_defaults.optimize_acqf",
-            return_value=(X_dummy, acqfv_dummy),
-        ) as _:
-            model.gen(
-                n,
-                bounds,
-                objective_weights,
-                outcome_constraints=(
-                    torch.tensor([[1.0, 1.0]], **tkwargs),
-                    torch.tensor([[10.0]], **tkwargs),
-                ),
-                model_gen_options={
-                    "acquisition_function_kwargs": {"chebyshev_scalarization": True},
-                    "optimizer_kwargs": _get_optimizer_kwargs(),
-                },
-            )
-            # get_chebyshev_scalarization should be called once for generated candidate.
-            self.assertEqual(n, _mock_chebyshev_scalarization.call_count)
diff --git a/ax/models/tests/test_botorch_moo_defaults.py b/ax/models/tests/test_botorch_moo_defaults.py
new file mode 100644
index 00000000000..0c19bb4cc9c
--- /dev/null
+++ b/ax/models/tests/test_botorch_moo_defaults.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from ax.models.torch.botorch_defaults import get_NEI
+from ax.models.torch.botorch_moo import MultiObjectiveBotorchModel
+from ax.models.torch.botorch_moo_defaults import get_EHVI
+from ax.utils.common.testutils import TestCase
+
+
+class BotorchMOODefaultsTest(TestCase):
+    def test_get_NEI_with_chebyshev_and_missing_Ys_error(self):
+        model = MultiObjectiveBotorchModel()
+        x = torch.zeros(2, 2)
+        weights = torch.ones(2)
+        with self.assertRaisesRegex(
+            ValueError, "Chebyshev Scalarization requires Ys argument"
+        ):
+            get_NEI(
+                model=model,
+                X_observed=x,
+                objective_weights=weights,
+                chebyshev_scalarization=True,
+            )
+
+    def test_get_EHVI_input_validation_errors(self):
+        model = MultiObjectiveBotorchModel()
+        x = torch.zeros(2, 2)
+        weights = torch.ones(2)
+        ref_point = torch.zeros(2)
+        with self.assertRaisesRegex(
+            ValueError, "There are no feasible observed points."
+        ):
+            get_EHVI(model=model, objective_weights=weights, ref_point=ref_point)
+        with self.assertRaisesRegex(
+            ValueError, "Expected Hypervolume Improvement requires Ys argument"
+        ):
+            get_EHVI(
+                model=model,
+                X_observed=x,
+                objective_weights=weights,
+                ref_point=ref_point,
+            )
diff --git a/ax/models/tests/test_botorch_moo_model.py b/ax/models/tests/test_botorch_moo_model.py
new file mode 100644
index 00000000000..6a4625ed94e
--- /dev/null
+++ b/ax/models/tests/test_botorch_moo_model.py
@@ -0,0 +1,405 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict
+from unittest import mock
+
+import torch
+from ax.models.torch.botorch_defaults import get_NEI
+from ax.models.torch.botorch_moo import MultiObjectiveBotorchModel
+from ax.models.torch.botorch_moo_defaults import get_EHVI
+from ax.models.torch.utils import HYPERSPHERE
+from ax.utils.common.testutils import TestCase
+from botorch.acquisition.multi_objective import monte_carlo as moo_monte_carlo
+from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization
+
+
+FIT_MODEL_MO_PATH = "ax.models.torch.botorch_defaults.fit_gpytorch_model"
+SAMPLE_SIMPLEX_UTIL_PATH = "ax.models.torch.utils.sample_simplex"
+SAMPLE_HYPERSPHERE_UTIL_PATH = "ax.models.torch.utils.sample_hypersphere"
+CHEBYSHEV_SCALARIZATION_PATH = (
+    "ax.models.torch.botorch_defaults.get_chebyshev_scalarization"
+)
+EHVI_ACQF_PATH = (
+    "botorch.acquisition.utils.moo_monte_carlo.qExpectedHypervolumeImprovement"
+)
+
+
+def dummy_func(X: torch.Tensor) -> torch.Tensor:
+    return X
+
+
+def _get_optimizer_kwargs() -> Dict[str, int]:
+    return {"num_restarts": 2, "raw_samples": 2, "maxiter": 2, "batch_limit": 1}
+
+
+def _get_torch_test_data(
+    dtype=torch.float, cuda=False, constant_noise=True, task_features=None
+):
+    device = torch.device("cuda") if cuda else torch.device("cpu")
+    Xs = [torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], dtype=dtype, device=device)]
+    Ys = [torch.tensor([[3.0], [4.0]], dtype=dtype, device=device)]
+    Yvars = [torch.tensor([[0.0], [2.0]], dtype=dtype, device=device)]
+    if constant_noise:
+        Yvars[0].fill_(1.0)
+    bounds = [(0.0, 1.0), (1.0, 4.0), (2.0, 5.0)]
+    feature_names = ["x1", "x2", "x3"]
+    task_features = [] if task_features is None else task_features
+    metric_names = ["y", "r"]
+    return Xs, Ys, Yvars, bounds, task_features, feature_names, metric_names
+
+
+class BotorchMOOModelTest(TestCase):
+    def test_BotorchMOOModel_cuda(self):
+        if torch.cuda.is_available():
+            self.test_BotorchMOOModel_with_random_scalarization(cuda=True)
+
+    def test_BotorchMOOModel_double(self):
+        self.test_BotorchMOOModel_with_random_scalarization(dtype=torch.double)
+
+    def test_BotorchMOOModel_double_cuda(self):
+        if torch.cuda.is_available():
+            self.test_BotorchMOOModel_with_random_scalarization(
+                dtype=torch.double, cuda=True
+            )
+
+    def test_BotorchMOOModel_with_random_scalarization(
+        self, dtype=torch.float, cuda=False
+    ):
+        tkwargs = {
+            "device": torch.device("cuda") if cuda else torch.device("cpu"),
+            "dtype": dtype,
+        }
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        n = 3
+        objective_weights = torch.tensor([1.0, 1.0], **tkwargs)
+
+        X_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+        acqfv_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+
+        model = MultiObjectiveBotorchModel(acqf_constructor=get_NEI)
+        with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model:
+            model.fit(
+                Xs=Xs1 + Xs2,
+                Ys=Ys1 + Ys2,
+                Yvars=Yvars1 + Yvars2,
+                bounds=bounds,
+                task_features=tfs,
+                feature_names=fns,
+                metric_names=mns,
+                fidelity_features=[],
+            )
+            _mock_fit_model.assert_called_once()
+
+        with mock.patch(
+            SAMPLE_SIMPLEX_UTIL_PATH,
+            autospec=True,
+            return_value=torch.tensor([0.7, 0.3], **tkwargs),
+        ) as _mock_sample_simplex, mock.patch(
+            "ax.models.torch.botorch_defaults.optimize_acqf",
+            return_value=(X_dummy, acqfv_dummy),
+        ) as _:
+            model.gen(
+                n,
+                bounds,
+                objective_weights,
+                model_gen_options={
+                    "acquisition_function_kwargs": {"random_scalarization": True},
+                    "optimizer_kwargs": _get_optimizer_kwargs(),
+                },
+            )
+            # Sample_simplex should be called once for generated candidate.
+            self.assertEqual(n, _mock_sample_simplex.call_count)
+
+        with mock.patch(
+            SAMPLE_HYPERSPHERE_UTIL_PATH,
+            autospec=True,
+            return_value=torch.tensor([0.6, 0.8], **tkwargs),
+        ) as _mock_sample_hypersphere, mock.patch(
+            "ax.models.torch.botorch_defaults.optimize_acqf",
+            return_value=(X_dummy, acqfv_dummy),
+        ) as _:
+            model.gen(
+                n,
+                bounds,
+                objective_weights,
+                model_gen_options={
+                    "acquisition_function_kwargs": {
+                        "random_scalarization": True,
+                        "random_scalarization_distribution": HYPERSPHERE,
+                    },
+                    "optimizer_kwargs": _get_optimizer_kwargs(),
+                },
+            )
+            # Sample_simplex should be called once per generated candidate.
+            self.assertEqual(n, _mock_sample_hypersphere.call_count)
+
+    def test_BotorchMOOModel_with_chebyshev_scalarization(
+        self, dtype=torch.float, cuda=False
+    ):
+        tkwargs = {
+            "device": torch.device("cuda") if cuda else torch.device("cpu"),
+            "dtype": dtype,
+        }
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        n = 3
+        objective_weights = torch.tensor([1.0, 1.0], **tkwargs)
+
+        X_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+        acqfv_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+
+        model = MultiObjectiveBotorchModel(acqf_constructor=get_NEI)
+        with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model:
+            model.fit(
+                Xs=Xs1 + Xs2,
+                Ys=Ys1 + Ys2,
+                Yvars=Yvars1 + Yvars2,
+                bounds=bounds,
+                task_features=tfs,
+                feature_names=fns,
+                metric_names=mns,
+                fidelity_features=[],
+            )
+            _mock_fit_model.assert_called_once()
+
+        with mock.patch(
+            CHEBYSHEV_SCALARIZATION_PATH, wraps=get_chebyshev_scalarization
+        ) as _mock_chebyshev_scalarization, mock.patch(
+            "ax.models.torch.botorch_defaults.optimize_acqf",
+            return_value=(X_dummy, acqfv_dummy),
+        ) as _:
+            model.gen(
+                n,
+                bounds,
+                objective_weights,
+                model_gen_options={
+                    "acquisition_function_kwargs": {"chebyshev_scalarization": True},
+                    "optimizer_kwargs": _get_optimizer_kwargs(),
+                },
+            )
+            # get_chebyshev_scalarization should be called once for generated candidate.
+            self.assertEqual(n, _mock_chebyshev_scalarization.call_count)
+
+    def test_BotorchMOOModel_with_ehvi(self, dtype=torch.float, cuda=False):
+        tkwargs = {
+            "device": torch.device("cuda") if cuda else torch.device("cpu"),
+            "dtype": dtype,
+        }
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        n = 3
+        objective_weights = torch.tensor([1.0, 1.0], **tkwargs)
+        model = MultiObjectiveBotorchModel(acqf_constructor=get_EHVI)
+
+        X_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+        acqfv_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+
+        with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model:
+            model.fit(
+                Xs=Xs1 + Xs2,
+                Ys=Ys1 + Ys2,
+                Yvars=Yvars1 + Yvars2,
+                bounds=bounds,
+                task_features=tfs,
+                feature_names=fns,
+                metric_names=mns,
+                fidelity_features=[],
+            )
+            _mock_fit_model.assert_called_once()
+
+        with mock.patch(
+            EHVI_ACQF_PATH, wraps=moo_monte_carlo.qExpectedHypervolumeImprovement
+        ) as _mock_ehvi_acqf, mock.patch(
+            "ax.models.torch.botorch_defaults.optimize_acqf",
+            return_value=(X_dummy, acqfv_dummy),
+        ) as _:
+            model.gen(
+                n,
+                bounds,
+                objective_weights,
+                model_gen_options={"optimizer_kwargs": _get_optimizer_kwargs()},
+                ref_point=[1.0, 1.0],
+            )
+            # the EHVI acquisition function should be created only once.
+            self.assertEqual(1, _mock_ehvi_acqf.call_count)
+
+    def test_BotorchMOOModel_with_random_scalarization_and_outcome_constraints(
+        self, dtype=torch.float, cuda=False
+    ):
+        tkwargs = {
+            "device": torch.device("cuda") if cuda else torch.device("cpu"),
+            "dtype": dtype,
+        }
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        n = 2
+        objective_weights = torch.tensor([1.0, 1.0], **tkwargs)
+        model = MultiObjectiveBotorchModel(acqf_constructor=get_NEI)
+
+        X_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+        acqfv_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+
+        with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model:
+            model.fit(
+                Xs=Xs1 + Xs2,
+                Ys=Ys1 + Ys2,
+                Yvars=Yvars1 + Yvars2,
+                bounds=bounds,
+                task_features=tfs,
+                feature_names=fns,
+                metric_names=mns,
+                fidelity_features=[],
+            )
+            _mock_fit_model.assert_called_once()
+
+        with mock.patch(
+            SAMPLE_SIMPLEX_UTIL_PATH,
+            autospec=True,
+            return_value=torch.tensor([0.7, 0.3], **tkwargs),
+        ) as _mock_sample_simplex, mock.patch(
+            "ax.models.torch.botorch_defaults.optimize_acqf",
+            return_value=(X_dummy, acqfv_dummy),
+        ) as _:
+            model.gen(
+                n,
+                bounds,
+                objective_weights,
+                outcome_constraints=(
+                    torch.tensor([[1.0, 1.0]], **tkwargs),
+                    torch.tensor([[10.0]], **tkwargs),
+                ),
+                model_gen_options={
+                    "acquisition_function_kwargs": {"random_scalarization": True},
+                    "optimizer_kwargs": _get_optimizer_kwargs(),
+                },
+            )
+            self.assertEqual(n, _mock_sample_simplex.call_count)
+
+    def test_BotorchMOOModel_with_chebyshev_scalarization_and_outcome_constraints(
+        self, dtype=torch.float, cuda=False
+    ):
+        tkwargs = {
+            "device": torch.device("cuda") if cuda else torch.device("cpu"),
+            "dtype": torch.float,
+        }
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        n = 2
+        objective_weights = torch.tensor([1.0, 1.0], **tkwargs)
+        model = MultiObjectiveBotorchModel(acqf_constructor=get_NEI)
+
+        X_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+        acqfv_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+
+        with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model:
+            model.fit(
+                Xs=Xs1 + Xs2,
+                Ys=Ys1 + Ys2,
+                Yvars=Yvars1 + Yvars2,
+                bounds=bounds,
+                task_features=tfs,
+                feature_names=fns,
+                metric_names=mns,
+                fidelity_features=[],
+            )
+            _mock_fit_model.assert_called_once()
+
+        with mock.patch(
+            CHEBYSHEV_SCALARIZATION_PATH, wraps=get_chebyshev_scalarization
+        ) as _mock_chebyshev_scalarization, mock.patch(
+            "ax.models.torch.botorch_defaults.optimize_acqf",
+            return_value=(X_dummy, acqfv_dummy),
+        ) as _:
+            model.gen(
+                n,
+                bounds,
+                objective_weights,
+                outcome_constraints=(
+                    torch.tensor([[1.0, 1.0]], **tkwargs),
+                    torch.tensor([[10.0]], **tkwargs),
+                ),
+                model_gen_options={
+                    "acquisition_function_kwargs": {"chebyshev_scalarization": True},
+                    "optimizer_kwargs": _get_optimizer_kwargs(),
+                },
+            )
+            # get_chebyshev_scalarization should be called once for generated candidate.
+            self.assertEqual(n, _mock_chebyshev_scalarization.call_count)
+
+    def test_BotorchMOOModel_with_ehvi_and_outcome_constraints(
+        self, dtype=torch.float, cuda=False
+    ):
+        tkwargs = {
+            "device": torch.device("cuda") if cuda else torch.device("cpu"),
+            "dtype": dtype,
+        }
+        Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data(
+            dtype=dtype, cuda=cuda, constant_noise=True
+        )
+        n = 3
+        objective_weights = torch.tensor([1.0, 1.0], **tkwargs)
+        model = MultiObjectiveBotorchModel(acqf_constructor=get_EHVI)
+
+        X_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+        acqfv_dummy = torch.tensor([[[1.0, 2.0, 3.0]]], **tkwargs)
+
+        with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model:
+            model.fit(
+                Xs=Xs1 + Xs2,
+                Ys=Ys1 + Ys2,
+                Yvars=Yvars1 + Yvars2,
+                bounds=bounds,
+                task_features=tfs,
+                feature_names=fns,
+                metric_names=mns,
+                fidelity_features=[],
+            )
+            _mock_fit_model.assert_called_once()
+
+        with mock.patch(
+            EHVI_ACQF_PATH, wraps=moo_monte_carlo.qExpectedHypervolumeImprovement
+        ) as _mock_ehvi_acqf, mock.patch(
+            "ax.models.torch.botorch_defaults.optimize_acqf",
+            return_value=(X_dummy, acqfv_dummy),
+        ) as _:
+            model.gen(
+                n,
+                bounds,
+                objective_weights,
+                outcome_constraints=(
+                    torch.tensor([[1.0, 1.0]], **tkwargs),
+                    torch.tensor([[10.0]], **tkwargs),
+                ),
+                model_gen_options={"optimizer_kwargs": _get_optimizer_kwargs()},
+                ref_point=[1.0, 1.0],
+            )
+            # the EHVI acquisition function should be created only once.
+            self.assertEqual(1, _mock_ehvi_acqf.call_count)
diff --git a/ax/models/torch/botorch.py b/ax/models/torch/botorch.py
index 30decf708db..6f029d9ef82 100644
--- a/ax/models/torch/botorch.py
+++ b/ax/models/torch/botorch.py
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from copy import deepcopy
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Tuple, cast
 
 import numpy as np
 import torch
@@ -15,14 +15,12 @@
     get_NEI,
     recommend_best_observed_point,
     scipy_optimizer,
-    scipy_optimizer_list,
 )
 from ax.models.torch.utils import (
     _get_X_pending_and_observed,
     _to_inequality_constraints,
     normalize_indices,
     predict_from_model,
-    randomize_objective_weights,
     subset_model,
 )
 from ax.models.torch_base import TorchModel
@@ -75,17 +73,6 @@
     ],
     Tuple[Tensor, Tensor],
 ]
-TOptimizerList = Callable[
-    [
-        List[AcquisitionFunction],
-        Tensor,
-        Optional[List[Tuple[Tensor, Tensor, float]]],
-        Optional[Dict[int, float]],
-        Optional[Callable[[Tensor], Tensor]],
-        Any,
-    ],
-    Tuple[Tensor, Tensor],
-]
 TBestPointRecommender = Callable[
     [
         TorchModel,
@@ -246,12 +233,8 @@ def __init__(
         #  Optional[Tuple[Tensor, Tensor]], Optional[Tensor], Optional[Tensor],
         #  **(Any)], AcquisitionFunction]`.
         acqf_constructor: TAcqfConstructor = get_NEI,
-        # pyre-fixme[9]: acqf_optimizer has type `Callable[[AcquisitionFunction,
-        #  Tensor, int, Optional[Dict[int, float]], Optional[Callable[[Tensor],
-        #  Tensor]], Any], Tensor]`; used as `Callable[[AcquisitionFunction, Tensor,
-        #  int, Optional[Dict[int, float]], Optional[Callable[[Tensor], Tensor]],
-        #  **(Any)], Tensor]`.
-        acqf_optimizer: Union[TOptimizer, TOptimizerList] = scipy_optimizer,
+        # pyre-fixme[9]: acqf_optimizer declared/used type mismatch
+        acqf_optimizer: TOptimizer = scipy_optimizer,
         best_point_recommender: TBestPointRecommender = recommend_best_observed_point,
         refit_on_cv: bool = False,
         refit_on_update: bool = True,
@@ -354,84 +337,39 @@ def gen(
 
         model = self.model
 
-        # subset model only to the outcomes we need for the optimization
+        # subset model only to the outcomes we need for the optimization	357
         if options.get(Keys.SUBSET_MODEL, True):
-            model, objective_weights, outcome_constraints, Ys = subset_model(
+            model, objective_weights, outcome_constraints, _ = subset_model(
                 model=model,  # pyre-ignore [6]
                 objective_weights=objective_weights,
                 outcome_constraints=outcome_constraints,
-                Ys=self.Ys,
             )
-        else:
-            Ys = self.Ys
 
         bounds_ = torch.tensor(bounds, dtype=self.dtype, device=self.device)
         bounds_ = bounds_.transpose(0, 1)
 
         botorch_rounding_func = get_rounding_func(rounding_func)
-        if acf_options.get("random_scalarization", False) or acf_options.get(
-            "chebyshev_scalarization", False
-        ):
-            # TODO (jej): Move into MultiObjectiveBotorch
-            # If using a list of acquisition functions, the algorithm to generate
-            # that list is configured by acquisition_function_kwargs.
-            objective_weights_list = [
-                randomize_objective_weights(objective_weights, **acf_options)
-                for _ in range(n)
-            ]
-            acquisition_function_list = [
-                self.acqf_constructor(  # pyre-ignore: [28]
-                    model=model,
-                    objective_weights=objective_weights,
-                    outcome_constraints=outcome_constraints,
-                    X_observed=X_observed,
-                    X_pending=X_pending,
-                    Ys=Ys,  # Required for chebyshev scalarization calculations.
-                    **acf_options,
-                )
-                for objective_weights in objective_weights_list
-            ]
-            acquisition_function_list = [
-                checked_cast(AcquisitionFunction, acq_function)
-                for acq_function in acquisition_function_list
-            ]
-            # Multiple acquisition functions require a sequential optimizer
-            # always use scipy_optimizer_list.
-            # TODO(jej): Allow any optimizer.
-            candidates, expected_acquisition_value = scipy_optimizer_list(
-                acq_function_list=acquisition_function_list,
-                bounds=bounds_,
-                inequality_constraints=_to_inequality_constraints(
-                    linear_constraints=linear_constraints
-                ),
-                fixed_features=fixed_features,
-                rounding_func=botorch_rounding_func,
-                **optimizer_options,
-            )
-        else:
-            acquisition_function = self.acqf_constructor(  # pyre-ignore: [28]
-                model=model,
-                objective_weights=objective_weights,
-                outcome_constraints=outcome_constraints,
-                X_observed=X_observed,
-                X_pending=X_pending,
-                **acf_options,
-            )
-            acquisition_function = checked_cast(
-                AcquisitionFunction, acquisition_function
-            )
-            # pyre-ignore: [28]
-            candidates, expected_acquisition_value = self.acqf_optimizer(
-                acq_function=checked_cast(AcquisitionFunction, acquisition_function),
-                bounds=bounds_,
-                n=n,
-                inequality_constraints=_to_inequality_constraints(
-                    linear_constraints=linear_constraints
-                ),
-                fixed_features=fixed_features,
-                rounding_func=botorch_rounding_func,
-                **optimizer_options,
-            )
+        acquisition_function = self.acqf_constructor(  # pyre-ignore: [28]
+            model=model,
+            objective_weights=objective_weights,
+            outcome_constraints=outcome_constraints,
+            X_observed=X_observed,
+            X_pending=X_pending,
+            **acf_options,
+        )
+        acquisition_function = checked_cast(AcquisitionFunction, acquisition_function)
+        # pyre-ignore: [28]
+        candidates, expected_acquisition_value = self.acqf_optimizer(
+            acq_function=checked_cast(AcquisitionFunction, acquisition_function),
+            bounds=bounds_,
+            n=n,
+            inequality_constraints=_to_inequality_constraints(
+                linear_constraints=linear_constraints
+            ),
+            fixed_features=fixed_features,
+            rounding_func=botorch_rounding_func,
+            **optimizer_options,
+        )
         return (
             candidates.detach().cpu(),
             torch.ones(n, dtype=self.dtype),
diff --git a/ax/models/torch/botorch_defaults.py b/ax/models/torch/botorch_defaults.py
index 6573bbf4dac..97d7260142e 100644
--- a/ax/models/torch/botorch_defaults.py
+++ b/ax/models/torch/botorch_defaults.py
@@ -26,7 +26,7 @@
 from botorch.models.model import Model
 from botorch.models.model_list_gp_regression import ModelListGP
 from botorch.models.multitask import FixedNoiseMultiTaskGP, MultiTaskGP
-from botorch.optim.optimize import optimize_acqf, optimize_acqf_list
+from botorch.optim.optimize import optimize_acqf
 from botorch.utils import (
     get_objective_weights_transform,
     get_outcome_constraint_transforms,
@@ -267,7 +267,6 @@ def scipy_optimizer(
           values, where `i`-th element is the expected acquisition value
           conditional on having observed candidates `0,1,...,i-1`.
     """
-
     num_restarts: int = kwargs.get("num_restarts", 20)
     raw_samples: int = kwargs.get("num_raw_samples", 50 * num_restarts)
 
@@ -293,60 +292,6 @@ def scipy_optimizer(
     return X, expected_acquisition_value
 
 
-# TODO (jej): rewrite optimize_acqf wrappers to avoid duplicate code.
-def scipy_optimizer_list(
-    acq_function_list: List[AcquisitionFunction],
-    bounds: Tensor,
-    inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
-    fixed_features: Optional[Dict[int, float]] = None,
-    rounding_func: Optional[Callable[[Tensor], Tensor]] = None,
-    **kwargs: Any,
-) -> Tuple[Tensor, Tensor]:
-    r"""Sequential optimizer using scipy's minimize module on a numpy-adaptor.
-
-    The ith acquisition in the sequence uses the ith given acquisition_function.
-
-    Args:
-        acq_function_list: A list of botorch AcquisitionFunctions,
-            optimized sequentially.
-        bounds: A `2 x d`-dim tensor, where `bounds[0]` (`bounds[1]`) are the
-            lower (upper) bounds of the feasible hyperrectangle.
-        n: The number of candidates to generate.
-        inequality constraints: A list of tuples (indices, coefficients, rhs),
-            with each tuple encoding an inequality constraint of the form
-            `\sum_i (X[indices[i]] * coefficients[i]) >= rhs`
-        fixed_features: A map {feature_index: value} for features that should
-            be fixed to a particular value during generation.
-        rounding_func: A function that rounds an optimization result
-            appropriately (i.e., according to `round-trip` transformations).
-
-    Returns:
-        2-element tuple containing
-
-        - A `n x d`-dim tensor of generated candidates.
-        - A `n`-dim tensor of conditional acquisition
-          values, where `i`-th element is the expected acquisition value
-          conditional on having observed candidates `0,1,...,i-1`.
-    """
-    num_restarts: int = kwargs.get("num_restarts", 20)
-    raw_samples: int = kwargs.get("num_raw_samples", 50 * num_restarts)
-
-    # use SLSQP by default for small problems since it yields faster wall times
-    if "method" not in kwargs:
-        kwargs["method"] = "SLSQP"
-    X, expected_acquisition_value = optimize_acqf_list(
-        acq_function_list=acq_function_list,
-        bounds=bounds,
-        num_restarts=num_restarts,
-        raw_samples=raw_samples,
-        options=kwargs,
-        inequality_constraints=inequality_constraints,
-        fixed_features=fixed_features,
-        post_processing_func=rounding_func,
-    )
-    return X, expected_acquisition_value
-
-
 def recommend_best_observed_point(
     model: TorchModel,
     bounds: List[Tuple[float, float]],
diff --git a/ax/models/torch/botorch_moo.py b/ax/models/torch/botorch_moo.py
new file mode 100644
index 00000000000..bad67ec9610
--- /dev/null
+++ b/ax/models/torch/botorch_moo.py
@@ -0,0 +1,340 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import torch
+from ax.core.types import TCandidateMetadata, TConfig, TGenMetadata
+from ax.models.torch.botorch import (
+    BotorchModel,
+    TAcqfConstructor,
+    TBestPointRecommender,
+    TModelConstructor,
+    TModelPredictor,
+    TOptimizer,
+    get_rounding_func,
+)
+from ax.models.torch.botorch_defaults import (
+    get_and_fit_model,
+    predict_from_model,
+    recommend_best_observed_point,
+    scipy_optimizer,
+)
+from ax.models.torch.botorch_moo_defaults import get_EHVI, scipy_optimizer_list
+from ax.models.torch.utils import (
+    _get_X_pending_and_observed,
+    _to_inequality_constraints,
+    randomize_objective_weights,
+    subset_model,
+)
+from ax.models.torch_base import TorchModel
+from ax.utils.common.constants import Keys
+from ax.utils.common.docutils import copy_doc
+from ax.utils.common.logger import get_logger
+from ax.utils.common.typeutils import checked_cast
+from botorch.acquisition.acquisition import AcquisitionFunction
+from botorch.models.model import Model
+from torch import Tensor
+
+
+logger = get_logger(__name__)
+
+TOptimizerList = Callable[
+    [
+        List[AcquisitionFunction],
+        Tensor,
+        Optional[List[Tuple[Tensor, Tensor, float]]],
+        Optional[Dict[int, float]],
+        Optional[Callable[[Tensor], Tensor]],
+        Any,
+    ],
+    Tuple[Tensor, Tensor],
+]
+TFrontierEvaluator = Callable[
+    [
+        TorchModel,  # TODO: MultiObjective
+        List[Tuple[float, float]],
+        Tensor,
+        Optional[Tuple[Tensor, Tensor]],
+        Optional[Tuple[Tensor, Tensor]],
+        Optional[Dict[int, float]],
+        Optional[TConfig],
+    ],
+    Optional[List[Tensor]],
+]
+
+
+class MultiObjectiveBotorchModel(BotorchModel):
+    r"""
+    Customizable multi-objective model.
+
+    By default, this uses an Expected Hypervolume Improvment function to find the
+    pareto frontier of a function with multiple outcomes. This behavior
+    can be modified by providing custom implementations of the following
+    components:
+
+    - a `model_constructor` that instantiates and fits a model on data
+    - a `model_predictor` that predicts outcomes using the fitted model
+    - a `acqf_constructor` that creates an acquisition function from a fitted model
+    - a `acqf_optimizer` that optimizes the acquisition function
+
+    Args:
+        model_constructor: A callable that instantiates and fits a model on data,
+            with signature as described below.
+        model_predictor: A callable that predicts using the fitted model, with
+            signature as described below.
+        acqf_constructor: A callable that creates an acquisition function from a
+            fitted model, with signature as described below.
+        acqf_optimizer: A callable that optimizes an acquisition
+            function, with signature as described below.
+
+
+
+    Call signatures:
+
+    ::
+
+        model_constructor(
+            Xs,
+            Ys,
+            Yvars,
+            task_features,
+            fidelity_features,
+            metric_names,
+            state_dict,
+            **kwargs,
+        ) -> model
+
+    Here `Xs`, `Ys`, `Yvars` are lists of tensors (one element per outcome),
+    `task_features` identifies columns of Xs that should be modeled as a task,
+    `fidelity_features` is a list of ints that specify the positions of fidelity
+    parameters in 'Xs', `metric_names` provides the names of each `Y` in `Ys`,
+    `state_dict` is a pytorch module state dict, and `model` is a BoTorch `Model`.
+    Optional kwargs are being passed through from the `BotorchModel` constructor.
+    This callable is assumed to return a fitted BoTorch model that has the same
+    dtype and lives on the same device as the input tensors.
+
+    ::
+
+        model_predictor(model, X) -> [mean, cov]
+
+    Here `model` is a fitted botorch model, `X` is a tensor of candidate points,
+    and `mean` and `cov` are the posterior mean and covariance, respectively.
+
+    ::
+
+        acqf_constructor(
+            model,
+            objective_weights,
+            outcome_constraints,
+            X_observed,
+            X_pending,
+            **kwargs,
+        ) -> acq_function
+
+
+    Here `model` is a botorch `Model`, `objective_weights` is a tensor of weights
+    for the model outputs, `outcome_constraints` is a tuple of tensors describing
+    the (linear) outcome constraints, `X_observed` are previously observed points,
+    and `X_pending` are points whose evaluation is pending. `acq_function` is a
+    BoTorch acquisition function crafted from these inputs. For additional
+    details on the arguments, see `get_NEI`.
+
+    ::
+
+        acqf_optimizer(
+            acq_function,
+            bounds,
+            n,
+            inequality_constraints,
+            fixed_features,
+            rounding_func,
+            **kwargs,
+        ) -> candidates
+
+    Here `acq_function` is a BoTorch `AcquisitionFunction`, `bounds` is a tensor
+    containing bounds on the parameters, `n` is the number of candidates to be
+    generated, `inequality_constraints` are inequality constraints on parameter
+    values, `fixed_features` specifies features that should be fixed during
+    generation, and `rounding_func` is a callback that rounds an optimization
+    result appropriately. `candidates` is a tensor of generated candidates.
+    For additional details on the arguments, see `scipy_optimizer`.
+    """
+
+    dtype: Optional[torch.dtype]
+    device: Optional[torch.device]
+    Xs: List[Tensor]
+    Ys: List[Tensor]
+    Yvars: List[Tensor]
+
+    def __init__(
+        self,
+        model_constructor: TModelConstructor = get_and_fit_model,
+        model_predictor: TModelPredictor = predict_from_model,
+        # pyre-fixme[9]: acqf_constructor has type `Callable[[Model, Tensor,
+        #  Optional[Tuple[Tensor, Tensor]], Optional[Tensor], Optional[Tensor], Any],
+        #  AcquisitionFunction]`; used as `Callable[[Model, Tensor,
+        #  Optional[Tuple[Tensor, Tensor]], Optional[Tensor], Optional[Tensor],
+        #  **(Any)], AcquisitionFunction]`.
+        acqf_constructor: TAcqfConstructor = get_EHVI,
+        # pyre-fixme[9]: acqf_optimizer has type `Callable[[AcquisitionFunction,
+        #  Tensor, int, Optional[Dict[int, float]], Optional[Callable[[Tensor],
+        #  Tensor]], Any], Tensor]`; used as `Callable[[AcquisitionFunction, Tensor,
+        #  int, Optional[Dict[int, float]], Optional[Callable[[Tensor], Tensor]],
+        #  **(Any)], Tensor]`.
+        acqf_optimizer: TOptimizer = scipy_optimizer,
+        # TODO: Remove best_point_recommender for botorch_moo. Used in modelbridge._gen.
+        best_point_recommender: TBestPointRecommender = recommend_best_observed_point,
+        refit_on_cv: bool = False,
+        refit_on_update: bool = True,
+        warm_start_refitting: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        self.model_constructor = model_constructor
+        self.model_predictor = model_predictor
+        self.acqf_constructor = acqf_constructor
+        self.acqf_optimizer = acqf_optimizer
+        self.best_point_recommender = best_point_recommender
+        self._kwargs = kwargs
+        self.refit_on_cv = refit_on_cv
+        self.refit_on_update = refit_on_update
+        self.warm_start_refitting = warm_start_refitting
+        self.model: Optional[Model] = None
+        self.Xs = []
+        self.Ys = []
+        self.Yvars = []
+        self.dtype = None
+        self.device = None
+        self.task_features: List[int] = []
+        self.fidelity_features: List[int] = []
+        self.metric_names: List[str] = []
+
+    # pyre-fixme[56]: While applying decorator
+    #  `ax.utils.common.docutils.copy_doc(...)`: Argument `bounds` expected.
+    @copy_doc(TorchModel.gen)
+    def gen(
+        self,
+        n: int,
+        bounds: List[Tuple[float, float]],
+        objective_weights: Tensor,  # objective_directions
+        outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
+        linear_constraints: Optional[Tuple[Tensor, Tensor]] = None,
+        fixed_features: Optional[Dict[int, float]] = None,
+        pending_observations: Optional[List[Tensor]] = None,
+        model_gen_options: Optional[TConfig] = None,
+        rounding_func: Optional[Callable[[Tensor], Tensor]] = None,
+        target_fidelities: Optional[Dict[int, float]] = None,
+        ref_point: Optional[List[float]] = None,
+    ) -> Tuple[Tensor, Tensor, TGenMetadata, Optional[List[TCandidateMetadata]]]:
+        options = model_gen_options or {}
+        acf_options = options.get("acquisition_function_kwargs", {})
+        optimizer_options = options.get("optimizer_kwargs", {})
+
+        if target_fidelities:
+            raise NotImplementedError(
+                "target_fidelities not implemented for base BotorchModel"
+            )
+
+        X_pending, X_observed = _get_X_pending_and_observed(
+            Xs=self.Xs,
+            pending_observations=pending_observations,
+            objective_weights=objective_weights,
+            outcome_constraints=outcome_constraints,
+            bounds=bounds,
+            linear_constraints=linear_constraints,
+            fixed_features=fixed_features,
+        )
+
+        model = self.model
+
+        # subset model only to the outcomes we need for the optimization
+        if options.get(Keys.SUBSET_MODEL, True):
+            model, objective_weights, outcome_constraints, Ys = subset_model(
+                model=model,  # pyre-ignore [6]
+                objective_weights=objective_weights,
+                outcome_constraints=outcome_constraints,
+                Ys=self.Ys,
+            )
+        else:
+            Ys = self.Ys
+
+        bounds_ = torch.tensor(bounds, dtype=self.dtype, device=self.device)
+        bounds_ = bounds_.transpose(0, 1)
+        botorch_rounding_func = get_rounding_func(rounding_func)
+        if acf_options.get("random_scalarization", False) or acf_options.get(
+            "chebyshev_scalarization", False
+        ):
+            # If using a list of acquisition functions, the algorithm to generate
+            # that list is configured by acquisition_function_kwargs.
+            objective_weights_list = [
+                randomize_objective_weights(objective_weights, **acf_options)
+                for _ in range(n)
+            ]
+            acquisition_function_list = [
+                self.acqf_constructor(  # pyre-ignore: [28]
+                    model=model,
+                    objective_weights=objective_weights,
+                    outcome_constraints=outcome_constraints,
+                    X_observed=X_observed,
+                    X_pending=X_pending,
+                    Ys=Ys,  # Required for chebyshev scalarization calculations.
+                    **acf_options,
+                )
+                for objective_weights in objective_weights_list
+            ]
+            acquisition_function_list = [
+                checked_cast(AcquisitionFunction, acq_function)
+                for acq_function in acquisition_function_list
+            ]
+            # Multiple acquisition functions require a sequential optimizer
+            # always use scipy_optimizer_list.
+            # TODO(jej): Allow any optimizer.
+            candidates, expected_acquisition_value = scipy_optimizer_list(
+                acq_function_list=acquisition_function_list,
+                bounds=bounds_,
+                inequality_constraints=_to_inequality_constraints(
+                    linear_constraints=linear_constraints
+                ),
+                fixed_features=fixed_features,
+                rounding_func=botorch_rounding_func,
+                **optimizer_options,
+            )
+        else:
+            if ref_point:
+                ref_point = torch.tensor(
+                    ref_point, dtype=self.dtype, device=self.device
+                )
+            acquisition_function = self.acqf_constructor(  # pyre-ignore: [28]
+                model=model,
+                objective_weights=objective_weights,
+                ref_point=ref_point,
+                outcome_constraints=outcome_constraints,
+                X_observed=X_observed,
+                X_pending=X_pending,
+                Ys=self.Ys,  # Required for qEHVI calculations.
+                **acf_options,
+            )
+            acquisition_function = checked_cast(
+                AcquisitionFunction, acquisition_function
+            )
+            # pyre-ignore: [28]
+            candidates, expected_acquisition_value = self.acqf_optimizer(
+                acq_function=checked_cast(AcquisitionFunction, acquisition_function),
+                bounds=bounds_,
+                n=n,
+                inequality_constraints=_to_inequality_constraints(
+                    linear_constraints=linear_constraints
+                ),
+                fixed_features=fixed_features,
+                rounding_func=botorch_rounding_func,
+                **optimizer_options,
+            )
+        return (
+            candidates.detach().cpu(),
+            torch.ones(n, dtype=self.dtype),
+            {"expected_acquisition_value": expected_acquisition_value.tolist()},
+            None,
+        )
diff --git a/ax/models/torch/botorch_moo_defaults.py b/ax/models/torch/botorch_moo_defaults.py
new file mode 100644
index 00000000000..f50c08a0fb6
--- /dev/null
+++ b/ax/models/torch/botorch_moo_defaults.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import torch
+from ax.models.torch.utils import (  # noqa F40
+    _to_inequality_constraints,
+    get_outcome_constraint_transforms,
+    predict_from_model,
+)
+from botorch.acquisition.acquisition import AcquisitionFunction
+from botorch.acquisition.multi_objective.objective import WeightedMCMultiOutputObjective
+from botorch.acquisition.utils import get_acquisition_function
+from botorch.models.model import Model
+from botorch.optim.optimize import optimize_acqf_list
+from botorch.utils.transforms import squeeze_last_dim
+from torch import Tensor
+
+
+def get_EHVI(
+    model: Model,
+    objective_weights: Tensor,
+    ref_point: Tensor,
+    outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
+    X_observed: Optional[Tensor] = None,
+    X_pending: Optional[Tensor] = None,
+    **kwargs: Any,
+) -> AcquisitionFunction:
+    r"""Instantiates a qExpectedHyperVolumeImprovement acquisition function.
+
+    Args:
+        model: The underlying model which the acqusition function uses
+            to estimate acquisition values of candidates.
+        objective_weights: The objective is to maximize a weighted sum of
+            the columns of f(x). These are the weights.
+        ref_point: A reference point from which to calculate pareto frontier
+            hypervolume. Points that do not dominate the ref_point contribute
+            nothing to hypervolume.
+        outcome_constraints: A tuple of (A, b). For k outcome constraints
+            and m outputs at f(x), A is (k x m) and b is (k x 1) such that
+            A f(x) <= b. (Not used by single task models)
+        X_observed: A tensor containing points observed for all objective
+            outcomes and outcomes that appear in the outcome constraints (if
+            there are any).
+        X_pending: A tensor containing points whose evaluation is pending (i.e.
+            that have been submitted for evaluation) present for all objective
+            outcomes and outcomes that appear in the outcome constraints (if
+            there are any).
+        mc_samples: The number of MC samples to use (default: 512).
+        qmc: If True, use qMC instead of MC (default: True).
+        prune_baseline: If True, prune the baseline points for NEI (default: True).
+        chebyshev_scalarization: If True, use augmented Chebyshev scalarization.
+
+    Returns:
+        qNoisyExpectedImprovement: The instantiated acquisition function.
+    """
+    if X_observed is None:
+        raise ValueError("There are no feasible observed points.")
+    # construct Objective module
+    objective = WeightedMCMultiOutputObjective(weights=objective_weights)
+    ref_point = torch.mul(ref_point, objective_weights[: len(ref_point)]).tolist()
+    if "Ys" not in kwargs:
+        raise ValueError("Expected Hypervolume Improvement requires Ys argument")
+    Y_tensor = squeeze_last_dim(torch.stack(kwargs.get("Ys")).transpose(0, 1))
+    Y_tensor = torch.mul(Y_tensor, torch.tensor(objective_weights))
+
+    # For EHVI acquisition functions we pass the constraint transform directly.
+    if outcome_constraints is None:
+        cons_tfs = None
+    else:
+        cons_tfs = get_outcome_constraint_transforms(outcome_constraints)
+
+    return get_acquisition_function(
+        acquisition_function_name="qEHVI",
+        model=model,
+        # TODO (jej): Fix pyre error below by restructuring class hierarchy.
+        # pyre-fixme[6]: Expected `botorch.acquisition.objective.
+        #  MCAcquisitionObjective` for 3rd parameter `objective` to call
+        #  `get_acquisition_function` but got `IdentityMCMultiOutputObjective`.
+        objective=objective,
+        X_observed=X_observed,
+        X_pending=X_pending,
+        constraints=cons_tfs,
+        prune_baseline=kwargs.get("prune_baseline", True),
+        mc_samples=kwargs.get("mc_samples", 512),
+        qmc=kwargs.get("qmc", True),
+        seed=torch.randint(1, 10000, (1,)).item(),
+        ref_point=ref_point,
+        Y=Y_tensor,
+    )
+
+
+# TODO (jej): rewrite optimize_acqf wrappers to avoid duplicate code.
+def scipy_optimizer_list(
+    acq_function_list: List[AcquisitionFunction],
+    bounds: Tensor,
+    inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
+    fixed_features: Optional[Dict[int, float]] = None,
+    rounding_func: Optional[Callable[[Tensor], Tensor]] = None,
+    **kwargs: Any,
+) -> Tuple[Tensor, Tensor]:
+    r"""Sequential optimizer using scipy's minimize module on a numpy-adaptor.
+
+    The ith acquisition in the sequence uses the ith given acquisition_function.
+
+    Args:
+        acq_function_list: A list of botorch AcquisitionFunctions,
+            optimized sequentially.
+        bounds: A `2 x d`-dim tensor, where `bounds[0]` (`bounds[1]`) are the
+            lower (upper) bounds of the feasible hyperrectangle.
+        n: The number of candidates to generate.
+        inequality constraints: A list of tuples (indices, coefficients, rhs),
+            with each tuple encoding an inequality constraint of the form
+            `\sum_i (X[indices[i]] * coefficients[i]) >= rhs`
+        fixed_features: A map {feature_index: value} for features that should
+            be fixed to a particular value during generation.
+        rounding_func: A function that rounds an optimization result
+            appropriately (i.e., according to `round-trip` transformations).
+
+    Returns:
+        2-element tuple containing
+
+        - A `n x d`-dim tensor of generated candidates.
+        - A `n`-dim tensor of conditional acquisition
+          values, where `i`-th element is the expected acquisition value
+          conditional on having observed candidates `0,1,...,i-1`.
+    """
+    num_restarts: int = kwargs.get("num_restarts", 20)
+    raw_samples: int = kwargs.get("num_raw_samples", 50 * num_restarts)
+
+    # use SLSQP by default for small problems since it yields faster wall times
+    if "method" not in kwargs:
+        kwargs["method"] = "SLSQP"
+    X, expected_acquisition_value = optimize_acqf_list(
+        acq_function_list=acq_function_list,
+        bounds=bounds,
+        num_restarts=num_restarts,
+        raw_samples=raw_samples,
+        options=kwargs,
+        inequality_constraints=inequality_constraints,
+        fixed_features=fixed_features,
+        post_processing_func=rounding_func,
+    )
+    return X, expected_acquisition_value
diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py
index be118ead476..2117091c5af 100644
--- a/ax/service/utils/best_point.py
+++ b/ax/service/utils/best_point.py
@@ -9,7 +9,7 @@
 import pandas as pd
 from ax.core.batch_trial import BatchTrial
 from ax.core.experiment import Experiment
-from ax.core.objective import Objective, ScalarizedObjective
+from ax.core.objective import MultiObjective, Objective, ScalarizedObjective
 from ax.core.optimization_config import OptimizationConfig
 from ax.core.trial import Trial
 from ax.core.types import TModelPredictArm, TParameterization
@@ -35,6 +35,13 @@ def get_best_raw_objective_point(
         Tuple of parameterization and a mapping from metric name to a tuple of
             the corresponding objective mean and SEM.
     """
+    # pyre-ignore [16]
+    if isinstance(experiment.optimization_config.objective, MultiObjective):
+        logger.warn(
+            "get_best_raw_objective_point is deprecated for multi-objective "
+            "optimization. This method will return an arbitrary point on the "
+            "pareto frontier."
+        )
     opt_config = optimization_config or experiment.optimization_config
     assert opt_config is not None, (
         "Cannot identify the best point without an optimization config, but no "
@@ -79,6 +86,13 @@ def get_best_from_model_predictions(
     Returns:
         Tuple of parameterization and model predictions for it.
     """
+    # pyre-ignore [16]
+    if isinstance(experiment.optimization_config.objective, MultiObjective):
+        logger.warn(
+            "get_best_from_model_predictions is deprecated for multi-objective "
+            "optimization configs. This method will return an arbitrary point on "
+            "the pareto frontier."
+        )
     for _, trial in sorted(experiment.trials.items(), key=lambda x: x[0], reverse=True):
         gr = None
         if isinstance(trial, Trial):
@@ -112,7 +126,12 @@ def get_best_parameters(
     Returns:
         Tuple of parameterization and model predictions for it.
     """
-
+    # pyre-ignore [16]
+    if isinstance(experiment.optimization_config.objective, MultiObjective):
+        logger.warn(
+            "get_best_parameters is deprecated for multi-objective optimization. "
+            "This method will return an arbitrary point on the pareto frontier."
+        )
     # Find latest trial which has a generator_run attached and get its predictions
     model_predictions = get_best_from_model_predictions(experiment=experiment)
     if model_predictions is not None:  # pragma: no cover
diff --git a/ax/utils/testing/core_stubs.py b/ax/utils/testing/core_stubs.py
index d00a9b35ec3..0552d938446 100644
--- a/ax/utils/testing/core_stubs.py
+++ b/ax/utils/testing/core_stubs.py
@@ -649,7 +649,11 @@ def get_branin_objective() -> Objective:
 
 def get_branin_multi_objective() -> Objective:
     return MultiObjective(
-        metrics=[get_branin_metric(), get_branin_metric()], minimize=False
+        metrics=[
+            get_branin_metric(name="branin_a"),
+            get_branin_metric(name="branin_b"),
+        ],
+        minimize=False,
     )
 
 
@@ -853,6 +857,23 @@ def get_branin_data(trial_indices: Optional[Iterable[int]] = None) -> Data:
     return Data(df=pd.DataFrame.from_records(df_dicts))
 
 
+def get_branin_data_multi_objective(
+    trial_indices: Optional[Iterable[int]] = None,
+) -> Data:
+    df_dicts = [
+        {
+            "trial_index": trial_index,
+            "metric_name": f"branin_{suffix}",
+            "arm_name": f"{trial_index}_0",
+            "mean": 5.0,
+            "sem": 0.0,
+        }
+        for trial_index in (trial_indices or [0])
+        for suffix in ["a", "b"]
+    ]
+    return Data(df=pd.DataFrame.from_records(df_dicts))
+
+
 ##############################
 # Instances of types from core/types.py
 ##############################
diff --git a/sphinx/source/modelbridge.rst b/sphinx/source/modelbridge.rst
index 3ac24d556d7..14732ce7d44 100644
--- a/sphinx/source/modelbridge.rst
+++ b/sphinx/source/modelbridge.rst
@@ -84,6 +84,14 @@ Torch Model Bridge
     :undoc-members:
     :show-inheritance:
 
+Multi-Objective Torch Model Bridge
+~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: ax.modelbridge.multi_objective_torch
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 
 Utilities
 ---------------