From 49daf0e4005a0c87aea26eec35cb936a71e86993 Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@fb.com>
Date: Thu, 25 Jul 2024 13:07:44 -0700
Subject: [PATCH] Require all problems to have an `optimal_value`; simplify
 Problem inheritance

Summary:
Context:
* Problems that lack an `optimal_value` get a NaN score and cannot really be used for benchmarking except when aggregated. This is a nasty "gotcha." We don't always know the optimum, but IMO it is better to guess.
* The inheritance structure of benchmark problems is too complex and will make subsequent refactors harder.

This PR:
* Adds an `optimal_value` requirement to `BenchmarkProblem` and makes `BenchmarkProblem` the base class to `SurrogateBenchmarkProblem`, enabling `BenchmarkProblem` to be the only type annotation needed. Therefore, the type annotation `BenchmarkProblemProtocol` is no longer necessary. It will be removed in the next PR.
* No longer allows for giving an NaN score to benchmarks where the problem lacks an optimal value, because this won't happen.
* Updates a lot of annotations.
* Raises an exception for constrained multi-objective problems.
* Use dataclasses to cut down on code.

Differential Revision: D60145193

Reviewed By: saitcakmak
---
 ax/benchmark/benchmark_problem.py             | 276 ++++++++----------
 ax/benchmark/problems/surrogate.py            | 151 ++--------
 .../problems/test_mixed_integer_problems.py   |   3 -
 .../tests/problems/test_surrogate_problems.py |   5 +
 ax/benchmark/tests/test_benchmark.py          |   9 +-
 ax/benchmark/tests/test_benchmark_problem.py  |  32 +-
 ax/utils/testing/benchmark_stubs.py           |   5 +-
 7 files changed, 171 insertions(+), 310 deletions(-)

diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
index b3f8e29531a..91dcb47d8f5 100644
--- a/ax/benchmark/benchmark_problem.py
+++ b/ax/benchmark/benchmark_problem.py
@@ -11,7 +11,18 @@
 # in the UI.
 
 import abc
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable, Type, Union
+from dataclasses import dataclass, field
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Protocol,
+    runtime_checkable,
+    Type,
+    TypeVar,
+    Union,
+)
 
 from ax.benchmark.metrics.base import BenchmarkMetricBase
 
@@ -30,10 +41,21 @@
 from ax.core.types import ComparisonOp
 from ax.utils.common.base import Base
 from ax.utils.common.typeutils import checked_cast
-from botorch.test_functions.base import BaseTestProblem, ConstrainedBaseTestProblem
-from botorch.test_functions.multi_objective import MultiObjectiveTestProblem
+from botorch.test_functions.base import (
+    BaseTestProblem,
+    ConstrainedBaseTestProblem,
+    MultiObjectiveTestProblem,
+)
 from botorch.test_functions.synthetic import SyntheticTestFunction
 
+TBenchmarkProblem = TypeVar("TBenchmarkProblem", bound="BenchmarkProblem")
+TSingleObjectiveBenchmarkProblem = TypeVar(
+    "TSingleObjectiveBenchmarkProblem", bound="SingleObjectiveBenchmarkProblem"
+)
+TMultiObjectiveBenchmarkProblem = TypeVar(
+    "TMultiObjectiveBenchmarkProblem", bound="MultiObjectiveBenchmarkProblem"
+)
+
 
 def _get_name(
     test_problem: BaseTestProblem,
@@ -81,46 +103,81 @@ class BenchmarkProblemWithKnownOptimum(Protocol):
     optimal_value: float
 
 
+@dataclass(kw_only=True, repr=True)
 class BenchmarkProblem(Base):
-    """Benchmark problem, represented in terms of Ax search space, optimization
-    config, and runner.
+    """
+    Problem against which diffrent methods can be benchmarked.
+
+    Defines how data is generated, the objective (via the OptimizationConfig),
+    and the SearchSpace.
+
+    Args:
+        name: Can be generated programmatically with `_get_name`.
+        optimization_config: Defines the objective of optimizaiton.
+        num_trials: Number of optimization iterations to run. BatchTrials count
+            as one trial.
+        observe_noise_stds: If boolean, whether the standard deviation of the
+            observation noise is observed for all metrics. If a dictionary,
+            whether noise levels are observed on a per-metric basis.
+        has_ground_truth: Whether the Runner produces underlying ground truth
+            values, which are not observed in real noisy problems but may be
+            known in benchmarks.
+        tracking_metrics: Tracking metrics are not optimized, and for the
+            purpose of benchmarking, they will not be fit. The ground truth may
+            be provided as `tracking_metrics`.
+        optimal_value: The best ground-truth objective value. Hypervolume for
+            multi-objective problems. If the best value is not known, it is
+            conventional to set it to a value that is almost certainly better
+            than the best value, so that a benchmark's score will not exceed 100%.
+        search_space: The search space.
     """
 
-    def __init__(
-        self,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        runner: Runner,
-        num_trials: int,
-        is_noiseless: bool = False,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        has_ground_truth: bool = False,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-    ) -> None:
-        self.name = name
-        self.search_space = search_space
-        self.optimization_config = optimization_config
-        self._runner = runner
-        self.num_trials = num_trials
-        self.is_noiseless = is_noiseless
-        self.observe_noise_stds = observe_noise_stds
-        self.has_ground_truth = has_ground_truth
-        self.tracking_metrics: List[BenchmarkMetricBase] = tracking_metrics or []
-
-    @property
-    def runner(self) -> Runner:
-        return self._runner
+    name: str
+    optimization_config: OptimizationConfig
+    num_trials: int
+    observe_noise_stds: Union[bool, Dict[str, bool]]
+    has_ground_truth: bool
+    tracking_metrics: List[BenchmarkMetricBase] = field(default_factory=list)
+    optimal_value: float
+
+    # exclude from repr
+    search_space: SearchSpace = field(repr=False)
+
+
+@dataclass(kw_only=True, repr=True)
+class EagerRunnerMixin:
+    """
+    Args:
+        runner: The runner prodcuces both the potentially noisy data that is
+            used for modeling and would be observable in realistic settings and
+            the ground truth data used for evaluating performance in a benchmark,
+            writing the latter to `tracking_metrics`.
+    """
+
+    runner: Runner = field(repr=False)
+
+
+@dataclass(kw_only=True, repr=True)
+class SingleObjectiveBenchmarkProblem(BenchmarkProblem, EagerRunnerMixin):
+    """
+    Benchmark problem with a single objective.
+
+    For argument descriptions, see `BenchmarkProblem`; it additionally takes a
+    `Runner`.
+    is_noiseless: Whether the problem is noiseless.
+    """
+
+    is_noiseless: bool
 
     @classmethod
-    def from_botorch(
-        cls,
-        test_problem_class: Type[BaseTestProblem],
+    def from_botorch_synthetic(
+        cls: Type[TSingleObjectiveBenchmarkProblem],
+        test_problem_class: Type[SyntheticTestFunction],
         test_problem_kwargs: Dict[str, Any],
         lower_is_better: bool,
         num_trials: int,
         observe_noise_sd: bool = False,
-    ) -> "BenchmarkProblem":
+    ) -> TSingleObjectiveBenchmarkProblem:
         """
         Create a BenchmarkProblem from a BoTorch BaseTestProblem using
         specialized Metrics and Runners. The test problem's result will be
@@ -199,7 +256,11 @@ def from_botorch(
             objective=objective,
             outcome_constraints=outcome_constraints,
         )
-
+        optimal_value = (
+            test_problem.max_hv
+            if isinstance(test_problem, MultiObjectiveTestProblem)
+            else test_problem.optimal_value
+        )
         return cls(
             name=name,
             search_space=search_space,
@@ -213,75 +274,10 @@ def from_botorch(
             observe_noise_stds=observe_noise_sd,
             is_noiseless=test_problem.noise_std in (None, 0.0),
             has_ground_truth=True,  # all synthetic problems have ground truth
+            optimal_value=optimal_value,
         )
 
-    def __repr__(self) -> str:
-        """
-        Return a string representation that includes only the attributes that
-        print nicely and contain information likely to be useful.
-        """
-        return (
-            f"{self.__class__.__name__}("
-            f"name={self.name}, "
-            f"optimization_config={self.optimization_config}, "
-            f"num_trials={self.num_trials}, "
-            f"is_noiseless={self.is_noiseless}, "
-            f"observe_noise_stds={self.observe_noise_stds}, "
-            f"has_ground_truth={self.has_ground_truth}, "
-            f"tracking_metrics={self.tracking_metrics})"
-        )
-
-
-class SingleObjectiveBenchmarkProblem(BenchmarkProblem):
-    """The most basic BenchmarkProblem, with a single objective and a known optimal
-    value.
-    """
-
-    def __init__(
-        self,
-        optimal_value: float,
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        runner: Runner,
-        num_trials: int,
-        is_noiseless: bool = False,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        has_ground_truth: bool = False,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-    ) -> None:
-        super().__init__(
-            name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            runner=runner,
-            num_trials=num_trials,
-            is_noiseless=is_noiseless,
-            observe_noise_stds=observe_noise_stds,
-            has_ground_truth=has_ground_truth,
-            tracking_metrics=tracking_metrics,
-        )
-        self.optimal_value = optimal_value
-
-    @classmethod
-    def from_botorch_synthetic(
-        cls,
-        test_problem_class: Type[SyntheticTestFunction],
-        test_problem_kwargs: Dict[str, Any],
-        lower_is_better: bool,
-        num_trials: int,
-        observe_noise_sd: bool = False,
-    ) -> "SingleObjectiveBenchmarkProblem":
-        """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
-        Metrics and Runners. The test problem's result will be computed on the Runner
-        and retrieved by the Metric.
-        """
-
-        # pyre-fixme [45]: Invalid class instantiation
-        test_problem = test_problem_class(**test_problem_kwargs)
-
-        problem = BenchmarkProblem.from_botorch(
+        return cls.from_botorch(
             test_problem_class=test_problem_class,
             test_problem_kwargs=test_problem_kwargs,
             lower_is_better=lower_is_better,
@@ -289,79 +285,48 @@ def from_botorch_synthetic(
             observe_noise_sd=observe_noise_sd,
         )
 
-        dim = test_problem_kwargs.get("dim", None)
-        name = _get_name(
-            test_problem=test_problem, observe_noise_sd=observe_noise_sd, dim=dim
-        )
 
-        return cls(
-            name=name,
-            search_space=problem.search_space,
-            optimization_config=problem.optimization_config,
-            runner=problem.runner,
-            num_trials=num_trials,
-            is_noiseless=problem.is_noiseless,
-            observe_noise_stds=problem.observe_noise_stds,
-            has_ground_truth=problem.has_ground_truth,
-            optimal_value=test_problem.optimal_value,
-        )
-
-
-class MultiObjectiveBenchmarkProblem(BenchmarkProblem):
+# TODO: Support `observe_noise_stds` as a metric-level argument.
+@dataclass(kw_only=True, repr=True)
+class MultiObjectiveBenchmarkProblem(BenchmarkProblem, EagerRunnerMixin):
     """
     A `BenchmarkProblem` that supports multiple objectives.
 
     For multi-objective problems, `optimal_value` indicates the maximum
     hypervolume attainable with the given `reference_point`.
+
+    For argument descriptions, see `BenchmarkProblem`; it additionally takes a `runner`
+    and a `reference_point`.
     """
 
-    def __init__(
-        self,
-        optimal_value: float,
-        reference_point: List[float],
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        runner: Runner,
-        num_trials: int,
-        is_noiseless: bool = False,
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        has_ground_truth: bool = False,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-    ) -> None:
-        self.optimal_value = optimal_value
-        self.reference_point = reference_point
-        super().__init__(
-            name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            runner=runner,
-            num_trials=num_trials,
-            is_noiseless=is_noiseless,
-            observe_noise_stds=observe_noise_stds,
-            has_ground_truth=has_ground_truth,
-            tracking_metrics=tracking_metrics,
-        )
+    is_noiseless: bool
+    reference_point: List[float]
+    optimization_config: MultiObjectiveOptimizationConfig
 
     @classmethod
     def from_botorch_multi_objective(
-        cls,
+        cls: Type[TMultiObjectiveBenchmarkProblem],
         test_problem_class: Type[MultiObjectiveTestProblem],
         test_problem_kwargs: Dict[str, Any],
         # TODO: Figure out whether we should use `lower_is_better` here.
         num_trials: int,
         observe_noise_sd: bool = False,
-    ) -> "MultiObjectiveBenchmarkProblem":
+    ) -> TMultiObjectiveBenchmarkProblem:
         """Create a BenchmarkProblem from a BoTorch BaseTestProblem using specialized
         Metrics and Runners. The test problem's result will be computed on the Runner
         once per trial and each Metric will retrieve its own result by index.
         """
+        if issubclass(test_problem_class, ConstrainedBaseTestProblem):
+            raise NotImplementedError(
+                "Constrained multi-objective problems are not supported."
+            )
 
         # pyre-fixme [45]: Invalid class instantiation
         test_problem = test_problem_class(**test_problem_kwargs)
 
-        problem = BenchmarkProblem.from_botorch(
+        problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
+            # pyre-fixme [6]: Passing a multi-objective problem where a
+            # single-objective problem is expected.
             test_problem_class=test_problem_class,
             test_problem_kwargs=test_problem_kwargs,
             lower_is_better=True,  # Seems like we always assume minimization for MOO?
@@ -369,10 +334,7 @@ def from_botorch_multi_objective(
             observe_noise_sd=observe_noise_sd,
         )
 
-        dim = test_problem_kwargs.get("dim", None)
-        name = _get_name(
-            test_problem=test_problem, observe_noise_sd=observe_noise_sd, dim=dim
-        )
+        name = problem.name
 
         n_obj = test_problem.num_objectives
         if not observe_noise_sd:
@@ -420,7 +382,3 @@ def from_botorch_multi_objective(
             optimal_value=test_problem.max_hv,
             reference_point=test_problem._ref_point,
         )
-
-    @property
-    def maximum_hypervolume(self) -> float:
-        return self.optimal_value
diff --git a/ax/benchmark/problems/surrogate.py b/ax/benchmark/problems/surrogate.py
index 4f08ecb84f8..efb85f75e3d 100644
--- a/ax/benchmark/problems/surrogate.py
+++ b/ax/benchmark/problems/surrogate.py
@@ -5,17 +5,14 @@
 
 # pyre-strict
 
+from dataclasses import dataclass
 from typing import Callable, Dict, List, Optional, Tuple, Union
 
-from ax.benchmark.metrics.base import BenchmarkMetricBase
+from ax.benchmark.benchmark_problem import BenchmarkProblem
 
 from ax.benchmark.runners.surrogate import SurrogateRunner
-from ax.core.optimization_config import (
-    MultiObjectiveOptimizationConfig,
-    OptimizationConfig,
-)
+from ax.core.optimization_config import MultiObjectiveOptimizationConfig
 from ax.core.runner import Runner
-from ax.core.search_space import SearchSpace
 from ax.modelbridge.torch import TorchModelBridge
 from ax.utils.common.base import Base
 from ax.utils.common.equality import equality_typechecker
@@ -23,7 +20,8 @@
 from botorch.utils.datasets import SupervisedDataset
 
 
-class SurrogateBenchmarkProblemBase(Base):
+@dataclass(kw_only=True)
+class SurrogateBenchmarkProblemBase(BenchmarkProblem):
     """
     Base class for SOOSurrogateBenchmarkProblem and MOOSurrogateBenchmarkProblem.
 
@@ -31,62 +29,19 @@ class SurrogateBenchmarkProblemBase(Base):
     called, to defer construction of the surrogate and downloading of datasets.
     """
 
-    def __init__(
-        self,
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        num_trials: int,
-        outcome_names: List[str],
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        noise_stds: Union[float, Dict[str, float]] = 0.0,
-        get_surrogate_and_datasets: Optional[
-            Callable[[], Tuple[TorchModelBridge, List[SupervisedDataset]]]
-        ] = None,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-        _runner: Optional[Runner] = None,
-    ) -> None:
-        """Construct a `SurrogateBenchmarkProblemBase` instance.
-
-        Args:
-            name: The name of the benchmark problem.
-            search_space: The search space to optimize over.
-            optimization_config: THe optimization config for the problem.
-            num_trials: The number of trials to run.
-            outcome_names: The names of the metrics the benchmark problem
-                produces outcome observations for.
-            observe_noise_stds: Whether or not to observe the observation noise
-                level for each metric. If True/False, observe the the noise standard
-                deviation for all/no metrics. If a dictionary, specify this for
-                individual metrics (metrics not appearing in the dictionary will
-                be assumed to not provide observation noise levels).
-            noise_stds: The standard deviation(s) of the observation noise(s).
-                If a single value is provided, it is used for all metrics. Providing
-                a dictionary allows specifying different noise levels for different
-                metrics (metrics not appearing in the dictionary will be assumed to
-                be noiseless - but not necessarily be known to the problem to be
-                noiseless).
-            get_surrogate_and_datasets: A factory function that retunrs the Surrogate
-                and a list of datasets to be used by the surrogate.
-            tracking_metrics: Additional tracking metrics to compute during the
-                optimization (not used to inform the optimization).
-        """
+    noise_stds: Union[float, Dict[str, float]] = 0.0
+    get_surrogate_and_datasets: Optional[
+        Callable[[], Tuple[TorchModelBridge, List[SupervisedDataset]]]
+    ] = None
+    _runner: Optional[Runner] = None
+    outcome_names: List[str]
+    has_ground_truth: bool = True
 
-        if get_surrogate_and_datasets is None and _runner is None:
+    def __post_init__(self) -> None:
+        if self.get_surrogate_and_datasets is None and self._runner is None:
             raise ValueError(
                 "Either `get_surrogate_and_datasets` or `_runner` required."
             )
-        self.name = name
-        self.search_space = search_space
-        self.optimization_config = optimization_config
-        self.num_trials = num_trials
-        self.outcome_names = outcome_names
-        self.observe_noise_stds = observe_noise_stds
-        self.noise_stds = noise_stds
-        self.get_surrogate_and_datasets = get_surrogate_and_datasets
-        self.tracking_metrics: List[BenchmarkMetricBase] = tracking_metrics or []
-        self._runner = _runner
 
     @property
     def is_noiseless(self) -> bool:
@@ -96,11 +51,6 @@ def is_noiseless(self) -> bool:
             return self.noise_stds == 0.0
         return all(std == 0.0 for std in checked_cast(dict, self.noise_stds).values())
 
-    @property
-    def has_ground_truth(self) -> bool:
-        # All surrogate-based problems have a ground truth
-        return True
-
     @equality_typechecker
     def __eq__(self, other: Base) -> bool:
         if type(other) is not type(self):
@@ -146,45 +96,10 @@ def __repr__(self) -> str:
 
 
 class SOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
-    """
-    Has the same attributes/properties as a `MultiObjectiveBenchmarkProblem`,
-    but its runner is not constructed until needed, to allow for deferring
-    constructing the surrogate and downloading data. The surrogate is only
-    defined when `runner` is accessed or `set_runner` is called.
-    """
-
-    def __init__(
-        self,
-        optimal_value: float,
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: OptimizationConfig,
-        num_trials: int,
-        outcome_names: List[str],
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        noise_stds: Union[float, Dict[str, float]] = 0.0,
-        get_surrogate_and_datasets: Optional[
-            Callable[[], Tuple[TorchModelBridge, List[SupervisedDataset]]]
-        ] = None,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-        _runner: Optional[Runner] = None,
-    ) -> None:
-        super().__init__(
-            name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            num_trials=num_trials,
-            outcome_names=outcome_names,
-            observe_noise_stds=observe_noise_stds,
-            noise_stds=noise_stds,
-            get_surrogate_and_datasets=get_surrogate_and_datasets,
-            tracking_metrics=tracking_metrics,
-            _runner=_runner,
-        )
-        self.optimal_value = optimal_value
+    pass
 
 
+@dataclass(kw_only=True)
 class MOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
     """
     Has the same attributes/properties as a `MultiObjectiveBenchmarkProblem`,
@@ -194,36 +109,4 @@ class MOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
     """
 
     optimization_config: MultiObjectiveOptimizationConfig
-
-    def __init__(
-        self,
-        optimal_value: float,
-        reference_point: List[float],
-        *,
-        name: str,
-        search_space: SearchSpace,
-        optimization_config: MultiObjectiveOptimizationConfig,
-        num_trials: int,
-        outcome_names: List[str],
-        observe_noise_stds: Union[bool, Dict[str, bool]] = False,
-        noise_stds: Union[float, Dict[str, float]] = 0.0,
-        get_surrogate_and_datasets: Optional[
-            Callable[[], Tuple[TorchModelBridge, List[SupervisedDataset]]]
-        ] = None,
-        tracking_metrics: Optional[List[BenchmarkMetricBase]] = None,
-        _runner: Optional[Runner] = None,
-    ) -> None:
-        super().__init__(
-            name=name,
-            search_space=search_space,
-            optimization_config=optimization_config,
-            num_trials=num_trials,
-            outcome_names=outcome_names,
-            observe_noise_stds=observe_noise_stds,
-            noise_stds=noise_stds,
-            get_surrogate_and_datasets=get_surrogate_and_datasets,
-            tracking_metrics=tracking_metrics,
-            _runner=_runner,
-        )
-        self.reference_point = reference_point
-        self.optimal_value = optimal_value
+    reference_point: List[float]
diff --git a/ax/benchmark/tests/problems/test_mixed_integer_problems.py b/ax/benchmark/tests/problems/test_mixed_integer_problems.py
index 717beb3aabd..fa6cb400515 100644
--- a/ax/benchmark/tests/problems/test_mixed_integer_problems.py
+++ b/ax/benchmark/tests/problems/test_mixed_integer_problems.py
@@ -58,9 +58,6 @@ def test_problems(self) -> None:
                 ).test_problem._bounds,
                 expected_bounds,
             )
-            print(f"{name=}")
-            print(f"{problem.optimal_value=}")
-            print(f"{problem_cls().optimal_value=}")
             self.assertGreaterEqual(problem.optimal_value, problem_cls().optimal_value)
 
         # Test that they match correctly to the original problems.
diff --git a/ax/benchmark/tests/problems/test_surrogate_problems.py b/ax/benchmark/tests/problems/test_surrogate_problems.py
index 6d54784e0e0..b178d5a2ed0 100644
--- a/ax/benchmark/tests/problems/test_surrogate_problems.py
+++ b/ax/benchmark/tests/problems/test_surrogate_problems.py
@@ -15,6 +15,11 @@
 
 
 class TestSurrogateProblems(TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        # print max output so errors in 'repr' can be fully shown
+        self.maxDiff = None
+
     def test_conforms_to_protocol(self) -> None:
         sbp = get_soo_surrogate()
         self.assertIsInstance(sbp, BenchmarkProblemProtocol)
diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py
index dc24816effb..47c47683126 100644
--- a/ax/benchmark/tests/test_benchmark.py
+++ b/ax/benchmark/tests/test_benchmark.py
@@ -27,6 +27,7 @@
 from ax.benchmark.metrics.base import GroundTruthMetricMixin
 from ax.benchmark.metrics.benchmark import BenchmarkMetric, GroundTruthBenchmarkMetric
 from ax.benchmark.problems.registry import get_problem
+from ax.core.optimization_config import MultiObjectiveOptimizationConfig
 from ax.modelbridge.generation_strategy import GenerationNode, GenerationStrategy
 from ax.modelbridge.model_spec import ModelSpec
 from ax.modelbridge.registry import Models
@@ -36,7 +37,6 @@
 from ax.utils.common.testutils import TestCase
 from ax.utils.common.typeutils import checked_cast, not_none
 from ax.utils.testing.benchmark_stubs import (
-    get_constrained_multi_objective_benchmark_problem,
     get_moo_surrogate,
     get_multi_objective_benchmark_problem,
     get_single_objective_benchmark_problem,
@@ -162,9 +162,10 @@ def test_make_ground_truth_optimization_config(self) -> None:
         gt_opt_cfg = make_ground_truth_optimization_config(experiment)
         self.assertIs(gt_opt_cfg.objective.metric, gt_metric)
 
-        # Test behavior with MOO problem and outcome constraints
-        problem = get_constrained_multi_objective_benchmark_problem(
-            observe_noise_sd=False
+        # Test behavior with MOO problem
+        problem = get_multi_objective_benchmark_problem(observe_noise_sd=False)
+        self.assertIsInstance(
+            problem.optimization_config, MultiObjectiveOptimizationConfig
         )
         experiment = _create_benchmark_experiment(
             problem=problem, method_name="test_method"
diff --git a/ax/benchmark/tests/test_benchmark_problem.py b/ax/benchmark/tests/test_benchmark_problem.py
index b6de743528c..4df3f267202 100644
--- a/ax/benchmark/tests/test_benchmark_problem.py
+++ b/ax/benchmark/tests/test_benchmark_problem.py
@@ -16,7 +16,7 @@
 from ax.core.types import ComparisonOp
 from ax.utils.common.testutils import TestCase
 from ax.utils.common.typeutils import checked_cast
-from botorch.test_functions.multi_objective import BraninCurrin
+from botorch.test_functions.multi_objective import BraninCurrin, ConstrainedBraninCurrin
 from botorch.test_functions.synthetic import (
     Ackley,
     ConstrainedGramacy,
@@ -27,6 +27,11 @@
 
 
 class TestBenchmarkProblem(TestCase):
+    def setUp(self) -> None:
+        # Print full output, so that any differences in 'repr' output are shown
+        self.maxDiff = None
+        super().setUp()
+
     def test_single_objective_from_botorch(self) -> None:
         for botorch_test_problem in [Ackley(), ConstrainedHartmann(dim=6)]:
             test_problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
@@ -77,15 +82,16 @@ def test_single_objective_from_botorch(self) -> None:
                     test_problem.optimization_config.outcome_constraints, []
                 )
                 expected_repr = (
-                    "SingleObjectiveBenchmarkProblem(name=Ackley, "
+                    "SingleObjectiveBenchmarkProblem(name='Ackley', "
                     "optimization_config=OptimizationConfig(objective=Objective("
                     'metric_name="Ackley", '
                     "minimize=True), outcome_constraints=[]), "
                     "num_trials=1, "
-                    "is_noiseless=True, "
                     "observe_noise_stds=False, "
                     "has_ground_truth=True, "
-                    "tracking_metrics=[])"
+                    "tracking_metrics=[], "
+                    "optimal_value=0.0, "
+                    "is_noiseless=True)"
                 )
             else:
                 outcome_constraint = (
@@ -96,16 +102,17 @@ def test_single_objective_from_botorch(self) -> None:
                 self.assertFalse(outcome_constraint.relative)
                 self.assertEqual(outcome_constraint.bound, 0.0)
                 expected_repr = (
-                    "SingleObjectiveBenchmarkProblem(name=ConstrainedHartmann, "
+                    "SingleObjectiveBenchmarkProblem(name='ConstrainedHartmann', "
                     "optimization_config=OptimizationConfig(objective=Objective("
                     'metric_name="ConstrainedHartmann", minimize=True), '
                     "outcome_constraints=[OutcomeConstraint(constraint_slack_0"
                     " >= 0.0)]), "
                     "num_trials=1, "
-                    "is_noiseless=True, "
                     "observe_noise_stds=False, "
                     "has_ground_truth=True, "
-                    "tracking_metrics=[])"
+                    "tracking_metrics=[], "
+                    "optimal_value=-3.32237, "
+                    "is_noiseless=True)"
                 )
 
             self.assertEqual(repr(test_problem), expected_repr)
@@ -197,6 +204,17 @@ def test_moo_from_botorch(self) -> None:
         self.assertEqual(branin_currin_problem.optimal_value, test_problem._max_hv)
         self.assertEqual(branin_currin_problem.reference_point, test_problem._ref_point)
 
+    def test_moo_from_botorch_constrained(self) -> None:
+        with self.assertRaisesRegex(
+            NotImplementedError,
+            "Constrained multi-objective problems are not supported.",
+        ):
+            MultiObjectiveBenchmarkProblem.from_botorch_multi_objective(
+                test_problem_class=ConstrainedBraninCurrin,
+                test_problem_kwargs={},
+                num_trials=1,
+            )
+
     def test_maximization_problem(self) -> None:
         test_problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
             test_problem_class=Cosine8,
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
index 170334b0258..7c648001f07 100644
--- a/ax/utils/testing/benchmark_stubs.py
+++ b/ax/utils/testing/benchmark_stubs.py
@@ -11,7 +11,6 @@
 import numpy as np
 from ax.benchmark.benchmark_method import BenchmarkMethod
 from ax.benchmark.benchmark_problem import (
-    BenchmarkProblem,
     MultiObjectiveBenchmarkProblem,
     SingleObjectiveBenchmarkProblem,
 )
@@ -43,8 +42,8 @@
 from botorch.test_functions.synthetic import Branin
 
 
-def get_benchmark_problem() -> BenchmarkProblem:
-    return BenchmarkProblem.from_botorch(
+def get_benchmark_problem() -> SingleObjectiveBenchmarkProblem:
+    return SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
         test_problem_class=Branin,
         test_problem_kwargs={},
         lower_is_better=True,