Remove functionality for BenchmarkRunner without ground truth (facebook#2674)

esantorella · facebook-github-bot · commit c9c92333c5b9 · 2024-08-20T06:35:04.000-07:00
Summary: Pull Request resolved: facebook#2674 Context: This is an alternative to D61431979. Note: There are benchmarks that do not use `BenchmarkRunner`, but I plan to have them all use `BenchmarkRunner` in the future. `BenchmarkRunner` technically supports benchmarks without a ground truth, but that functionality is never used, and there aren't any Ax benchmarks that are noisy *and* don't have a ground truth. It is not conceptually clear how such a case should be benchmarked, so it is better to not over-engineer for that need, which may never arise. Instead, benchmarks that lack a ground truth but are deterministic can be treated as noiseless problems with a ground truth, and we can reap support for problems without a ground truth. Also, `BenchmarkRunner` has some methods that must either be defined or not defined depending on whether there is a ground truth. They can't be abstract because they will not always be defined. With this change, we can make the ground-truth methods abstract and get rid of the rest. This PR: - Rewrites docstrings - Removes method `get_Y_Ystd` - Makes `get_Y_true` and other methods abstract - Removes functionality for the case where `get_Y_true` raises a `NotImplementedError` Reviewed By: ItsMrLin Differential Revision: D61483962
diff --git a/ax/benchmark/runners/base.py b/ax/benchmark/runners/base.py
@@ -5,9 +5,9 @@
 
 # pyre-strict
 
-from abc import ABC, abstractmethod
+from abc import ABC, abstractmethod, abstractproperty
 from math import sqrt
-from typing import Any, Optional, Union
+from typing import Any, Union
 
 import torch
 from ax.core.arm import Arm
@@ -21,45 +21,44 @@
 
 
 class BenchmarkRunner(Runner, ABC):
-
-    @property
-    @abstractmethod
+    """
+    A Runner that produces both observed and ground-truth values.
+
+    Observed values equal ground-truth values plus noise, with the noise added
+    according to the standard deviations returned by `get_noise_stds()`.
+
+    This runner does require that every benchmark has a ground truth, which
+    won't necessarily be true for real-world problems. Such problems fall into
+    two categories:
+        - If they are deterministic, they can be used with this runner by
+          viewing them as noiseless problems where the observed values are the
+          ground truth. The observed values will be used for tracking the
+          progress of optimization.
+        - If they are not deterministc, they are not supported. It is not
+          conceptually clear how to benchmark such problems, so we decided to
+          not over-engineer for that before such a use case arrives.
+    """
+
+    @abstractproperty
     def outcome_names(self) -> list[str]:
         """The names of the outcomes of the problem (in the order of the outcomes)."""
         pass  # pragma: no cover
 
     def get_Y_true(self, arm: Arm) -> Tensor:
-        """Function returning the ground truth values for a given arm. The
-        synthetic noise is added as part of the Runner's `run()` method.
-        For problems that do not have a ground truth, the Runner must
-        implement the `get_Y_Ystd()` method instead."""
-        raise NotImplementedError(
-            "Must implement method `get_Y_true()` for Runner "
-            f"{self.__class__.__name__} as it does not implement a "
-            "`get_Y_Ystd()` method."
-        )
+        """
+        Return the ground truth values for a given arm.
+
+        Synthetic noise is added as part of the Runner's `run()` method.
+        """
+        ...
 
+    @abstractmethod
     def get_noise_stds(self) -> Union[None, float, dict[str, float]]:
-        """Function returning the standard errors for the synthetic noise
-        to be applied to the observed values. For problems that do not have
-        a ground truth, the Runner must implement the `get_Y_Ystd()` method
-        instead."""
-        raise NotImplementedError(
-            "Must implement method `get_Y_Ystd()` for Runner "
-            f"{self.__class__.__name__} as it does not implement a "
-            "`get_noise_stds()` method."
-        )
-
-    def get_Y_Ystd(self, arm: Arm) -> tuple[Tensor, Optional[Tensor]]:
-        """Function returning the observed values and their standard errors
-        for a given arm. This function is unused for problems that have a
-        ground truth (in this case `get_Y_true()` is used), and is required
-        for problems that do not have a ground truth."""
-        raise NotImplementedError(
-            "Must implement method `get_Y_Ystd()` for Runner "
-            f"{self.__class__.__name__} as it does not implement a "
-            "`get_Y_true()` method."
-        )
+        """
+        Return the standard errors for the synthetic noise to be applied to the
+        observed values.
+        """
+        ...
 
     def run(self, trial: BaseTrial) -> dict[str, Any]:
         """Run the trial by evaluating its parameterization(s).
@@ -110,33 +109,26 @@ def run(self, trial: BaseTrial) -> dict[str, Any]:
                 )
 
         for arm in trial.arms:
-            try:
-                # Case where we do have a ground truth
-                Y_true = self.get_Y_true(arm)
-                Ys_true[arm.name] = Y_true.tolist()
-                if noise_stds is None:
-                    # No noise, so just return the true outcome.
-                    Ystds[arm.name] = [0.0] * len(Y_true)
-                    Ys[arm.name] = Y_true.tolist()
-                else:
-                    # We can scale the noise std by the inverse of the relative sample
-                    # budget allocation to each arm. This works b/c (i) we assume that
-                    # observations per unit sample budget are i.i.d. and (ii) the
-                    # normalized weights sum to one.
-                    std = noise_stds_tsr.to(Y_true) / sqrt(nlzd_arm_weights[arm])
-                    Ystds[arm.name] = std.tolist()
-                    Ys[arm.name] = (Y_true + std * torch.randn_like(Y_true)).tolist()
-            except NotImplementedError:
-                # Case where we don't have a ground truth.
-                Y, Ystd = self.get_Y_Ystd(arm)
-                Ys[arm.name] = Y.tolist()
-                Ystds[arm.name] = Ystd.tolist() if Ystd is not None else None
+            # Case where we do have a ground truth
+            Y_true = self.get_Y_true(arm)
+            Ys_true[arm.name] = Y_true.tolist()
+            if noise_stds is None:
+                # No noise, so just return the true outcome.
+                Ystds[arm.name] = [0.0] * len(Y_true)
+                Ys[arm.name] = Y_true.tolist()
+            else:
+                # We can scale the noise std by the inverse of the relative sample
+                # budget allocation to each arm. This works b/c (i) we assume that
+                # observations per unit sample budget are i.i.d. and (ii) the
+                # normalized weights sum to one.
+                std = noise_stds_tsr.to(Y_true) / sqrt(nlzd_arm_weights[arm])
+                Ystds[arm.name] = std.tolist()
+                Ys[arm.name] = (Y_true + std * torch.randn_like(Y_true)).tolist()
 
         run_metadata = {
             "Ys": Ys,
             "Ystds": Ystds,
             "outcome_names": self.outcome_names,
+            "Ys_true": Ys_true,
         }
-        if Ys_true:  # only add key if we actually have a ground truth
-            run_metadata["Ys_true"] = Ys_true
         return run_metadata