diff --git a/README.rst b/README.rst index 1888eb37a68c..b4b48da3bbf3 100644 --- a/README.rst +++ b/README.rst @@ -120,7 +120,7 @@ This example runs a parallel grid search to optimize an example objective functi print("Best config: ", analysis.get_best_config(metric="mean_loss")) # Get a dataframe for analyzing trial results. - df = analysis.dataframe() + df = analysis.results_df If TensorBoard is installed, automatically visualize all trial results: diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt index 83e0979e5db3..48a48b7c1ce0 100644 --- a/doc/requirements-doc.txt +++ b/doc/requirements-doc.txt @@ -28,5 +28,5 @@ sphinx_rtd_theme tabulate uvicorn werkzeug -tune-sklearn==0.0.5 +git+git://github.com/ray-project/tune-sklearn@master#tune-sklearn scikit-optimize diff --git a/doc/source/tune/api_docs/analysis.rst b/doc/source/tune/api_docs/analysis.rst index 2ec32f686c17..c9468fbab694 100644 --- a/doc/source/tune/api_docs/analysis.rst +++ b/doc/source/tune/api_docs/analysis.rst @@ -18,7 +18,7 @@ Here are some example operations for obtaining a summary of your experiment: .. code-block:: python # Get a dataframe for the last reported results of all of the trials - df = analysis.dataframe() + df = analysis.results_df # Get a dataframe for the max accuracy seen for each trial df = analysis.dataframe(metric="mean_accuracy", mode="max") diff --git a/doc/source/tune/key-concepts.rst b/doc/source/tune/key-concepts.rst index 11247895bd50..213d680a01c0 100644 --- a/doc/source/tune/key-concepts.rst +++ b/doc/source/tune/key-concepts.rst @@ -219,16 +219,24 @@ Analysis analysis = tune.run(trainable, search_alg=algo, stop={"training_iteration": 20}) - # Get the best hyperparameters - best_hyperparameters = analysis.get_best_config() + best_trial = analysis.best_trial # Get best trial + best_config = analysis.best_config # Get best trial's hyperparameters + best_logdir = analysis.best_logdir # Get best trial's logdir + best_checkpoint = analysis.best_checkpoint # Get best trial's best checkpoint + best_result = analysis.best_result # Get best trial's last results + best_result_df = analysis.best_result_df # Get best result as pandas dataframe This object can also retrieve all training runs as dataframes, allowing you to do ad-hoc data analysis over your results. .. code-block:: python - # Get a dataframe for the max score seen for each trial + # Get a dataframe with the last results for each trial + df_results = analysis.results_df + + # Get a dataframe of results for a specific score or mode df = analysis.dataframe(metric="score", mode="max") + What's Next? ------------- diff --git a/python/ray/dashboard/dashboard.py b/python/ray/dashboard/dashboard.py index 76a75d053168..ee82a5bad00f 100644 --- a/python/ray/dashboard/dashboard.py +++ b/python/ray/dashboard/dashboard.py @@ -806,7 +806,7 @@ def collect(self): # search through all the sub_directories in log directory analysis = Analysis(str(self._logdir)) - df = analysis.dataframe() + df = analysis.dataframe(metric="episode_reward_mean", mode="max") if len(df) == 0 or "trial_id" not in df.columns: return diff --git a/python/ray/tune/BUILD b/python/ray/tune/BUILD index d24cf0452663..f85583f7434d 100644 --- a/python/ray/tune/BUILD +++ b/python/ray/tune/BUILD @@ -149,7 +149,7 @@ py_test( py_test( name = "test_sample", - size = "medium", + size = "small", srcs = ["tests/test_sample.py"], deps = [":tune_lib"], tags = ["exclusive"], diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py index 2da4c33e8883..afa5de622ceb 100644 --- a/python/ray/tune/analysis/experiment_analysis.py +++ b/python/ray/tune/analysis/experiment_analysis.py @@ -1,11 +1,17 @@ import json import logging import os +from typing import Dict + +from ray.tune.checkpoint_manager import Checkpoint +from ray.tune.utils import flatten_dict try: import pandas as pd + from pandas import DataFrame except ImportError: pd = None + DataFrame = None from ray.tune.error import TuneError from ray.tune.result import EXPR_PROGRESS_FILE, EXPR_PARAM_FILE,\ @@ -80,6 +86,9 @@ def dataframe(self, metric=None, mode=None): Returns: pd.DataFrame: Constructed from a result dict of each trial. """ + metric = self._validate_metric(metric) + mode = self._validate_mode(mode) + rows = self._retrieve_rows(metric=metric, mode=mode) all_configs = self.get_all_configs(prefix=True) for path, config in all_configs.items(): @@ -227,6 +236,9 @@ def get_best_checkpoint(self, trial, metric=None, mode=None): mode = self._validate_mode(mode) checkpoint_paths = self.get_trial_checkpoints_paths(trial, metric) + if not checkpoint_paths: + logger.error(f"No checkpoints have been found for trial {trial}.") + return None if mode == "max": return max(checkpoint_paths, key=lambda x: x[1])[0] else: @@ -316,7 +328,150 @@ def __init__(self, os.path.dirname(experiment_checkpoint_path), default_metric, default_mode) - def get_best_trial(self, metric=None, mode=None, scope="all"): + @property + def best_trial(self) -> Trial: + """Get the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_trial`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_trial(metric, mode)` method to set the metric " + "and mode explicitly.") + return self.get_best_trial(self.default_metric, self.default_mode) + + @property + def best_config(self) -> Dict: + """Get the config of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_config(metric, mode, scope)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_config`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_config(metric, mode)` method to set the metric " + "and mode explicitly.") + return self.get_best_config(self.default_metric, self.default_mode) + + @property + def best_checkpoint(self) -> Checkpoint: + """Get the checkpoint of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_checkpoint(trial, metric, mode)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_checkpoint`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_checkpoint(trial, metric, mode)` method to set the " + "metric and mode explicitly.") + best_trial = self.best_trial + return self.get_best_checkpoint(best_trial, self.default_metric, + self.default_mode) + + @property + def best_logdir(self) -> str: + """Get the logdir of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_logdir(metric, mode)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_logdir`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_logdir(metric, mode, scope)` method to set the " + "metric and mode explicitly.") + return self.get_best_logdir(self.default_metric, self.default_mode) + + @property + def best_dataframe(self) -> DataFrame: + """Get the full result dataframe of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_logdir(metric, mode)` and use it to look for the dataframe + in the `self.trial_dataframes` dict. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_result`, pass a `metric` and `mode` " + "parameter to `tune.run()`.") + best_logdir = self.best_logdir + return self.trial_dataframes[best_logdir] + + @property + def best_result(self) -> Dict: + """Get the last result of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope).last_result` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_result`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use " + "`get_best_trial(metric, mode).last_result` to set " + "the metric and mode explicitly and fetch the last result.") + return self.best_trial.last_result + + @property + def best_result_df(self) -> DataFrame: + """Get the best result of the experiment as a pandas dataframe. + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope).last_result` instead. + """ + if not pd: + raise ValueError("`best_result_df` requires pandas. Install with " + "`pip install pandas`.") + best_result = flatten_dict(self.best_result, delimiter=".") + return pd.DataFrame.from_records([best_result], index="trial_id") + + @property + def results(self) -> Dict[str, Dict]: + """Get the last result of the all trials of the experiment""" + return {trial.trial_id: trial.last_result for trial in self.trials} + + @property + def results_df(self) -> DataFrame: + if not pd: + raise ValueError("`best_result_df` requires pandas. Install with " + "`pip install pandas`.") + return pd.DataFrame.from_records( + [ + flatten_dict(trial.last_result, delimiter=".") + for trial in self.trials + ], + index="trial_id") + + def get_best_trial(self, metric=None, mode=None, scope="last"): """Retrieve the best trial object. Compares all trials' scores on ``metric``. @@ -380,7 +535,7 @@ def get_best_trial(self, metric=None, mode=None, scope="all"): "parameter?") return best_trial - def get_best_config(self, metric=None, mode=None, scope="all"): + def get_best_config(self, metric=None, mode=None, scope="last"): """Retrieve the best config corresponding to the trial. Compares all trials' scores on `metric`. @@ -407,7 +562,7 @@ def get_best_config(self, metric=None, mode=None, scope="all"): best_trial = self.get_best_trial(metric, mode, scope) return best_trial.config if best_trial else None - def get_best_logdir(self, metric=None, mode=None, scope="all"): + def get_best_logdir(self, metric=None, mode=None, scope="last"): """Retrieve the logdir corresponding to the best trial. Compares all trials' scores on `metric`. diff --git a/python/ray/tune/commands.py b/python/ray/tune/commands.py index 2ab17e609906..7fbbe9776bde 100644 --- a/python/ray/tune/commands.py +++ b/python/ray/tune/commands.py @@ -116,7 +116,8 @@ def list_trials(experiment_path, _check_tabulate() try: - checkpoints_df = Analysis(experiment_path).dataframe() + checkpoints_df = Analysis(experiment_path).dataframe( + metric="episode_reward_mean", mode="max") except TuneError: raise click.ClickException("No trial data found!") diff --git a/python/ray/tune/examples/mnist_pytorch.py b/python/ray/tune/examples/mnist_pytorch.py index 5a2c3677079c..56681d401fdb 100644 --- a/python/ray/tune/examples/mnist_pytorch.py +++ b/python/ray/tune/examples/mnist_pytorch.py @@ -121,7 +121,7 @@ def train_mnist(config): else: ray.init(num_cpus=2 if args.smoke_test else None) sched = AsyncHyperBandScheduler( - time_attr="training_iteration", metric="mean_accuracy") + time_attr="training_iteration", metric="mean_accuracy", mode="max") analysis = tune.run( train_mnist, name="exp", diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py index c623111daf83..c31b81968375 100644 --- a/python/ray/tune/examples/mnist_pytorch_trainable.py +++ b/python/ray/tune/examples/mnist_pytorch_trainable.py @@ -65,9 +65,11 @@ def load_checkpoint(self, checkpoint_path): if __name__ == "__main__": args = parser.parse_args() ray.init(address=args.ray_address, num_cpus=6 if args.smoke_test else None) - sched = ASHAScheduler(metric="mean_accuracy") + sched = ASHAScheduler() analysis = tune.run( TrainMNIST, + metric="mean_accuracy", + mode="max", scheduler=sched, stop={ "mean_accuracy": 0.95, diff --git a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py index 1d6b3b7e3822..8dea4fbcdeeb 100644 --- a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py +++ b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py @@ -160,6 +160,6 @@ def _export_model(self, export_formats, export_dir): # demo of the trained Generators if not args.smoke_test: - logdirs = analysis.dataframe()["logdir"].tolist() + logdirs = analysis.results_df["logdir"].tolist() model_paths = [os.path.join(d, "exported_models") for d in logdirs] demo_gan(analysis, model_paths) diff --git a/python/ray/tune/schedulers/__init__.py b/python/ray/tune/schedulers/__init__.py index 54b88ca9ecb0..5e51bdab24b3 100644 --- a/python/ray/tune/schedulers/__init__.py +++ b/python/ray/tune/schedulers/__init__.py @@ -10,8 +10,8 @@ def create_scheduler( scheduler, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, **kwargs, ): """Instantiate a scheduler based on the given string. diff --git a/python/ray/tune/schedulers/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py index 29cf481eb784..7c0208f93cc1 100644 --- a/python/ray/tune/schedulers/async_hyperband.py +++ b/python/ray/tune/schedulers/async_hyperband.py @@ -38,8 +38,8 @@ class AsyncHyperBandScheduler(FIFOScheduler): def __init__(self, time_attr="training_iteration", reward_attr=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, max_t=100, grace_period=1, reduction_factor=4, @@ -49,7 +49,8 @@ def __init__(self, assert grace_period > 0, "grace_period must be positive!" assert reduction_factor > 1, "Reduction Factor not valid!" assert brackets > 0, "brackets must be positive!" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" @@ -73,13 +74,41 @@ def __init__(self, self._counter = 0 # for self._num_stopped = 0 self._metric = metric - if mode == "max": + self._mode = mode + self._metric_op = None + if self._mode == "max": self._metric_op = 1. - elif mode == "min": + elif self._mode == "min": self._metric_op = -1. self._time_attr = time_attr + def set_search_properties(self, metric, mode): + if self._metric and metric: + return False + if self._mode and mode: + return False + + if metric: + self._metric = metric + if mode: + self._mode = mode + + if self._mode == "max": + self._metric_op = 1. + elif self._mode == "min": + self._metric_op = -1. + + return True + def on_trial_add(self, trial_runner, trial): + if not self._metric or not self._metric_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) + sizes = np.array([len(b._rungs) for b in self._brackets]) probs = np.e**(sizes - sizes.max()) normalized = probs / probs.sum() diff --git a/python/ray/tune/schedulers/hb_bohb.py b/python/ray/tune/schedulers/hb_bohb.py index 7204e71e361a..c8c061034631 100644 --- a/python/ray/tune/schedulers/hb_bohb.py +++ b/python/ray/tune/schedulers/hb_bohb.py @@ -30,6 +30,13 @@ def on_trial_add(self, trial_runner, trial): to current bracket. Else, create new iteration, create new bracket, add to bracket. """ + if not self._metric or not self._metric_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) cur_bracket = self._state["bracket"] cur_band = self._hyperbands[self._state["band_idx"]] diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py index a2fe3ad91ed3..3066cf80b3dc 100644 --- a/python/ray/tune/schedulers/hyperband.py +++ b/python/ray/tune/schedulers/hyperband.py @@ -76,12 +76,13 @@ class HyperBandScheduler(FIFOScheduler): def __init__(self, time_attr="training_iteration", reward_attr=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, max_t=81, reduction_factor=3): assert max_t > 0, "Max (time_attr) not valid!" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" @@ -108,12 +109,33 @@ def __init__(self, self._state = {"bracket": None, "band_idx": 0} self._num_stopped = 0 self._metric = metric - if mode == "max": + self._mode = mode + self._metric_op = None + + if self._mode == "max": self._metric_op = 1. - elif mode == "min": + elif self._mode == "min": self._metric_op = -1. self._time_attr = time_attr + def set_search_properties(self, metric, mode): + if self._metric and metric: + return False + if self._mode and mode: + return False + + if metric: + self._metric = metric + if mode: + self._mode = mode + + if self._mode == "max": + self._metric_op = 1. + elif self._mode == "min": + self._metric_op = -1. + + return True + def on_trial_add(self, trial_runner, trial): """Adds new trial. @@ -121,6 +143,13 @@ def on_trial_add(self, trial_runner, trial): add to current bracket. Else, if current band is not filled, create new bracket, add to current bracket. Else, create new iteration, create new bracket, add to bracket.""" + if not self._metric or not self._metric_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) cur_bracket = self._state["bracket"] cur_band = self._hyperbands[self._state["band_idx"]] diff --git a/python/ray/tune/schedulers/median_stopping_rule.py b/python/ray/tune/schedulers/median_stopping_rule.py index 2389f166e32e..497c62915ac6 100644 --- a/python/ray/tune/schedulers/median_stopping_rule.py +++ b/python/ray/tune/schedulers/median_stopping_rule.py @@ -40,13 +40,12 @@ class MedianStoppingRule(FIFOScheduler): def __init__(self, time_attr="time_total_s", reward_attr=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, grace_period=60.0, min_samples_required=3, min_time_slice=0, hard_stop=True): - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" metric = reward_attr @@ -60,15 +59,49 @@ def __init__(self, self._min_samples_required = min_samples_required self._min_time_slice = min_time_slice self._metric = metric - assert mode in {"min", "max"}, "`mode` must be 'min' or 'max'." - self._worst = float("-inf") if mode == "max" else float("inf") - self._compare_op = max if mode == "max" else min + self._worst = None + self._compare_op = None + + self._mode = mode + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." + self._worst = float("-inf") if self._mode == "max" else float( + "inf") + self._compare_op = max if self._mode == "max" else min + self._time_attr = time_attr self._hard_stop = hard_stop self._trial_state = {} self._last_pause = collections.defaultdict(lambda: float("-inf")) self._results = collections.defaultdict(list) + def set_search_properties(self, metric, mode): + if self._metric and metric: + return False + if self._mode and mode: + return False + + if metric: + self._metric = metric + if mode: + self._mode = mode + + self._worst = float("-inf") if self._mode == "max" else float("inf") + self._compare_op = max if self._mode == "max" else min + + return True + + def on_trial_add(self, trial_runner, trial): + if not self._metric or not self._worst or not self._compare_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) + + super(MedianStoppingRule, self).on_trial_add(trial_runner, trial) + def on_trial_result(self, trial_runner, trial, result): """Callback for early stopping. diff --git a/python/ray/tune/schedulers/pbt.py b/python/ray/tune/schedulers/pbt.py index 70137e8de3f6..6e6396097028 100644 --- a/python/ray/tune/schedulers/pbt.py +++ b/python/ray/tune/schedulers/pbt.py @@ -216,8 +216,8 @@ class PopulationBasedTraining(FIFOScheduler): def __init__(self, time_attr="time_total_s", reward_attr=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, perturbation_interval=60.0, hyperparam_mutations={}, quantile_fraction=0.25, @@ -253,7 +253,8 @@ def __init__(self, "perturbation_interval must be a positive number greater " "than 0. Current value: '{}'".format(perturbation_interval)) - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." if reward_attr is not None: mode = "max" @@ -265,9 +266,11 @@ def __init__(self, FIFOScheduler.__init__(self) self._metric = metric - if mode == "max": + self._mode = mode + self._metric_op = None + if self._mode == "max": self._metric_op = 1. - elif mode == "min": + elif self._mode == "min": self._metric_op = -1. self._time_attr = time_attr self._perturbation_interval = perturbation_interval @@ -285,7 +288,33 @@ def __init__(self, self._num_checkpoints = 0 self._num_perturbations = 0 + def set_search_properties(self, metric, mode): + if self._metric and metric: + return False + if self._mode and mode: + return False + + if metric: + self._metric = metric + if mode: + self._mode = mode + + if self._mode == "max": + self._metric_op = 1. + elif self._mode == "min": + self._metric_op = -1. + + return True + def on_trial_add(self, trial_runner, trial): + if not self._metric or not self._metric_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) + self._trial_state[trial] = PBTTrialState(trial) for attr in self._hyperparam_mutations.keys(): diff --git a/python/ray/tune/schedulers/trial_scheduler.py b/python/ray/tune/schedulers/trial_scheduler.py index 6fe7284cf655..66ba25904379 100644 --- a/python/ray/tune/schedulers/trial_scheduler.py +++ b/python/ray/tune/schedulers/trial_scheduler.py @@ -8,6 +8,18 @@ class TrialScheduler: PAUSE = "PAUSE" #: Status for pausing trial execution STOP = "STOP" #: Status for stopping trial execution + def set_search_properties(self, metric, mode): + """Pass search properties to scheduler. + + This method acts as an alternative to instantiating schedulers + that react to metrics with their own `metric` and `mode` parameters. + + Args: + metric (str): Metric to optimize + mode (str): One of ["min", "max"]. Direction to optimize. + """ + return True + def on_trial_add(self, trial_runner, trial): """Called when a new trial is added to the trial runner.""" diff --git a/python/ray/tune/suggest/__init__.py b/python/ray/tune/suggest/__init__.py index a9b5582a9088..f3f332f1ff4e 100644 --- a/python/ray/tune/suggest/__init__.py +++ b/python/ray/tune/suggest/__init__.py @@ -8,8 +8,8 @@ def create_searcher( search_alg, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, **kwargs, ): """Instantiate a search algorithm based on the given string. diff --git a/python/ray/tune/suggest/ax.py b/python/ray/tune/suggest/ax.py index 9574f80ce398..28b52a9c6816 100644 --- a/python/ray/tune/suggest/ax.py +++ b/python/ray/tune/suggest/ax.py @@ -104,15 +104,16 @@ def easy_objective(config): def __init__(self, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, parameter_constraints=None, outcome_constraints=None, ax_client=None, use_early_stopped_trials=None, max_concurrent=None): assert ax is not None, "Ax must be installed!" - assert mode in ["min", "max"], "`mode` must be one of ['min', 'max']" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." super(AxSearch, self).__init__( metric=metric, diff --git a/python/ray/tune/suggest/bayesopt.py b/python/ray/tune/suggest/bayesopt.py index 340f200a5ecb..d5c7684c1ab4 100644 --- a/python/ray/tune/suggest/bayesopt.py +++ b/python/ray/tune/suggest/bayesopt.py @@ -101,8 +101,8 @@ class BayesOptSearch(Searcher): def __init__(self, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, utility_kwargs=None, random_state=42, random_search_steps=10, @@ -144,7 +144,8 @@ def __init__(self, assert byo is not None, ( "BayesOpt must be installed!. You can install BayesOpt with" " the command: `pip install bayesian-optimization`.") - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." self.max_concurrent = max_concurrent self._config_counter = defaultdict(int) self._patience = patience @@ -284,8 +285,10 @@ def register_analysis(self, analysis): analysis (ExperimentAnalysis): Optionally, the previous analysis to integrate. """ - for (_, report), params in zip(analysis.dataframe().iterrows(), - analysis.get_all_configs().values()): + for (_, report), params in zip( + analysis.dataframe(metric=self._metric, + mode=self._mode).iterrows(), + analysis.get_all_configs().values()): # We add the obtained results to the # gaussian process optimizer self._register_result(params, report) diff --git a/python/ray/tune/suggest/bohb.py b/python/ray/tune/suggest/bohb.py index b545656106e7..318e582e0717 100644 --- a/python/ray/tune/suggest/bohb.py +++ b/python/ray/tune/suggest/bohb.py @@ -95,11 +95,12 @@ def __init__(self, space=None, bohb_config=None, max_concurrent=10, - metric="neg_mean_loss", - mode="max"): + metric=None, + mode=None): from hpbandster.optimizers.config_generators.bohb import BOHB assert BOHB is not None, "HpBandSter must be installed!" - assert mode in ["min", "max"], "`mode` must be in [min, max]!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." self._max_concurrent = max_concurrent self.trial_to_params = {} self.running = set() diff --git a/python/ray/tune/suggest/dragonfly.py b/python/ray/tune/suggest/dragonfly.py index 051301b62135..b2da186b04b5 100644 --- a/python/ray/tune/suggest/dragonfly.py +++ b/python/ray/tune/suggest/dragonfly.py @@ -130,15 +130,16 @@ def __init__(self, optimizer=None, domain=None, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, points_to_evaluate=None, evaluated_rewards=None, **kwargs): assert dragonfly is not None, """dragonfly must be installed! You can install Dragonfly with the command: `pip install dragonfly-opt`.""" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." super(DragonflySearch, self).__init__( metric=metric, mode=mode, **kwargs) diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py index b05cc3cc2314..b097cc29f275 100644 --- a/python/ray/tune/suggest/hyperopt.py +++ b/python/ray/tune/suggest/hyperopt.py @@ -118,8 +118,8 @@ class HyperOptSearch(Searcher): def __init__( self, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, points_to_evaluate=None, n_initial_points=20, random_state_seed=None, @@ -129,6 +129,8 @@ def __init__( ): assert hpo is not None, ( "HyperOpt must be installed! Run `pip install hyperopt`.") + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." from hyperopt.fmin import generate_trials_to_calculate super(HyperOptSearch, self).__init__( metric=metric, diff --git a/python/ray/tune/suggest/nevergrad.py b/python/ray/tune/suggest/nevergrad.py index e46935907387..bee20c814564 100644 --- a/python/ray/tune/suggest/nevergrad.py +++ b/python/ray/tune/suggest/nevergrad.py @@ -87,12 +87,13 @@ class NevergradSearch(Searcher): def __init__(self, optimizer=None, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, max_concurrent=None, **kwargs): assert ng is not None, "Nevergrad must be installed!" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." super(NevergradSearch, self).__init__( metric=metric, mode=mode, max_concurrent=max_concurrent, **kwargs) diff --git a/python/ray/tune/suggest/optuna.py b/python/ray/tune/suggest/optuna.py index 792df0fc3b53..ae3f1aadbe48 100644 --- a/python/ray/tune/suggest/optuna.py +++ b/python/ray/tune/suggest/optuna.py @@ -100,11 +100,7 @@ class OptunaSearch(Searcher): """ - def __init__(self, - space=None, - metric="episode_reward_mean", - mode="max", - sampler=None): + def __init__(self, space=None, metric=None, mode=None, sampler=None): assert ot is not None, ( "Optuna must be installed! Run `pip install optuna`.") super(OptunaSearch, self).__init__( diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index ff26ed24f598..67dec2bde9e7 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -127,8 +127,8 @@ class SkOptSearch(Searcher): def __init__(self, optimizer=None, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, points_to_evaluate=None, evaluated_rewards=None, max_concurrent=None, @@ -137,7 +137,8 @@ def __init__(self, You can install Skopt with the command: `pip install scikit-optimize`.""" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." self.max_concurrent = max_concurrent super(SkOptSearch, self).__init__( metric=metric, diff --git a/python/ray/tune/suggest/suggestion.py b/python/ray/tune/suggest/suggestion.py index 633fe33718e1..205285329760 100644 --- a/python/ray/tune/suggest/suggestion.py +++ b/python/ray/tune/suggest/suggestion.py @@ -56,8 +56,8 @@ def on_trial_complete(self, trial_id, result, **kwargs): CKPT_FILE_TMPL = "searcher-state-{}.pkl" def __init__(self, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, max_concurrent=None, use_early_stopped_trials=None): if use_early_stopped_trials is False: @@ -70,6 +70,13 @@ def __init__(self, "search algorithm. Use tune.suggest.ConcurrencyLimiter() " "instead. This will raise an error in future versions of Ray.") + self._metric = metric + self._mode = mode + + if not mode or not metric: + # Early return to avoid assertions + return + assert isinstance( metric, type(mode)), "metric and mode must be of the same type" if isinstance(mode, str): @@ -83,9 +90,6 @@ def __init__(self, else: raise ValueError("Mode most either be a list or string") - self._metric = metric - self._mode = mode - def set_search_properties(self, metric, mode, config): """Pass search properties to searcher. diff --git a/python/ray/tune/suggest/zoopt.py b/python/ray/tune/suggest/zoopt.py index 950a8a68745e..8f3b2453181d 100644 --- a/python/ray/tune/suggest/zoopt.py +++ b/python/ray/tune/suggest/zoopt.py @@ -109,12 +109,13 @@ def __init__(self, algo="asracos", budget=None, dim_dict=None, - metric="episode_reward_mean", - mode="min", + metric=None, + mode=None, **kwargs): assert zoopt is not None, "Zoopt not found - please install zoopt." assert budget is not None, "`budget` should not be None!" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." _algo = algo.lower() assert _algo in ["asracos", "sracos" ], "`algo` must be in ['asracos', 'sracos'] currently" diff --git a/python/ray/tune/tests/example.py b/python/ray/tune/tests/example.py index 69d1f854b577..383dd5ecb72a 100644 --- a/python/ray/tune/tests/example.py +++ b/python/ray/tune/tests/example.py @@ -39,5 +39,5 @@ def training_function(config): metric="mean_loss", mode="min")) # Get a dataframe for analyzing trial results. -df = analysis.dataframe() +df = analysis.results_df # __quick_start_end__ diff --git a/python/ray/tune/tests/test_api.py b/python/ray/tune/tests/test_api.py index fa0213dd8571..3dc3d9fb25bd 100644 --- a/python/ray/tune/tests/test_api.py +++ b/python/ray/tune/tests/test_api.py @@ -520,7 +520,8 @@ def train(config, reporter): analysis = tune.run(train, num_samples=10, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= top) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= top) patience = 5 stopper = EarlyStopping("test", top=top, mode="min", patience=patience) @@ -528,14 +529,16 @@ def train(config, reporter): analysis = tune.run(train, num_samples=20, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= patience) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= patience) stopper = EarlyStopping("test", top=top, mode="min") analysis = tune.run(train, num_samples=10, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= top) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= top) def testBadStoppingFunction(self): def train(config, reporter): diff --git a/python/ray/tune/tests/test_experiment_analysis.py b/python/ray/tune/tests/test_experiment_analysis.py index 5195c7825264..bac891cc965c 100644 --- a/python/ray/tune/tests/test_experiment_analysis.py +++ b/python/ray/tune/tests/test_experiment_analysis.py @@ -7,7 +7,7 @@ from numpy import nan import ray -from ray.tune import run, sample_from +from ray import tune from ray.tune.examples.async_hyperband_example import MyTrainableClass @@ -26,7 +26,7 @@ def tearDown(self): ray.shutdown() def run_test_exp(self): - self.ea = run( + self.ea = tune.run( MyTrainableClass, name=self.test_name, local_dir=self.test_dir, @@ -34,13 +34,14 @@ def run_test_exp(self): checkpoint_freq=1, num_samples=self.num_samples, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) def nan_test_exp(self): - nan_ea = run( + nan_ea = tune.run( lambda x: nan, name="testing_nan", local_dir=self.test_dir, @@ -48,14 +49,15 @@ def nan_test_exp(self): checkpoint_freq=1, num_samples=self.num_samples, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) return nan_ea def testDataframe(self): - df = self.ea.dataframe() + df = self.ea.dataframe(self.metric, mode="max") self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEquals(df.shape[0], self.num_samples) @@ -143,21 +145,50 @@ def testAllDataframes(self): self.assertEqual(df.training_iteration.max(), 1) def testIgnoreOtherExperiment(self): - analysis = run( + analysis = tune.run( MyTrainableClass, name="test_example", local_dir=self.test_dir, stop={"training_iteration": 1}, num_samples=1, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) - df = analysis.dataframe() + df = analysis.dataframe(self.metric, mode="max") self.assertEquals(df.shape[0], 1) +class ExperimentAnalysisPropertySuite(unittest.TestCase): + def testBestProperties(self): + def train(config): + for i in range(10): + with tune.checkpoint_dir(i): + pass + tune.report(res=config["base"] + i) + + ea = tune.run( + train, + config={"base": tune.grid_search([100, 200, 300])}, + metric="res", + mode="max") + + trials = ea.trials + + self.assertEquals(ea.best_trial, trials[2]) + self.assertEquals(ea.best_config, trials[2].config) + self.assertEquals(ea.best_logdir, trials[2].logdir) + self.assertEquals(ea.best_checkpoint, trials[2].checkpoint.value) + self.assertTrue( + all(ea.best_dataframe["trial_id"] == trials[2].trial_id)) + self.assertEquals(ea.results_df.loc[trials[2].trial_id, "res"], 309) + self.assertEquals(ea.best_result["res"], 309) + self.assertEquals(ea.best_result_df.loc[trials[2].trial_id, "res"], + 309) + + if __name__ == "__main__": import pytest import sys diff --git a/python/ray/tune/tests/test_experiment_analysis_mem.py b/python/ray/tune/tests/test_experiment_analysis_mem.py index 4e299a758855..4ef9a51f8fd3 100644 --- a/python/ray/tune/tests/test_experiment_analysis_mem.py +++ b/python/ray/tune/tests/test_experiment_analysis_mem.py @@ -83,10 +83,10 @@ def testCompareTrials(self): num_samples=1, config={"id": grid_search(list(range(5)))}) - max_all = ea.get_best_trial("score", - "max").metric_analysis["score"]["max"] - min_all = ea.get_best_trial("score", - "min").metric_analysis["score"]["min"] + max_all = ea.get_best_trial("score", "max", + "all").metric_analysis["score"]["max"] + min_all = ea.get_best_trial("score", "min", + "all").metric_analysis["score"]["min"] max_last = ea.get_best_trial("score", "max", "last").metric_analysis["score"]["last"] max_avg = ea.get_best_trial("score", "max", @@ -149,7 +149,7 @@ def tearDown(self): def testDataframe(self): analysis = Analysis(self.test_dir) - df = analysis.dataframe() + df = analysis.dataframe(self.metric, mode="max") self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEqual(df.shape[0], self.num_samples * 2) diff --git a/python/ray/tune/tests/test_trial_scheduler.py b/python/ray/tune/tests/test_trial_scheduler.py index 320e76af39d6..507ae81f0aee 100644 --- a/python/ray/tune/tests/test_trial_scheduler.py +++ b/python/ray/tune/tests/test_trial_scheduler.py @@ -60,7 +60,11 @@ def basicSetup(self, rule): return t1, t2 def testMedianStoppingConstantPerf(self): - rule = MedianStoppingRule(grace_period=0, min_samples_required=1) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -75,7 +79,11 @@ def testMedianStoppingConstantPerf(self): TrialScheduler.STOP) def testMedianStoppingOnCompleteOnly(self): - rule = MedianStoppingRule(grace_period=0, min_samples_required=1) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() self.assertEqual( @@ -87,7 +95,11 @@ def testMedianStoppingOnCompleteOnly(self): TrialScheduler.STOP) def testMedianStoppingGracePeriod(self): - rule = MedianStoppingRule(grace_period=2.5, min_samples_required=1) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=2.5, + min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -104,7 +116,11 @@ def testMedianStoppingGracePeriod(self): TrialScheduler.STOP) def testMedianStoppingMinSamples(self): - rule = MedianStoppingRule(grace_period=0, min_samples_required=2) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=2) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -120,7 +136,11 @@ def testMedianStoppingMinSamples(self): TrialScheduler.STOP) def testMedianStoppingUsesMedian(self): - rule = MedianStoppingRule(grace_period=0, min_samples_required=1) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -135,7 +155,11 @@ def testMedianStoppingUsesMedian(self): def testMedianStoppingSoftStop(self): rule = MedianStoppingRule( - grace_period=0, min_samples_required=1, hard_stop=False) + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=1, + hard_stop=False) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -265,7 +289,8 @@ def schedulerSetup(self, num_trials, max_t=81): (15, 9) -> (5, 27) -> (2, 45); (34, 3) -> (12, 9) -> (4, 27) -> (2, 42); (81, 1) -> (27, 3) -> (9, 9) -> (3, 27) -> (1, 41);""" - sched = HyperBandScheduler(max_t=max_t) + sched = HyperBandScheduler( + metric="episode_reward_mean", mode="max", max_t=max_t) for i in range(num_trials): t = Trial("__fake") sched.on_trial_add(None, t) @@ -321,7 +346,7 @@ def advancedSetup(self): return sched def testConfigSameEta(self): - sched = HyperBandScheduler() + sched = HyperBandScheduler(metric="episode_reward_mean", mode="max") i = 0 while not sched._cur_band_filled(): t = Trial("__fake") @@ -335,7 +360,10 @@ def testConfigSameEta(self): reduction_factor = 10 sched = HyperBandScheduler( - max_t=1000, reduction_factor=reduction_factor) + metric="episode_reward_mean", + mode="max", + max_t=1000, + reduction_factor=reduction_factor) i = 0 while not sched._cur_band_filled(): t = Trial("__fake") @@ -348,7 +376,8 @@ def testConfigSameEta(self): self.assertEqual(sched._hyperbands[0][-1]._r, 1) def testConfigSameEtaSmall(self): - sched = HyperBandScheduler(max_t=1) + sched = HyperBandScheduler( + metric="episode_reward_mean", mode="max", max_t=1) i = 0 while len(sched._hyperbands) < 2: t = Trial("__fake") @@ -627,7 +656,11 @@ def tearDown(self): _register_all() # re-register the evicted objects def testLargestBracketFirst(self): - sched = HyperBandForBOHB(max_t=3, reduction_factor=3) + sched = HyperBandForBOHB( + metric="episode_reward_mean", + mode="max", + max_t=3, + reduction_factor=3) runner = _MockTrialRunner(sched) for i in range(3): t = Trial("__fake") @@ -642,7 +675,11 @@ def testCheckTrialInfoUpdate(self): def result(score, ts): return {"episode_reward_mean": score, TRAINING_ITERATION: ts} - sched = HyperBandForBOHB(max_t=3, reduction_factor=3) + sched = HyperBandForBOHB( + metric="episode_reward_mean", + mode="max", + max_t=3, + reduction_factor=3) runner = _MockTrialRunner(sched) runner._search_alg = MagicMock() runner._search_alg.searcher = MagicMock() @@ -668,7 +705,11 @@ def testCheckTrialInfoUpdateMin(self): def result(score, ts): return {"episode_reward_mean": score, TRAINING_ITERATION: ts} - sched = HyperBandForBOHB(max_t=3, reduction_factor=3, mode="min") + sched = HyperBandForBOHB( + metric="episode_reward_mean", + mode="min", + max_t=3, + reduction_factor=3) runner = _MockTrialRunner(sched) runner._search_alg = MagicMock() runner._search_alg.searcher = MagicMock() @@ -693,7 +734,11 @@ def testPauseResumeChooseTrial(self): def result(score, ts): return {"episode_reward_mean": score, TRAINING_ITERATION: ts} - sched = HyperBandForBOHB(max_t=10, reduction_factor=3, mode="min") + sched = HyperBandForBOHB( + metric="episode_reward_mean", + mode="min", + max_t=10, + reduction_factor=3) runner = _MockTrialRunner(sched) runner._search_alg = MagicMock() runner._search_alg.searcher = MagicMock() @@ -761,6 +806,8 @@ def basicSetup(self, } pbt = PopulationBasedTraining( time_attr="training_iteration", + metric="episode_reward_mean", + mode="max", perturbation_interval=perturbation_interval, resample_probability=resample_prob, quantile_fraction=0.25, @@ -1675,6 +1722,7 @@ def basicSetup(self, } pbt = PopulationBasedTraining( metric="mean_accuracy", + mode="max", time_attr="training_iteration", perturbation_interval=perturbation_interval, resample_probability=resample_prob, @@ -1791,7 +1839,8 @@ def nanSetup(self, scheduler): return t1, t2 def testAsyncHBOnComplete(self): - scheduler = AsyncHyperBandScheduler(max_t=10, brackets=1) + scheduler = AsyncHyperBandScheduler( + metric="episode_reward_mean", mode="max", max_t=10, brackets=1) t1, t2 = self.basicSetup(scheduler) t3 = Trial("PPO") scheduler.on_trial_add(None, t3) @@ -1802,7 +1851,11 @@ def testAsyncHBOnComplete(self): def testAsyncHBGracePeriod(self): scheduler = AsyncHyperBandScheduler( - grace_period=2.5, reduction_factor=3, brackets=1) + metric="episode_reward_mean", + mode="max", + grace_period=2.5, + reduction_factor=3, + brackets=1) t1, t2 = self.basicSetup(scheduler) scheduler.on_trial_complete(None, t1, result(10, 1000)) scheduler.on_trial_complete(None, t2, result(10, 1000)) @@ -1819,7 +1872,8 @@ def testAsyncHBGracePeriod(self): TrialScheduler.STOP) def testAsyncHBAllCompletes(self): - scheduler = AsyncHyperBandScheduler(max_t=10, brackets=10) + scheduler = AsyncHyperBandScheduler( + metric="episode_reward_mean", mode="max", max_t=10, brackets=10) trials = [Trial("PPO") for i in range(10)] for t in trials: scheduler.on_trial_add(None, t) @@ -1831,7 +1885,12 @@ def testAsyncHBAllCompletes(self): def testAsyncHBUsesPercentile(self): scheduler = AsyncHyperBandScheduler( - grace_period=1, max_t=10, reduction_factor=2, brackets=1) + metric="episode_reward_mean", + mode="max", + grace_period=1, + max_t=10, + reduction_factor=2, + brackets=1) t1, t2 = self.basicSetup(scheduler) scheduler.on_trial_complete(None, t1, result(10, 1000)) scheduler.on_trial_complete(None, t2, result(10, 1000)) @@ -1846,7 +1905,12 @@ def testAsyncHBUsesPercentile(self): def testAsyncHBNanPercentile(self): scheduler = AsyncHyperBandScheduler( - grace_period=1, max_t=10, reduction_factor=2, brackets=1) + metric="episode_reward_mean", + mode="max", + grace_period=1, + max_t=10, + reduction_factor=2, + brackets=1) t1, t2 = self.nanSetup(scheduler) scheduler.on_trial_complete(None, t1, result(10, 450)) scheduler.on_trial_complete(None, t2, result(10, np.nan)) diff --git a/python/ray/tune/tests/test_trial_scheduler_pbt.py b/python/ray/tune/tests/test_trial_scheduler_pbt.py index 740616e8ce4d..5af7cb46724a 100644 --- a/python/ray/tune/tests/test_trial_scheduler_pbt.py +++ b/python/ray/tune/tests/test_trial_scheduler_pbt.py @@ -82,15 +82,24 @@ def synchSetup(self, synch, param=[10, 20, 30]): def testAsynchFail(self): analysis = self.synchSetup(False) - self.assertTrue(any(analysis.dataframe()["mean_accuracy"] != 33)) + self.assertTrue( + any( + analysis.dataframe(metric="mean_accuracy", mode="max") + ["mean_accuracy"] != 33)) def testSynchPass(self): analysis = self.synchSetup(True) - self.assertTrue(all(analysis.dataframe()["mean_accuracy"] == 33)) + self.assertTrue( + all( + analysis.dataframe(metric="mean_accuracy", mode="max")[ + "mean_accuracy"] == 33)) def testSynchPassLast(self): analysis = self.synchSetup(True, param=[30, 20, 10]) - self.assertTrue(all(analysis.dataframe()["mean_accuracy"] == 33)) + self.assertTrue( + all( + analysis.dataframe(metric="mean_accuracy", mode="max")[ + "mean_accuracy"] == 33)) class PopulationBasedTrainingConfigTest(unittest.TestCase): diff --git a/python/ray/tune/tests/tutorial.py b/python/ray/tune/tests/tutorial.py index f0e5fa5af1d2..2aa4422798c6 100644 --- a/python/ray/tune/tests/tutorial.py +++ b/python/ray/tune/tests/tutorial.py @@ -166,7 +166,7 @@ def train_mnist(config): # __run_analysis_begin__ import os -df = analysis.dataframe() +df = analysis.results_df logdir = analysis.get_best_logdir("mean_accuracy", mode="max") state_dict = torch.load(os.path.join(logdir, "model.pth")) diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py index f331bebec2b3..075ba5c6910b 100644 --- a/python/ray/tune/tune.py +++ b/python/ray/tune/tune.py @@ -68,6 +68,8 @@ def _report_progress(runner, reporter, done=False): def run( run_or_experiment, name=None, + metric=None, + mode=None, stop=None, time_budget_s=None, config=None, @@ -147,6 +149,12 @@ def run( will need to first register the function: ``tune.register_trainable("lambda_id", lambda x: ...)``. You can then use ``tune.run("lambda_id")``. + metric (str): Metric to optimize. This metric should be reported + with `tune.report()`. If set, will be passed to the search + algorithm and scheduler. + mode (str): Must be one of [min, max]. Determines whether objective is + minimizing or maximizing the metric attribute. If set, will be + passed to the search algorithm and scheduler. name (str): Name of experiment. stop (dict | callable | :class:`Stopper`): Stopping criteria. If dict, the keys may be any field in the return result of 'train()', @@ -276,6 +284,11 @@ def run( "sync_config=SyncConfig(...)`. See `ray.tune.SyncConfig` for " "more details.") + if mode and mode not in ["min", "max"]: + raise ValueError( + "The `mode` parameter passed to `tune.run()` has to be one of " + "['min', 'max']") + config = config or {} sync_config = sync_config or SyncConfig() set_sync_periods(sync_config) @@ -329,8 +342,7 @@ def run( if not search_alg: search_alg = BasicVariantGenerator() - # TODO (krfricke): Introduce metric/mode as top level API - if config and not search_alg.set_search_properties(None, None, config): + if config and not search_alg.set_search_properties(metric, mode, config): if has_unresolved_values(config): raise ValueError( "You passed a `config` parameter to `tune.run()` with " @@ -339,9 +351,17 @@ def run( "does not contain any more parameter definitions - include " "them in the search algorithm's search space if necessary.") + scheduler = scheduler or FIFOScheduler() + if not scheduler.set_search_properties(metric, mode): + raise ValueError( + "You passed a `metric` or `mode` argument to `tune.run()`, but " + "the scheduler you are using was already instantiated with their " + "own `metric` and `mode` parameters. Either remove the arguments " + "from your scheduler or from your call to `tune.run()`") + runner = TrialRunner( search_alg=search_alg, - scheduler=scheduler or FIFOScheduler(), + scheduler=scheduler, local_checkpoint_dir=experiments[0].checkpoint_dir, remote_checkpoint_dir=experiments[0].remote_checkpoint_dir, sync_to_cloud=sync_config.sync_to_cloud, @@ -413,8 +433,8 @@ def run( return ExperimentAnalysis( runner.checkpoint_file, trials=trials, - default_metric=None, - default_mode=None) + default_metric=metric, + default_mode=mode) def run_experiments(experiments,