diff --git a/README.rst b/README.rst index 1888eb37a68c..b4b48da3bbf3 100644 --- a/README.rst +++ b/README.rst @@ -120,7 +120,7 @@ This example runs a parallel grid search to optimize an example objective functi print("Best config: ", analysis.get_best_config(metric="mean_loss")) # Get a dataframe for analyzing trial results. - df = analysis.dataframe() + df = analysis.results_df If TensorBoard is installed, automatically visualize all trial results: diff --git a/doc/source/tune/api_docs/analysis.rst b/doc/source/tune/api_docs/analysis.rst index 2ec32f686c17..c9468fbab694 100644 --- a/doc/source/tune/api_docs/analysis.rst +++ b/doc/source/tune/api_docs/analysis.rst @@ -18,7 +18,7 @@ Here are some example operations for obtaining a summary of your experiment: .. code-block:: python # Get a dataframe for the last reported results of all of the trials - df = analysis.dataframe() + df = analysis.results_df # Get a dataframe for the max accuracy seen for each trial df = analysis.dataframe(metric="mean_accuracy", mode="max") diff --git a/doc/source/tune/key-concepts.rst b/doc/source/tune/key-concepts.rst index 11247895bd50..213d680a01c0 100644 --- a/doc/source/tune/key-concepts.rst +++ b/doc/source/tune/key-concepts.rst @@ -219,16 +219,24 @@ Analysis analysis = tune.run(trainable, search_alg=algo, stop={"training_iteration": 20}) - # Get the best hyperparameters - best_hyperparameters = analysis.get_best_config() + best_trial = analysis.best_trial # Get best trial + best_config = analysis.best_config # Get best trial's hyperparameters + best_logdir = analysis.best_logdir # Get best trial's logdir + best_checkpoint = analysis.best_checkpoint # Get best trial's best checkpoint + best_result = analysis.best_result # Get best trial's last results + best_result_df = analysis.best_result_df # Get best result as pandas dataframe This object can also retrieve all training runs as dataframes, allowing you to do ad-hoc data analysis over your results. .. code-block:: python - # Get a dataframe for the max score seen for each trial + # Get a dataframe with the last results for each trial + df_results = analysis.results_df + + # Get a dataframe of results for a specific score or mode df = analysis.dataframe(metric="score", mode="max") + What's Next? ------------- diff --git a/python/ray/dashboard/dashboard.py b/python/ray/dashboard/dashboard.py index 76a75d053168..ee82a5bad00f 100644 --- a/python/ray/dashboard/dashboard.py +++ b/python/ray/dashboard/dashboard.py @@ -806,7 +806,7 @@ def collect(self): # search through all the sub_directories in log directory analysis = Analysis(str(self._logdir)) - df = analysis.dataframe() + df = analysis.dataframe(metric="episode_reward_mean", mode="max") if len(df) == 0 or "trial_id" not in df.columns: return diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py index 2da4c33e8883..afa5de622ceb 100644 --- a/python/ray/tune/analysis/experiment_analysis.py +++ b/python/ray/tune/analysis/experiment_analysis.py @@ -1,11 +1,17 @@ import json import logging import os +from typing import Dict + +from ray.tune.checkpoint_manager import Checkpoint +from ray.tune.utils import flatten_dict try: import pandas as pd + from pandas import DataFrame except ImportError: pd = None + DataFrame = None from ray.tune.error import TuneError from ray.tune.result import EXPR_PROGRESS_FILE, EXPR_PARAM_FILE,\ @@ -80,6 +86,9 @@ def dataframe(self, metric=None, mode=None): Returns: pd.DataFrame: Constructed from a result dict of each trial. """ + metric = self._validate_metric(metric) + mode = self._validate_mode(mode) + rows = self._retrieve_rows(metric=metric, mode=mode) all_configs = self.get_all_configs(prefix=True) for path, config in all_configs.items(): @@ -227,6 +236,9 @@ def get_best_checkpoint(self, trial, metric=None, mode=None): mode = self._validate_mode(mode) checkpoint_paths = self.get_trial_checkpoints_paths(trial, metric) + if not checkpoint_paths: + logger.error(f"No checkpoints have been found for trial {trial}.") + return None if mode == "max": return max(checkpoint_paths, key=lambda x: x[1])[0] else: @@ -316,7 +328,150 @@ def __init__(self, os.path.dirname(experiment_checkpoint_path), default_metric, default_mode) - def get_best_trial(self, metric=None, mode=None, scope="all"): + @property + def best_trial(self) -> Trial: + """Get the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_trial`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_trial(metric, mode)` method to set the metric " + "and mode explicitly.") + return self.get_best_trial(self.default_metric, self.default_mode) + + @property + def best_config(self) -> Dict: + """Get the config of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_config(metric, mode, scope)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_config`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_config(metric, mode)` method to set the metric " + "and mode explicitly.") + return self.get_best_config(self.default_metric, self.default_mode) + + @property + def best_checkpoint(self) -> Checkpoint: + """Get the checkpoint of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_checkpoint(trial, metric, mode)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_checkpoint`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_checkpoint(trial, metric, mode)` method to set the " + "metric and mode explicitly.") + best_trial = self.best_trial + return self.get_best_checkpoint(best_trial, self.default_metric, + self.default_mode) + + @property + def best_logdir(self) -> str: + """Get the logdir of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_logdir(metric, mode)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_logdir`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_logdir(metric, mode, scope)` method to set the " + "metric and mode explicitly.") + return self.get_best_logdir(self.default_metric, self.default_mode) + + @property + def best_dataframe(self) -> DataFrame: + """Get the full result dataframe of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_logdir(metric, mode)` and use it to look for the dataframe + in the `self.trial_dataframes` dict. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_result`, pass a `metric` and `mode` " + "parameter to `tune.run()`.") + best_logdir = self.best_logdir + return self.trial_dataframes[best_logdir] + + @property + def best_result(self) -> Dict: + """Get the last result of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope).last_result` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_result`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use " + "`get_best_trial(metric, mode).last_result` to set " + "the metric and mode explicitly and fetch the last result.") + return self.best_trial.last_result + + @property + def best_result_df(self) -> DataFrame: + """Get the best result of the experiment as a pandas dataframe. + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope).last_result` instead. + """ + if not pd: + raise ValueError("`best_result_df` requires pandas. Install with " + "`pip install pandas`.") + best_result = flatten_dict(self.best_result, delimiter=".") + return pd.DataFrame.from_records([best_result], index="trial_id") + + @property + def results(self) -> Dict[str, Dict]: + """Get the last result of the all trials of the experiment""" + return {trial.trial_id: trial.last_result for trial in self.trials} + + @property + def results_df(self) -> DataFrame: + if not pd: + raise ValueError("`best_result_df` requires pandas. Install with " + "`pip install pandas`.") + return pd.DataFrame.from_records( + [ + flatten_dict(trial.last_result, delimiter=".") + for trial in self.trials + ], + index="trial_id") + + def get_best_trial(self, metric=None, mode=None, scope="last"): """Retrieve the best trial object. Compares all trials' scores on ``metric``. @@ -380,7 +535,7 @@ def get_best_trial(self, metric=None, mode=None, scope="all"): "parameter?") return best_trial - def get_best_config(self, metric=None, mode=None, scope="all"): + def get_best_config(self, metric=None, mode=None, scope="last"): """Retrieve the best config corresponding to the trial. Compares all trials' scores on `metric`. @@ -407,7 +562,7 @@ def get_best_config(self, metric=None, mode=None, scope="all"): best_trial = self.get_best_trial(metric, mode, scope) return best_trial.config if best_trial else None - def get_best_logdir(self, metric=None, mode=None, scope="all"): + def get_best_logdir(self, metric=None, mode=None, scope="last"): """Retrieve the logdir corresponding to the best trial. Compares all trials' scores on `metric`. diff --git a/python/ray/tune/commands.py b/python/ray/tune/commands.py index 2ab17e609906..7fbbe9776bde 100644 --- a/python/ray/tune/commands.py +++ b/python/ray/tune/commands.py @@ -116,7 +116,8 @@ def list_trials(experiment_path, _check_tabulate() try: - checkpoints_df = Analysis(experiment_path).dataframe() + checkpoints_df = Analysis(experiment_path).dataframe( + metric="episode_reward_mean", mode="max") except TuneError: raise click.ClickException("No trial data found!") diff --git a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py index 1d6b3b7e3822..8dea4fbcdeeb 100644 --- a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py +++ b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py @@ -160,6 +160,6 @@ def _export_model(self, export_formats, export_dir): # demo of the trained Generators if not args.smoke_test: - logdirs = analysis.dataframe()["logdir"].tolist() + logdirs = analysis.results_df["logdir"].tolist() model_paths = [os.path.join(d, "exported_models") for d in logdirs] demo_gan(analysis, model_paths) diff --git a/python/ray/tune/suggest/bayesopt.py b/python/ray/tune/suggest/bayesopt.py index 8554d3a1d660..d5c7684c1ab4 100644 --- a/python/ray/tune/suggest/bayesopt.py +++ b/python/ray/tune/suggest/bayesopt.py @@ -285,8 +285,10 @@ def register_analysis(self, analysis): analysis (ExperimentAnalysis): Optionally, the previous analysis to integrate. """ - for (_, report), params in zip(analysis.dataframe().iterrows(), - analysis.get_all_configs().values()): + for (_, report), params in zip( + analysis.dataframe(metric=self._metric, + mode=self._mode).iterrows(), + analysis.get_all_configs().values()): # We add the obtained results to the # gaussian process optimizer self._register_result(params, report) diff --git a/python/ray/tune/tests/example.py b/python/ray/tune/tests/example.py index 69d1f854b577..383dd5ecb72a 100644 --- a/python/ray/tune/tests/example.py +++ b/python/ray/tune/tests/example.py @@ -39,5 +39,5 @@ def training_function(config): metric="mean_loss", mode="min")) # Get a dataframe for analyzing trial results. -df = analysis.dataframe() +df = analysis.results_df # __quick_start_end__ diff --git a/python/ray/tune/tests/test_api.py b/python/ray/tune/tests/test_api.py index fa0213dd8571..3dc3d9fb25bd 100644 --- a/python/ray/tune/tests/test_api.py +++ b/python/ray/tune/tests/test_api.py @@ -520,7 +520,8 @@ def train(config, reporter): analysis = tune.run(train, num_samples=10, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= top) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= top) patience = 5 stopper = EarlyStopping("test", top=top, mode="min", patience=patience) @@ -528,14 +529,16 @@ def train(config, reporter): analysis = tune.run(train, num_samples=20, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= patience) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= patience) stopper = EarlyStopping("test", top=top, mode="min") analysis = tune.run(train, num_samples=10, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= top) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= top) def testBadStoppingFunction(self): def train(config, reporter): diff --git a/python/ray/tune/tests/test_experiment_analysis.py b/python/ray/tune/tests/test_experiment_analysis.py index 5195c7825264..bac891cc965c 100644 --- a/python/ray/tune/tests/test_experiment_analysis.py +++ b/python/ray/tune/tests/test_experiment_analysis.py @@ -7,7 +7,7 @@ from numpy import nan import ray -from ray.tune import run, sample_from +from ray import tune from ray.tune.examples.async_hyperband_example import MyTrainableClass @@ -26,7 +26,7 @@ def tearDown(self): ray.shutdown() def run_test_exp(self): - self.ea = run( + self.ea = tune.run( MyTrainableClass, name=self.test_name, local_dir=self.test_dir, @@ -34,13 +34,14 @@ def run_test_exp(self): checkpoint_freq=1, num_samples=self.num_samples, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) def nan_test_exp(self): - nan_ea = run( + nan_ea = tune.run( lambda x: nan, name="testing_nan", local_dir=self.test_dir, @@ -48,14 +49,15 @@ def nan_test_exp(self): checkpoint_freq=1, num_samples=self.num_samples, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) return nan_ea def testDataframe(self): - df = self.ea.dataframe() + df = self.ea.dataframe(self.metric, mode="max") self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEquals(df.shape[0], self.num_samples) @@ -143,21 +145,50 @@ def testAllDataframes(self): self.assertEqual(df.training_iteration.max(), 1) def testIgnoreOtherExperiment(self): - analysis = run( + analysis = tune.run( MyTrainableClass, name="test_example", local_dir=self.test_dir, stop={"training_iteration": 1}, num_samples=1, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) - df = analysis.dataframe() + df = analysis.dataframe(self.metric, mode="max") self.assertEquals(df.shape[0], 1) +class ExperimentAnalysisPropertySuite(unittest.TestCase): + def testBestProperties(self): + def train(config): + for i in range(10): + with tune.checkpoint_dir(i): + pass + tune.report(res=config["base"] + i) + + ea = tune.run( + train, + config={"base": tune.grid_search([100, 200, 300])}, + metric="res", + mode="max") + + trials = ea.trials + + self.assertEquals(ea.best_trial, trials[2]) + self.assertEquals(ea.best_config, trials[2].config) + self.assertEquals(ea.best_logdir, trials[2].logdir) + self.assertEquals(ea.best_checkpoint, trials[2].checkpoint.value) + self.assertTrue( + all(ea.best_dataframe["trial_id"] == trials[2].trial_id)) + self.assertEquals(ea.results_df.loc[trials[2].trial_id, "res"], 309) + self.assertEquals(ea.best_result["res"], 309) + self.assertEquals(ea.best_result_df.loc[trials[2].trial_id, "res"], + 309) + + if __name__ == "__main__": import pytest import sys diff --git a/python/ray/tune/tests/test_experiment_analysis_mem.py b/python/ray/tune/tests/test_experiment_analysis_mem.py index 4e299a758855..4ef9a51f8fd3 100644 --- a/python/ray/tune/tests/test_experiment_analysis_mem.py +++ b/python/ray/tune/tests/test_experiment_analysis_mem.py @@ -83,10 +83,10 @@ def testCompareTrials(self): num_samples=1, config={"id": grid_search(list(range(5)))}) - max_all = ea.get_best_trial("score", - "max").metric_analysis["score"]["max"] - min_all = ea.get_best_trial("score", - "min").metric_analysis["score"]["min"] + max_all = ea.get_best_trial("score", "max", + "all").metric_analysis["score"]["max"] + min_all = ea.get_best_trial("score", "min", + "all").metric_analysis["score"]["min"] max_last = ea.get_best_trial("score", "max", "last").metric_analysis["score"]["last"] max_avg = ea.get_best_trial("score", "max", @@ -149,7 +149,7 @@ def tearDown(self): def testDataframe(self): analysis = Analysis(self.test_dir) - df = analysis.dataframe() + df = analysis.dataframe(self.metric, mode="max") self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEqual(df.shape[0], self.num_samples * 2) diff --git a/python/ray/tune/tests/test_trial_scheduler_pbt.py b/python/ray/tune/tests/test_trial_scheduler_pbt.py index 740616e8ce4d..5af7cb46724a 100644 --- a/python/ray/tune/tests/test_trial_scheduler_pbt.py +++ b/python/ray/tune/tests/test_trial_scheduler_pbt.py @@ -82,15 +82,24 @@ def synchSetup(self, synch, param=[10, 20, 30]): def testAsynchFail(self): analysis = self.synchSetup(False) - self.assertTrue(any(analysis.dataframe()["mean_accuracy"] != 33)) + self.assertTrue( + any( + analysis.dataframe(metric="mean_accuracy", mode="max") + ["mean_accuracy"] != 33)) def testSynchPass(self): analysis = self.synchSetup(True) - self.assertTrue(all(analysis.dataframe()["mean_accuracy"] == 33)) + self.assertTrue( + all( + analysis.dataframe(metric="mean_accuracy", mode="max")[ + "mean_accuracy"] == 33)) def testSynchPassLast(self): analysis = self.synchSetup(True, param=[30, 20, 10]) - self.assertTrue(all(analysis.dataframe()["mean_accuracy"] == 33)) + self.assertTrue( + all( + analysis.dataframe(metric="mean_accuracy", mode="max")[ + "mean_accuracy"] == 33)) class PopulationBasedTrainingConfigTest(unittest.TestCase): diff --git a/python/ray/tune/tests/tutorial.py b/python/ray/tune/tests/tutorial.py index f0e5fa5af1d2..2aa4422798c6 100644 --- a/python/ray/tune/tests/tutorial.py +++ b/python/ray/tune/tests/tutorial.py @@ -166,7 +166,7 @@ def train_mnist(config): # __run_analysis_begin__ import os -df = analysis.dataframe() +df = analysis.results_df logdir = analysis.get_best_logdir("mean_accuracy", mode="max") state_dict = torch.load(os.path.join(logdir, "model.pth"))