From 56301b5495652b54c6c4714071c24c777e1f9ba2 Mon Sep 17 00:00:00 2001 From: Montgomery Alban Date: Tue, 20 Jun 2023 14:09:55 +0000 Subject: [PATCH] Final updates. --- CHANGELOG.md | 6 +++++ README.md | 2 ++ docs/ecosystem/rllib.rst | 7 ++++++ docs/sim/env.rst | 9 +++++--- examples/rl/rllib/configs.py | 20 +---------------- examples/rl/rllib/model/README.md | 2 +- examples/rl/rllib/pg_example.py | 16 ++++++-------- .../rl/rllib/{rllib.py => pg_pbt_example.py} | 22 +++++++++++++------ examples/tests/test_examples.py | 7 ++---- 9 files changed, 47 insertions(+), 44 deletions(-) rename examples/rl/rllib/{rllib.py => pg_pbt_example.py} (93%) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4de26f7c3..d5d39833eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,13 @@ Copy and pasting the git commit messages is __NOT__ enough. ## [Unreleased] ### Added +- Added `rllib/pg_example.py` to demonstrate a simple integration with `RLlib` and `tensorflow` for policy training. +- Added `rllib/pg_pbt_example.py` to demonstrate integration with `ray.RLlib`, `tensorflow`, and `ray.tune` for scheduled policy training. ### Changed +- Updated `smarts[ray]` (`ray==2.2`) and `smarts[rllib]` (`ray[rllib]==1.4`) to use `ray~=2.5`. +- Introduced `tensorflow-probability` to `smarts[rllib]`. +- Updated `RLlibHiWayEnv` to use the `gymnasium` interface. +- Renamed `rllib/rllib.py` to `rllib/pg_pbt_example.py`. ### Deprecated ### Fixed - Missing neighborhood vehicle state `'lane_id'` is now added to the `hiway-v1` formatted observations. diff --git a/README.md b/README.md index 3857ca5090..6067a0eabb 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ Several agent control policies and agent [action types](smarts/core/controllers/ ### RL Model 1. [Drive](examples/rl/drive). See [Driving SMARTS 2023.1 & 2023.2](https://smarts.readthedocs.io/en/latest/benchmarks/driving_smarts_2023_1.html) for more info. 1. [VehicleFollowing](examples/rl/platoon). See [Driving SMARTS 2023.3](https://smarts.readthedocs.io/en/latest/benchmarks/driving_smarts_2023_3.html) for more info. +1. [PG](examples/rl/rllib/pg_example.py). See [RLlib](https://smarts.readthedocs.io/en/latest/docs/ecosystem/rllib.html) for more info. +1. [PG Population Based Training](examples/rl/rllib/pg_pbt_example.py). See [RLlib](https://smarts.readthedocs.io/en/latest/docs/ecosystem/rllib.html) for more info. ### RL Environment 1. [ULTRA](https://github.com/smarts-project/smarts-project.rl/blob/master/ultra) provides a gym-based environment built upon SMARTS to tackle intersection navigation, specifically the unprotected left turn. diff --git a/docs/ecosystem/rllib.rst b/docs/ecosystem/rllib.rst index 8e4b031f1c..7ae554cb99 100644 --- a/docs/ecosystem/rllib.rst +++ b/docs/ecosystem/rllib.rst @@ -8,6 +8,13 @@ RLlib of applications. ``RLlib`` natively supports ``TensorFlow``, ``TensorFlow Eager``, and ``PyTorch``. Most of its internals are agnostic to such deep learning frameworks. +SMARTS contains two examples using `Policy Gradients (PG) `_. + +1. rllib/pg_example.py +This example shows the basics of using RLlib with SMARTS through :class:`~smarts.env.rllib_hiway_env.RLlibHiWayEnv`. +1. rllib/pg_pbt_example.py +This example combines Policy Gradients with `Population Based Training (PBT) `_ scheduling. + Recommended reads ----------------- diff --git a/docs/sim/env.rst b/docs/sim/env.rst index 5d6571830a..a3e4ac9cd1 100644 --- a/docs/sim/env.rst +++ b/docs/sim/env.rst @@ -9,7 +9,7 @@ Base environments SMARTS environment module is defined in :mod:`~smarts.env` package. Currently SMARTS provides two kinds of training environments, namely: -+ ``HiWayEnv`` utilizing ``gym.env`` style interface ++ ``HiWayEnv`` utilizing a ``gymnasium.Env`` interface + ``RLlibHiwayEnv`` customized for `RLlib `_ training .. image:: ../_static/env.png @@ -17,11 +17,12 @@ environments, namely: HiWayEnv ^^^^^^^^ -``HiWayEnv`` inherits class ``gym.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below. +``HiWayEnv`` inherits class ``gymnasium.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below. Refer to :class:`~smarts.env.hiway_env.HiWayEnv` for more details. .. code-block:: python + import gymnasium as gym # Make env env = gym.make( "smarts.env:hiway-v0", # Env entry name. @@ -53,6 +54,7 @@ exactly matches the `env.observation_space`, and `ObservationOptions.multi_agent .. code-block:: python + import gymnasium as gym # Make env env = gym.make( "smarts.env:hiway-v1", # Env entry name. @@ -81,6 +83,7 @@ This can be done with :class:`~smarts.env.gymnasium.wrappers.api_reversion.Api02 .. code-block:: python + import gymnasium as gym # Make env env = gym.make( "smarts.env:hiway-v1", # Env entry name. @@ -91,7 +94,7 @@ This can be done with :class:`~smarts.env.gymnasium.wrappers.api_reversion.Api02 RLlibHiwayEnv ^^^^^^^^^^^^^ -``RLlibHiwayEnv`` inherits class ``MultiAgentEnv``, which is defined in `RLlib `_. It also supports common env APIs like ``reset``, +``RLlibHiwayEnv`` inherits class ``MultiAgentEnv``, which is defined in `RLlib `_. It also supports common environment APIs like ``reset``, ``step``, ``close``. An usage example is shown below. Refer to :class:`~smarts.env.rllib_hiway_env.RLlibHiWayEnv` for more details. .. code-block:: python diff --git a/examples/rl/rllib/configs.py b/examples/rl/rllib/configs.py index 841b8f1cdc..266e39d841 100644 --- a/examples/rl/rllib/configs.py +++ b/examples/rl/rllib/configs.py @@ -61,9 +61,7 @@ def gen_pg_config( return algo_config -def gen_parser( - prog: str, default_result_dir: str, default_save_model_path: str -) -> argparse.ArgumentParser: +def gen_parser(prog: str, default_result_dir: str) -> argparse.ArgumentParser: parser = argparse.ArgumentParser(prog) parser.add_argument( "--scenario", @@ -76,12 +74,6 @@ def gen_parser( action="store_true", help="Run simulation with Envision display.", ) - parser.add_argument( - "--num_samples", - type=int, - default=1, - help="Number of times to sample from hyperparameter space", - ) parser.add_argument( "--rollout_fragment_length", type=str, @@ -133,17 +125,7 @@ def gen_parser( default="ERROR", help="Log level (DEBUG|INFO|WARN|ERROR)", ) - parser.add_argument( - "--checkpoint_num", type=int, default=None, help="Checkpoint number" - ) parser.add_argument( "--checkpoint_freq", type=int, default=3, help="Checkpoint frequency" ) - - parser.add_argument( - "--save_model_path", - type=str, - default=default_save_model_path, - help="Destination path of where to copy the model when training is over", - ) return parser diff --git a/examples/rl/rllib/model/README.md b/examples/rl/rllib/model/README.md index 646c9015c8..9afe8800ce 100644 --- a/examples/rl/rllib/model/README.md +++ b/examples/rl/rllib/model/README.md @@ -1,3 +1,3 @@ ## Model Binaries -The binaries located in this directory are the components of a trained rllib model. These are related to the `examples/rl/rllib/rllib.py` example script. Results from `examples/rl/rllib/rllib.py` are loaded and written to this directory. \ No newline at end of file +The binaries located in this directory are the components of a trained rllib model. These are related to the `examples/rl/rllib/pg_pbt_example.py` example script. Results from `examples/rl/rllib/pg_pbt_example.py` are loaded and written to this directory. \ No newline at end of file diff --git a/examples/rl/rllib/pg_example.py b/examples/rl/rllib/pg_example.py index f422dfa435..ec340bba05 100644 --- a/examples/rl/rllib/pg_example.py +++ b/examples/rl/rllib/pg_example.py @@ -81,7 +81,6 @@ def main( rollout_fragment_length, train_batch_size, seed, - num_samples, num_agents, num_workers, resume_training, @@ -89,7 +88,6 @@ def main( checkpoint_freq: int, checkpoint_num: Optional[int], log_level: Literal["DEBUG", "INFO", "WARN", "ERROR"], - save_model_path, ): agent_values = { "agent_specs": { @@ -162,12 +160,14 @@ def get_checkpoint_dir(num): if __name__ == "__main__": - default_save_model_path = str( - Path(__file__).expanduser().resolve().parent / "pg_model" - ) default_result_dir = str(Path(__file__).resolve().parent / "results" / "pg_results") - parser = gen_parser("rllib-example", default_result_dir, default_save_model_path) - + parser = gen_parser("rllib-example", default_result_dir) + parser.add_argument( + "--checkpoint_num", + type=int, + default=None, + help="The checkpoint number to restart from.", + ) args = parser.parse_args() build_scenario(scenario=args.scenario, clean=False, seed=42) @@ -178,7 +178,6 @@ def get_checkpoint_dir(num): rollout_fragment_length=args.rollout_fragment_length, train_batch_size=args.train_batch_size, seed=args.seed, - num_samples=args.num_samples, num_agents=args.num_agents, num_workers=args.num_workers, resume_training=args.resume_training, @@ -186,5 +185,4 @@ def get_checkpoint_dir(num): checkpoint_freq=max(args.checkpoint_freq, 1), checkpoint_num=args.checkpoint_num, log_level=args.log_level, - save_model_path=args.save_model_path, ) diff --git a/examples/rl/rllib/rllib.py b/examples/rl/rllib/pg_pbt_example.py similarity index 93% rename from examples/rl/rllib/rllib.py rename to examples/rl/rllib/pg_pbt_example.py index 6c3672251f..4fa50c53d0 100644 --- a/examples/rl/rllib/rllib.py +++ b/examples/rl/rllib/pg_pbt_example.py @@ -124,7 +124,6 @@ def main( resume_training, result_dir, checkpoint_freq: int, - checkpoint_num: Optional[int], log_level: Literal["DEBUG", "INFO", "WARN", "ERROR"], save_model_path, ): @@ -240,12 +239,22 @@ def main( if __name__ == "__main__": - default_save_model_path = str( - Path(__file__).expanduser().resolve().parent / "model" + default_result_dir = str( + Path(__file__).resolve().parent / "results" / "tune_pg_results" + ) + parser = gen_parser("rllib-example", default_result_dir) + parser.add_argument( + "--num_samples", + type=int, + default=1, + help="Number of times to sample from hyperparameter space", + ) + parser.add_argument( + "--save_model_path", + type=str, + default=str(Path(__file__).expanduser().resolve().parent / "model"), + help="Destination path of where to copy the model when training is over", ) - default_result_dir = str(Path(__file__).resolve().parent / "results" / "tune_pg_results") - parser = gen_parser("rllib-example", default_result_dir, default_save_model_path) - args = parser.parse_args() build_scenario(scenario=args.scenario, clean=False, seed=42) @@ -262,7 +271,6 @@ def main( resume_training=args.resume_training, result_dir=args.result_dir, checkpoint_freq=max(args.checkpoint_freq, 1), - checkpoint_num=args.checkpoint_num, log_level=args.log_level, save_model_path=args.save_model_path, ) diff --git a/examples/tests/test_examples.py b/examples/tests/test_examples.py index f148331d2c..3e836368f6 100644 --- a/examples/tests/test_examples.py +++ b/examples/tests/test_examples.py @@ -51,22 +51,20 @@ def test_rllib_pg_example(): rollout_fragment_length=200, train_batch_size=200, seed=42, - num_samples=1, num_agents=2, num_workers=1, resume_training=False, result_dir=result_dir, checkpoint_num=None, checkpoint_freq=1, - save_model_path=model_dir, log_level="WARN", ) def test_rllib_tune_pg_example(): - from examples.rl.rllib import rllib + from examples.rl.rllib import pg_pbt_example - main = rllib.main + main = pg_pbt_example.main with tempfile.TemporaryDirectory() as result_dir, tempfile.TemporaryDirectory() as model_dir: main( scenario="scenarios/sumo/loop", @@ -80,7 +78,6 @@ def test_rllib_tune_pg_example(): num_workers=1, resume_training=False, result_dir=result_dir, - checkpoint_num=None, checkpoint_freq=1, save_model_path=model_dir, log_level="WARN",