From ac345611485e4ffd5cb868cf5e9412e601e6fe48 Mon Sep 17 00:00:00 2001 From: adaickalavan Date: Sat, 10 Jun 2023 15:19:29 -0400 Subject: [PATCH 1/5] Update RL examples. --- docs/benchmarks/driving_smarts_2023_1.rst | 22 +- docs/benchmarks/driving_smarts_2023_3.rst | 15 ++ examples/rl/drive/inference/__init__.py | 7 +- .../inference/contrib_policy/filter_obs.py | 85 ++++++-- .../drive/inference/contrib_policy/policy.py | 15 +- examples/rl/drive/train/config.yaml | 19 +- examples/rl/drive/train/env.py | 20 +- examples/rl/drive/train/preprocess.py | 15 +- examples/rl/drive/train/requirements.txt | 138 ++++-------- examples/rl/drive/train/reward.py | 7 +- examples/rl/drive/train/run.py | 176 +++++++++------ examples/rl/platoon/inference/__init__.py | 7 +- .../inference/contrib_policy/filter_obs.py | 85 ++++++-- .../inference/contrib_policy/format_action.py | 2 +- .../inference/contrib_policy/policy.py | 15 +- examples/rl/platoon/train/config.yaml | 18 +- examples/rl/platoon/train/env.py | 20 +- examples/rl/platoon/train/preprocess.py | 15 +- examples/rl/platoon/train/requirements.txt | 138 ++++-------- examples/rl/platoon/train/reward.py | 38 ++-- examples/rl/platoon/train/run.py | 178 +++++++++------ .../map.net.xml | 203 ------------------ .../scenario.py | 100 --------- .../merge_exit_sumo_t_agents_2/scenario.py | 2 +- .../driving_smarts/v2023/config_1.yaml | 5 +- .../driving_smarts/v2023/config_2.yaml | 3 +- .../driving_smarts/v2023/config_3.yaml | 9 +- smarts/core/route_cache.py | 3 +- 28 files changed, 595 insertions(+), 765 deletions(-) delete mode 100644 scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/map.net.xml delete mode 100644 scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/scenario.py diff --git a/docs/benchmarks/driving_smarts_2023_1.rst b/docs/benchmarks/driving_smarts_2023_1.rst index b0f2dd03fb..d3706f3e91 100644 --- a/docs/benchmarks/driving_smarts_2023_1.rst +++ b/docs/benchmarks/driving_smarts_2023_1.rst @@ -48,9 +48,24 @@ predictive models, etc, may be used to develop the policy. Several scenarios are provided for training. Their names and tasks are as follows. The desired task execution is illustrated in a gif by a trained baseline agent. -.. todo:: - - Provide sample training scenarios and corresponding ``.gif`` images showing a baseline model traversing the map. +**Driving SMARTS 2023.1 scenarios** + ++ :scenarios:`cruise_2lane_agents_1 ` ++ :scenarios:`cutin_2lane_agents_1 ` ++ :scenarios:`merge_exit_sumo_t_agents_1 ` ++ :scenarios:`overtake_2lane_agents_1 ` ++ :scenarios:`00a445fb-7293-4be6-adbc-e30c949b6cf7_agents_1 ` ++ :scenarios:`0a53dd99-2946-4b4d-ab66-c4d6fef97be2_agents_1 ` ++ :scenarios:`0a576bf1-66ae-495a-9c87-236f3fc2aa01_agents_1 ` + +**Driving SMARTS 2023.2 scenarios** + ++ :scenarios:`1_to_3lane_left_turn_sumo_c_agents_1 ` ++ :scenarios:`1_to_3lane_left_turn_middle_lane_c_agents_1 ` ++ :scenarios:`00b15e74-04a8-4bd4-9a78-eb24f0c0a980_agents_1 ` ++ :scenarios:`0a60b442-56b0-46c3-be45-cf166a182b67_agents_1 ` ++ :scenarios:`0a764a82-b44e-481e-97e7-05e1f1f925f6_agents_1 ` ++ :scenarios:`0bf054e3-7698-4b86-9c98-626df2dee9f4_agents_1 ` Observation space ----------------- @@ -264,6 +279,7 @@ Evaluate $ python3.8 -m venv ./.venv $ source ./.venv/bin/activate $ pip install --upgrade pip + $ pip install wheel==0.38.4 $ pip install -e .[camera_obs,argoverse,envision] $ scl zoo install examples/rl/drive/inference # For Driving SMARTS 2023.1 diff --git a/docs/benchmarks/driving_smarts_2023_3.rst b/docs/benchmarks/driving_smarts_2023_3.rst index 20902af2af..e0d956c817 100644 --- a/docs/benchmarks/driving_smarts_2023_3.rst +++ b/docs/benchmarks/driving_smarts_2023_3.rst @@ -42,6 +42,20 @@ The desired task execution is illustrated in a gif by a trained baseline agent. .. image:: /_static/driving_smarts_2023/platoon_straight_2lane_agents_1.gif ++ :scenarios:`straight_2lane_sumo_agents_1 ` ++ :scenarios:`straight_2lane_sumo_t_agents_1 ` ++ :scenarios:`straight_3lanes_sumo_agents_1 ` ++ :scenarios:`straight_3lanes_sumo_t_agents_1 ` ++ :scenarios:`straight_3lanes_sumo_t_agents_2 ` ++ :scenarios:`merge_exit_sumo_agents_1 ` ++ :scenarios:`merge_exit_sumo_t_agents_1 ` ++ :scenarios:`merge_exit_sumo_t_agents_2 ` ++ :scenarios:`ff239c9d-e4ff-4acc-bad5-bd55648c212e_0_agents_1 ` ++ :scenarios:`ff239c9d-e4ff-4acc-bad5-bd55648c212e_agents_1 ` ++ :scenarios:`ff6dc43b-dd27-4fe4-94b6-5c1b3940daed_agents_1 ` ++ :scenarios:`ff9619b5-b0c0-4942-b5d8-df6a5814f8a2_agents_1 ` ++ :scenarios:`ffd10ec2-715b-48af-a89d-b11f79927f63_agents_1 ` + Observation space ----------------- @@ -254,6 +268,7 @@ Evaluate $ python3.8 -m venv ./.venv $ source ./.venv/bin/activate $ pip install --upgrade pip + $ pip install wheel==0.38.4 $ pip install -e .[camera_obs,argoverse,envision] $ scl zoo install examples/rl/platoon/inference $ scl benchmark run driving_smarts_2023_3 examples.rl.platoon.inference:contrib-agent-v0 --auto-install diff --git a/examples/rl/drive/inference/__init__.py b/examples/rl/drive/inference/__init__.py index 2692e0fcef..a330eaa041 100644 --- a/examples/rl/drive/inference/__init__.py +++ b/examples/rl/drive/inference/__init__.py @@ -16,9 +16,9 @@ def entry_point(**kwargs): road_waypoints=False, signals=False, top_down_rgb=RGB( - width=128, - height=128, - resolution=80 / 128, # m/pixels + width=200, + height=200, + resolution=80 / 200, # m/pixels ), ) @@ -26,6 +26,7 @@ def entry_point(**kwargs): "top_down_rgb": interface.top_down_rgb, "action_space_type": interface.action, "num_stack": 3, # Number of frames to stack as input to policy network. + "crop": (50, 50, 0, 70), # Crop image from left, right, top, and bottom. } return AgentSpec( diff --git a/examples/rl/drive/inference/contrib_policy/filter_obs.py b/examples/rl/drive/inference/contrib_policy/filter_obs.py index 6bd1f3d259..e366355c16 100644 --- a/examples/rl/drive/inference/contrib_policy/filter_obs.py +++ b/examples/rl/drive/inference/contrib_policy/filter_obs.py @@ -1,5 +1,5 @@ import math -from typing import Any, Dict, Sequence +from typing import Any, Dict, Sequence, Tuple import gym import numpy as np @@ -11,11 +11,17 @@ class FilterObs: """Filter only the selected observation parameters.""" - def __init__(self, top_down_rgb: RGB): + def __init__( + self, top_down_rgb: RGB, crop: Tuple[int, int, int, int] = (0, 0, 0, 0) + ): self.observation_space = gym.spaces.Box( low=0, high=255, - shape=(3, top_down_rgb.height, top_down_rgb.width), + shape=( + 3, + top_down_rgb.height - crop[2] - crop[3], + top_down_rgb.width - crop[0] - crop[1], + ), dtype=np.uint8, ) @@ -26,7 +32,9 @@ def __init__(self, top_down_rgb: RGB): self._lane_divider_color = np.array(SceneColors.LaneDivider.value[0:3]) * 255 self._edge_divider_color = np.array(SceneColors.EdgeDivider.value[0:3]) * 255 self._ego_color = np.array(SceneColors.Agent.value[0:3]) * 255 + self._goal_color = np.array(Colors.Purple.value[0:3]) * 255 + self._blur_radius = 2 self._res = top_down_rgb.resolution h = top_down_rgb.height w = top_down_rgb.width @@ -43,6 +51,14 @@ def __init__(self, top_down_rgb: RGB): self._rgb_mask = np.zeros(shape=(h, w, 3), dtype=np.uint8) self._rgb_mask[shape[0][0] : shape[0][1], shape[1][0] : shape[1][1], :] = 1 + self._crop = crop + assert ( + self._crop[0] < np.floor(w / 2) + and self._crop[1] < np.floor(w / 2) + and self._crop[2] < np.floor(h / 2) + and self._crop[3] < np.floor(h / 2) + ), f"Expected crop to be less than half the image size, got crop={self._crop} for an image of size[h,w]=[{h},{w}]." + def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]: """Adapts the environment's observation.""" # fmt: off @@ -62,8 +78,8 @@ def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]: # Superimpose waypoints onto rgb image wps = obs["waypoint_paths"]["position"][0:11, 3:, 0:3] for path in wps[:]: - wps_valid = wps_to_pixels( - wps=path, + wps_valid = points_to_pixels( + points=path, ego_pos=ego_pos, ego_heading=ego_heading, w=w, @@ -75,6 +91,28 @@ def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]: if all(rgb_ego[img_y, img_x, :] == self._no_color): rgb_ego[img_y, img_x, :] = self._wps_color + # Superimpose goal position onto rgb image + if not all((goal:=obs["ego_vehicle_state"]["mission"]["goal_position"]) == np.zeros((3,))): + goal_pixel = points_to_pixels( + points=np.expand_dims(goal,axis=0), + ego_pos=ego_pos, + ego_heading=ego_heading, + w=w, + h=h, + res=self._res, + ) + if len(goal_pixel) != 0: + img_x, img_y = goal_pixel[0][0], goal_pixel[0][1] + if all(rgb_ego[img_y, img_x, :] == self._no_color) or all(rgb_ego[img_y, img_x, :] == self._wps_color): + rgb_ego[ + max(img_y-self._blur_radius,0):min(img_y+self._blur_radius,h), + max(img_x-self._blur_radius,0):min(img_x+self._blur_radius,w), + :, + ] = self._goal_color + + # Crop image + rgb_ego = rgb_ego[self._crop[2]:h-self._crop[3],self._crop[0]:w-self._crop[1],:] + # Channel first rgb rgb_ego = rgb_ego.transpose(2, 0, 1) @@ -123,14 +161,19 @@ def replace_color( # fmt: on -def wps_to_pixels( - wps: np.ndarray, ego_pos: np.ndarray, ego_heading: float, w: int, h: int, res: float +def points_to_pixels( + points: np.ndarray, + ego_pos: np.ndarray, + ego_heading: float, + w: int, + h: int, + res: float, ) -> np.ndarray: - """Converts waypoints into pixel coordinates in order to superimpose the - waypoints onto the RGB image. + """Converts points into pixel coordinates in order to superimpose the + points onto the RGB image. Args: - wps (np.ndarray): Waypoints for a single route. Shape (n,3). + points (np.ndarray): Array of points. Shape (n,3). ego_pos (np.ndarray): Ego position. Shape = (3,). ego_heading (float): Ego heading in radians. w (int): Width of RGB image @@ -139,19 +182,19 @@ def wps_to_pixels( ground_size/image_size. Returns: - np.ndarray: Array of waypoint coordinates on the RGB image. Shape = (m,3). + np.ndarray: Array of point coordinates on the RGB image. Shape = (m,3). """ # fmt: off - mask = [False if all(point == np.zeros(3,)) else True for point in wps] - wps_nonzero = wps[mask] - wps_delta = wps_nonzero - ego_pos - wps_rotated = rotate_axes(wps_delta, theta=ego_heading) - wps_pixels = wps_rotated / np.array([res, res, res]) - wps_overlay = np.array([w / 2, h / 2, 0]) + wps_pixels * np.array([1, -1, 1]) - wps_rfloat = np.rint(wps_overlay) - wps_valid = wps_rfloat[(wps_rfloat[:,0] >= 0) & (wps_rfloat[:,0] < w) & (wps_rfloat[:,1] >= 0) & (wps_rfloat[:,1] < h)] - wps_rint = wps_valid.astype(int) - return wps_rint + mask = [False if all(point == np.zeros(3,)) else True for point in points] + points_nonzero = points[mask] + points_delta = points_nonzero - ego_pos + points_rotated = rotate_axes(points_delta, theta=ego_heading) + points_pixels = points_rotated / np.array([res, res, res]) + points_overlay = np.array([w / 2, h / 2, 0]) + points_pixels * np.array([1, -1, 1]) + points_rfloat = np.rint(points_overlay) + points_valid = points_rfloat[(points_rfloat[:,0] >= 0) & (points_rfloat[:,0] < w) & (points_rfloat[:,1] >= 0) & (points_rfloat[:,1] < h)] + points_rint = points_valid.astype(int) + return points_rint # fmt: on diff --git a/examples/rl/drive/inference/contrib_policy/policy.py b/examples/rl/drive/inference/contrib_policy/policy.py index f63a72e70c..8d80b0894e 100644 --- a/examples/rl/drive/inference/contrib_policy/policy.py +++ b/examples/rl/drive/inference/contrib_policy/policy.py @@ -11,22 +11,20 @@ class Policy(Agent): """Policy class to be submitted by the user. This class will be loaded and tested during evaluation.""" - def __init__(self, num_stack, top_down_rgb, action_space_type): + def __init__(self, num_stack, top_down_rgb, crop, action_space_type): """All policy initialization matters, including loading of model, is performed here. To be implemented by the user. """ - import stable_baselines3 as sb3lib from contrib_policy import network from contrib_policy.filter_obs import FilterObs from contrib_policy.format_action import FormatAction from contrib_policy.frame_stack import FrameStack from contrib_policy.make_dict import MakeDict - model_path = Path(__file__).resolve().parents[0] / "saved_model" - self.model = sb3lib.PPO.load(model_path) + self._model = self._get_model() - self._filter_obs = FilterObs(top_down_rgb=top_down_rgb) + self._filter_obs = FilterObs(top_down_rgb=top_down_rgb, crop=crop) self._frame_stack = FrameStack( input_space=self._filter_obs.observation_space, num_stack=num_stack, @@ -43,7 +41,7 @@ def __init__(self, num_stack, top_down_rgb, action_space_type): def act(self, obs): """Mandatory act function to be implemented by user.""" processed_obs = self._process(obs) - action, _ = self.model.predict(observation=processed_obs, deterministic=True) + action, _ = self._model.predict(observation=processed_obs, deterministic=True) formatted_action = self._format_action.format( action=int(action), prev_heading=obs["ego_vehicle_state"]["heading"] ) @@ -57,3 +55,8 @@ def _process(self, obs): obs = self._frame_stack.stack(obs) obs = self._make_dict.make(obs) return obs + + def _get_model(self): + import stable_baselines3 as sb3lib + + return sb3lib.PPO.load(path=Path(__file__).resolve().parents[0] / "saved_model") diff --git a/examples/rl/drive/train/config.yaml b/examples/rl/drive/train/config.yaml index 7c0ea860a4..b09e83a428 100644 --- a/examples/rl/drive/train/config.yaml +++ b/examples/rl/drive/train/config.yaml @@ -5,20 +5,27 @@ smarts: agent_locator: inference:contrib-agent-v0 env_id: smarts.env:driving-smarts-v2023 scenarios: - - scenarios/sumo/intersections/1_to_1lane_left_turn_c_agents_1 + - scenarios/sumo/straight/cruise_2lane_agents_1 + - scenarios/sumo/straight/cutin_2lane_agents_1 + - scenarios/sumo/straight/merge_exit_sumo_t_agents_1 + - scenarios/sumo/straight/overtake_2lane_agents_1 + + - scenarios/sumo/intersections/1_to_3lane_left_turn_sumo_c_agents_1 + - scenarios/sumo/intersections/1_to_3lane_left_turn_middle_lane_c_agents_1 # PPO algorithm alg: - n_steps: 2048 - batch_size: 512 + n_steps: 1024 + batch_size: 64 n_epochs: 4 target_kl: 0.1 + # ent_coef: 0.01 # For exploration. Range = 0 to 0.01 # Training over all scenarios epochs: 500 # Number of training loops. # Training per scenario - train_steps: 10_000 - checkpoint_freq: 10_000 # Save a model every checkpoint_freq calls to env.step(). - eval_freq: 10_000 # Evaluate the trained model every eval_freq steps and save the best model. + train_steps: 4_096 + checkpoint_freq: 4_096 # Save a model every checkpoint_freq calls to env.step(). + eval_freq: 4_096 # Evaluate the trained model every eval_freq steps and save the best model. eval_eps: 5 # Number of evaluation epsiodes. diff --git a/examples/rl/drive/train/env.py b/examples/rl/drive/train/env.py index 6f1668b56f..a4dc0fea12 100644 --- a/examples/rl/drive/train/env.py +++ b/examples/rl/drive/train/env.py @@ -1,16 +1,18 @@ import sys from pathlib import Path -# To import submission folder -sys.path.insert(0, str(Path(__file__).resolve().parents[2])) +# To import train folder +sys.path.insert(0, str(Path(__file__).resolve().parents[0])) import gymnasium as gym +from smarts.zoo.agent_spec import AgentSpec -def make_env(env_id, scenario, agent_interface, config, seed): + +def make_env(env_id, scenario, agent_spec: AgentSpec, config, seed): from preprocess import Preprocess + from reward import Reward from stable_baselines3.common.monitor import Monitor - from train.reward import Reward from smarts.env.gymnasium.wrappers.api_reversion import Api021Reversion from smarts.env.gymnasium.wrappers.single_agent import SingleAgent @@ -18,15 +20,15 @@ def make_env(env_id, scenario, agent_interface, config, seed): env = gym.make( env_id, scenario=scenario, - agent_interface=agent_interface, + agent_interface=agent_spec.interface, seed=seed, sumo_headless=not config.sumo_gui, # If False, enables sumo-gui display. headless=not config.head, # If False, enables Envision display. ) - env = Reward(env) - env = SingleAgent(env) - env = Api021Reversion(env) - env = Preprocess(env, agent_interface) + env = Reward(env=env, crop=agent_spec.agent_params["crop"]) + env = SingleAgent(env=env) + env = Api021Reversion(env=env) + env = Preprocess(env=env, agent_spec=agent_spec) env = Monitor(env) return env diff --git a/examples/rl/drive/train/preprocess.py b/examples/rl/drive/train/preprocess.py index 38a59cb0f9..ec2f890809 100644 --- a/examples/rl/drive/train/preprocess.py +++ b/examples/rl/drive/train/preprocess.py @@ -4,17 +4,20 @@ from contrib_policy.frame_stack import FrameStack from contrib_policy.make_dict import MakeDict -from smarts.core.agent_interface import AgentInterface +from smarts.zoo.agent_spec import AgentSpec class Preprocess(gym.Wrapper): - def __init__(self, env: gym.Env, agent_interface: AgentInterface): + def __init__(self, env: gym.Env, agent_spec: AgentSpec): super().__init__(env) - self._filter_obs = FilterObs(top_down_rgb=agent_interface.top_down_rgb) + self._filter_obs = FilterObs( + top_down_rgb=agent_spec.interface.top_down_rgb, + crop=agent_spec.agent_params["crop"], + ) self._frame_stack = FrameStack( input_space=self._filter_obs.observation_space, - num_stack=3, + num_stack=agent_spec.agent_params["num_stack"], stack_axis=0, ) self._frame_stack.reset() @@ -23,7 +26,9 @@ def __init__(self, env: gym.Env, agent_interface: AgentInterface): self.observation_space = self._make_dict.observation_space self._prev_heading: float - self._format_action = FormatAction(agent_interface.action) + self._format_action = FormatAction( + action_space_type=agent_spec.interface.action + ) self.action_space = self._format_action.action_space def _process(self, obs): diff --git a/examples/rl/drive/train/requirements.txt b/examples/rl/drive/train/requirements.txt index 72c1d1db69..90d5d558db 100644 --- a/examples/rl/drive/train/requirements.txt +++ b/examples/rl/drive/train/requirements.txt @@ -1,152 +1,98 @@ absl-py==1.4.0 -alabaster==0.7.13 -astroid==2.15.0 -asttokens==2.2.1 -attrs==22.2.0 +argcomplete==3.0.8 +attrs==23.1.0 Automat==22.10.0 -Babel==2.12.1 -backcall==0.2.0 -black==22.6.0 -cached-property==1.5.2 -cachetools==5.3.0 -certifi==2022.12.7 -cfgv==3.3.1 +av==10.0.0 +av2==0.2.1 +cachetools==5.3.1 +certifi==2023.5.7 charset-normalizer==3.1.0 click==8.1.3 cloudpickle==1.6.0 +colorlog==6.7.0 constantly==15.1.0 contourpy==1.0.7 cycler==0.11.0 -decorator==5.1.1 -dill==0.3.6 distlib==0.3.6 -docutils==0.18.1 -eclipse-sumo==1.16.0 -executing==1.2.0 -filelock==3.9.1 -fonttools==4.39.0 +eclipse-sumo==1.17.0 +filelock==3.12.0 +fonttools==4.39.4 future==0.18.3 -google-auth==2.16.2 +google-auth==2.19.1 google-auth-oauthlib==0.4.6 -grpcio==1.52.0 -grpcio-tools==1.32.0 +grpcio==1.55.0 gym==0.21.0 gymnasium==0.27.0 gymnasium-notices==0.0.1 hyperlink==21.0.0 -identify==2.5.20 idna==3.4 ijson==3.2.0.post0 -imagesize==1.4.1 -importlab==0.8 importlib-metadata==4.13.0 importlib-resources==5.12.0 incremental==22.10.0 -ipython==8.11.0 -isort==5.7.0 -jax-jumpy==0.2.0 -jedi==0.18.2 -Jinja2==3.1.2 -jsonpatch==1.32 -jsonpointer==2.3 +jax-jumpy==1.0.0 +joblib==1.2.0 kiwisolver==1.4.4 -lazy-object-proxy==1.9.0 -libcst==0.4.9 -Markdown==3.4.1 +llvmlite==0.40.0 +Markdown==3.4.3 markdown-it-py==2.2.0 -MarkupSafe==2.1.2 +MarkupSafe==2.1.3 matplotlib==3.7.1 -matplotlib-inline==0.1.6 -mccabe==0.7.0 -mdit-py-plugins==0.3.5 mdurl==0.1.2 -mypy-extensions==1.0.0 -myst-parser==1.0.0 -networkx==3.0 -ninja==1.11.1 -nodeenv==1.7.0 +nox==2023.4.22 +numba==0.57.0 numpy==1.23.5 nvidia-cublas-cu11==11.10.3.66 nvidia-cuda-nvrtc-cu11==11.7.99 nvidia-cuda-runtime-cu11==11.7.99 nvidia-cudnn-cu11==8.5.0.96 oauthlib==3.2.2 -packaging==23.0 +opencv-python==4.7.0.72 +packaging==23.1 Panda3D==1.10.9 panda3d-gltf==0.13 panda3d-simplepbr==0.10 -pandas==1.5.3 -parso==0.8.3 -pathspec==0.11.1 -pbr==5.11.1 -pexpect==4.8.0 -pickleshare==0.7.5 -Pillow==9.4.0 -platformdirs==3.1.1 -pre-commit==2.16.0 -prompt-toolkit==3.0.38 +pandas==2.0.2 +Pillow==9.5.0 +platformdirs==3.5.1 protobuf==3.20.3 -psutil==5.9.4 -ptyprocess==0.7.0 -pure-eval==0.2.2 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 -pybullet==3.0.6 -Pygments==2.14.0 -pylint==2.17.0 +psutil==5.9.5 +pyarrow==12.0.0 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pybullet==3.2.5 +Pygments==2.15.1 pyparsing==3.0.9 +pyproj==3.5.0 python-dateutil==2.8.2 -pytype==2022.1.13 -pytz==2022.7.1 +pytz==2023.3 PyYAML==6.0 -requests==2.28.2 +requests==2.31.0 requests-oauthlib==1.3.1 -rich==13.3.2 +rich==13.4.1 rsa==4.9 Rtree==1.0.1 scipy==1.10.1 -sh==2.0.2 shapely==2.0.1 Shimmy==0.2.1 six==1.16.0 -snowballstemmer==2.2.0 -Sphinx==6.1.3 -sphinx-click==4.4.0 -sphinx-rtd-theme==1.2.0 -sphinxcontrib-apidoc==0.3.0 -sphinxcontrib-applehelp==1.0.4 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.1 -sphinxcontrib-jquery==4.1 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 stable-baselines3==1.7.0 -stack-data==0.6.2 tableprint==0.9.1 -tabulate==0.9.0 tensorboard==2.12.0 tensorboard-data-server==0.7.0 tensorboard-plugin-wit==1.8.1 -tokenize-rt==5.0.0 -toml==0.10.2 -tomli==2.0.1 -tomlkit==0.11.6 torch==1.13.1 torchinfo==1.7.2 -tornado==6.2 -traitlets==5.9.0 +tornado==6.3.2 trimesh==3.9.29 Twisted==22.10.0 -typing-inspect==0.8.0 -typing_extensions==4.5.0 -urllib3==1.26.15 -virtualenv==20.21.0 -visdom==0.2.4 +typing_extensions==4.6.3 +tzdata==2023.3 +urllib3==1.26.16 +virtualenv==20.23.0 wcwidth==0.2.6 -websocket-client==1.5.1 -Werkzeug==2.2.3 -wrapt==1.15.0 +websocket-client==1.5.2 +Werkzeug==2.3.5 yattag==1.15.1 zipp==3.15.0 -zope.interface==5.5.2 +zope.interface==6.0 diff --git a/examples/rl/drive/train/reward.py b/examples/rl/drive/train/reward.py index cee2825e26..ebffc2c7ee 100644 --- a/examples/rl/drive/train/reward.py +++ b/examples/rl/drive/train/reward.py @@ -1,14 +1,15 @@ import gymnasium as gym import numpy as np -from smarts.core.colors import SceneColors +from typing import Tuple class Reward(gym.Wrapper): - def __init__(self, env): + def __init__(self, env: gym.Env, crop: Tuple[int, int, int, int]): """Constructor for the Reward wrapper.""" super().__init__(env) self._total_dist = {} + self._crop = crop def reset(self, *, seed=None, options=None): self._total_dist = {} @@ -85,7 +86,7 @@ def _reward(self, obs, env_reward): if agent_obs["events"]["reached_goal"]: reward[agent_id] += np.float64(30) - # Reward for distance travelled by driving + # Reward for distance travelled in one step reward[agent_id] += np.float64(env_reward[agent_id]) return reward diff --git a/examples/rl/drive/train/run.py b/examples/rl/drive/train/run.py index eba4f2ff07..b4f2221cd2 100644 --- a/examples/rl/drive/train/run.py +++ b/examples/rl/drive/train/run.py @@ -22,23 +22,35 @@ from contrib_policy import network from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback from stable_baselines3.common.evaluation import evaluate_policy +from torchinfo import summary from train.env import make_env from train.utils import ObjDict from smarts.zoo import registry +from smarts.zoo.agent_spec import AgentSpec -print("\nTorch cuda is available: ", th.cuda.is_available(), "\n") +print("\n") +print(f"Torch cuda is available: {th.cuda.is_available()}") +device = th.device("cuda" if th.cuda.is_available() else "cpu") +print(f"Torch device: {device}") +print("\n") warnings.simplefilter("ignore", category=DeprecationWarning) warnings.simplefilter("ignore", category=ResourceWarning) # pytype: disable=attribute-error -def main(args: argparse.Namespace): - # Load config file. +def load_config(): + """Load config file.""" parent_dir = Path(__file__).resolve().parent config_file = yaml.safe_load((parent_dir / "config.yaml").read_text()) config = ObjDict(config_file["smarts"]) + return config + + +def main(args: argparse.Namespace): + parent_dir = Path(__file__).resolve().parent + config = load_config() # Load env config. config.mode = args.mode @@ -76,22 +88,28 @@ def main(args: argparse.Namespace): envs_train[f"{scenario}"] = make_env( env_id=config.env_id, scenario=scenario_path, - agent_interface=agent_spec.interface, + agent_spec=agent_spec, config=config, seed=config.seed, ) envs_eval[f"{scenario}"] = make_env( env_id=config.env_id, scenario=scenario_path, - agent_interface=agent_spec.interface, + agent_spec=agent_spec, config=config, seed=config.seed, ) # Run training or evaluation. - run(envs_train=envs_train, envs_eval=envs_eval, config=config) - - print("Finished training ...") + if config.mode == "train": + train( + envs_train=envs_train, + envs_eval=envs_eval, + config=config, + agent_spec=agent_spec, + ) + else: + evaluate(envs=envs_eval, config=config, agent_spec=agent_spec) # Close all environments for env in envs_train.values(): @@ -100,79 +118,95 @@ def main(args: argparse.Namespace): env.close() -def run( +def train( envs_train: Dict[str, gym.Env], envs_eval: Dict[str, gym.Env], config: Dict[str, Any], + agent_spec: AgentSpec, ): + print("\nStart training.\n") + scenarios_iter = cycle(config.scenarios) + save_dir = config.logdir / "train" + save_dir.mkdir(parents=True, exist_ok=True) + checkpoint_callback = CheckpointCallback( + save_freq=config.checkpoint_freq, + save_path=config.logdir / "checkpoint", + name_prefix="PPO", + ) - if config.mode == "train": - print("\nStart training.\n") - scenarios_iter = cycle(config.scenarios) - model = sb3lib.PPO( - env=envs_train[next(scenarios_iter)], - tensorboard_log=config.logdir / "tensorboard", - verbose=1, - **network.combined_extractor(config), - ) + for index in range(config.epochs): + scen = next(scenarios_iter) + env_train = envs_train[scen] + env_eval = envs_eval[scen] + print(f"\nTraining on {scen}.\n") - # Print model summary - # from torchinfo import summary - # td = {"rgb":th.zeros(1,9,128,128)} - # summary(model.policy, input_data=[td], depth=5) - # input("Press any key to continue ...") - - for index in range(config.epochs): - scen = next(scenarios_iter) - env_train = envs_train[scen] - env_eval = envs_eval[scen] - print(f"\nTraining on {scen}.\n") - checkpoint_callback = CheckpointCallback( - save_freq=config.checkpoint_freq, - save_path=config.logdir / "checkpoint", - name_prefix=f"PPO", - ) - eval_callback = EvalCallback( - env_eval, - best_model_save_path=config.logdir / "eval", - n_eval_episodes=3, - eval_freq=config.eval_freq, - deterministic=True, - render=False, + if index == 0: + model = sb3lib.PPO( + env=env_train, + tensorboard_log=config.logdir / "tensorboard", verbose=1, + **network.combined_extractor(config), ) - model.set_env(env_train) - model.learn( - total_timesteps=config.train_steps, - callback=[checkpoint_callback, eval_callback], - reset_num_timesteps=False, - ) + else: + model = sb3lib.PPO.load(save_dir / "intermediate") + + eval_callback = EvalCallback( + env_eval, + best_model_save_path=config.logdir / "eval", + n_eval_episodes=3, + eval_freq=config.eval_freq, + deterministic=True, + render=False, + verbose=1, + ) + model.set_env(env_train) + model.learn( + total_timesteps=config.train_steps, + callback=[checkpoint_callback, eval_callback], + reset_num_timesteps=False, + ) + model.save(save_dir / "intermediate") - # Save trained model. - save_dir = config.logdir / "train" - save_dir.mkdir(parents=True, exist_ok=True) - time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") - model.save(save_dir / ("model_" + time)) - print("\nSaved trained model.\n") + print("Finished training.") - if config.mode == "evaluate": - print("\nEvaluate policy.\n") - device = th.device("cpu") - model = sb3lib.PPO.load(config.model, print_system_info=True, device=device) - - # Print model summary - # from torchinfo import summary - # td = {"rgb":th.zeros(1,9,128,128)} - # summary(model.policy, input_data=[td], depth=5) - # input("Press any key to continue ...") - - for env_name, env_eval in envs_eval.items(): - print(f"\nEvaluating env {env_name}.") - mean_reward, std_reward = evaluate_policy( - model, env_eval, n_eval_episodes=config.eval_eps, deterministic=True - ) - print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}\n") - print("\nFinished evaluating.\n") + # Save trained model. + time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + model.save(save_dir / ("model_" + time)) + print("\nSaved trained model.\n") + + # Print model summary + crop = agent_spec.agent_params["crop"] + top_down_rgb = agent_spec.interface.top_down_rgb + h = top_down_rgb.height - crop[2] - crop[3] + w = top_down_rgb.width - crop[0] - crop[1] + td = {"rgb": th.zeros(1, 9, h, w)} + summary(model.policy, input_data=[td], depth=5) + + +def evaluate( + envs: Dict[str, gym.Env], + config: Dict[str, Any], + agent_spec: AgentSpec, +): + print("\nEvaluate policy.\n") + device = th.device("cpu") + model = sb3lib.PPO.load(config.model, print_system_info=True, device=device) + + # Print model summary + crop = agent_spec.agent_params["crop"] + top_down_rgb = agent_spec.interface.top_down_rgb + h = top_down_rgb.height - crop[2] - crop[3] + w = top_down_rgb.width - crop[0] - crop[1] + td = {"rgb": th.zeros(1, 9, h, w)} + summary(model.policy, input_data=[td], depth=5) + + for env_name, env_eval in envs.items(): + print(f"\nEvaluating env {env_name}.") + mean_reward, std_reward = evaluate_policy( + model, env_eval, n_eval_episodes=config.eval_eps, deterministic=True + ) + print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}\n") + print("\nFinished evaluating.\n") if __name__ == "__main__": diff --git a/examples/rl/platoon/inference/__init__.py b/examples/rl/platoon/inference/__init__.py index a63298644b..e153510ffa 100644 --- a/examples/rl/platoon/inference/__init__.py +++ b/examples/rl/platoon/inference/__init__.py @@ -16,9 +16,9 @@ def entry_point(**kwargs): road_waypoints=False, signals=False, top_down_rgb=RGB( - width=128, - height=128, - resolution=80 / 128, # m/pixels + width=200, + height=200, + resolution=80 / 200, # m/pixels ), ) @@ -26,6 +26,7 @@ def entry_point(**kwargs): "top_down_rgb": interface.top_down_rgb, "action_space_type": interface.action, "num_stack": 3, # Number of frames to stack as input to policy network. + "crop": (50, 50, 0, 70), # Crop image from left, right, top, and bottom. } return AgentSpec( diff --git a/examples/rl/platoon/inference/contrib_policy/filter_obs.py b/examples/rl/platoon/inference/contrib_policy/filter_obs.py index 6bd1f3d259..e366355c16 100644 --- a/examples/rl/platoon/inference/contrib_policy/filter_obs.py +++ b/examples/rl/platoon/inference/contrib_policy/filter_obs.py @@ -1,5 +1,5 @@ import math -from typing import Any, Dict, Sequence +from typing import Any, Dict, Sequence, Tuple import gym import numpy as np @@ -11,11 +11,17 @@ class FilterObs: """Filter only the selected observation parameters.""" - def __init__(self, top_down_rgb: RGB): + def __init__( + self, top_down_rgb: RGB, crop: Tuple[int, int, int, int] = (0, 0, 0, 0) + ): self.observation_space = gym.spaces.Box( low=0, high=255, - shape=(3, top_down_rgb.height, top_down_rgb.width), + shape=( + 3, + top_down_rgb.height - crop[2] - crop[3], + top_down_rgb.width - crop[0] - crop[1], + ), dtype=np.uint8, ) @@ -26,7 +32,9 @@ def __init__(self, top_down_rgb: RGB): self._lane_divider_color = np.array(SceneColors.LaneDivider.value[0:3]) * 255 self._edge_divider_color = np.array(SceneColors.EdgeDivider.value[0:3]) * 255 self._ego_color = np.array(SceneColors.Agent.value[0:3]) * 255 + self._goal_color = np.array(Colors.Purple.value[0:3]) * 255 + self._blur_radius = 2 self._res = top_down_rgb.resolution h = top_down_rgb.height w = top_down_rgb.width @@ -43,6 +51,14 @@ def __init__(self, top_down_rgb: RGB): self._rgb_mask = np.zeros(shape=(h, w, 3), dtype=np.uint8) self._rgb_mask[shape[0][0] : shape[0][1], shape[1][0] : shape[1][1], :] = 1 + self._crop = crop + assert ( + self._crop[0] < np.floor(w / 2) + and self._crop[1] < np.floor(w / 2) + and self._crop[2] < np.floor(h / 2) + and self._crop[3] < np.floor(h / 2) + ), f"Expected crop to be less than half the image size, got crop={self._crop} for an image of size[h,w]=[{h},{w}]." + def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]: """Adapts the environment's observation.""" # fmt: off @@ -62,8 +78,8 @@ def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]: # Superimpose waypoints onto rgb image wps = obs["waypoint_paths"]["position"][0:11, 3:, 0:3] for path in wps[:]: - wps_valid = wps_to_pixels( - wps=path, + wps_valid = points_to_pixels( + points=path, ego_pos=ego_pos, ego_heading=ego_heading, w=w, @@ -75,6 +91,28 @@ def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]: if all(rgb_ego[img_y, img_x, :] == self._no_color): rgb_ego[img_y, img_x, :] = self._wps_color + # Superimpose goal position onto rgb image + if not all((goal:=obs["ego_vehicle_state"]["mission"]["goal_position"]) == np.zeros((3,))): + goal_pixel = points_to_pixels( + points=np.expand_dims(goal,axis=0), + ego_pos=ego_pos, + ego_heading=ego_heading, + w=w, + h=h, + res=self._res, + ) + if len(goal_pixel) != 0: + img_x, img_y = goal_pixel[0][0], goal_pixel[0][1] + if all(rgb_ego[img_y, img_x, :] == self._no_color) or all(rgb_ego[img_y, img_x, :] == self._wps_color): + rgb_ego[ + max(img_y-self._blur_radius,0):min(img_y+self._blur_radius,h), + max(img_x-self._blur_radius,0):min(img_x+self._blur_radius,w), + :, + ] = self._goal_color + + # Crop image + rgb_ego = rgb_ego[self._crop[2]:h-self._crop[3],self._crop[0]:w-self._crop[1],:] + # Channel first rgb rgb_ego = rgb_ego.transpose(2, 0, 1) @@ -123,14 +161,19 @@ def replace_color( # fmt: on -def wps_to_pixels( - wps: np.ndarray, ego_pos: np.ndarray, ego_heading: float, w: int, h: int, res: float +def points_to_pixels( + points: np.ndarray, + ego_pos: np.ndarray, + ego_heading: float, + w: int, + h: int, + res: float, ) -> np.ndarray: - """Converts waypoints into pixel coordinates in order to superimpose the - waypoints onto the RGB image. + """Converts points into pixel coordinates in order to superimpose the + points onto the RGB image. Args: - wps (np.ndarray): Waypoints for a single route. Shape (n,3). + points (np.ndarray): Array of points. Shape (n,3). ego_pos (np.ndarray): Ego position. Shape = (3,). ego_heading (float): Ego heading in radians. w (int): Width of RGB image @@ -139,19 +182,19 @@ def wps_to_pixels( ground_size/image_size. Returns: - np.ndarray: Array of waypoint coordinates on the RGB image. Shape = (m,3). + np.ndarray: Array of point coordinates on the RGB image. Shape = (m,3). """ # fmt: off - mask = [False if all(point == np.zeros(3,)) else True for point in wps] - wps_nonzero = wps[mask] - wps_delta = wps_nonzero - ego_pos - wps_rotated = rotate_axes(wps_delta, theta=ego_heading) - wps_pixels = wps_rotated / np.array([res, res, res]) - wps_overlay = np.array([w / 2, h / 2, 0]) + wps_pixels * np.array([1, -1, 1]) - wps_rfloat = np.rint(wps_overlay) - wps_valid = wps_rfloat[(wps_rfloat[:,0] >= 0) & (wps_rfloat[:,0] < w) & (wps_rfloat[:,1] >= 0) & (wps_rfloat[:,1] < h)] - wps_rint = wps_valid.astype(int) - return wps_rint + mask = [False if all(point == np.zeros(3,)) else True for point in points] + points_nonzero = points[mask] + points_delta = points_nonzero - ego_pos + points_rotated = rotate_axes(points_delta, theta=ego_heading) + points_pixels = points_rotated / np.array([res, res, res]) + points_overlay = np.array([w / 2, h / 2, 0]) + points_pixels * np.array([1, -1, 1]) + points_rfloat = np.rint(points_overlay) + points_valid = points_rfloat[(points_rfloat[:,0] >= 0) & (points_rfloat[:,0] < w) & (points_rfloat[:,1] >= 0) & (points_rfloat[:,1] < h)] + points_rint = points_valid.astype(int) + return points_rint # fmt: on diff --git a/examples/rl/platoon/inference/contrib_policy/format_action.py b/examples/rl/platoon/inference/contrib_policy/format_action.py index 9c3ba69f79..532770b3a1 100644 --- a/examples/rl/platoon/inference/contrib_policy/format_action.py +++ b/examples/rl/platoon/inference/contrib_policy/format_action.py @@ -13,7 +13,7 @@ def __init__(self, action_space_type: ActionSpaceType): else: raise Exception(f"Unknown action space type {action_space_type}.") - def format(self, action): + def format(self, action: int): """Adapts the action input to the wrapped environment. Note: Users should not directly call this method. diff --git a/examples/rl/platoon/inference/contrib_policy/policy.py b/examples/rl/platoon/inference/contrib_policy/policy.py index 10d81044d4..979fdffa17 100644 --- a/examples/rl/platoon/inference/contrib_policy/policy.py +++ b/examples/rl/platoon/inference/contrib_policy/policy.py @@ -11,22 +11,20 @@ class Policy(Agent): """Policy class to be submitted by the user. This class will be loaded and tested during evaluation.""" - def __init__(self, num_stack, top_down_rgb, action_space_type): + def __init__(self, num_stack, top_down_rgb, crop, action_space_type): """All policy initialization matters, including loading of model, is performed here. To be implemented by the user. """ - import stable_baselines3 as sb3lib from contrib_policy import network from contrib_policy.filter_obs import FilterObs from contrib_policy.format_action import FormatAction from contrib_policy.frame_stack import FrameStack from contrib_policy.make_dict import MakeDict - model_path = Path(__file__).resolve().parents[0] / "saved_model" - self.model = sb3lib.PPO.load(model_path) + self._model = self._get_model() - self._filter_obs = FilterObs(top_down_rgb=top_down_rgb) + self._filter_obs = FilterObs(top_down_rgb=top_down_rgb, crop=crop) self._frame_stack = FrameStack( input_space=self._filter_obs.observation_space, num_stack=num_stack, @@ -43,7 +41,7 @@ def __init__(self, num_stack, top_down_rgb, action_space_type): def act(self, obs): """Mandatory act function to be implemented by user.""" processed_obs = self._process(obs) - action, _ = self.model.predict(observation=processed_obs, deterministic=True) + action, _ = self._model.predict(observation=processed_obs, deterministic=True) formatted_action = self._format_action.format(action=int(action)) return formatted_action @@ -55,3 +53,8 @@ def _process(self, obs): obs = self._frame_stack.stack(obs) obs = self._make_dict.make(obs) return obs + + def _get_model(self): + import stable_baselines3 as sb3lib + + return sb3lib.PPO.load(path=Path(__file__).resolve().parents[0] / "saved_model") diff --git a/examples/rl/platoon/train/config.yaml b/examples/rl/platoon/train/config.yaml index 0eb347937f..8aaa324203 100644 --- a/examples/rl/platoon/train/config.yaml +++ b/examples/rl/platoon/train/config.yaml @@ -5,20 +5,26 @@ smarts: agent_locator: inference:contrib-agent-v0 env_id: smarts.env:platoon-v0 scenarios: - - scenarios/sumo/platoon/merge_exit_sumo_t_agents_1 + - scenarios/sumo/vehicle_following/straight_2lane_sumo_agents_1 + - scenarios/sumo/vehicle_following/straight_2lane_sumo_t_agents_1 + - scenarios/sumo/vehicle_following/straight_3lanes_sumo_agents_1 + - scenarios/sumo/vehicle_following/straight_3lanes_sumo_t_agents_1 + - scenarios/sumo/vehicle_following/merge_exit_sumo_agents_1 + - scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_1 # PPO algorithm alg: - n_steps: 2048 - batch_size: 512 + n_steps: 1024 + batch_size: 64 n_epochs: 4 target_kl: 0.1 + # ent_coef: 0.01 # For exploration. Range = 0 to 0.01 # Training over all scenarios epochs: 500 # Number of training loops. # Training per scenario - train_steps: 10_000 - checkpoint_freq: 10_000 # Save a model every checkpoint_freq calls to env.step(). - eval_freq: 10_000 # Evaluate the trained model every eval_freq steps and save the best model. + train_steps: 4_096 + checkpoint_freq: 4_096 # Save a model every checkpoint_freq calls to env.step(). + eval_freq: 4_096 # Evaluate the trained model every eval_freq steps and save the best model. eval_eps: 5 # Number of evaluation epsiodes. diff --git a/examples/rl/platoon/train/env.py b/examples/rl/platoon/train/env.py index 6f1668b56f..a4dc0fea12 100644 --- a/examples/rl/platoon/train/env.py +++ b/examples/rl/platoon/train/env.py @@ -1,16 +1,18 @@ import sys from pathlib import Path -# To import submission folder -sys.path.insert(0, str(Path(__file__).resolve().parents[2])) +# To import train folder +sys.path.insert(0, str(Path(__file__).resolve().parents[0])) import gymnasium as gym +from smarts.zoo.agent_spec import AgentSpec -def make_env(env_id, scenario, agent_interface, config, seed): + +def make_env(env_id, scenario, agent_spec: AgentSpec, config, seed): from preprocess import Preprocess + from reward import Reward from stable_baselines3.common.monitor import Monitor - from train.reward import Reward from smarts.env.gymnasium.wrappers.api_reversion import Api021Reversion from smarts.env.gymnasium.wrappers.single_agent import SingleAgent @@ -18,15 +20,15 @@ def make_env(env_id, scenario, agent_interface, config, seed): env = gym.make( env_id, scenario=scenario, - agent_interface=agent_interface, + agent_interface=agent_spec.interface, seed=seed, sumo_headless=not config.sumo_gui, # If False, enables sumo-gui display. headless=not config.head, # If False, enables Envision display. ) - env = Reward(env) - env = SingleAgent(env) - env = Api021Reversion(env) - env = Preprocess(env, agent_interface) + env = Reward(env=env, crop=agent_spec.agent_params["crop"]) + env = SingleAgent(env=env) + env = Api021Reversion(env=env) + env = Preprocess(env=env, agent_spec=agent_spec) env = Monitor(env) return env diff --git a/examples/rl/platoon/train/preprocess.py b/examples/rl/platoon/train/preprocess.py index 5d9079a9d9..5a79d78960 100644 --- a/examples/rl/platoon/train/preprocess.py +++ b/examples/rl/platoon/train/preprocess.py @@ -4,17 +4,20 @@ from contrib_policy.frame_stack import FrameStack from contrib_policy.make_dict import MakeDict -from smarts.core.agent_interface import AgentInterface +from smarts.zoo.agent_spec import AgentSpec class Preprocess(gym.Wrapper): - def __init__(self, env: gym.Env, agent_interface: AgentInterface): + def __init__(self, env: gym.Env, agent_spec: AgentSpec): super().__init__(env) - self._filter_obs = FilterObs(top_down_rgb=agent_interface.top_down_rgb) + self._filter_obs = FilterObs( + top_down_rgb=agent_spec.interface.top_down_rgb, + crop=agent_spec.agent_params["crop"], + ) self._frame_stack = FrameStack( input_space=self._filter_obs.observation_space, - num_stack=3, + num_stack=agent_spec.agent_params["num_stack"], stack_axis=0, ) self._frame_stack.reset() @@ -22,7 +25,9 @@ def __init__(self, env: gym.Env, agent_interface: AgentInterface): self.observation_space = self._make_dict.observation_space - self._format_action = FormatAction(agent_interface.action) + self._format_action = FormatAction( + action_space_type=agent_spec.interface.action + ) self.action_space = self._format_action.action_space def _process(self, obs): diff --git a/examples/rl/platoon/train/requirements.txt b/examples/rl/platoon/train/requirements.txt index 72c1d1db69..90d5d558db 100644 --- a/examples/rl/platoon/train/requirements.txt +++ b/examples/rl/platoon/train/requirements.txt @@ -1,152 +1,98 @@ absl-py==1.4.0 -alabaster==0.7.13 -astroid==2.15.0 -asttokens==2.2.1 -attrs==22.2.0 +argcomplete==3.0.8 +attrs==23.1.0 Automat==22.10.0 -Babel==2.12.1 -backcall==0.2.0 -black==22.6.0 -cached-property==1.5.2 -cachetools==5.3.0 -certifi==2022.12.7 -cfgv==3.3.1 +av==10.0.0 +av2==0.2.1 +cachetools==5.3.1 +certifi==2023.5.7 charset-normalizer==3.1.0 click==8.1.3 cloudpickle==1.6.0 +colorlog==6.7.0 constantly==15.1.0 contourpy==1.0.7 cycler==0.11.0 -decorator==5.1.1 -dill==0.3.6 distlib==0.3.6 -docutils==0.18.1 -eclipse-sumo==1.16.0 -executing==1.2.0 -filelock==3.9.1 -fonttools==4.39.0 +eclipse-sumo==1.17.0 +filelock==3.12.0 +fonttools==4.39.4 future==0.18.3 -google-auth==2.16.2 +google-auth==2.19.1 google-auth-oauthlib==0.4.6 -grpcio==1.52.0 -grpcio-tools==1.32.0 +grpcio==1.55.0 gym==0.21.0 gymnasium==0.27.0 gymnasium-notices==0.0.1 hyperlink==21.0.0 -identify==2.5.20 idna==3.4 ijson==3.2.0.post0 -imagesize==1.4.1 -importlab==0.8 importlib-metadata==4.13.0 importlib-resources==5.12.0 incremental==22.10.0 -ipython==8.11.0 -isort==5.7.0 -jax-jumpy==0.2.0 -jedi==0.18.2 -Jinja2==3.1.2 -jsonpatch==1.32 -jsonpointer==2.3 +jax-jumpy==1.0.0 +joblib==1.2.0 kiwisolver==1.4.4 -lazy-object-proxy==1.9.0 -libcst==0.4.9 -Markdown==3.4.1 +llvmlite==0.40.0 +Markdown==3.4.3 markdown-it-py==2.2.0 -MarkupSafe==2.1.2 +MarkupSafe==2.1.3 matplotlib==3.7.1 -matplotlib-inline==0.1.6 -mccabe==0.7.0 -mdit-py-plugins==0.3.5 mdurl==0.1.2 -mypy-extensions==1.0.0 -myst-parser==1.0.0 -networkx==3.0 -ninja==1.11.1 -nodeenv==1.7.0 +nox==2023.4.22 +numba==0.57.0 numpy==1.23.5 nvidia-cublas-cu11==11.10.3.66 nvidia-cuda-nvrtc-cu11==11.7.99 nvidia-cuda-runtime-cu11==11.7.99 nvidia-cudnn-cu11==8.5.0.96 oauthlib==3.2.2 -packaging==23.0 +opencv-python==4.7.0.72 +packaging==23.1 Panda3D==1.10.9 panda3d-gltf==0.13 panda3d-simplepbr==0.10 -pandas==1.5.3 -parso==0.8.3 -pathspec==0.11.1 -pbr==5.11.1 -pexpect==4.8.0 -pickleshare==0.7.5 -Pillow==9.4.0 -platformdirs==3.1.1 -pre-commit==2.16.0 -prompt-toolkit==3.0.38 +pandas==2.0.2 +Pillow==9.5.0 +platformdirs==3.5.1 protobuf==3.20.3 -psutil==5.9.4 -ptyprocess==0.7.0 -pure-eval==0.2.2 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 -pybullet==3.0.6 -Pygments==2.14.0 -pylint==2.17.0 +psutil==5.9.5 +pyarrow==12.0.0 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pybullet==3.2.5 +Pygments==2.15.1 pyparsing==3.0.9 +pyproj==3.5.0 python-dateutil==2.8.2 -pytype==2022.1.13 -pytz==2022.7.1 +pytz==2023.3 PyYAML==6.0 -requests==2.28.2 +requests==2.31.0 requests-oauthlib==1.3.1 -rich==13.3.2 +rich==13.4.1 rsa==4.9 Rtree==1.0.1 scipy==1.10.1 -sh==2.0.2 shapely==2.0.1 Shimmy==0.2.1 six==1.16.0 -snowballstemmer==2.2.0 -Sphinx==6.1.3 -sphinx-click==4.4.0 -sphinx-rtd-theme==1.2.0 -sphinxcontrib-apidoc==0.3.0 -sphinxcontrib-applehelp==1.0.4 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.1 -sphinxcontrib-jquery==4.1 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 stable-baselines3==1.7.0 -stack-data==0.6.2 tableprint==0.9.1 -tabulate==0.9.0 tensorboard==2.12.0 tensorboard-data-server==0.7.0 tensorboard-plugin-wit==1.8.1 -tokenize-rt==5.0.0 -toml==0.10.2 -tomli==2.0.1 -tomlkit==0.11.6 torch==1.13.1 torchinfo==1.7.2 -tornado==6.2 -traitlets==5.9.0 +tornado==6.3.2 trimesh==3.9.29 Twisted==22.10.0 -typing-inspect==0.8.0 -typing_extensions==4.5.0 -urllib3==1.26.15 -virtualenv==20.21.0 -visdom==0.2.4 +typing_extensions==4.6.3 +tzdata==2023.3 +urllib3==1.26.16 +virtualenv==20.23.0 wcwidth==0.2.6 -websocket-client==1.5.1 -Werkzeug==2.2.3 -wrapt==1.15.0 +websocket-client==1.5.2 +Werkzeug==2.3.5 yattag==1.15.1 zipp==3.15.0 -zope.interface==5.5.2 +zope.interface==6.0 diff --git a/examples/rl/platoon/train/reward.py b/examples/rl/platoon/train/reward.py index 99b4ea492c..166f858512 100644 --- a/examples/rl/platoon/train/reward.py +++ b/examples/rl/platoon/train/reward.py @@ -2,14 +2,15 @@ import numpy as np from smarts.core.colors import SceneColors - +from typing import Tuple class Reward(gym.Wrapper): - def __init__(self, env): + def __init__(self, env: gym.Env, crop: Tuple[int, int, int, int]): """Constructor for the Reward wrapper.""" super().__init__(env) self._leader_color = np.array(SceneColors.SocialAgent.value[0:3]) * 255 self._total_dist = {} + self._crop = crop def reset(self, *, seed=None, options=None): self._total_dist = {} @@ -40,7 +41,9 @@ def step(self, action): elif agent_obs["events"]["reached_max_episode_steps"]: print(f"{agent_id}: Reached max episode steps.") elif ( - agent_obs["events"]["collisions"] | agent_obs["events"]["off_road"] + agent_obs["events"]["collisions"] + | agent_obs["events"]["off_road"] + | agent_obs["events"]["wrong_way"] ): pass elif agent_obs["events"]["interest_done"]: @@ -78,32 +81,38 @@ def _reward(self, obs, env_reward): if agent_obs["events"]["collisions"]: reward[agent_id] -= np.float64(10) print(f"{agent_id}: Collided.") - break + continue # Penalty for driving off road if agent_obs["events"]["off_road"]: reward[agent_id] -= np.float64(10) print(f"{agent_id}: Went off road.") - break + continue - # Reward for reaching goal - # if agent_obs["events"]["reached_goal"]: - # reward[agent_id] += np.float64(30) + # Penalty for driving on wrong way + if obs[agent_id]["events"]["wrong_way"]: + reward[agent_id] -= np.float64(10) + print(f"{agent_id}: Went wrong way.") + continue - # Reward for distance travelled by driving - reward[agent_id] += np.float64(env_reward[agent_id]) + # Reward for distance travelled in one step + # reward[agent_id] += np.float64(env_reward[agent_id]) # Check if leader is in front and within the rgb observation if leader: rgb = agent_obs["top_down_rgb"] h, w, d = rgb.shape rgb_masked = rgb[0 : h // 2, :, :] + rgb_cropped = rgb_masked[ + self._crop[2] :, self._crop[0] : w - self._crop[1], : + ] leader_in_rgb = ( - (rgb_masked == self._leader_color.reshape((1, 1, 3))) + (rgb_cropped == self._leader_color.reshape((1, 1, 3))) .all(axis=-1) .any() ) + # Rewards specific to vehicle following task if leader and leader_in_rgb: # Get agent's waypoints waypoints = agent_obs["waypoint_paths"]["position"] @@ -119,15 +128,12 @@ def _reward(self, obs, env_reward): np.array([leader["position"]]), ) - # Rewards specific to "platooning" and "following" tasks - if leader and leader_in_rgb: - # Reward for being in the same lane as the leader if (leader_ind is not None) and (leader_wp_ind[0] in ego_wp_inds): - reward[agent_id] += np.float64(1) + reward[agent_id] += np.float64(3) else: - reward[agent_id] -= np.float64(0.2) + reward[agent_id] -= np.float64(0.3) return reward diff --git a/examples/rl/platoon/train/run.py b/examples/rl/platoon/train/run.py index eba4f2ff07..ca0ef11bfa 100644 --- a/examples/rl/platoon/train/run.py +++ b/examples/rl/platoon/train/run.py @@ -6,6 +6,7 @@ # Required to load inference module sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + import argparse import warnings from datetime import datetime @@ -22,23 +23,36 @@ from contrib_policy import network from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback from stable_baselines3.common.evaluation import evaluate_policy +from torchinfo import summary from train.env import make_env from train.utils import ObjDict from smarts.zoo import registry +from smarts.zoo.agent_spec import AgentSpec + -print("\nTorch cuda is available: ", th.cuda.is_available(), "\n") +print("\n") +print(f"Torch cuda is available: {th.cuda.is_available()}") +device = th.device("cuda" if th.cuda.is_available() else "cpu") +print(f"Torch device: {device}") +print("\n") warnings.simplefilter("ignore", category=DeprecationWarning) warnings.simplefilter("ignore", category=ResourceWarning) # pytype: disable=attribute-error -def main(args: argparse.Namespace): - # Load config file. +def load_config(): + """Load config file.""" parent_dir = Path(__file__).resolve().parent config_file = yaml.safe_load((parent_dir / "config.yaml").read_text()) config = ObjDict(config_file["smarts"]) + return config + + +def main(args: argparse.Namespace): + parent_dir = Path(__file__).resolve().parent + config = load_config() # Load env config. config.mode = args.mode @@ -76,22 +90,28 @@ def main(args: argparse.Namespace): envs_train[f"{scenario}"] = make_env( env_id=config.env_id, scenario=scenario_path, - agent_interface=agent_spec.interface, + agent_spec=agent_spec, config=config, seed=config.seed, ) envs_eval[f"{scenario}"] = make_env( env_id=config.env_id, scenario=scenario_path, - agent_interface=agent_spec.interface, + agent_spec=agent_spec, config=config, seed=config.seed, ) # Run training or evaluation. - run(envs_train=envs_train, envs_eval=envs_eval, config=config) - - print("Finished training ...") + if config.mode == "train": + train( + envs_train=envs_train, + envs_eval=envs_eval, + config=config, + agent_spec=agent_spec, + ) + else: + evaluate(envs=envs_eval, config=config, agent_spec=agent_spec) # Close all environments for env in envs_train.values(): @@ -100,79 +120,95 @@ def main(args: argparse.Namespace): env.close() -def run( +def train( envs_train: Dict[str, gym.Env], envs_eval: Dict[str, gym.Env], config: Dict[str, Any], + agent_spec: AgentSpec, ): + print("\nStart training.\n") + scenarios_iter = cycle(config.scenarios) + save_dir = config.logdir / "train" + save_dir.mkdir(parents=True, exist_ok=True) + checkpoint_callback = CheckpointCallback( + save_freq=config.checkpoint_freq, + save_path=config.logdir / "checkpoint", + name_prefix="PPO", + ) - if config.mode == "train": - print("\nStart training.\n") - scenarios_iter = cycle(config.scenarios) - model = sb3lib.PPO( - env=envs_train[next(scenarios_iter)], - tensorboard_log=config.logdir / "tensorboard", - verbose=1, - **network.combined_extractor(config), - ) + for index in range(config.epochs): + scen = next(scenarios_iter) + env_train = envs_train[scen] + env_eval = envs_eval[scen] + print(f"\nTraining on {scen}.\n") - # Print model summary - # from torchinfo import summary - # td = {"rgb":th.zeros(1,9,128,128)} - # summary(model.policy, input_data=[td], depth=5) - # input("Press any key to continue ...") - - for index in range(config.epochs): - scen = next(scenarios_iter) - env_train = envs_train[scen] - env_eval = envs_eval[scen] - print(f"\nTraining on {scen}.\n") - checkpoint_callback = CheckpointCallback( - save_freq=config.checkpoint_freq, - save_path=config.logdir / "checkpoint", - name_prefix=f"PPO", - ) - eval_callback = EvalCallback( - env_eval, - best_model_save_path=config.logdir / "eval", - n_eval_episodes=3, - eval_freq=config.eval_freq, - deterministic=True, - render=False, + if index == 0: + model = sb3lib.PPO( + env=env_train, + tensorboard_log=config.logdir / "tensorboard", verbose=1, + **network.combined_extractor(config), ) - model.set_env(env_train) - model.learn( - total_timesteps=config.train_steps, - callback=[checkpoint_callback, eval_callback], - reset_num_timesteps=False, - ) + else: + model = sb3lib.PPO.load(save_dir / "intermediate") + + eval_callback = EvalCallback( + env_eval, + best_model_save_path=config.logdir / "eval", + n_eval_episodes=3, + eval_freq=config.eval_freq, + deterministic=True, + render=False, + verbose=1, + ) + model.set_env(env_train) + model.learn( + total_timesteps=config.train_steps, + callback=[checkpoint_callback, eval_callback], + reset_num_timesteps=False, + ) + model.save(save_dir / "intermediate") - # Save trained model. - save_dir = config.logdir / "train" - save_dir.mkdir(parents=True, exist_ok=True) - time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") - model.save(save_dir / ("model_" + time)) - print("\nSaved trained model.\n") + print("Finished training.") - if config.mode == "evaluate": - print("\nEvaluate policy.\n") - device = th.device("cpu") - model = sb3lib.PPO.load(config.model, print_system_info=True, device=device) - - # Print model summary - # from torchinfo import summary - # td = {"rgb":th.zeros(1,9,128,128)} - # summary(model.policy, input_data=[td], depth=5) - # input("Press any key to continue ...") - - for env_name, env_eval in envs_eval.items(): - print(f"\nEvaluating env {env_name}.") - mean_reward, std_reward = evaluate_policy( - model, env_eval, n_eval_episodes=config.eval_eps, deterministic=True - ) - print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}\n") - print("\nFinished evaluating.\n") + # Save trained model. + time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + model.save(save_dir / ("model_" + time)) + print("\nSaved trained model.\n") + + # Print model summary + crop = agent_spec.agent_params["crop"] + top_down_rgb = agent_spec.interface.top_down_rgb + h = top_down_rgb.height - crop[2] - crop[3] + w = top_down_rgb.width - crop[0] - crop[1] + td = {"rgb": th.zeros(1, 9, h, w)} + summary(model.policy, input_data=[td], depth=5) + + +def evaluate( + envs: Dict[str, gym.Env], + config: Dict[str, Any], + agent_spec: AgentSpec, +): + print("\nEvaluate policy.\n") + device = th.device("cpu") + model = sb3lib.PPO.load(config.model, print_system_info=True, device=device) + + # Print model summary + crop = agent_spec.agent_params["crop"] + top_down_rgb = agent_spec.interface.top_down_rgb + h = top_down_rgb.height - crop[2] - crop[3] + w = top_down_rgb.width - crop[0] - crop[1] + td = {"rgb": th.zeros(1, 9, h, w)} + summary(model.policy, input_data=[td], depth=5) + + for env_name, env_eval in envs.items(): + print(f"\nEvaluating env {env_name}.") + mean_reward, std_reward = evaluate_policy( + model, env_eval, n_eval_episodes=config.eval_eps, deterministic=True + ) + print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}\n") + print("\nFinished evaluating.\n") if __name__ == "__main__": diff --git a/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/map.net.xml b/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/map.net.xml deleted file mode 100644 index 3f66d5c793..0000000000 --- a/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/map.net.xml +++ /dev/null @@ -1,203 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/scenario.py b/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/scenario.py deleted file mode 100644 index 000193efa2..0000000000 --- a/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/scenario.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -import random -from itertools import combinations -from pathlib import Path - -from smarts.sstudio import gen_scenario -from smarts.sstudio.types import Flow, Mission, Route, Scenario, Traffic, TrafficActor - -normal = TrafficActor( - name="car", -) - -vertical_routes = [ - ("E2", 0, "E7", 0), - ("E8", 0, "E1", 1), -] - -horizontal_routes = [ - ("E3", 0, "E5", 0), - ("E3", 1, "E5", 1), - ("E3", 2, "E5", 2), - ("E6", 1, "E4", 1), - ("E6", 0, "E4", 0), -] - -turn_left_routes = [ - ("E8", 0, "E5", 2), - ("E6", 1, "E1", 1), - ("E2", 1, "E4", 1), - ("E3", 2, "E7", 0), -] - -turn_right_routes = [ - ("E6", 0, "E7", 0), - ("E3", 0, "E1", 0), - ("E2", 0, "E5", 0), - ("E8", 0, "E4", 0), -] - -# Total route combinations = 14C1 + 14C2 + 14C3 + 14C4 = 1470 -all_routes = vertical_routes + horizontal_routes + turn_left_routes + turn_right_routes -route_comb = [com for elems in range(4, 5) for com in combinations(all_routes, elems)] -traffic = {} -for name, routes in enumerate(route_comb): - traffic[str(name)] = Traffic( - flows=[ - Flow( - route=Route( - begin=(f"{r[0]}", r[1], 0), - end=(f"{r[2]}", r[3], "max"), - ), - # Random flow rate, between x and y vehicles per minute. - rate=60 * random.uniform(2, 5), - # Random flow start time, between x and y seconds. - begin=random.uniform(0, 3), - # For an episode with maximum_episode_steps=3000 and step - # time=0.1s, maximum episode time=300s. Hence, traffic set to - # end at 900s, which is greater than maximum episode time of - # 300s. - end=60 * 15, - actors={normal: 1}, - ) - for r in routes - ] - ) - -route = Route(begin=("E8", 0, 5), end=("E5", 0, "max")) -ego_missions = [ - Mission( - route=route, - start_time=15, # Delayed start, to ensure road has prior traffic. - ) -] -# runtime = 14 -gen_scenario( - scenario=Scenario( - traffic=traffic, - ego_missions=ego_missions, - ), - output_dir=Path(__file__).parent, -) diff --git a/scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2/scenario.py b/scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2/scenario.py index 7f2eacd983..d39c58b9c0 100644 --- a/scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2/scenario.py +++ b/scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2/scenario.py @@ -125,7 +125,7 @@ scenario_metadata=ScenarioMetadata( actor_of_interest_re_filter=leader_id, actor_of_interest_color=Colors.Blue, - scenario_difficulty=3, + scenario_difficulty=0.9, scenario_duration=duration, ), ), diff --git a/smarts/benchmark/driving_smarts/v2023/config_1.yaml b/smarts/benchmark/driving_smarts/v2023/config_1.yaml index 6b3a57f675..e19c3cc9c2 100644 --- a/smarts/benchmark/driving_smarts/v2023/config_1.yaml +++ b/smarts/benchmark/driving_smarts/v2023/config_1.yaml @@ -15,7 +15,10 @@ benchmark: standard: loc: "smarts.env:driving-smarts-v2023" scenarios: - - scenarios/sumo/straight/3lane_cruise_agents_1 + - scenarios/sumo/straight/cruise_2lane_agents_1 + - scenarios/sumo/straight/cutin_2lane_agents_1 + - scenarios/sumo/straight/merge_exit_sumo_t_agents_1 + - scenarios/sumo/straight/overtake_2lane_agents_1 kwargs: seed: 42 metric_formula: smarts/benchmark/driving_smarts/v2023/metric_formula_drive.py \ No newline at end of file diff --git a/smarts/benchmark/driving_smarts/v2023/config_2.yaml b/smarts/benchmark/driving_smarts/v2023/config_2.yaml index e7131ef475..9f00597da0 100644 --- a/smarts/benchmark/driving_smarts/v2023/config_2.yaml +++ b/smarts/benchmark/driving_smarts/v2023/config_2.yaml @@ -15,7 +15,8 @@ benchmark: turns: loc: "smarts.env:driving-smarts-v2023" scenarios: - - scenarios/sumo/intersections/1_to_2lane_left_turn_c_agents_1 + - scenarios/sumo/intersections/1_to_3lane_left_turn_sumo_c_agents_1 + - scenarios/sumo/intersections/1_to_3lane_left_turn_middle_lane_c_agents_1 kwargs: seed: 42 metric_formula: smarts/benchmark/driving_smarts/v2023/metric_formula_drive.py \ No newline at end of file diff --git a/smarts/benchmark/driving_smarts/v2023/config_3.yaml b/smarts/benchmark/driving_smarts/v2023/config_3.yaml index d2e89b43f4..5ecabfe4b0 100644 --- a/smarts/benchmark/driving_smarts/v2023/config_3.yaml +++ b/smarts/benchmark/driving_smarts/v2023/config_3.yaml @@ -15,7 +15,14 @@ benchmark: platoon: loc: "smarts.env:platoon-v0" scenarios: - - scenarios/sumo/platoon/merge_exit_sumo_t_agents_1 + - scenarios/sumo/vehicle_following/straight_2lane_sumo_agents_1 + - scenarios/sumo/vehicle_following/straight_2lane_sumo_t_agents_1 + - scenarios/sumo/vehicle_following/straight_3lanes_sumo_agents_1 + - scenarios/sumo/vehicle_following/straight_3lanes_sumo_t_agents_1 + - scenarios/sumo/vehicle_following/straight_3lanes_sumo_t_agents_2 + - scenarios/sumo/vehicle_following/merge_exit_sumo_agents_1 + - scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_1 + - scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2 kwargs: seed: 42 metric_formula: smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py diff --git a/smarts/core/route_cache.py b/smarts/core/route_cache.py index 39321ce45a..313a3eceb1 100644 --- a/smarts/core/route_cache.py +++ b/smarts/core/route_cache.py @@ -81,7 +81,8 @@ def road_ids(self) -> List[str]: """Get the road IDs for this route. Returns: - (List[str]): A list of the road IDs for the Roads in this Route.""" + (List[str]): A list of the road IDs for the Roads in this Route. + """ return [road.road_id for road in self.roads] @staticmethod From 38faceba17c7c77fac073916709a8c3151a33ecd Mon Sep 17 00:00:00 2001 From: adaickalavan Date: Mon, 12 Jun 2023 15:48:14 -0400 Subject: [PATCH 2/5] Address reviews. --- examples/rl/drive/inference/__init__.py | 2 +- examples/rl/platoon/inference/__init__.py | 2 +- examples/rl/platoon/train/reward.py | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/examples/rl/drive/inference/__init__.py b/examples/rl/drive/inference/__init__.py index a330eaa041..307416f915 100644 --- a/examples/rl/drive/inference/__init__.py +++ b/examples/rl/drive/inference/__init__.py @@ -26,7 +26,7 @@ def entry_point(**kwargs): "top_down_rgb": interface.top_down_rgb, "action_space_type": interface.action, "num_stack": 3, # Number of frames to stack as input to policy network. - "crop": (50, 50, 0, 70), # Crop image from left, right, top, and bottom. + "crop": (50, 50, 0, 70), # Crop image from left, right, top, and bottom. Units: pixels. } return AgentSpec( diff --git a/examples/rl/platoon/inference/__init__.py b/examples/rl/platoon/inference/__init__.py index e153510ffa..38eb05cab3 100644 --- a/examples/rl/platoon/inference/__init__.py +++ b/examples/rl/platoon/inference/__init__.py @@ -26,7 +26,7 @@ def entry_point(**kwargs): "top_down_rgb": interface.top_down_rgb, "action_space_type": interface.action, "num_stack": 3, # Number of frames to stack as input to policy network. - "crop": (50, 50, 0, 70), # Crop image from left, right, top, and bottom. + "crop": (50, 50, 0, 70), # Crop image from left, right, top, and bottom. Units: pixels. } return AgentSpec( diff --git a/examples/rl/platoon/train/reward.py b/examples/rl/platoon/train/reward.py index 166f858512..be23854afe 100644 --- a/examples/rl/platoon/train/reward.py +++ b/examples/rl/platoon/train/reward.py @@ -95,9 +95,6 @@ def _reward(self, obs, env_reward): print(f"{agent_id}: Went wrong way.") continue - # Reward for distance travelled in one step - # reward[agent_id] += np.float64(env_reward[agent_id]) - # Check if leader is in front and within the rgb observation if leader: rgb = agent_obs["top_down_rgb"] From b64fdd6c7844722f625a48f713914cfd61da6c26 Mon Sep 17 00:00:00 2001 From: adai Date: Mon, 12 Jun 2023 16:07:27 -0400 Subject: [PATCH 3/5] Use slice iterator. Co-authored-by: Tucker Alban --- examples/rl/platoon/train/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/rl/platoon/train/run.py b/examples/rl/platoon/train/run.py index ca0ef11bfa..b563cdf2d3 100644 --- a/examples/rl/platoon/train/run.py +++ b/examples/rl/platoon/train/run.py @@ -136,8 +136,8 @@ def train( name_prefix="PPO", ) - for index in range(config.epochs): - scen = next(scenarios_iter) + scenarios_iter = islice(cycle(config.scenarios), config.epocs) + for index, scen in enumerate(scenarios_iter): env_train = envs_train[scen] env_eval = envs_eval[scen] print(f"\nTraining on {scen}.\n") From 551684295aa24480b0a4df74c4f4ccadf7fdca23 Mon Sep 17 00:00:00 2001 From: adaickalavan Date: Mon, 12 Jun 2023 16:18:04 -0400 Subject: [PATCH 4/5] Use islice tool. --- examples/rl/drive/train/run.py | 7 +++---- examples/rl/platoon/train/run.py | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/rl/drive/train/run.py b/examples/rl/drive/train/run.py index b4f2221cd2..b3139a7dbd 100644 --- a/examples/rl/drive/train/run.py +++ b/examples/rl/drive/train/run.py @@ -9,7 +9,7 @@ import argparse import warnings from datetime import datetime -from itertools import cycle +from itertools import cycle, islice from typing import Any, Dict import gym @@ -125,7 +125,6 @@ def train( agent_spec: AgentSpec, ): print("\nStart training.\n") - scenarios_iter = cycle(config.scenarios) save_dir = config.logdir / "train" save_dir.mkdir(parents=True, exist_ok=True) checkpoint_callback = CheckpointCallback( @@ -134,8 +133,8 @@ def train( name_prefix="PPO", ) - for index in range(config.epochs): - scen = next(scenarios_iter) + scenarios_iter = islice(cycle(config.scenarios), config.epocs) + for index, scen in enumerate(scenarios_iter): env_train = envs_train[scen] env_eval = envs_eval[scen] print(f"\nTraining on {scen}.\n") diff --git a/examples/rl/platoon/train/run.py b/examples/rl/platoon/train/run.py index b563cdf2d3..32da66172b 100644 --- a/examples/rl/platoon/train/run.py +++ b/examples/rl/platoon/train/run.py @@ -10,7 +10,7 @@ import argparse import warnings from datetime import datetime -from itertools import cycle +from itertools import cycle, islice from typing import Any, Dict import gym @@ -127,7 +127,6 @@ def train( agent_spec: AgentSpec, ): print("\nStart training.\n") - scenarios_iter = cycle(config.scenarios) save_dir = config.logdir / "train" save_dir.mkdir(parents=True, exist_ok=True) checkpoint_callback = CheckpointCallback( From 4e948d9ca9ebcb161fc870c690909d886cf01658 Mon Sep 17 00:00:00 2001 From: adaickalavan Date: Mon, 12 Jun 2023 16:21:12 -0400 Subject: [PATCH 5/5] Rectify spelling. --- examples/rl/drive/train/run.py | 2 +- examples/rl/platoon/train/run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/rl/drive/train/run.py b/examples/rl/drive/train/run.py index b3139a7dbd..c42b9cab7c 100644 --- a/examples/rl/drive/train/run.py +++ b/examples/rl/drive/train/run.py @@ -133,7 +133,7 @@ def train( name_prefix="PPO", ) - scenarios_iter = islice(cycle(config.scenarios), config.epocs) + scenarios_iter = islice(cycle(config.scenarios), config.epochs) for index, scen in enumerate(scenarios_iter): env_train = envs_train[scen] env_eval = envs_eval[scen] diff --git a/examples/rl/platoon/train/run.py b/examples/rl/platoon/train/run.py index 32da66172b..d86e41f587 100644 --- a/examples/rl/platoon/train/run.py +++ b/examples/rl/platoon/train/run.py @@ -135,7 +135,7 @@ def train( name_prefix="PPO", ) - scenarios_iter = islice(cycle(config.scenarios), config.epocs) + scenarios_iter = islice(cycle(config.scenarios), config.epochs) for index, scen in enumerate(scenarios_iter): env_train = envs_train[scen] env_eval = envs_eval[scen]