From ac345611485e4ffd5cb868cf5e9412e601e6fe48 Mon Sep 17 00:00:00 2001
From: adaickalavan <adaickalavan@gmail.com>
Date: Sat, 10 Jun 2023 15:19:29 -0400
Subject: [PATCH 1/5] Update RL examples.

---
 docs/benchmarks/driving_smarts_2023_1.rst     |  22 +-
 docs/benchmarks/driving_smarts_2023_3.rst     |  15 ++
 examples/rl/drive/inference/__init__.py       |   7 +-
 .../inference/contrib_policy/filter_obs.py    |  85 ++++++--
 .../drive/inference/contrib_policy/policy.py  |  15 +-
 examples/rl/drive/train/config.yaml           |  19 +-
 examples/rl/drive/train/env.py                |  20 +-
 examples/rl/drive/train/preprocess.py         |  15 +-
 examples/rl/drive/train/requirements.txt      | 138 ++++--------
 examples/rl/drive/train/reward.py             |   7 +-
 examples/rl/drive/train/run.py                | 176 +++++++++------
 examples/rl/platoon/inference/__init__.py     |   7 +-
 .../inference/contrib_policy/filter_obs.py    |  85 ++++++--
 .../inference/contrib_policy/format_action.py |   2 +-
 .../inference/contrib_policy/policy.py        |  15 +-
 examples/rl/platoon/train/config.yaml         |  18 +-
 examples/rl/platoon/train/env.py              |  20 +-
 examples/rl/platoon/train/preprocess.py       |  15 +-
 examples/rl/platoon/train/requirements.txt    | 138 ++++--------
 examples/rl/platoon/train/reward.py           |  38 ++--
 examples/rl/platoon/train/run.py              | 178 +++++++++------
 .../map.net.xml                               | 203 ------------------
 .../scenario.py                               | 100 ---------
 .../merge_exit_sumo_t_agents_2/scenario.py    |   2 +-
 .../driving_smarts/v2023/config_1.yaml        |   5 +-
 .../driving_smarts/v2023/config_2.yaml        |   3 +-
 .../driving_smarts/v2023/config_3.yaml        |   9 +-
 smarts/core/route_cache.py                    |   3 +-
 28 files changed, 595 insertions(+), 765 deletions(-)
 delete mode 100644 scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/map.net.xml
 delete mode 100644 scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/scenario.py

diff --git a/docs/benchmarks/driving_smarts_2023_1.rst b/docs/benchmarks/driving_smarts_2023_1.rst
index b0f2dd03fb..d3706f3e91 100644
--- a/docs/benchmarks/driving_smarts_2023_1.rst
+++ b/docs/benchmarks/driving_smarts_2023_1.rst
@@ -48,9 +48,24 @@ predictive models, etc, may be used to develop the policy.
 Several scenarios are provided for training. Their names and tasks are as follows. 
 The desired task execution is illustrated in a gif by a trained baseline agent. 
 
-.. todo::
-    
-    Provide sample training scenarios and corresponding ``.gif`` images showing a baseline model traversing the map.
+**Driving SMARTS 2023.1 scenarios**
+
++ :scenarios:`cruise_2lane_agents_1 <sumo/straight/cruise_2lane_agents_1>`
++ :scenarios:`cutin_2lane_agents_1 <sumo/straight/cutin_2lane_agents_1>`
++ :scenarios:`merge_exit_sumo_t_agents_1 <sumo/straight/merge_exit_sumo_t_agents_1>`
++ :scenarios:`overtake_2lane_agents_1 <sumo/straight/overtake_2lane_agents_1>`
++ :scenarios:`00a445fb-7293-4be6-adbc-e30c949b6cf7_agents_1 <argoverse/straight/00a445fb-7293-4be6-adbc-e30c949b6cf7_agents_1>`
++ :scenarios:`0a53dd99-2946-4b4d-ab66-c4d6fef97be2_agents_1 <argoverse/straight/0a53dd99-2946-4b4d-ab66-c4d6fef97be2_agents_1>`
++ :scenarios:`0a576bf1-66ae-495a-9c87-236f3fc2aa01_agents_1 <argoverse/straight/0a576bf1-66ae-495a-9c87-236f3fc2aa01_agents_1>`
+
+**Driving SMARTS 2023.2 scenarios**
+
++ :scenarios:`1_to_3lane_left_turn_sumo_c_agents_1 <sumo/intersections/1_to_3lane_left_turn_sumo_c_agents_1>`
++ :scenarios:`1_to_3lane_left_turn_middle_lane_c_agents_1 <sumo/intersections/1_to_3lane_left_turn_middle_lane_c_agents_1>`
++ :scenarios:`00b15e74-04a8-4bd4-9a78-eb24f0c0a980_agents_1 <argoverse/turn/00b15e74-04a8-4bd4-9a78-eb24f0c0a980_agents_1>`
++ :scenarios:`0a60b442-56b0-46c3-be45-cf166a182b67_agents_1 <argoverse/turn/0a60b442-56b0-46c3-be45-cf166a182b67_agents_1>`
++ :scenarios:`0a764a82-b44e-481e-97e7-05e1f1f925f6_agents_1 <argoverse/turn/0a764a82-b44e-481e-97e7-05e1f1f925f6_agents_1>`
++ :scenarios:`0bf054e3-7698-4b86-9c98-626df2dee9f4_agents_1 <argoverse/turn/0bf054e3-7698-4b86-9c98-626df2dee9f4_agents_1>`
 
 Observation space
 -----------------
@@ -264,6 +279,7 @@ Evaluate
     $ python3.8 -m venv ./.venv
     $ source ./.venv/bin/activate
     $ pip install --upgrade pip
+    $ pip install wheel==0.38.4
     $ pip install -e .[camera_obs,argoverse,envision]
     $ scl zoo install examples/rl/drive/inference
     # For Driving SMARTS 2023.1
diff --git a/docs/benchmarks/driving_smarts_2023_3.rst b/docs/benchmarks/driving_smarts_2023_3.rst
index 20902af2af..e0d956c817 100644
--- a/docs/benchmarks/driving_smarts_2023_3.rst
+++ b/docs/benchmarks/driving_smarts_2023_3.rst
@@ -42,6 +42,20 @@ The desired task execution is illustrated in a gif by a trained baseline agent.
 
     .. image:: /_static/driving_smarts_2023/platoon_straight_2lane_agents_1.gif
 
++ :scenarios:`straight_2lane_sumo_agents_1 <sumo/vehicle_following/straight_2lane_sumo_agents_1>`
++ :scenarios:`straight_2lane_sumo_t_agents_1 <sumo/vehicle_following/straight_2lane_sumo_t_agents_1>`
++ :scenarios:`straight_3lanes_sumo_agents_1 <sumo/vehicle_following/straight_3lanes_sumo_agents_1>`
++ :scenarios:`straight_3lanes_sumo_t_agents_1 <sumo/vehicle_following/straight_3lanes_sumo_t_agents_1>`
++ :scenarios:`straight_3lanes_sumo_t_agents_2 <sumo/vehicle_following/straight_3lanes_sumo_t_agents_2>`
++ :scenarios:`merge_exit_sumo_agents_1 <sumo/vehicle_following/merge_exit_sumo_agents_1>`
++ :scenarios:`merge_exit_sumo_t_agents_1 <sumo/vehicle_following/merge_exit_sumo_t_agents_1>`
++ :scenarios:`merge_exit_sumo_t_agents_2 <sumo/vehicle_following/merge_exit_sumo_t_agents_2>`
++ :scenarios:`ff239c9d-e4ff-4acc-bad5-bd55648c212e_0_agents_1 <argoverse/vehicle_following/ff239c9d-e4ff-4acc-bad5-bd55648c212e_0_agents_1>`
++ :scenarios:`ff239c9d-e4ff-4acc-bad5-bd55648c212e_agents_1 <argoverse/vehicle_following/ff239c9d-e4ff-4acc-bad5-bd55648c212e_agents_1>`
++ :scenarios:`ff6dc43b-dd27-4fe4-94b6-5c1b3940daed_agents_1 <argoverse/vehicle_following/ff6dc43b-dd27-4fe4-94b6-5c1b3940daed_agents_1>`
++ :scenarios:`ff9619b5-b0c0-4942-b5d8-df6a5814f8a2_agents_1 <argoverse/vehicle_following/ff9619b5-b0c0-4942-b5d8-df6a5814f8a2_agents_1>`
++ :scenarios:`ffd10ec2-715b-48af-a89d-b11f79927f63_agents_1 <argoverse/vehicle_following/ffd10ec2-715b-48af-a89d-b11f79927f63_agents_1>`
+
 Observation space
 -----------------
 
@@ -254,6 +268,7 @@ Evaluate
     $ python3.8 -m venv ./.venv
     $ source ./.venv/bin/activate
     $ pip install --upgrade pip
+    $ pip install wheel==0.38.4
     $ pip install -e .[camera_obs,argoverse,envision]
     $ scl zoo install examples/rl/platoon/inference
     $ scl benchmark run driving_smarts_2023_3 examples.rl.platoon.inference:contrib-agent-v0 --auto-install
diff --git a/examples/rl/drive/inference/__init__.py b/examples/rl/drive/inference/__init__.py
index 2692e0fcef..a330eaa041 100644
--- a/examples/rl/drive/inference/__init__.py
+++ b/examples/rl/drive/inference/__init__.py
@@ -16,9 +16,9 @@ def entry_point(**kwargs):
         road_waypoints=False,
         signals=False,
         top_down_rgb=RGB(
-            width=128,
-            height=128,
-            resolution=80 / 128,  # m/pixels
+            width=200,
+            height=200,
+            resolution=80 / 200,  # m/pixels
         ),
     )
 
@@ -26,6 +26,7 @@ def entry_point(**kwargs):
         "top_down_rgb": interface.top_down_rgb,
         "action_space_type": interface.action,
         "num_stack": 3,  # Number of frames to stack as input to policy network.
+        "crop": (50, 50, 0, 70),  # Crop image from left, right, top, and bottom.
     }
 
     return AgentSpec(
diff --git a/examples/rl/drive/inference/contrib_policy/filter_obs.py b/examples/rl/drive/inference/contrib_policy/filter_obs.py
index 6bd1f3d259..e366355c16 100644
--- a/examples/rl/drive/inference/contrib_policy/filter_obs.py
+++ b/examples/rl/drive/inference/contrib_policy/filter_obs.py
@@ -1,5 +1,5 @@
 import math
-from typing import Any, Dict, Sequence
+from typing import Any, Dict, Sequence, Tuple
 
 import gym
 import numpy as np
@@ -11,11 +11,17 @@
 class FilterObs:
     """Filter only the selected observation parameters."""
 
-    def __init__(self, top_down_rgb: RGB):
+    def __init__(
+        self, top_down_rgb: RGB, crop: Tuple[int, int, int, int] = (0, 0, 0, 0)
+    ):
         self.observation_space = gym.spaces.Box(
             low=0,
             high=255,
-            shape=(3, top_down_rgb.height, top_down_rgb.width),
+            shape=(
+                3,
+                top_down_rgb.height - crop[2] - crop[3],
+                top_down_rgb.width - crop[0] - crop[1],
+            ),
             dtype=np.uint8,
         )
 
@@ -26,7 +32,9 @@ def __init__(self, top_down_rgb: RGB):
         self._lane_divider_color = np.array(SceneColors.LaneDivider.value[0:3]) * 255
         self._edge_divider_color = np.array(SceneColors.EdgeDivider.value[0:3]) * 255
         self._ego_color = np.array(SceneColors.Agent.value[0:3]) * 255
+        self._goal_color = np.array(Colors.Purple.value[0:3]) * 255
 
+        self._blur_radius = 2
         self._res = top_down_rgb.resolution
         h = top_down_rgb.height
         w = top_down_rgb.width
@@ -43,6 +51,14 @@ def __init__(self, top_down_rgb: RGB):
         self._rgb_mask = np.zeros(shape=(h, w, 3), dtype=np.uint8)
         self._rgb_mask[shape[0][0] : shape[0][1], shape[1][0] : shape[1][1], :] = 1
 
+        self._crop = crop
+        assert (
+            self._crop[0] < np.floor(w / 2)
+            and self._crop[1] < np.floor(w / 2)
+            and self._crop[2] < np.floor(h / 2)
+            and self._crop[3] < np.floor(h / 2)
+        ), f"Expected crop to be less than half the image size, got crop={self._crop} for an image of size[h,w]=[{h},{w}]."
+
     def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]:
         """Adapts the environment's observation."""
         # fmt: off
@@ -62,8 +78,8 @@ def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]:
         # Superimpose waypoints onto rgb image
         wps = obs["waypoint_paths"]["position"][0:11, 3:, 0:3]
         for path in wps[:]:
-            wps_valid = wps_to_pixels(
-                wps=path,
+            wps_valid = points_to_pixels(
+                points=path,
                 ego_pos=ego_pos,
                 ego_heading=ego_heading,
                 w=w,
@@ -75,6 +91,28 @@ def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]:
                 if all(rgb_ego[img_y, img_x, :] == self._no_color):
                     rgb_ego[img_y, img_x, :] = self._wps_color
 
+        # Superimpose goal position onto rgb image       
+        if not all((goal:=obs["ego_vehicle_state"]["mission"]["goal_position"]) == np.zeros((3,))):       
+            goal_pixel = points_to_pixels(
+                points=np.expand_dims(goal,axis=0),
+                ego_pos=ego_pos,
+                ego_heading=ego_heading,
+                w=w,
+                h=h,
+                res=self._res,
+            )
+            if len(goal_pixel) != 0:
+                img_x, img_y = goal_pixel[0][0], goal_pixel[0][1]
+                if all(rgb_ego[img_y, img_x, :] == self._no_color) or all(rgb_ego[img_y, img_x, :] == self._wps_color):
+                    rgb_ego[
+                        max(img_y-self._blur_radius,0):min(img_y+self._blur_radius,h), 
+                        max(img_x-self._blur_radius,0):min(img_x+self._blur_radius,w), 
+                        :,
+                    ] = self._goal_color
+
+        # Crop image
+        rgb_ego = rgb_ego[self._crop[2]:h-self._crop[3],self._crop[0]:w-self._crop[1],:]
+
         # Channel first rgb
         rgb_ego = rgb_ego.transpose(2, 0, 1)
 
@@ -123,14 +161,19 @@ def replace_color(
     # fmt: on
 
 
-def wps_to_pixels(
-    wps: np.ndarray, ego_pos: np.ndarray, ego_heading: float, w: int, h: int, res: float
+def points_to_pixels(
+    points: np.ndarray,
+    ego_pos: np.ndarray,
+    ego_heading: float,
+    w: int,
+    h: int,
+    res: float,
 ) -> np.ndarray:
-    """Converts waypoints into pixel coordinates in order to superimpose the
-    waypoints onto the RGB image.
+    """Converts points into pixel coordinates in order to superimpose the
+    points onto the RGB image.
 
     Args:
-        wps (np.ndarray): Waypoints for a single route. Shape (n,3).
+        points (np.ndarray): Array of points. Shape (n,3).
         ego_pos (np.ndarray): Ego position. Shape = (3,).
         ego_heading (float): Ego heading in radians.
         w (int): Width of RGB image
@@ -139,19 +182,19 @@ def wps_to_pixels(
             ground_size/image_size.
 
     Returns:
-        np.ndarray: Array of waypoint coordinates on the RGB image. Shape = (m,3).
+        np.ndarray: Array of point coordinates on the RGB image. Shape = (m,3).
     """
     # fmt: off
-    mask = [False if all(point == np.zeros(3,)) else True for point in wps]
-    wps_nonzero = wps[mask]
-    wps_delta = wps_nonzero - ego_pos
-    wps_rotated = rotate_axes(wps_delta, theta=ego_heading)
-    wps_pixels = wps_rotated / np.array([res, res, res])
-    wps_overlay = np.array([w / 2, h / 2, 0]) + wps_pixels * np.array([1, -1, 1])
-    wps_rfloat = np.rint(wps_overlay)
-    wps_valid = wps_rfloat[(wps_rfloat[:,0] >= 0) & (wps_rfloat[:,0] < w) & (wps_rfloat[:,1] >= 0) & (wps_rfloat[:,1] < h)] 
-    wps_rint = wps_valid.astype(int)
-    return wps_rint
+    mask = [False if all(point == np.zeros(3,)) else True for point in points]
+    points_nonzero = points[mask]
+    points_delta = points_nonzero - ego_pos
+    points_rotated = rotate_axes(points_delta, theta=ego_heading)
+    points_pixels = points_rotated / np.array([res, res, res])
+    points_overlay = np.array([w / 2, h / 2, 0]) + points_pixels * np.array([1, -1, 1])
+    points_rfloat = np.rint(points_overlay)
+    points_valid = points_rfloat[(points_rfloat[:,0] >= 0) & (points_rfloat[:,0] < w) & (points_rfloat[:,1] >= 0) & (points_rfloat[:,1] < h)]
+    points_rint = points_valid.astype(int)
+    return points_rint
     # fmt: on
 
 
diff --git a/examples/rl/drive/inference/contrib_policy/policy.py b/examples/rl/drive/inference/contrib_policy/policy.py
index f63a72e70c..8d80b0894e 100644
--- a/examples/rl/drive/inference/contrib_policy/policy.py
+++ b/examples/rl/drive/inference/contrib_policy/policy.py
@@ -11,22 +11,20 @@ class Policy(Agent):
     """Policy class to be submitted by the user. This class will be loaded
     and tested during evaluation."""
 
-    def __init__(self, num_stack, top_down_rgb, action_space_type):
+    def __init__(self, num_stack, top_down_rgb, crop, action_space_type):
         """All policy initialization matters, including loading of model, is
         performed here. To be implemented by the user.
         """
 
-        import stable_baselines3 as sb3lib
         from contrib_policy import network
         from contrib_policy.filter_obs import FilterObs
         from contrib_policy.format_action import FormatAction
         from contrib_policy.frame_stack import FrameStack
         from contrib_policy.make_dict import MakeDict
 
-        model_path = Path(__file__).resolve().parents[0] / "saved_model"
-        self.model = sb3lib.PPO.load(model_path)
+        self._model = self._get_model()
 
-        self._filter_obs = FilterObs(top_down_rgb=top_down_rgb)
+        self._filter_obs = FilterObs(top_down_rgb=top_down_rgb, crop=crop)
         self._frame_stack = FrameStack(
             input_space=self._filter_obs.observation_space,
             num_stack=num_stack,
@@ -43,7 +41,7 @@ def __init__(self, num_stack, top_down_rgb, action_space_type):
     def act(self, obs):
         """Mandatory act function to be implemented by user."""
         processed_obs = self._process(obs)
-        action, _ = self.model.predict(observation=processed_obs, deterministic=True)
+        action, _ = self._model.predict(observation=processed_obs, deterministic=True)
         formatted_action = self._format_action.format(
             action=int(action), prev_heading=obs["ego_vehicle_state"]["heading"]
         )
@@ -57,3 +55,8 @@ def _process(self, obs):
         obs = self._frame_stack.stack(obs)
         obs = self._make_dict.make(obs)
         return obs
+
+    def _get_model(self):
+        import stable_baselines3 as sb3lib
+
+        return sb3lib.PPO.load(path=Path(__file__).resolve().parents[0] / "saved_model")
diff --git a/examples/rl/drive/train/config.yaml b/examples/rl/drive/train/config.yaml
index 7c0ea860a4..b09e83a428 100644
--- a/examples/rl/drive/train/config.yaml
+++ b/examples/rl/drive/train/config.yaml
@@ -5,20 +5,27 @@ smarts:
   agent_locator: inference:contrib-agent-v0
   env_id: smarts.env:driving-smarts-v2023  
   scenarios:
-    - scenarios/sumo/intersections/1_to_1lane_left_turn_c_agents_1
+    - scenarios/sumo/straight/cruise_2lane_agents_1
+    - scenarios/sumo/straight/cutin_2lane_agents_1
+    - scenarios/sumo/straight/merge_exit_sumo_t_agents_1
+    - scenarios/sumo/straight/overtake_2lane_agents_1
+
+    - scenarios/sumo/intersections/1_to_3lane_left_turn_sumo_c_agents_1
+    - scenarios/sumo/intersections/1_to_3lane_left_turn_middle_lane_c_agents_1
 
   # PPO algorithm
   alg:
-    n_steps: 2048
-    batch_size: 512
+    n_steps: 1024
+    batch_size: 64
     n_epochs: 4
     target_kl: 0.1
+    # ent_coef: 0.01 # For exploration. Range = 0 to 0.01
 
   # Training over all scenarios
   epochs: 500 # Number of training loops.
 
   # Training per scenario
-  train_steps: 10_000
-  checkpoint_freq: 10_000 # Save a model every checkpoint_freq calls to env.step().
-  eval_freq: 10_000 # Evaluate the trained model every eval_freq steps and save the best model.
+  train_steps: 4_096
+  checkpoint_freq: 4_096 # Save a model every checkpoint_freq calls to env.step().
+  eval_freq: 4_096 # Evaluate the trained model every eval_freq steps and save the best model.
   eval_eps: 5 # Number of evaluation epsiodes.
diff --git a/examples/rl/drive/train/env.py b/examples/rl/drive/train/env.py
index 6f1668b56f..a4dc0fea12 100644
--- a/examples/rl/drive/train/env.py
+++ b/examples/rl/drive/train/env.py
@@ -1,16 +1,18 @@
 import sys
 from pathlib import Path
 
-# To import submission folder
-sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+# To import train folder
+sys.path.insert(0, str(Path(__file__).resolve().parents[0]))
 
 import gymnasium as gym
 
+from smarts.zoo.agent_spec import AgentSpec
 
-def make_env(env_id, scenario, agent_interface, config, seed):
+
+def make_env(env_id, scenario, agent_spec: AgentSpec, config, seed):
     from preprocess import Preprocess
+    from reward import Reward
     from stable_baselines3.common.monitor import Monitor
-    from train.reward import Reward
 
     from smarts.env.gymnasium.wrappers.api_reversion import Api021Reversion
     from smarts.env.gymnasium.wrappers.single_agent import SingleAgent
@@ -18,15 +20,15 @@ def make_env(env_id, scenario, agent_interface, config, seed):
     env = gym.make(
         env_id,
         scenario=scenario,
-        agent_interface=agent_interface,
+        agent_interface=agent_spec.interface,
         seed=seed,
         sumo_headless=not config.sumo_gui,  # If False, enables sumo-gui display.
         headless=not config.head,  # If False, enables Envision display.
     )
-    env = Reward(env)
-    env = SingleAgent(env)
-    env = Api021Reversion(env)
-    env = Preprocess(env, agent_interface)
+    env = Reward(env=env, crop=agent_spec.agent_params["crop"])
+    env = SingleAgent(env=env)
+    env = Api021Reversion(env=env)
+    env = Preprocess(env=env, agent_spec=agent_spec)
     env = Monitor(env)
 
     return env
diff --git a/examples/rl/drive/train/preprocess.py b/examples/rl/drive/train/preprocess.py
index 38a59cb0f9..ec2f890809 100644
--- a/examples/rl/drive/train/preprocess.py
+++ b/examples/rl/drive/train/preprocess.py
@@ -4,17 +4,20 @@
 from contrib_policy.frame_stack import FrameStack
 from contrib_policy.make_dict import MakeDict
 
-from smarts.core.agent_interface import AgentInterface
+from smarts.zoo.agent_spec import AgentSpec
 
 
 class Preprocess(gym.Wrapper):
-    def __init__(self, env: gym.Env, agent_interface: AgentInterface):
+    def __init__(self, env: gym.Env, agent_spec: AgentSpec):
         super().__init__(env)
 
-        self._filter_obs = FilterObs(top_down_rgb=agent_interface.top_down_rgb)
+        self._filter_obs = FilterObs(
+            top_down_rgb=agent_spec.interface.top_down_rgb,
+            crop=agent_spec.agent_params["crop"],
+        )
         self._frame_stack = FrameStack(
             input_space=self._filter_obs.observation_space,
-            num_stack=3,
+            num_stack=agent_spec.agent_params["num_stack"],
             stack_axis=0,
         )
         self._frame_stack.reset()
@@ -23,7 +26,9 @@ def __init__(self, env: gym.Env, agent_interface: AgentInterface):
         self.observation_space = self._make_dict.observation_space
 
         self._prev_heading: float
-        self._format_action = FormatAction(agent_interface.action)
+        self._format_action = FormatAction(
+            action_space_type=agent_spec.interface.action
+        )
         self.action_space = self._format_action.action_space
 
     def _process(self, obs):
diff --git a/examples/rl/drive/train/requirements.txt b/examples/rl/drive/train/requirements.txt
index 72c1d1db69..90d5d558db 100644
--- a/examples/rl/drive/train/requirements.txt
+++ b/examples/rl/drive/train/requirements.txt
@@ -1,152 +1,98 @@
 absl-py==1.4.0
-alabaster==0.7.13
-astroid==2.15.0
-asttokens==2.2.1
-attrs==22.2.0
+argcomplete==3.0.8
+attrs==23.1.0
 Automat==22.10.0
-Babel==2.12.1
-backcall==0.2.0
-black==22.6.0
-cached-property==1.5.2
-cachetools==5.3.0
-certifi==2022.12.7
-cfgv==3.3.1
+av==10.0.0
+av2==0.2.1
+cachetools==5.3.1
+certifi==2023.5.7
 charset-normalizer==3.1.0
 click==8.1.3
 cloudpickle==1.6.0
+colorlog==6.7.0
 constantly==15.1.0
 contourpy==1.0.7
 cycler==0.11.0
-decorator==5.1.1
-dill==0.3.6
 distlib==0.3.6
-docutils==0.18.1
-eclipse-sumo==1.16.0
-executing==1.2.0
-filelock==3.9.1
-fonttools==4.39.0
+eclipse-sumo==1.17.0
+filelock==3.12.0
+fonttools==4.39.4
 future==0.18.3
-google-auth==2.16.2
+google-auth==2.19.1
 google-auth-oauthlib==0.4.6
-grpcio==1.52.0
-grpcio-tools==1.32.0
+grpcio==1.55.0
 gym==0.21.0
 gymnasium==0.27.0
 gymnasium-notices==0.0.1
 hyperlink==21.0.0
-identify==2.5.20
 idna==3.4
 ijson==3.2.0.post0
-imagesize==1.4.1
-importlab==0.8
 importlib-metadata==4.13.0
 importlib-resources==5.12.0
 incremental==22.10.0
-ipython==8.11.0
-isort==5.7.0
-jax-jumpy==0.2.0
-jedi==0.18.2
-Jinja2==3.1.2
-jsonpatch==1.32
-jsonpointer==2.3
+jax-jumpy==1.0.0
+joblib==1.2.0
 kiwisolver==1.4.4
-lazy-object-proxy==1.9.0
-libcst==0.4.9
-Markdown==3.4.1
+llvmlite==0.40.0
+Markdown==3.4.3
 markdown-it-py==2.2.0
-MarkupSafe==2.1.2
+MarkupSafe==2.1.3
 matplotlib==3.7.1
-matplotlib-inline==0.1.6
-mccabe==0.7.0
-mdit-py-plugins==0.3.5
 mdurl==0.1.2
-mypy-extensions==1.0.0
-myst-parser==1.0.0
-networkx==3.0
-ninja==1.11.1
-nodeenv==1.7.0
+nox==2023.4.22
+numba==0.57.0
 numpy==1.23.5
 nvidia-cublas-cu11==11.10.3.66
 nvidia-cuda-nvrtc-cu11==11.7.99
 nvidia-cuda-runtime-cu11==11.7.99
 nvidia-cudnn-cu11==8.5.0.96
 oauthlib==3.2.2
-packaging==23.0
+opencv-python==4.7.0.72
+packaging==23.1
 Panda3D==1.10.9
 panda3d-gltf==0.13
 panda3d-simplepbr==0.10
-pandas==1.5.3
-parso==0.8.3
-pathspec==0.11.1
-pbr==5.11.1
-pexpect==4.8.0
-pickleshare==0.7.5
-Pillow==9.4.0
-platformdirs==3.1.1
-pre-commit==2.16.0
-prompt-toolkit==3.0.38
+pandas==2.0.2
+Pillow==9.5.0
+platformdirs==3.5.1
 protobuf==3.20.3
-psutil==5.9.4
-ptyprocess==0.7.0
-pure-eval==0.2.2
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-pybullet==3.0.6
-Pygments==2.14.0
-pylint==2.17.0
+psutil==5.9.5
+pyarrow==12.0.0
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pybullet==3.2.5
+Pygments==2.15.1
 pyparsing==3.0.9
+pyproj==3.5.0
 python-dateutil==2.8.2
-pytype==2022.1.13
-pytz==2022.7.1
+pytz==2023.3
 PyYAML==6.0
-requests==2.28.2
+requests==2.31.0
 requests-oauthlib==1.3.1
-rich==13.3.2
+rich==13.4.1
 rsa==4.9
 Rtree==1.0.1
 scipy==1.10.1
-sh==2.0.2
 shapely==2.0.1
 Shimmy==0.2.1
 six==1.16.0
-snowballstemmer==2.2.0
-Sphinx==6.1.3
-sphinx-click==4.4.0
-sphinx-rtd-theme==1.2.0
-sphinxcontrib-apidoc==0.3.0
-sphinxcontrib-applehelp==1.0.4
-sphinxcontrib-devhelp==1.0.2
-sphinxcontrib-htmlhelp==2.0.1
-sphinxcontrib-jquery==4.1
-sphinxcontrib-jsmath==1.0.1
-sphinxcontrib-qthelp==1.0.3
-sphinxcontrib-serializinghtml==1.1.5
 stable-baselines3==1.7.0
-stack-data==0.6.2
 tableprint==0.9.1
-tabulate==0.9.0
 tensorboard==2.12.0
 tensorboard-data-server==0.7.0
 tensorboard-plugin-wit==1.8.1
-tokenize-rt==5.0.0
-toml==0.10.2
-tomli==2.0.1
-tomlkit==0.11.6
 torch==1.13.1
 torchinfo==1.7.2
-tornado==6.2
-traitlets==5.9.0
+tornado==6.3.2
 trimesh==3.9.29
 Twisted==22.10.0
-typing-inspect==0.8.0
-typing_extensions==4.5.0
-urllib3==1.26.15
-virtualenv==20.21.0
-visdom==0.2.4
+typing_extensions==4.6.3
+tzdata==2023.3
+urllib3==1.26.16
+virtualenv==20.23.0
 wcwidth==0.2.6
-websocket-client==1.5.1
-Werkzeug==2.2.3
-wrapt==1.15.0
+websocket-client==1.5.2
+Werkzeug==2.3.5
 yattag==1.15.1
 zipp==3.15.0
-zope.interface==5.5.2
+zope.interface==6.0
diff --git a/examples/rl/drive/train/reward.py b/examples/rl/drive/train/reward.py
index cee2825e26..ebffc2c7ee 100644
--- a/examples/rl/drive/train/reward.py
+++ b/examples/rl/drive/train/reward.py
@@ -1,14 +1,15 @@
 import gymnasium as gym
 import numpy as np
 
-from smarts.core.colors import SceneColors
+from typing import Tuple
 
 
 class Reward(gym.Wrapper):
-    def __init__(self, env):
+    def __init__(self, env: gym.Env, crop: Tuple[int, int, int, int]):
         """Constructor for the Reward wrapper."""
         super().__init__(env)
         self._total_dist = {}
+        self._crop = crop
 
     def reset(self, *, seed=None, options=None):
         self._total_dist = {}
@@ -85,7 +86,7 @@ def _reward(self, obs, env_reward):
             if agent_obs["events"]["reached_goal"]:
                 reward[agent_id] += np.float64(30)
 
-            # Reward for distance travelled by driving
+            # Reward for distance travelled in one step
             reward[agent_id] += np.float64(env_reward[agent_id])
 
         return reward
diff --git a/examples/rl/drive/train/run.py b/examples/rl/drive/train/run.py
index eba4f2ff07..b4f2221cd2 100644
--- a/examples/rl/drive/train/run.py
+++ b/examples/rl/drive/train/run.py
@@ -22,23 +22,35 @@
 from contrib_policy import network
 from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
 from stable_baselines3.common.evaluation import evaluate_policy
+from torchinfo import summary
 from train.env import make_env
 from train.utils import ObjDict
 
 from smarts.zoo import registry
+from smarts.zoo.agent_spec import AgentSpec
 
-print("\nTorch cuda is available: ", th.cuda.is_available(), "\n")
+print("\n")
+print(f"Torch cuda is available: {th.cuda.is_available()}")
+device = th.device("cuda" if th.cuda.is_available() else "cpu")
+print(f"Torch device: {device}")
+print("\n")
 warnings.simplefilter("ignore", category=DeprecationWarning)
 warnings.simplefilter("ignore", category=ResourceWarning)
 
 # pytype: disable=attribute-error
 
 
-def main(args: argparse.Namespace):
-    # Load config file.
+def load_config():
+    """Load config file."""
     parent_dir = Path(__file__).resolve().parent
     config_file = yaml.safe_load((parent_dir / "config.yaml").read_text())
     config = ObjDict(config_file["smarts"])
+    return config
+
+
+def main(args: argparse.Namespace):
+    parent_dir = Path(__file__).resolve().parent
+    config = load_config()
 
     # Load env config.
     config.mode = args.mode
@@ -76,22 +88,28 @@ def main(args: argparse.Namespace):
         envs_train[f"{scenario}"] = make_env(
             env_id=config.env_id,
             scenario=scenario_path,
-            agent_interface=agent_spec.interface,
+            agent_spec=agent_spec,
             config=config,
             seed=config.seed,
         )
         envs_eval[f"{scenario}"] = make_env(
             env_id=config.env_id,
             scenario=scenario_path,
-            agent_interface=agent_spec.interface,
+            agent_spec=agent_spec,
             config=config,
             seed=config.seed,
         )
 
     # Run training or evaluation.
-    run(envs_train=envs_train, envs_eval=envs_eval, config=config)
-
-    print("Finished training ...")
+    if config.mode == "train":
+        train(
+            envs_train=envs_train,
+            envs_eval=envs_eval,
+            config=config,
+            agent_spec=agent_spec,
+        )
+    else:
+        evaluate(envs=envs_eval, config=config, agent_spec=agent_spec)
 
     # Close all environments
     for env in envs_train.values():
@@ -100,79 +118,95 @@ def main(args: argparse.Namespace):
         env.close()
 
 
-def run(
+def train(
     envs_train: Dict[str, gym.Env],
     envs_eval: Dict[str, gym.Env],
     config: Dict[str, Any],
+    agent_spec: AgentSpec,
 ):
+    print("\nStart training.\n")
+    scenarios_iter = cycle(config.scenarios)
+    save_dir = config.logdir / "train"
+    save_dir.mkdir(parents=True, exist_ok=True)
+    checkpoint_callback = CheckpointCallback(
+        save_freq=config.checkpoint_freq,
+        save_path=config.logdir / "checkpoint",
+        name_prefix="PPO",
+    )
 
-    if config.mode == "train":
-        print("\nStart training.\n")
-        scenarios_iter = cycle(config.scenarios)
-        model = sb3lib.PPO(
-            env=envs_train[next(scenarios_iter)],
-            tensorboard_log=config.logdir / "tensorboard",
-            verbose=1,
-            **network.combined_extractor(config),
-        )
+    for index in range(config.epochs):
+        scen = next(scenarios_iter)
+        env_train = envs_train[scen]
+        env_eval = envs_eval[scen]
+        print(f"\nTraining on {scen}.\n")
 
-        # Print model summary
-        # from torchinfo import summary
-        # td = {"rgb":th.zeros(1,9,128,128)}
-        # summary(model.policy, input_data=[td], depth=5)
-        # input("Press any key to continue ...")
-
-        for index in range(config.epochs):
-            scen = next(scenarios_iter)
-            env_train = envs_train[scen]
-            env_eval = envs_eval[scen]
-            print(f"\nTraining on {scen}.\n")
-            checkpoint_callback = CheckpointCallback(
-                save_freq=config.checkpoint_freq,
-                save_path=config.logdir / "checkpoint",
-                name_prefix=f"PPO",
-            )
-            eval_callback = EvalCallback(
-                env_eval,
-                best_model_save_path=config.logdir / "eval",
-                n_eval_episodes=3,
-                eval_freq=config.eval_freq,
-                deterministic=True,
-                render=False,
+        if index == 0:
+            model = sb3lib.PPO(
+                env=env_train,
+                tensorboard_log=config.logdir / "tensorboard",
                 verbose=1,
+                **network.combined_extractor(config),
             )
-            model.set_env(env_train)
-            model.learn(
-                total_timesteps=config.train_steps,
-                callback=[checkpoint_callback, eval_callback],
-                reset_num_timesteps=False,
-            )
+        else:
+            model = sb3lib.PPO.load(save_dir / "intermediate")
+
+        eval_callback = EvalCallback(
+            env_eval,
+            best_model_save_path=config.logdir / "eval",
+            n_eval_episodes=3,
+            eval_freq=config.eval_freq,
+            deterministic=True,
+            render=False,
+            verbose=1,
+        )
+        model.set_env(env_train)
+        model.learn(
+            total_timesteps=config.train_steps,
+            callback=[checkpoint_callback, eval_callback],
+            reset_num_timesteps=False,
+        )
+        model.save(save_dir / "intermediate")
 
-        # Save trained model.
-        save_dir = config.logdir / "train"
-        save_dir.mkdir(parents=True, exist_ok=True)
-        time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
-        model.save(save_dir / ("model_" + time))
-        print("\nSaved trained model.\n")
+    print("Finished training.")
 
-    if config.mode == "evaluate":
-        print("\nEvaluate policy.\n")
-        device = th.device("cpu")
-        model = sb3lib.PPO.load(config.model, print_system_info=True, device=device)
-
-        # Print model summary
-        # from torchinfo import summary
-        # td = {"rgb":th.zeros(1,9,128,128)}
-        # summary(model.policy, input_data=[td], depth=5)
-        # input("Press any key to continue ...")
-
-        for env_name, env_eval in envs_eval.items():
-            print(f"\nEvaluating env {env_name}.")
-            mean_reward, std_reward = evaluate_policy(
-                model, env_eval, n_eval_episodes=config.eval_eps, deterministic=True
-            )
-            print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}\n")
-        print("\nFinished evaluating.\n")
+    # Save trained model.
+    time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
+    model.save(save_dir / ("model_" + time))
+    print("\nSaved trained model.\n")
+
+    # Print model summary
+    crop = agent_spec.agent_params["crop"]
+    top_down_rgb = agent_spec.interface.top_down_rgb
+    h = top_down_rgb.height - crop[2] - crop[3]
+    w = top_down_rgb.width - crop[0] - crop[1]
+    td = {"rgb": th.zeros(1, 9, h, w)}
+    summary(model.policy, input_data=[td], depth=5)
+
+
+def evaluate(
+    envs: Dict[str, gym.Env],
+    config: Dict[str, Any],
+    agent_spec: AgentSpec,
+):
+    print("\nEvaluate policy.\n")
+    device = th.device("cpu")
+    model = sb3lib.PPO.load(config.model, print_system_info=True, device=device)
+
+    # Print model summary
+    crop = agent_spec.agent_params["crop"]
+    top_down_rgb = agent_spec.interface.top_down_rgb
+    h = top_down_rgb.height - crop[2] - crop[3]
+    w = top_down_rgb.width - crop[0] - crop[1]
+    td = {"rgb": th.zeros(1, 9, h, w)}
+    summary(model.policy, input_data=[td], depth=5)
+
+    for env_name, env_eval in envs.items():
+        print(f"\nEvaluating env {env_name}.")
+        mean_reward, std_reward = evaluate_policy(
+            model, env_eval, n_eval_episodes=config.eval_eps, deterministic=True
+        )
+        print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}\n")
+    print("\nFinished evaluating.\n")
 
 
 if __name__ == "__main__":
diff --git a/examples/rl/platoon/inference/__init__.py b/examples/rl/platoon/inference/__init__.py
index a63298644b..e153510ffa 100644
--- a/examples/rl/platoon/inference/__init__.py
+++ b/examples/rl/platoon/inference/__init__.py
@@ -16,9 +16,9 @@ def entry_point(**kwargs):
         road_waypoints=False,
         signals=False,
         top_down_rgb=RGB(
-            width=128,
-            height=128,
-            resolution=80 / 128,  # m/pixels
+            width=200,
+            height=200,
+            resolution=80 / 200,  # m/pixels
         ),
     )
 
@@ -26,6 +26,7 @@ def entry_point(**kwargs):
         "top_down_rgb": interface.top_down_rgb,
         "action_space_type": interface.action,
         "num_stack": 3,  # Number of frames to stack as input to policy network.
+        "crop": (50, 50, 0, 70),  # Crop image from left, right, top, and bottom.
     }
 
     return AgentSpec(
diff --git a/examples/rl/platoon/inference/contrib_policy/filter_obs.py b/examples/rl/platoon/inference/contrib_policy/filter_obs.py
index 6bd1f3d259..e366355c16 100644
--- a/examples/rl/platoon/inference/contrib_policy/filter_obs.py
+++ b/examples/rl/platoon/inference/contrib_policy/filter_obs.py
@@ -1,5 +1,5 @@
 import math
-from typing import Any, Dict, Sequence
+from typing import Any, Dict, Sequence, Tuple
 
 import gym
 import numpy as np
@@ -11,11 +11,17 @@
 class FilterObs:
     """Filter only the selected observation parameters."""
 
-    def __init__(self, top_down_rgb: RGB):
+    def __init__(
+        self, top_down_rgb: RGB, crop: Tuple[int, int, int, int] = (0, 0, 0, 0)
+    ):
         self.observation_space = gym.spaces.Box(
             low=0,
             high=255,
-            shape=(3, top_down_rgb.height, top_down_rgb.width),
+            shape=(
+                3,
+                top_down_rgb.height - crop[2] - crop[3],
+                top_down_rgb.width - crop[0] - crop[1],
+            ),
             dtype=np.uint8,
         )
 
@@ -26,7 +32,9 @@ def __init__(self, top_down_rgb: RGB):
         self._lane_divider_color = np.array(SceneColors.LaneDivider.value[0:3]) * 255
         self._edge_divider_color = np.array(SceneColors.EdgeDivider.value[0:3]) * 255
         self._ego_color = np.array(SceneColors.Agent.value[0:3]) * 255
+        self._goal_color = np.array(Colors.Purple.value[0:3]) * 255
 
+        self._blur_radius = 2
         self._res = top_down_rgb.resolution
         h = top_down_rgb.height
         w = top_down_rgb.width
@@ -43,6 +51,14 @@ def __init__(self, top_down_rgb: RGB):
         self._rgb_mask = np.zeros(shape=(h, w, 3), dtype=np.uint8)
         self._rgb_mask[shape[0][0] : shape[0][1], shape[1][0] : shape[1][1], :] = 1
 
+        self._crop = crop
+        assert (
+            self._crop[0] < np.floor(w / 2)
+            and self._crop[1] < np.floor(w / 2)
+            and self._crop[2] < np.floor(h / 2)
+            and self._crop[3] < np.floor(h / 2)
+        ), f"Expected crop to be less than half the image size, got crop={self._crop} for an image of size[h,w]=[{h},{w}]."
+
     def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]:
         """Adapts the environment's observation."""
         # fmt: off
@@ -62,8 +78,8 @@ def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]:
         # Superimpose waypoints onto rgb image
         wps = obs["waypoint_paths"]["position"][0:11, 3:, 0:3]
         for path in wps[:]:
-            wps_valid = wps_to_pixels(
-                wps=path,
+            wps_valid = points_to_pixels(
+                points=path,
                 ego_pos=ego_pos,
                 ego_heading=ego_heading,
                 w=w,
@@ -75,6 +91,28 @@ def filter(self, obs: Dict[str, Any]) -> Dict[str, Any]:
                 if all(rgb_ego[img_y, img_x, :] == self._no_color):
                     rgb_ego[img_y, img_x, :] = self._wps_color
 
+        # Superimpose goal position onto rgb image       
+        if not all((goal:=obs["ego_vehicle_state"]["mission"]["goal_position"]) == np.zeros((3,))):       
+            goal_pixel = points_to_pixels(
+                points=np.expand_dims(goal,axis=0),
+                ego_pos=ego_pos,
+                ego_heading=ego_heading,
+                w=w,
+                h=h,
+                res=self._res,
+            )
+            if len(goal_pixel) != 0:
+                img_x, img_y = goal_pixel[0][0], goal_pixel[0][1]
+                if all(rgb_ego[img_y, img_x, :] == self._no_color) or all(rgb_ego[img_y, img_x, :] == self._wps_color):
+                    rgb_ego[
+                        max(img_y-self._blur_radius,0):min(img_y+self._blur_radius,h), 
+                        max(img_x-self._blur_radius,0):min(img_x+self._blur_radius,w), 
+                        :,
+                    ] = self._goal_color
+
+        # Crop image
+        rgb_ego = rgb_ego[self._crop[2]:h-self._crop[3],self._crop[0]:w-self._crop[1],:]
+
         # Channel first rgb
         rgb_ego = rgb_ego.transpose(2, 0, 1)
 
@@ -123,14 +161,19 @@ def replace_color(
     # fmt: on
 
 
-def wps_to_pixels(
-    wps: np.ndarray, ego_pos: np.ndarray, ego_heading: float, w: int, h: int, res: float
+def points_to_pixels(
+    points: np.ndarray,
+    ego_pos: np.ndarray,
+    ego_heading: float,
+    w: int,
+    h: int,
+    res: float,
 ) -> np.ndarray:
-    """Converts waypoints into pixel coordinates in order to superimpose the
-    waypoints onto the RGB image.
+    """Converts points into pixel coordinates in order to superimpose the
+    points onto the RGB image.
 
     Args:
-        wps (np.ndarray): Waypoints for a single route. Shape (n,3).
+        points (np.ndarray): Array of points. Shape (n,3).
         ego_pos (np.ndarray): Ego position. Shape = (3,).
         ego_heading (float): Ego heading in radians.
         w (int): Width of RGB image
@@ -139,19 +182,19 @@ def wps_to_pixels(
             ground_size/image_size.
 
     Returns:
-        np.ndarray: Array of waypoint coordinates on the RGB image. Shape = (m,3).
+        np.ndarray: Array of point coordinates on the RGB image. Shape = (m,3).
     """
     # fmt: off
-    mask = [False if all(point == np.zeros(3,)) else True for point in wps]
-    wps_nonzero = wps[mask]
-    wps_delta = wps_nonzero - ego_pos
-    wps_rotated = rotate_axes(wps_delta, theta=ego_heading)
-    wps_pixels = wps_rotated / np.array([res, res, res])
-    wps_overlay = np.array([w / 2, h / 2, 0]) + wps_pixels * np.array([1, -1, 1])
-    wps_rfloat = np.rint(wps_overlay)
-    wps_valid = wps_rfloat[(wps_rfloat[:,0] >= 0) & (wps_rfloat[:,0] < w) & (wps_rfloat[:,1] >= 0) & (wps_rfloat[:,1] < h)] 
-    wps_rint = wps_valid.astype(int)
-    return wps_rint
+    mask = [False if all(point == np.zeros(3,)) else True for point in points]
+    points_nonzero = points[mask]
+    points_delta = points_nonzero - ego_pos
+    points_rotated = rotate_axes(points_delta, theta=ego_heading)
+    points_pixels = points_rotated / np.array([res, res, res])
+    points_overlay = np.array([w / 2, h / 2, 0]) + points_pixels * np.array([1, -1, 1])
+    points_rfloat = np.rint(points_overlay)
+    points_valid = points_rfloat[(points_rfloat[:,0] >= 0) & (points_rfloat[:,0] < w) & (points_rfloat[:,1] >= 0) & (points_rfloat[:,1] < h)]
+    points_rint = points_valid.astype(int)
+    return points_rint
     # fmt: on
 
 
diff --git a/examples/rl/platoon/inference/contrib_policy/format_action.py b/examples/rl/platoon/inference/contrib_policy/format_action.py
index 9c3ba69f79..532770b3a1 100644
--- a/examples/rl/platoon/inference/contrib_policy/format_action.py
+++ b/examples/rl/platoon/inference/contrib_policy/format_action.py
@@ -13,7 +13,7 @@ def __init__(self, action_space_type: ActionSpaceType):
         else:
             raise Exception(f"Unknown action space type {action_space_type}.")
 
-    def format(self, action):
+    def format(self, action: int):
         """Adapts the action input to the wrapped environment.
 
         Note: Users should not directly call this method.
diff --git a/examples/rl/platoon/inference/contrib_policy/policy.py b/examples/rl/platoon/inference/contrib_policy/policy.py
index 10d81044d4..979fdffa17 100644
--- a/examples/rl/platoon/inference/contrib_policy/policy.py
+++ b/examples/rl/platoon/inference/contrib_policy/policy.py
@@ -11,22 +11,20 @@ class Policy(Agent):
     """Policy class to be submitted by the user. This class will be loaded
     and tested during evaluation."""
 
-    def __init__(self, num_stack, top_down_rgb, action_space_type):
+    def __init__(self, num_stack, top_down_rgb, crop, action_space_type):
         """All policy initialization matters, including loading of model, is
         performed here. To be implemented by the user.
         """
 
-        import stable_baselines3 as sb3lib
         from contrib_policy import network
         from contrib_policy.filter_obs import FilterObs
         from contrib_policy.format_action import FormatAction
         from contrib_policy.frame_stack import FrameStack
         from contrib_policy.make_dict import MakeDict
 
-        model_path = Path(__file__).resolve().parents[0] / "saved_model"
-        self.model = sb3lib.PPO.load(model_path)
+        self._model = self._get_model()
 
-        self._filter_obs = FilterObs(top_down_rgb=top_down_rgb)
+        self._filter_obs = FilterObs(top_down_rgb=top_down_rgb, crop=crop)
         self._frame_stack = FrameStack(
             input_space=self._filter_obs.observation_space,
             num_stack=num_stack,
@@ -43,7 +41,7 @@ def __init__(self, num_stack, top_down_rgb, action_space_type):
     def act(self, obs):
         """Mandatory act function to be implemented by user."""
         processed_obs = self._process(obs)
-        action, _ = self.model.predict(observation=processed_obs, deterministic=True)
+        action, _ = self._model.predict(observation=processed_obs, deterministic=True)
         formatted_action = self._format_action.format(action=int(action))
         return formatted_action
 
@@ -55,3 +53,8 @@ def _process(self, obs):
         obs = self._frame_stack.stack(obs)
         obs = self._make_dict.make(obs)
         return obs
+
+    def _get_model(self):
+        import stable_baselines3 as sb3lib
+
+        return sb3lib.PPO.load(path=Path(__file__).resolve().parents[0] / "saved_model")
diff --git a/examples/rl/platoon/train/config.yaml b/examples/rl/platoon/train/config.yaml
index 0eb347937f..8aaa324203 100644
--- a/examples/rl/platoon/train/config.yaml
+++ b/examples/rl/platoon/train/config.yaml
@@ -5,20 +5,26 @@ smarts:
   agent_locator: inference:contrib-agent-v0
   env_id: smarts.env:platoon-v0
   scenarios:
-    - scenarios/sumo/platoon/merge_exit_sumo_t_agents_1
+    - scenarios/sumo/vehicle_following/straight_2lane_sumo_agents_1
+    - scenarios/sumo/vehicle_following/straight_2lane_sumo_t_agents_1
+    - scenarios/sumo/vehicle_following/straight_3lanes_sumo_agents_1
+    - scenarios/sumo/vehicle_following/straight_3lanes_sumo_t_agents_1
+    - scenarios/sumo/vehicle_following/merge_exit_sumo_agents_1
+    - scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_1
 
   # PPO algorithm
   alg:
-    n_steps: 2048
-    batch_size: 512
+    n_steps: 1024
+    batch_size: 64
     n_epochs: 4
     target_kl: 0.1
+    # ent_coef: 0.01 # For exploration. Range = 0 to 0.01
 
   # Training over all scenarios
   epochs: 500 # Number of training loops.
 
   # Training per scenario
-  train_steps: 10_000
-  checkpoint_freq: 10_000 # Save a model every checkpoint_freq calls to env.step().
-  eval_freq: 10_000 # Evaluate the trained model every eval_freq steps and save the best model.
+  train_steps: 4_096
+  checkpoint_freq: 4_096 # Save a model every checkpoint_freq calls to env.step().
+  eval_freq: 4_096 # Evaluate the trained model every eval_freq steps and save the best model.
   eval_eps: 5 # Number of evaluation epsiodes.
diff --git a/examples/rl/platoon/train/env.py b/examples/rl/platoon/train/env.py
index 6f1668b56f..a4dc0fea12 100644
--- a/examples/rl/platoon/train/env.py
+++ b/examples/rl/platoon/train/env.py
@@ -1,16 +1,18 @@
 import sys
 from pathlib import Path
 
-# To import submission folder
-sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+# To import train folder
+sys.path.insert(0, str(Path(__file__).resolve().parents[0]))
 
 import gymnasium as gym
 
+from smarts.zoo.agent_spec import AgentSpec
 
-def make_env(env_id, scenario, agent_interface, config, seed):
+
+def make_env(env_id, scenario, agent_spec: AgentSpec, config, seed):
     from preprocess import Preprocess
+    from reward import Reward
     from stable_baselines3.common.monitor import Monitor
-    from train.reward import Reward
 
     from smarts.env.gymnasium.wrappers.api_reversion import Api021Reversion
     from smarts.env.gymnasium.wrappers.single_agent import SingleAgent
@@ -18,15 +20,15 @@ def make_env(env_id, scenario, agent_interface, config, seed):
     env = gym.make(
         env_id,
         scenario=scenario,
-        agent_interface=agent_interface,
+        agent_interface=agent_spec.interface,
         seed=seed,
         sumo_headless=not config.sumo_gui,  # If False, enables sumo-gui display.
         headless=not config.head,  # If False, enables Envision display.
     )
-    env = Reward(env)
-    env = SingleAgent(env)
-    env = Api021Reversion(env)
-    env = Preprocess(env, agent_interface)
+    env = Reward(env=env, crop=agent_spec.agent_params["crop"])
+    env = SingleAgent(env=env)
+    env = Api021Reversion(env=env)
+    env = Preprocess(env=env, agent_spec=agent_spec)
     env = Monitor(env)
 
     return env
diff --git a/examples/rl/platoon/train/preprocess.py b/examples/rl/platoon/train/preprocess.py
index 5d9079a9d9..5a79d78960 100644
--- a/examples/rl/platoon/train/preprocess.py
+++ b/examples/rl/platoon/train/preprocess.py
@@ -4,17 +4,20 @@
 from contrib_policy.frame_stack import FrameStack
 from contrib_policy.make_dict import MakeDict
 
-from smarts.core.agent_interface import AgentInterface
+from smarts.zoo.agent_spec import AgentSpec
 
 
 class Preprocess(gym.Wrapper):
-    def __init__(self, env: gym.Env, agent_interface: AgentInterface):
+    def __init__(self, env: gym.Env, agent_spec: AgentSpec):
         super().__init__(env)
 
-        self._filter_obs = FilterObs(top_down_rgb=agent_interface.top_down_rgb)
+        self._filter_obs = FilterObs(
+            top_down_rgb=agent_spec.interface.top_down_rgb,
+            crop=agent_spec.agent_params["crop"],
+        )
         self._frame_stack = FrameStack(
             input_space=self._filter_obs.observation_space,
-            num_stack=3,
+            num_stack=agent_spec.agent_params["num_stack"],
             stack_axis=0,
         )
         self._frame_stack.reset()
@@ -22,7 +25,9 @@ def __init__(self, env: gym.Env, agent_interface: AgentInterface):
 
         self.observation_space = self._make_dict.observation_space
 
-        self._format_action = FormatAction(agent_interface.action)
+        self._format_action = FormatAction(
+            action_space_type=agent_spec.interface.action
+        )
         self.action_space = self._format_action.action_space
 
     def _process(self, obs):
diff --git a/examples/rl/platoon/train/requirements.txt b/examples/rl/platoon/train/requirements.txt
index 72c1d1db69..90d5d558db 100644
--- a/examples/rl/platoon/train/requirements.txt
+++ b/examples/rl/platoon/train/requirements.txt
@@ -1,152 +1,98 @@
 absl-py==1.4.0
-alabaster==0.7.13
-astroid==2.15.0
-asttokens==2.2.1
-attrs==22.2.0
+argcomplete==3.0.8
+attrs==23.1.0
 Automat==22.10.0
-Babel==2.12.1
-backcall==0.2.0
-black==22.6.0
-cached-property==1.5.2
-cachetools==5.3.0
-certifi==2022.12.7
-cfgv==3.3.1
+av==10.0.0
+av2==0.2.1
+cachetools==5.3.1
+certifi==2023.5.7
 charset-normalizer==3.1.0
 click==8.1.3
 cloudpickle==1.6.0
+colorlog==6.7.0
 constantly==15.1.0
 contourpy==1.0.7
 cycler==0.11.0
-decorator==5.1.1
-dill==0.3.6
 distlib==0.3.6
-docutils==0.18.1
-eclipse-sumo==1.16.0
-executing==1.2.0
-filelock==3.9.1
-fonttools==4.39.0
+eclipse-sumo==1.17.0
+filelock==3.12.0
+fonttools==4.39.4
 future==0.18.3
-google-auth==2.16.2
+google-auth==2.19.1
 google-auth-oauthlib==0.4.6
-grpcio==1.52.0
-grpcio-tools==1.32.0
+grpcio==1.55.0
 gym==0.21.0
 gymnasium==0.27.0
 gymnasium-notices==0.0.1
 hyperlink==21.0.0
-identify==2.5.20
 idna==3.4
 ijson==3.2.0.post0
-imagesize==1.4.1
-importlab==0.8
 importlib-metadata==4.13.0
 importlib-resources==5.12.0
 incremental==22.10.0
-ipython==8.11.0
-isort==5.7.0
-jax-jumpy==0.2.0
-jedi==0.18.2
-Jinja2==3.1.2
-jsonpatch==1.32
-jsonpointer==2.3
+jax-jumpy==1.0.0
+joblib==1.2.0
 kiwisolver==1.4.4
-lazy-object-proxy==1.9.0
-libcst==0.4.9
-Markdown==3.4.1
+llvmlite==0.40.0
+Markdown==3.4.3
 markdown-it-py==2.2.0
-MarkupSafe==2.1.2
+MarkupSafe==2.1.3
 matplotlib==3.7.1
-matplotlib-inline==0.1.6
-mccabe==0.7.0
-mdit-py-plugins==0.3.5
 mdurl==0.1.2
-mypy-extensions==1.0.0
-myst-parser==1.0.0
-networkx==3.0
-ninja==1.11.1
-nodeenv==1.7.0
+nox==2023.4.22
+numba==0.57.0
 numpy==1.23.5
 nvidia-cublas-cu11==11.10.3.66
 nvidia-cuda-nvrtc-cu11==11.7.99
 nvidia-cuda-runtime-cu11==11.7.99
 nvidia-cudnn-cu11==8.5.0.96
 oauthlib==3.2.2
-packaging==23.0
+opencv-python==4.7.0.72
+packaging==23.1
 Panda3D==1.10.9
 panda3d-gltf==0.13
 panda3d-simplepbr==0.10
-pandas==1.5.3
-parso==0.8.3
-pathspec==0.11.1
-pbr==5.11.1
-pexpect==4.8.0
-pickleshare==0.7.5
-Pillow==9.4.0
-platformdirs==3.1.1
-pre-commit==2.16.0
-prompt-toolkit==3.0.38
+pandas==2.0.2
+Pillow==9.5.0
+platformdirs==3.5.1
 protobuf==3.20.3
-psutil==5.9.4
-ptyprocess==0.7.0
-pure-eval==0.2.2
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-pybullet==3.0.6
-Pygments==2.14.0
-pylint==2.17.0
+psutil==5.9.5
+pyarrow==12.0.0
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pybullet==3.2.5
+Pygments==2.15.1
 pyparsing==3.0.9
+pyproj==3.5.0
 python-dateutil==2.8.2
-pytype==2022.1.13
-pytz==2022.7.1
+pytz==2023.3
 PyYAML==6.0
-requests==2.28.2
+requests==2.31.0
 requests-oauthlib==1.3.1
-rich==13.3.2
+rich==13.4.1
 rsa==4.9
 Rtree==1.0.1
 scipy==1.10.1
-sh==2.0.2
 shapely==2.0.1
 Shimmy==0.2.1
 six==1.16.0
-snowballstemmer==2.2.0
-Sphinx==6.1.3
-sphinx-click==4.4.0
-sphinx-rtd-theme==1.2.0
-sphinxcontrib-apidoc==0.3.0
-sphinxcontrib-applehelp==1.0.4
-sphinxcontrib-devhelp==1.0.2
-sphinxcontrib-htmlhelp==2.0.1
-sphinxcontrib-jquery==4.1
-sphinxcontrib-jsmath==1.0.1
-sphinxcontrib-qthelp==1.0.3
-sphinxcontrib-serializinghtml==1.1.5
 stable-baselines3==1.7.0
-stack-data==0.6.2
 tableprint==0.9.1
-tabulate==0.9.0
 tensorboard==2.12.0
 tensorboard-data-server==0.7.0
 tensorboard-plugin-wit==1.8.1
-tokenize-rt==5.0.0
-toml==0.10.2
-tomli==2.0.1
-tomlkit==0.11.6
 torch==1.13.1
 torchinfo==1.7.2
-tornado==6.2
-traitlets==5.9.0
+tornado==6.3.2
 trimesh==3.9.29
 Twisted==22.10.0
-typing-inspect==0.8.0
-typing_extensions==4.5.0
-urllib3==1.26.15
-virtualenv==20.21.0
-visdom==0.2.4
+typing_extensions==4.6.3
+tzdata==2023.3
+urllib3==1.26.16
+virtualenv==20.23.0
 wcwidth==0.2.6
-websocket-client==1.5.1
-Werkzeug==2.2.3
-wrapt==1.15.0
+websocket-client==1.5.2
+Werkzeug==2.3.5
 yattag==1.15.1
 zipp==3.15.0
-zope.interface==5.5.2
+zope.interface==6.0
diff --git a/examples/rl/platoon/train/reward.py b/examples/rl/platoon/train/reward.py
index 99b4ea492c..166f858512 100644
--- a/examples/rl/platoon/train/reward.py
+++ b/examples/rl/platoon/train/reward.py
@@ -2,14 +2,15 @@
 import numpy as np
 
 from smarts.core.colors import SceneColors
-
+from typing import Tuple
 
 class Reward(gym.Wrapper):
-    def __init__(self, env):
+    def __init__(self, env: gym.Env, crop: Tuple[int, int, int, int]):
         """Constructor for the Reward wrapper."""
         super().__init__(env)
         self._leader_color = np.array(SceneColors.SocialAgent.value[0:3]) * 255
         self._total_dist = {}
+        self._crop = crop
 
     def reset(self, *, seed=None, options=None):
         self._total_dist = {}
@@ -40,7 +41,9 @@ def step(self, action):
                 elif agent_obs["events"]["reached_max_episode_steps"]:
                     print(f"{agent_id}: Reached max episode steps.")
                 elif (
-                    agent_obs["events"]["collisions"] | agent_obs["events"]["off_road"]
+                    agent_obs["events"]["collisions"]
+                    | agent_obs["events"]["off_road"]
+                    | agent_obs["events"]["wrong_way"]
                 ):
                     pass
                 elif agent_obs["events"]["interest_done"]:
@@ -78,32 +81,38 @@ def _reward(self, obs, env_reward):
             if agent_obs["events"]["collisions"]:
                 reward[agent_id] -= np.float64(10)
                 print(f"{agent_id}: Collided.")
-                break
+                continue
 
             # Penalty for driving off road
             if agent_obs["events"]["off_road"]:
                 reward[agent_id] -= np.float64(10)
                 print(f"{agent_id}: Went off road.")
-                break
+                continue
 
-            # Reward for reaching goal
-            # if agent_obs["events"]["reached_goal"]:
-            #     reward[agent_id] += np.float64(30)
+            # Penalty for driving on wrong way
+            if obs[agent_id]["events"]["wrong_way"]:
+                reward[agent_id] -= np.float64(10)
+                print(f"{agent_id}: Went wrong way.")
+                continue
 
-            # Reward for distance travelled by driving
-            reward[agent_id] += np.float64(env_reward[agent_id])
+            # Reward for distance travelled in one step
+            # reward[agent_id] += np.float64(env_reward[agent_id])
 
             # Check if leader is in front and within the rgb observation
             if leader:
                 rgb = agent_obs["top_down_rgb"]
                 h, w, d = rgb.shape
                 rgb_masked = rgb[0 : h // 2, :, :]
+                rgb_cropped = rgb_masked[
+                    self._crop[2] :, self._crop[0] : w - self._crop[1], :
+                ]
                 leader_in_rgb = (
-                    (rgb_masked == self._leader_color.reshape((1, 1, 3)))
+                    (rgb_cropped == self._leader_color.reshape((1, 1, 3)))
                     .all(axis=-1)
                     .any()
                 )
 
+            # Rewards specific to vehicle following task
             if leader and leader_in_rgb:
                 # Get agent's waypoints
                 waypoints = agent_obs["waypoint_paths"]["position"]
@@ -119,15 +128,12 @@ def _reward(self, obs, env_reward):
                     np.array([leader["position"]]),
                 )
 
-            # Rewards specific to "platooning" and "following" tasks
-            if leader and leader_in_rgb:
-
                 # Reward for being in the same lane as the leader
                 if (leader_ind is not None) and (leader_wp_ind[0] in ego_wp_inds):
-                    reward[agent_id] += np.float64(1)
+                    reward[agent_id] += np.float64(3)
 
             else:
-                reward[agent_id] -= np.float64(0.2)
+                reward[agent_id] -= np.float64(0.3)
 
         return reward
 
diff --git a/examples/rl/platoon/train/run.py b/examples/rl/platoon/train/run.py
index eba4f2ff07..ca0ef11bfa 100644
--- a/examples/rl/platoon/train/run.py
+++ b/examples/rl/platoon/train/run.py
@@ -6,6 +6,7 @@
 
 # Required to load inference module
 sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+
 import argparse
 import warnings
 from datetime import datetime
@@ -22,23 +23,36 @@
 from contrib_policy import network
 from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
 from stable_baselines3.common.evaluation import evaluate_policy
+from torchinfo import summary
 from train.env import make_env
 from train.utils import ObjDict
 
 from smarts.zoo import registry
+from smarts.zoo.agent_spec import AgentSpec
+
 
-print("\nTorch cuda is available: ", th.cuda.is_available(), "\n")
+print("\n")
+print(f"Torch cuda is available: {th.cuda.is_available()}")
+device = th.device("cuda" if th.cuda.is_available() else "cpu")
+print(f"Torch device: {device}")
+print("\n")
 warnings.simplefilter("ignore", category=DeprecationWarning)
 warnings.simplefilter("ignore", category=ResourceWarning)
 
 # pytype: disable=attribute-error
 
 
-def main(args: argparse.Namespace):
-    # Load config file.
+def load_config():
+    """Load config file."""
     parent_dir = Path(__file__).resolve().parent
     config_file = yaml.safe_load((parent_dir / "config.yaml").read_text())
     config = ObjDict(config_file["smarts"])
+    return config
+
+
+def main(args: argparse.Namespace):
+    parent_dir = Path(__file__).resolve().parent
+    config = load_config()
 
     # Load env config.
     config.mode = args.mode
@@ -76,22 +90,28 @@ def main(args: argparse.Namespace):
         envs_train[f"{scenario}"] = make_env(
             env_id=config.env_id,
             scenario=scenario_path,
-            agent_interface=agent_spec.interface,
+            agent_spec=agent_spec,
             config=config,
             seed=config.seed,
         )
         envs_eval[f"{scenario}"] = make_env(
             env_id=config.env_id,
             scenario=scenario_path,
-            agent_interface=agent_spec.interface,
+            agent_spec=agent_spec,
             config=config,
             seed=config.seed,
         )
 
     # Run training or evaluation.
-    run(envs_train=envs_train, envs_eval=envs_eval, config=config)
-
-    print("Finished training ...")
+    if config.mode == "train":
+        train(
+            envs_train=envs_train,
+            envs_eval=envs_eval,
+            config=config,
+            agent_spec=agent_spec,
+        )
+    else:
+        evaluate(envs=envs_eval, config=config, agent_spec=agent_spec)
 
     # Close all environments
     for env in envs_train.values():
@@ -100,79 +120,95 @@ def main(args: argparse.Namespace):
         env.close()
 
 
-def run(
+def train(
     envs_train: Dict[str, gym.Env],
     envs_eval: Dict[str, gym.Env],
     config: Dict[str, Any],
+    agent_spec: AgentSpec,
 ):
+    print("\nStart training.\n")
+    scenarios_iter = cycle(config.scenarios)
+    save_dir = config.logdir / "train"
+    save_dir.mkdir(parents=True, exist_ok=True)
+    checkpoint_callback = CheckpointCallback(
+        save_freq=config.checkpoint_freq,
+        save_path=config.logdir / "checkpoint",
+        name_prefix="PPO",
+    )
 
-    if config.mode == "train":
-        print("\nStart training.\n")
-        scenarios_iter = cycle(config.scenarios)
-        model = sb3lib.PPO(
-            env=envs_train[next(scenarios_iter)],
-            tensorboard_log=config.logdir / "tensorboard",
-            verbose=1,
-            **network.combined_extractor(config),
-        )
+    for index in range(config.epochs):
+        scen = next(scenarios_iter)
+        env_train = envs_train[scen]
+        env_eval = envs_eval[scen]
+        print(f"\nTraining on {scen}.\n")
 
-        # Print model summary
-        # from torchinfo import summary
-        # td = {"rgb":th.zeros(1,9,128,128)}
-        # summary(model.policy, input_data=[td], depth=5)
-        # input("Press any key to continue ...")
-
-        for index in range(config.epochs):
-            scen = next(scenarios_iter)
-            env_train = envs_train[scen]
-            env_eval = envs_eval[scen]
-            print(f"\nTraining on {scen}.\n")
-            checkpoint_callback = CheckpointCallback(
-                save_freq=config.checkpoint_freq,
-                save_path=config.logdir / "checkpoint",
-                name_prefix=f"PPO",
-            )
-            eval_callback = EvalCallback(
-                env_eval,
-                best_model_save_path=config.logdir / "eval",
-                n_eval_episodes=3,
-                eval_freq=config.eval_freq,
-                deterministic=True,
-                render=False,
+        if index == 0:
+            model = sb3lib.PPO(
+                env=env_train,
+                tensorboard_log=config.logdir / "tensorboard",
                 verbose=1,
+                **network.combined_extractor(config),
             )
-            model.set_env(env_train)
-            model.learn(
-                total_timesteps=config.train_steps,
-                callback=[checkpoint_callback, eval_callback],
-                reset_num_timesteps=False,
-            )
+        else:
+            model = sb3lib.PPO.load(save_dir / "intermediate")
+
+        eval_callback = EvalCallback(
+            env_eval,
+            best_model_save_path=config.logdir / "eval",
+            n_eval_episodes=3,
+            eval_freq=config.eval_freq,
+            deterministic=True,
+            render=False,
+            verbose=1,
+        )
+        model.set_env(env_train)
+        model.learn(
+            total_timesteps=config.train_steps,
+            callback=[checkpoint_callback, eval_callback],
+            reset_num_timesteps=False,
+        )
+        model.save(save_dir / "intermediate")
 
-        # Save trained model.
-        save_dir = config.logdir / "train"
-        save_dir.mkdir(parents=True, exist_ok=True)
-        time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
-        model.save(save_dir / ("model_" + time))
-        print("\nSaved trained model.\n")
+    print("Finished training.")
 
-    if config.mode == "evaluate":
-        print("\nEvaluate policy.\n")
-        device = th.device("cpu")
-        model = sb3lib.PPO.load(config.model, print_system_info=True, device=device)
-
-        # Print model summary
-        # from torchinfo import summary
-        # td = {"rgb":th.zeros(1,9,128,128)}
-        # summary(model.policy, input_data=[td], depth=5)
-        # input("Press any key to continue ...")
-
-        for env_name, env_eval in envs_eval.items():
-            print(f"\nEvaluating env {env_name}.")
-            mean_reward, std_reward = evaluate_policy(
-                model, env_eval, n_eval_episodes=config.eval_eps, deterministic=True
-            )
-            print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}\n")
-        print("\nFinished evaluating.\n")
+    # Save trained model.
+    time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
+    model.save(save_dir / ("model_" + time))
+    print("\nSaved trained model.\n")
+
+    # Print model summary
+    crop = agent_spec.agent_params["crop"]
+    top_down_rgb = agent_spec.interface.top_down_rgb
+    h = top_down_rgb.height - crop[2] - crop[3]
+    w = top_down_rgb.width - crop[0] - crop[1]
+    td = {"rgb": th.zeros(1, 9, h, w)}
+    summary(model.policy, input_data=[td], depth=5)
+
+
+def evaluate(
+    envs: Dict[str, gym.Env],
+    config: Dict[str, Any],
+    agent_spec: AgentSpec,
+):
+    print("\nEvaluate policy.\n")
+    device = th.device("cpu")
+    model = sb3lib.PPO.load(config.model, print_system_info=True, device=device)
+
+    # Print model summary
+    crop = agent_spec.agent_params["crop"]
+    top_down_rgb = agent_spec.interface.top_down_rgb
+    h = top_down_rgb.height - crop[2] - crop[3]
+    w = top_down_rgb.width - crop[0] - crop[1]
+    td = {"rgb": th.zeros(1, 9, h, w)}
+    summary(model.policy, input_data=[td], depth=5)
+
+    for env_name, env_eval in envs.items():
+        print(f"\nEvaluating env {env_name}.")
+        mean_reward, std_reward = evaluate_policy(
+            model, env_eval, n_eval_episodes=config.eval_eps, deterministic=True
+        )
+        print(f"Mean reward:{mean_reward:.2f} +/- {std_reward:.2f}\n")
+    print("\nFinished evaluating.\n")
 
 
 if __name__ == "__main__":
diff --git a/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/map.net.xml b/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/map.net.xml
deleted file mode 100644
index 3f66d5c793..0000000000
--- a/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/map.net.xml
+++ /dev/null
@@ -1,203 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!-- generated on 2022-07-20 15:19:34 by Eclipse SUMO netedit Version 1.10.0
-<configuration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://sumo.dlr.de/xsd/netconvertConfiguration.xsd">
-
-    <input>
-        <sumo-net-file value="/home/adai/workspace/competition_bundle/eval_scenarios/intersection/1_to_3lane_left_turn_c_agents_1/map.net.xml"/>
-    </input>
-
-    <output>
-        <output-file value="/home/adai/workspace/competition_bundle/eval_scenarios/intersection/1_to_3lane_left_turn_c_agents_1/map.net.xml"/>
-    </output>
-
-    <processing>
-        <geometry.min-radius.fix.railways value="false"/>
-        <geometry.max-grade.fix value="false"/>
-        <offset.disable-normalization value="true"/>
-        <lefthand value="false"/>
-    </processing>
-
-    <junctions>
-        <no-turnarounds value="true"/>
-        <junctions.corner-detail value="5"/>
-        <junctions.limit-turn-speed value="5.5"/>
-        <rectangular-lane-cut value="false"/>
-    </junctions>
-
-    <pedestrian>
-        <walkingareas value="false"/>
-    </pedestrian>
-
-    <report>
-        <aggregate-warnings value="5"/>
-    </report>
-
-</configuration>
--->
-
-<net version="1.9" junctionCornerDetail="5" limitTurnSpeed="5.50" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://sumo.dlr.de/xsd/net_file.xsd">
-
-    <location netOffset="-630882.85,-4856133.12" convBoundary="-60.00,-50.00,60.00,50.00" origBoundary="-79.371708,43.846679,-79.369237,43.847436" projParameter="+proj=utm +zone=17 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"/>
-
-    <type id="highway.bridleway" priority="1" numLanes="1" speed="2.78" allow="pedestrian" oneway="1" width="2.00"/>
-    <type id="highway.bus_guideway" priority="1" numLanes="1" speed="27.78" allow="bus" oneway="1"/>
-    <type id="highway.cycleway" priority="1" numLanes="1" speed="8.33" allow="bicycle" oneway="1" width="1.00"/>
-    <type id="highway.footway" priority="1" numLanes="1" speed="2.78" allow="pedestrian" oneway="1" width="2.00"/>
-    <type id="highway.ford" priority="1" numLanes="1" speed="2.78" allow="army" oneway="1"/>
-    <type id="highway.living_street" priority="2" numLanes="1" speed="2.78" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.motorway" priority="14" numLanes="2" speed="39.44" allow="private emergency authority army vip passenger hov taxi bus coach delivery truck trailer motorcycle evehicle custom1 custom2" oneway="1"/>
-    <type id="highway.motorway_link" priority="9" numLanes="1" speed="22.22" allow="private emergency authority army vip passenger hov taxi bus coach delivery truck trailer motorcycle evehicle custom1 custom2" oneway="1"/>
-    <type id="highway.path" priority="1" numLanes="1" speed="2.78" allow="pedestrian bicycle" oneway="1" width="2.00"/>
-    <type id="highway.pedestrian" priority="1" numLanes="1" speed="2.78" allow="pedestrian" oneway="1" width="2.00"/>
-    <type id="highway.primary" priority="12" numLanes="2" speed="27.78" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.primary_link" priority="7" numLanes="1" speed="22.22" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.raceway" priority="15" numLanes="2" speed="83.33" allow="vip" oneway="1"/>
-    <type id="highway.residential" priority="3" numLanes="1" speed="13.89" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.secondary" priority="11" numLanes="2" speed="27.78" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.secondary_link" priority="6" numLanes="1" speed="22.22" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.service" priority="1" numLanes="1" speed="5.56" allow="pedestrian delivery bicycle" oneway="1"/>
-    <type id="highway.stairs" priority="1" numLanes="1" speed="1.39" allow="pedestrian" oneway="1" width="2.00"/>
-    <type id="highway.step" priority="1" numLanes="1" speed="1.39" allow="pedestrian" oneway="1" width="2.00"/>
-    <type id="highway.steps" priority="1" numLanes="1" speed="1.39" allow="pedestrian" oneway="1" width="2.00"/>
-    <type id="highway.tertiary" priority="10" numLanes="1" speed="22.22" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.tertiary_link" priority="5" numLanes="1" speed="22.22" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.track" priority="1" numLanes="1" speed="5.56" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.trunk" priority="13" numLanes="2" speed="27.78" disallow="pedestrian bicycle tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.trunk_link" priority="8" numLanes="1" speed="22.22" disallow="pedestrian bicycle tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.unclassified" priority="4" numLanes="1" speed="13.89" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="highway.unsurfaced" priority="1" numLanes="1" speed="8.33" disallow="tram rail_urban rail rail_electric ship" oneway="1"/>
-    <type id="railway.light_rail" priority="19" numLanes="1" speed="27.78" allow="rail_urban" oneway="1"/>
-    <type id="railway.preserved" priority="16" numLanes="1" speed="27.78" allow="rail" oneway="1"/>
-    <type id="railway.rail" priority="20" numLanes="1" speed="83.33" allow="rail rail_electric" oneway="1"/>
-    <type id="railway.subway" priority="18" numLanes="1" speed="27.78" allow="rail_urban" oneway="1"/>
-    <type id="railway.tram" priority="17" numLanes="1" speed="13.89" allow="tram" oneway="1"/>
-
-    <edge id=":junction-intersection_0" function="internal">
-        <lane id=":junction-intersection_0_0" index="0" speed="8.33" length="15.48" shape="-4.80,17.60 -5.40,13.40 -7.20,10.40 -10.20,8.60 -14.40,8.00"/>
-    </edge>
-    <edge id=":junction-intersection_1" function="internal">
-        <lane id=":junction-intersection_1_0" index="0" speed="13.00" length="32.17" shape="-4.80,17.60 -4.30,9.08 -3.20,-0.27 -2.10,-8.70 -1.60,-14.40"/>
-    </edge>
-    <edge id=":junction-intersection_2" function="internal">
-        <lane id=":junction-intersection_2_0" index="0" speed="11.13" length="28.43" shape="-1.60,17.60 -0.60,9.20 2.40,3.20 7.40,-0.40 14.40,-1.60"/>
-    </edge>
-    <edge id=":junction-intersection_3" function="internal">
-        <lane id=":junction-intersection_3_0" index="0" speed="8.33" length="15.48" shape="14.40,8.00 10.20,8.60 7.20,10.40 5.40,13.40 4.80,17.60"/>
-    </edge>
-    <edge id=":junction-intersection_4" function="internal">
-        <lane id=":junction-intersection_4_0" index="0" speed="13.00" length="28.80" shape="14.40,8.00 -14.40,8.00"/>
-        <lane id=":junction-intersection_4_1" index="1" speed="13.00" length="28.80" shape="14.40,4.80 -14.40,4.80"/>
-        <lane id=":junction-intersection_4_2" index="2" speed="13.00" length="28.80" shape="14.40,1.60 -14.40,1.60"/>
-    </edge>
-    <edge id=":junction-intersection_7" function="internal">
-        <lane id=":junction-intersection_7_0" index="0" speed="10.62" length="25.80" shape="14.40,1.60 7.40,0.60 2.40,-2.40 -0.60,-7.40 -1.60,-14.40"/>
-    </edge>
-    <edge id=":junction-intersection_8" function="internal">
-        <lane id=":junction-intersection_8_0" index="0" speed="8.98" length="18.14" shape="1.60,-14.40 2.40,-10.20 4.80,-7.20 8.80,-5.40 14.40,-4.80"/>
-    </edge>
-    <edge id=":junction-intersection_9" function="internal">
-        <lane id=":junction-intersection_9_0" index="0" speed="13.00" length="32.00" shape="1.60,-14.40 1.60,17.60"/>
-    </edge>
-    <edge id=":junction-intersection_10" function="internal">
-        <lane id=":junction-intersection_10_0" index="0" speed="10.62" length="25.80" shape="1.60,-14.40 0.60,-7.40 -2.40,-2.40 -7.40,0.60 -14.40,1.60"/>
-    </edge>
-    <edge id=":junction-intersection_11" function="internal">
-        <lane id=":junction-intersection_11_0" index="0" speed="8.98" length="18.14" shape="-14.40,-4.80 -8.80,-5.40 -4.80,-7.20 -2.40,-10.20 -1.60,-14.40"/>
-    </edge>
-    <edge id=":junction-intersection_12" function="internal">
-        <lane id=":junction-intersection_12_0" index="0" speed="13.00" length="28.80" shape="-14.40,-4.80 14.40,-4.80"/>
-        <lane id=":junction-intersection_12_1" index="1" speed="13.00" length="28.80" shape="-14.40,-1.60 14.40,-1.60"/>
-    </edge>
-    <edge id=":junction-intersection_14" function="internal">
-        <lane id=":junction-intersection_14_0" index="0" speed="11.13" length="28.43" shape="-14.40,-1.60 -7.40,-0.40 -2.40,3.20 0.60,9.20 1.60,17.60"/>
-    </edge>
-
-    <edge id="E1" from="junction-intersection" to="junction-north" priority="-1">
-        <lane id="E1_0" index="0" speed="13.00" length="32.40" shape="4.80,17.60 4.80,50.00"/>
-        <lane id="E1_1" index="1" speed="13.00" length="32.40" shape="1.60,17.60 1.60,50.00"/>
-    </edge>
-    <edge id="E2" from="junction-north" to="junction-intersection" priority="-1">
-        <lane id="E2_0" index="0" speed="13.00" length="32.40" shape="-4.80,50.00 -4.80,17.60"/>
-        <lane id="E2_1" index="1" speed="13.00" length="32.40" shape="-1.60,50.00 -1.60,17.60"/>
-    </edge>
-    <edge id="E3" from="junction-east" to="junction-intersection" priority="-1">
-        <lane id="E3_0" index="0" speed="13.00" length="45.60" shape="60.00,8.00 14.40,8.00"/>
-        <lane id="E3_1" index="1" speed="13.00" length="45.60" shape="60.00,4.80 14.40,4.80"/>
-        <lane id="E3_2" index="2" speed="13.00" length="45.60" shape="60.00,1.60 14.40,1.60"/>
-    </edge>
-    <edge id="E4" from="junction-intersection" to="junction-east" priority="-1">
-        <lane id="E4_0" index="0" speed="13.00" length="45.60" shape="14.40,-4.80 60.00,-4.80"/>
-        <lane id="E4_1" index="1" speed="13.00" length="45.60" shape="14.40,-1.60 60.00,-1.60"/>
-    </edge>
-    <edge id="E5" from="junction-intersection" to="junction-west" priority="-1">
-        <lane id="E5_0" index="0" speed="13.00" length="45.60" shape="-14.40,8.00 -60.00,8.00"/>
-        <lane id="E5_1" index="1" speed="13.00" length="45.60" shape="-14.40,4.80 -60.00,4.80"/>
-        <lane id="E5_2" index="2" speed="13.00" length="45.60" shape="-14.40,1.60 -60.00,1.60"/>
-    </edge>
-    <edge id="E6" from="junction-west" to="junction-intersection" priority="-1">
-        <lane id="E6_0" index="0" speed="13.00" length="45.60" shape="-60.00,-4.80 -14.40,-4.80"/>
-        <lane id="E6_1" index="1" speed="13.00" length="45.60" shape="-60.00,-1.60 -14.40,-1.60"/>
-    </edge>
-    <edge id="E7" from="junction-intersection" to="junction-south" priority="-1">
-        <lane id="E7_0" index="0" speed="13.00" length="35.60" shape="-1.60,-14.40 -1.60,-50.00"/>
-    </edge>
-    <edge id="E8" from="junction-south" to="junction-intersection" priority="-1">
-        <lane id="E8_0" index="0" speed="13.00" length="35.60" shape="1.60,-50.00 1.60,-14.40"/>
-    </edge>
-
-    <junction id="junction-east" type="dead_end" x="60.00" y="0.00" incLanes="E4_0 E4_1" intLanes="" shape="60.00,0.00 60.00,-6.40 60.00,0.00"/>
-    <junction id="junction-intersection" type="allway_stop" x="0.00" y="0.00" incLanes="E2_0 E2_1 E3_0 E3_1 E3_2 E8_0 E6_0 E6_1" intLanes=":junction-intersection_0_0 :junction-intersection_1_0 :junction-intersection_2_0 :junction-intersection_3_0 :junction-intersection_4_0 :junction-intersection_4_1 :junction-intersection_4_2 :junction-intersection_7_0 :junction-intersection_8_0 :junction-intersection_9_0 :junction-intersection_10_0 :junction-intersection_11_0 :junction-intersection_12_0 :junction-intersection_12_1 :junction-intersection_14_0" shape="-6.40,17.60 6.40,17.60 6.62,15.16 7.29,13.16 8.40,11.60 9.96,10.49 11.96,9.82 14.40,9.60 14.40,-6.40 10.98,-6.62 8.18,-7.29 6.00,-8.40 4.44,-9.96 3.51,-11.96 3.20,-14.40 -3.20,-14.40 -3.51,-11.96 -4.44,-9.96 -6.00,-8.40 -8.18,-7.29 -10.98,-6.62 -14.40,-6.40 -14.40,9.60 -11.96,9.82 -9.96,10.49 -8.40,11.60 -7.29,13.16 -6.62,15.16" radius="8.00">
-        <request index="0"  response="000000001110000" foes="000000001110000" cont="0"/>
-        <request index="1"  response="111110011110000" foes="111110011110000" cont="0"/>
-        <request index="2"  response="111001011110000" foes="111001011110000" cont="0"/>
-        <request index="3"  response="000000000000000" foes="000000000000000" cont="0"/>
-        <request index="4"  response="100011000000111" foes="100011000000111" cont="0"/>
-        <request index="5"  response="100011000000111" foes="100011000000111" cont="0"/>
-        <request index="6"  response="100011000000111" foes="100011000000111" cont="0"/>
-        <request index="7"  response="011111000000110" foes="011111000000110" cont="0"/>
-        <request index="8"  response="011000000000000" foes="011000000000000" cont="0"/>
-        <request index="9"  response="111000011110100" foes="111000011110100" cont="0"/>
-        <request index="10" response="111000011110010" foes="111000011110010" cont="0"/>
-        <request index="11" response="000000010000010" foes="000000010000010" cont="0"/>
-        <request index="12" response="000011110000110" foes="000011110000110" cont="0"/>
-        <request index="13" response="000011110000110" foes="000011110000110" cont="0"/>
-        <request index="14" response="000011001110110" foes="000011001110110" cont="0"/>
-    </junction>
-    <junction id="junction-north" type="dead_end" x="0.00" y="50.00" incLanes="E1_0 E1_1" intLanes="" shape="0.00,50.00 6.40,50.00 0.00,50.00"/>
-    <junction id="junction-south" type="dead_end" x="0.00" y="-50.00" incLanes="E7_0" intLanes="" shape="0.00,-50.00 -3.20,-50.00 0.00,-50.00"/>
-    <junction id="junction-west" type="dead_end" x="-60.00" y="0.00" incLanes="E5_0 E5_1 E5_2" intLanes="" shape="-60.00,0.00 -60.00,9.60 -60.00,0.00"/>
-
-    <connection from="E2" to="E5" fromLane="0" toLane="0" via=":junction-intersection_0_0" dir="r" state="w"/>
-    <connection from="E2" to="E7" fromLane="0" toLane="0" via=":junction-intersection_1_0" dir="s" state="w"/>
-    <connection from="E2" to="E4" fromLane="1" toLane="1" via=":junction-intersection_2_0" dir="l" state="w"/>
-    <connection from="E3" to="E1" fromLane="0" toLane="0" via=":junction-intersection_3_0" dir="r" state="w"/>
-    <connection from="E3" to="E5" fromLane="0" toLane="0" via=":junction-intersection_4_0" dir="s" state="w"/>
-    <connection from="E3" to="E5" fromLane="1" toLane="1" via=":junction-intersection_4_1" dir="s" state="w"/>
-    <connection from="E3" to="E5" fromLane="2" toLane="2" via=":junction-intersection_4_2" dir="s" state="w"/>
-    <connection from="E3" to="E7" fromLane="2" toLane="0" via=":junction-intersection_7_0" dir="l" state="w"/>
-    <connection from="E6" to="E7" fromLane="0" toLane="0" via=":junction-intersection_11_0" dir="r" state="w"/>
-    <connection from="E6" to="E4" fromLane="0" toLane="0" via=":junction-intersection_12_0" dir="s" state="w"/>
-    <connection from="E6" to="E4" fromLane="1" toLane="1" via=":junction-intersection_12_1" dir="s" state="w"/>
-    <connection from="E6" to="E1" fromLane="1" toLane="1" via=":junction-intersection_14_0" dir="l" state="w"/>
-    <connection from="E8" to="E4" fromLane="0" toLane="0" via=":junction-intersection_8_0" dir="r" state="w"/>
-    <connection from="E8" to="E1" fromLane="0" toLane="1" via=":junction-intersection_9_0" dir="s" state="w"/>
-    <connection from="E8" to="E5" fromLane="0" toLane="2" via=":junction-intersection_10_0" dir="l" state="w"/>
-
-    <connection from=":junction-intersection_0" to="E5" fromLane="0" toLane="0" dir="r" state="M"/>
-    <connection from=":junction-intersection_1" to="E7" fromLane="0" toLane="0" dir="s" state="M"/>
-    <connection from=":junction-intersection_2" to="E4" fromLane="0" toLane="1" dir="l" state="M"/>
-    <connection from=":junction-intersection_3" to="E1" fromLane="0" toLane="0" dir="r" state="M"/>
-    <connection from=":junction-intersection_4" to="E5" fromLane="0" toLane="0" dir="s" state="M"/>
-    <connection from=":junction-intersection_4" to="E5" fromLane="1" toLane="1" dir="s" state="M"/>
-    <connection from=":junction-intersection_4" to="E5" fromLane="2" toLane="2" dir="s" state="M"/>
-    <connection from=":junction-intersection_7" to="E7" fromLane="0" toLane="0" dir="l" state="M"/>
-    <connection from=":junction-intersection_8" to="E4" fromLane="0" toLane="0" dir="r" state="M"/>
-    <connection from=":junction-intersection_9" to="E1" fromLane="0" toLane="1" dir="s" state="M"/>
-    <connection from=":junction-intersection_10" to="E5" fromLane="0" toLane="2" dir="l" state="M"/>
-    <connection from=":junction-intersection_11" to="E7" fromLane="0" toLane="0" dir="r" state="M"/>
-    <connection from=":junction-intersection_12" to="E4" fromLane="0" toLane="0" dir="s" state="M"/>
-    <connection from=":junction-intersection_12" to="E4" fromLane="1" toLane="1" dir="s" state="M"/>
-    <connection from=":junction-intersection_14" to="E1" fromLane="0" toLane="1" dir="l" state="M"/>
-
-</net>
diff --git a/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/scenario.py b/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/scenario.py
deleted file mode 100644
index 000193efa2..0000000000
--- a/scenarios/sumo/intersections/1_to_3lane_left_turn_c_agents_1/scenario.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-import random
-from itertools import combinations
-from pathlib import Path
-
-from smarts.sstudio import gen_scenario
-from smarts.sstudio.types import Flow, Mission, Route, Scenario, Traffic, TrafficActor
-
-normal = TrafficActor(
-    name="car",
-)
-
-vertical_routes = [
-    ("E2", 0, "E7", 0),
-    ("E8", 0, "E1", 1),
-]
-
-horizontal_routes = [
-    ("E3", 0, "E5", 0),
-    ("E3", 1, "E5", 1),
-    ("E3", 2, "E5", 2),
-    ("E6", 1, "E4", 1),
-    ("E6", 0, "E4", 0),
-]
-
-turn_left_routes = [
-    ("E8", 0, "E5", 2),
-    ("E6", 1, "E1", 1),
-    ("E2", 1, "E4", 1),
-    ("E3", 2, "E7", 0),
-]
-
-turn_right_routes = [
-    ("E6", 0, "E7", 0),
-    ("E3", 0, "E1", 0),
-    ("E2", 0, "E5", 0),
-    ("E8", 0, "E4", 0),
-]
-
-# Total route combinations = 14C1 + 14C2 + 14C3 + 14C4 = 1470
-all_routes = vertical_routes + horizontal_routes + turn_left_routes + turn_right_routes
-route_comb = [com for elems in range(4, 5) for com in combinations(all_routes, elems)]
-traffic = {}
-for name, routes in enumerate(route_comb):
-    traffic[str(name)] = Traffic(
-        flows=[
-            Flow(
-                route=Route(
-                    begin=(f"{r[0]}", r[1], 0),
-                    end=(f"{r[2]}", r[3], "max"),
-                ),
-                # Random flow rate, between x and y vehicles per minute.
-                rate=60 * random.uniform(2, 5),
-                # Random flow start time, between x and y seconds.
-                begin=random.uniform(0, 3),
-                # For an episode with maximum_episode_steps=3000 and step
-                # time=0.1s, maximum episode time=300s. Hence, traffic set to
-                # end at 900s, which is greater than maximum episode time of
-                # 300s.
-                end=60 * 15,
-                actors={normal: 1},
-            )
-            for r in routes
-        ]
-    )
-
-route = Route(begin=("E8", 0, 5), end=("E5", 0, "max"))
-ego_missions = [
-    Mission(
-        route=route,
-        start_time=15,  # Delayed start, to ensure road has prior traffic.
-    )
-]
-# runtime = 14
-gen_scenario(
-    scenario=Scenario(
-        traffic=traffic,
-        ego_missions=ego_missions,
-    ),
-    output_dir=Path(__file__).parent,
-)
diff --git a/scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2/scenario.py b/scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2/scenario.py
index 7f2eacd983..d39c58b9c0 100644
--- a/scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2/scenario.py
+++ b/scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2/scenario.py
@@ -125,7 +125,7 @@
         scenario_metadata=ScenarioMetadata(
             actor_of_interest_re_filter=leader_id,
             actor_of_interest_color=Colors.Blue,
-            scenario_difficulty=3,
+            scenario_difficulty=0.9,
             scenario_duration=duration,
         ),
     ),
diff --git a/smarts/benchmark/driving_smarts/v2023/config_1.yaml b/smarts/benchmark/driving_smarts/v2023/config_1.yaml
index 6b3a57f675..e19c3cc9c2 100644
--- a/smarts/benchmark/driving_smarts/v2023/config_1.yaml
+++ b/smarts/benchmark/driving_smarts/v2023/config_1.yaml
@@ -15,7 +15,10 @@ benchmark:
     standard:
       loc: "smarts.env:driving-smarts-v2023"
       scenarios:
-        - scenarios/sumo/straight/3lane_cruise_agents_1
+        - scenarios/sumo/straight/cruise_2lane_agents_1
+        - scenarios/sumo/straight/cutin_2lane_agents_1
+        - scenarios/sumo/straight/merge_exit_sumo_t_agents_1
+        - scenarios/sumo/straight/overtake_2lane_agents_1
       kwargs:
         seed: 42
       metric_formula: smarts/benchmark/driving_smarts/v2023/metric_formula_drive.py
\ No newline at end of file
diff --git a/smarts/benchmark/driving_smarts/v2023/config_2.yaml b/smarts/benchmark/driving_smarts/v2023/config_2.yaml
index e7131ef475..9f00597da0 100644
--- a/smarts/benchmark/driving_smarts/v2023/config_2.yaml
+++ b/smarts/benchmark/driving_smarts/v2023/config_2.yaml
@@ -15,7 +15,8 @@ benchmark:
     turns:
       loc: "smarts.env:driving-smarts-v2023"
       scenarios:
-        - scenarios/sumo/intersections/1_to_2lane_left_turn_c_agents_1
+        - scenarios/sumo/intersections/1_to_3lane_left_turn_sumo_c_agents_1
+        - scenarios/sumo/intersections/1_to_3lane_left_turn_middle_lane_c_agents_1
       kwargs:
         seed: 42
       metric_formula: smarts/benchmark/driving_smarts/v2023/metric_formula_drive.py
\ No newline at end of file
diff --git a/smarts/benchmark/driving_smarts/v2023/config_3.yaml b/smarts/benchmark/driving_smarts/v2023/config_3.yaml
index d2e89b43f4..5ecabfe4b0 100644
--- a/smarts/benchmark/driving_smarts/v2023/config_3.yaml
+++ b/smarts/benchmark/driving_smarts/v2023/config_3.yaml
@@ -15,7 +15,14 @@ benchmark:
     platoon:
       loc: "smarts.env:platoon-v0"
       scenarios:
-        - scenarios/sumo/platoon/merge_exit_sumo_t_agents_1
+        - scenarios/sumo/vehicle_following/straight_2lane_sumo_agents_1
+        - scenarios/sumo/vehicle_following/straight_2lane_sumo_t_agents_1
+        - scenarios/sumo/vehicle_following/straight_3lanes_sumo_agents_1
+        - scenarios/sumo/vehicle_following/straight_3lanes_sumo_t_agents_1
+        - scenarios/sumo/vehicle_following/straight_3lanes_sumo_t_agents_2
+        - scenarios/sumo/vehicle_following/merge_exit_sumo_agents_1
+        - scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_1
+        - scenarios/sumo/vehicle_following/merge_exit_sumo_t_agents_2
       kwargs:
         seed: 42
       metric_formula: smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py
diff --git a/smarts/core/route_cache.py b/smarts/core/route_cache.py
index 39321ce45a..313a3eceb1 100644
--- a/smarts/core/route_cache.py
+++ b/smarts/core/route_cache.py
@@ -81,7 +81,8 @@ def road_ids(self) -> List[str]:
         """Get the road IDs for this route.
 
         Returns:
-            (List[str]): A list of the road IDs for the Roads in this Route."""
+            (List[str]): A list of the road IDs for the Roads in this Route.
+        """
         return [road.road_id for road in self.roads]
 
     @staticmethod

From 38faceba17c7c77fac073916709a8c3151a33ecd Mon Sep 17 00:00:00 2001
From: adaickalavan <adaickalavan@gmail.com>
Date: Mon, 12 Jun 2023 15:48:14 -0400
Subject: [PATCH 2/5] Address reviews.

---
 examples/rl/drive/inference/__init__.py   | 2 +-
 examples/rl/platoon/inference/__init__.py | 2 +-
 examples/rl/platoon/train/reward.py       | 3 ---
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/examples/rl/drive/inference/__init__.py b/examples/rl/drive/inference/__init__.py
index a330eaa041..307416f915 100644
--- a/examples/rl/drive/inference/__init__.py
+++ b/examples/rl/drive/inference/__init__.py
@@ -26,7 +26,7 @@ def entry_point(**kwargs):
         "top_down_rgb": interface.top_down_rgb,
         "action_space_type": interface.action,
         "num_stack": 3,  # Number of frames to stack as input to policy network.
-        "crop": (50, 50, 0, 70),  # Crop image from left, right, top, and bottom.
+        "crop": (50, 50, 0, 70),  # Crop image from left, right, top, and bottom. Units: pixels.
     }
 
     return AgentSpec(
diff --git a/examples/rl/platoon/inference/__init__.py b/examples/rl/platoon/inference/__init__.py
index e153510ffa..38eb05cab3 100644
--- a/examples/rl/platoon/inference/__init__.py
+++ b/examples/rl/platoon/inference/__init__.py
@@ -26,7 +26,7 @@ def entry_point(**kwargs):
         "top_down_rgb": interface.top_down_rgb,
         "action_space_type": interface.action,
         "num_stack": 3,  # Number of frames to stack as input to policy network.
-        "crop": (50, 50, 0, 70),  # Crop image from left, right, top, and bottom.
+        "crop": (50, 50, 0, 70),  # Crop image from left, right, top, and bottom. Units: pixels.
     }
 
     return AgentSpec(
diff --git a/examples/rl/platoon/train/reward.py b/examples/rl/platoon/train/reward.py
index 166f858512..be23854afe 100644
--- a/examples/rl/platoon/train/reward.py
+++ b/examples/rl/platoon/train/reward.py
@@ -95,9 +95,6 @@ def _reward(self, obs, env_reward):
                 print(f"{agent_id}: Went wrong way.")
                 continue
 
-            # Reward for distance travelled in one step
-            # reward[agent_id] += np.float64(env_reward[agent_id])
-
             # Check if leader is in front and within the rgb observation
             if leader:
                 rgb = agent_obs["top_down_rgb"]

From b64fdd6c7844722f625a48f713914cfd61da6c26 Mon Sep 17 00:00:00 2001
From: adai <adaickalavan@gmail.com>
Date: Mon, 12 Jun 2023 16:07:27 -0400
Subject: [PATCH 3/5] Use slice iterator.

Co-authored-by: Tucker Alban <montgomery.alban@huawei.com>
---
 examples/rl/platoon/train/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/rl/platoon/train/run.py b/examples/rl/platoon/train/run.py
index ca0ef11bfa..b563cdf2d3 100644
--- a/examples/rl/platoon/train/run.py
+++ b/examples/rl/platoon/train/run.py
@@ -136,8 +136,8 @@ def train(
         name_prefix="PPO",
     )
 
-    for index in range(config.epochs):
-        scen = next(scenarios_iter)
+    scenarios_iter = islice(cycle(config.scenarios), config.epocs)
+    for index, scen in enumerate(scenarios_iter):
         env_train = envs_train[scen]
         env_eval = envs_eval[scen]
         print(f"\nTraining on {scen}.\n")

From 551684295aa24480b0a4df74c4f4ccadf7fdca23 Mon Sep 17 00:00:00 2001
From: adaickalavan <adaickalavan@gmail.com>
Date: Mon, 12 Jun 2023 16:18:04 -0400
Subject: [PATCH 4/5] Use islice tool.

---
 examples/rl/drive/train/run.py   | 7 +++----
 examples/rl/platoon/train/run.py | 3 +--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/examples/rl/drive/train/run.py b/examples/rl/drive/train/run.py
index b4f2221cd2..b3139a7dbd 100644
--- a/examples/rl/drive/train/run.py
+++ b/examples/rl/drive/train/run.py
@@ -9,7 +9,7 @@
 import argparse
 import warnings
 from datetime import datetime
-from itertools import cycle
+from itertools import cycle, islice
 from typing import Any, Dict
 
 import gym
@@ -125,7 +125,6 @@ def train(
     agent_spec: AgentSpec,
 ):
     print("\nStart training.\n")
-    scenarios_iter = cycle(config.scenarios)
     save_dir = config.logdir / "train"
     save_dir.mkdir(parents=True, exist_ok=True)
     checkpoint_callback = CheckpointCallback(
@@ -134,8 +133,8 @@ def train(
         name_prefix="PPO",
     )
 
-    for index in range(config.epochs):
-        scen = next(scenarios_iter)
+    scenarios_iter = islice(cycle(config.scenarios), config.epocs)
+    for index, scen in enumerate(scenarios_iter):
         env_train = envs_train[scen]
         env_eval = envs_eval[scen]
         print(f"\nTraining on {scen}.\n")
diff --git a/examples/rl/platoon/train/run.py b/examples/rl/platoon/train/run.py
index b563cdf2d3..32da66172b 100644
--- a/examples/rl/platoon/train/run.py
+++ b/examples/rl/platoon/train/run.py
@@ -10,7 +10,7 @@
 import argparse
 import warnings
 from datetime import datetime
-from itertools import cycle
+from itertools import cycle, islice
 from typing import Any, Dict
 
 import gym
@@ -127,7 +127,6 @@ def train(
     agent_spec: AgentSpec,
 ):
     print("\nStart training.\n")
-    scenarios_iter = cycle(config.scenarios)
     save_dir = config.logdir / "train"
     save_dir.mkdir(parents=True, exist_ok=True)
     checkpoint_callback = CheckpointCallback(

From 4e948d9ca9ebcb161fc870c690909d886cf01658 Mon Sep 17 00:00:00 2001
From: adaickalavan <adaickalavan@gmail.com>
Date: Mon, 12 Jun 2023 16:21:12 -0400
Subject: [PATCH 5/5] Rectify spelling.

---
 examples/rl/drive/train/run.py   | 2 +-
 examples/rl/platoon/train/run.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/rl/drive/train/run.py b/examples/rl/drive/train/run.py
index b3139a7dbd..c42b9cab7c 100644
--- a/examples/rl/drive/train/run.py
+++ b/examples/rl/drive/train/run.py
@@ -133,7 +133,7 @@ def train(
         name_prefix="PPO",
     )
 
-    scenarios_iter = islice(cycle(config.scenarios), config.epocs)
+    scenarios_iter = islice(cycle(config.scenarios), config.epochs)
     for index, scen in enumerate(scenarios_iter):
         env_train = envs_train[scen]
         env_eval = envs_eval[scen]
diff --git a/examples/rl/platoon/train/run.py b/examples/rl/platoon/train/run.py
index 32da66172b..d86e41f587 100644
--- a/examples/rl/platoon/train/run.py
+++ b/examples/rl/platoon/train/run.py
@@ -135,7 +135,7 @@ def train(
         name_prefix="PPO",
     )
 
-    scenarios_iter = islice(cycle(config.scenarios), config.epocs)
+    scenarios_iter = islice(cycle(config.scenarios), config.epochs)
     for index, scen in enumerate(scenarios_iter):
         env_train = envs_train[scen]
         env_eval = envs_eval[scen]