huawei-noah · Adaickalavan · Apr 26, 2023 · Apr 21, 2023 · Apr 21, 2023 · Apr 21, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -59,6 +59,7 @@ Copy and pasting the git commit messages is __NOT__ enough.
 - The `Panda3d` implementation of `Renderer` has been extracted from the interface and moved to `smarts.p3d`.
 - Made all metrics as functions to be minimised, except the overall score which is to be maximised.
 - Driving SMARTS 2023.3 benchmark and the metrics module now uses `actor_of_interest_re_filter` from scenario metadata to identify the lead vehicle.
+- Included `RelativeTargetPose` action space to the set of allowed action spaces in `platoon-v0` env.
 ### Deprecated
 ### Fixed
 - Fixed issues related to waypoints in junctions on Argoverse maps. Waypoints will now be generated for all paths leading through the lane(s) the vehicle is on.

diff --git a/docs/benchmarks/driving_smarts_2023_3.rst b/docs/benchmarks/driving_smarts_2023_3.rst
@@ -6,7 +6,7 @@ Driving SMARTS 2023.3
 Objective
 ---------
 
-Objective is to develop a single-ego policy capable of controlling a single ego to perform a platooning task in the 
+Objective is to develop a single-ego policy capable of controlling a single ego to perform a vehicle-following task in the 
 ``platoon-v0`` environment. Refer to :func:`~smarts.env.gymnasium.platoon_env.platoon_env` for environment details. 
 
 .. important::
@@ -16,18 +16,20 @@ Objective is to develop a single-ego policy capable of controlling a single ego
     policies are executed in a distributed manner. The single-ego policy should be capable of accounting for and 
     interacting with other ego vehicles, if any are present.
 
-Each ego is supposed to track and follow its specified leader (i.e., lead vehicle) in a single file or in a 
-platoon fashion. The name identifier of the lead vehicle to be followed is given to the ego through the configuration
-of the :attr:`~smarts.core.agent_interface.InterestDoneCriteria.actors_of_interest` attribute.
+All ego agents should track and follow the leader (i.e., lead vehicle) in a single-file fashion. The lead vehicle is 
+marked as a vehicle of interest and may be found by filtering the
+:attr:`~smarts.core.observations.VehicleObservation.interest` attribute of the neighborhood vehicles in the 
+observation.
 
 .. figure:: ../_static/driving_smarts_2023/vehicle_following.png
 
     Here, egos are in red colour, lead vehicle is in blue colour, and background traffic is in silver colour. 
     (Left) At the start of episode, egos start tracking the lead vehicle. (Right) After a while, egos follow the
     lead vehicle in a single-file fashion.
 
-An ego is terminated when its assigned leader reaches the leader's destination. Egos do not have prior knowledge of 
-the assigned leader's destination.
+The episode ends when the leader reaches its destination. Ego agents do not have prior knowledge of the leader's 
+destination. Additionally, the ego terminates whenever it collides, drives off road, or exceeds maximum number 
+of steps per episode.
 
 Any method such as reinforcement learning, offline reinforcement learning, behaviour cloning, generative models,
 predictive models, etc, may be used to develop the policy.
@@ -52,7 +54,9 @@ a sample formatted observation data structure.
 Action space
 ------------
 
-Action space for each ego is :attr:`~smarts.core.controllers.ActionSpaceType.Continuous`.
+Action space for an ego can be either :attr:`~smarts.core.controllers.ActionSpaceType.Continuous`
+or :attr:`~smarts.core.controllers.ActionSpaceType.RelativeTargetPose`. User should choose
+one of the action spaces and specify the chosen action space through the ego's agent interface.
 
 Code structure
 --------------
@@ -166,6 +170,7 @@ the user.
     + User may fill in the ``<...>`` spaces in the template.
     + User should provide a name for their policy and describe it in the ``name`` and ``long_description`` sections, respectively.
     + Do **not** add SMARTS package as a dependency in the ``install_requires`` section.
+    + Dependencies in the ``install_requires`` section **must** have an exact package version specified using ``==``.
 
 6. inference/setup.py
     + Keep this file and its default contents unchanged.
@@ -184,6 +189,7 @@ Example
 An example training and inference code is provided for this benchmark. 
 See the :examples:`rl/platoon` example. The example uses PPO algorithm from 
 `Stable Baselines3 <https://github.com/DLR-RM/stable-baselines3>`_ reinforcement learning library. 
+It uses :attr:`~smarts.core.controllers.ActionSpaceType.Continuous` action space.
 Instructions for training and evaluating the example is as follows.
 
 Train

diff --git a/examples/rl/platoon/train/Dockerfile b/examples/rl/platoon/train/Dockerfile
@@ -34,6 +34,6 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt
 COPY . /SMARTS
 WORKDIR /SMARTS
 RUN pip install -e .[camera_obs,argoverse]
-RUN pip install -e ./examples/rl/baseline/inference
+RUN pip install -e ./examples/rl/platoon/inference
 
 SHELL ["/bin/bash", "-c", "-l"]
diff --git a/examples/rl/platoon/train/reward.py b/examples/rl/platoon/train/reward.py
@@ -8,8 +8,6 @@ class Reward(gym.Wrapper):
     def __init__(self, env):
         """Constructor for the Reward wrapper."""
         super().__init__(env)
-        self._half_pi = np.pi / 2
-        self._two_pi = 2 * np.pi
         self._leader_color = np.array(SceneColors.SocialAgent.value[0:3]) * 255
         self._total_dist = {}
 

diff --git a/smarts/env/gymnasium/platoon_env.py b/smarts/env/gymnasium/platoon_env.py
@@ -33,14 +33,20 @@
 )
 from smarts.core.controllers import ActionSpaceType
 from smarts.env.gymnasium.hiway_env_v1 import HiWayEnvV1, SumoOptions
+from smarts.env.gymnasium.wrappers.limit_relative_target_pose import (
+    LimitRelativeTargetPose,
+)
 from smarts.env.utils.observation_conversion import ObservationOptions
 from smarts.env.utils.scenario import get_scenario_specs
 from smarts.sstudio.scenario_construction import build_scenario
 
 logger = logging.getLogger(__file__)
 logger.setLevel(logging.WARNING)
 
-SUPPORTED_ACTION_TYPES = (ActionSpaceType.Continuous,)
+SUPPORTED_ACTION_TYPES = (
+    ActionSpaceType.Continuous,
+    ActionSpaceType.RelativeTargetPose,
+)
 
 
 def platoon_env(
@@ -52,12 +58,13 @@ def platoon_env(
     sumo_headless: bool = True,
     envision_record_data_replay_path: Optional[str] = None,
 ):
-    """Each ego is supposed to track and follow its specified leader (i.e., lead
-    vehicle) in a single file or in a platoon fashion. The name of the lead vehicle
-    to track is given to the ego through its
-    :attr:`~smarts.core.agent_interface.InterestDoneCriteria.actors_of_interest` attribute.
-    The episode ends for an ego when its assigned leader reaches the leader's
-    destination. Egos do not have prior knowledge of their assigned leader's destination.
+    """All ego agents should track and follow the leader (i.e., lead vehicle) in a 
+    single-file fashion. The lead vehicle is marked as a vehicle of interest 
+    and may be found by filtering the 
+    :attr:`~smarts.core.observations.VehicleObservation.interest` attribute of
+    the neighborhood vehicles in the observation. The episode ends when the 
+    leader reaches its destination. Ego agents do not have prior knowledge of the 
+    leader's destination.
 
     Observation space for each agent:
         Formatted :class:`~smarts.core.observations.Observation` using
@@ -67,7 +74,9 @@ def platoon_env(
         a sample formatted observation data structure.
 
     Action space for each agent:
-        Action space for each agent is :attr:`~smarts.core.controllers.ActionSpaceType.Continuous`.
+        Action space for an ego can be either :attr:`~smarts.core.controllers.ActionSpaceType.Continuous`
+        or :attr:`~smarts.core.controllers.ActionSpaceType.RelativeTargetPose`. User should choose
+        one of the action spaces and specify the chosen action space through the ego's agent interface.
 
     Agent interface:
         Using the input argument agent_interface, users may configure any field of
@@ -137,14 +146,16 @@ def platoon_env(
     env = HiWayEnvV1(
         scenarios=[env_specs["scenario"]],
         agent_interfaces=agent_interfaces,
-        sim_name="Platoon",
+        sim_name="VehicleFollowing",
         headless=headless,
         visdom=visdom,
         seed=seed,
         sumo_options=SumoOptions(headless=sumo_headless),
         visualization_client_builder=visualization_client_builder,
         observation_options=ObservationOptions.multi_agent,
     )
+    if resolved_agent_interface.action == ActionSpaceType.RelativeTargetPose:
+        env = LimitRelativeTargetPose(env)
 
     return env