From 371e2aab47f16c9d4b0df053eb613abe642d0484 Mon Sep 17 00:00:00 2001 From: Tucker Date: Mon, 13 Feb 2023 19:51:25 -0500 Subject: [PATCH 1/4] Add reset options. --- CHANGELOG.md | 4 ++ docs/sim/env.rst | 13 ++-- smarts/env/gymnasium/hiway_env_v1.py | 22 +++++- smarts/env/tests/test_hiway_env_v1.py | 99 +++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 9 deletions(-) create mode 100644 smarts/env/tests/test_hiway_env_v1.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 40f44dd2a4..5c6a2c0f43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,11 @@ Copy and pasting the git commit messages is __NOT__ enough. ### Added - Added a zoo agent, named Control-and-Supervised-Learning, from NeurIPS 2022 submission. This zoo agent runs in benchmark `driving_smarts==0.0`. - Added a zoo agent, named Discrete Soft Actor Critic, from NeurIPS 2022 submission. This zoo agent runs in benchmark `driving_smarts==0.0`. +- Added basic tests for `hiway-v1` resetting and unformatted observations and actions. ### Changed +- `HiWayEnvV1` derived environments now allow an explicit scenario through `reset(options["scenario"])`. +- `HiWayEnvV1` derived environments now allow an explicit simulation start time through `reset(options["start_time"])`. +- Exposed `smarts` as a property on `HiWayEnvV1`. ### Deprecated ### Fixed ### Removed diff --git a/docs/sim/env.rst b/docs/sim/env.rst index 5c984612d1..9b1c7a4e9b 100644 --- a/docs/sim/env.rst +++ b/docs/sim/env.rst @@ -9,15 +9,15 @@ Base environments SMARTS environment module is defined in :mod:`~smarts.env` package. Currently SMARTS provides two kinds of training environments, namely: -+ ``HiwayEnv`` utilising ``gym.env`` style interface ++ ``HiWayEnv`` utilising ``gym.env`` style interface + ``RLlibHiwayEnv`` customized for `RLlib `_ training .. image:: ../_static/env.png -HiwayEnv +HiWayEnv ^^^^^^^^ -``HiwayEnv`` inherits class ``gym.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below. +``HiWayEnv`` inherits class ``gym.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below. Refer to :class:`~smarts.env.hiway_env.HiWayEnv` for more details. .. code-block:: python @@ -44,10 +44,10 @@ Refer to :class:`~smarts.env.hiway_env.HiWayEnv` for more details. # Close env. env.close() -HiwayEnvV1 +HiWayEnvV1 ^^^^^^^^^^ -``HiwayEnvV1`` inherits class ``gymnasium.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below. +``HiWayEnvV1`` inherits class ``gymnasium.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below. This version has two configurations of observation output: `ObservationOptions.full` which provides padded agents in the observations which exactly matches the `env.observation_space`, and `ObservationOptions.multi_agent` which provides only agents as are currently active. Refer to :class:`~smarts.env.gymnasium.hiway_env_v1.HiWayEnvV1` for more details. @@ -66,7 +66,8 @@ exactly matches the `env.observation_space`, and `ObservationOptions.multi_agent ) # Reset env and build agent. - observations, infos = env.reset() + ## `reset(options) forwards "scenario" and "start_time" to `smarts.reset()` + observations, infos = env.reset() agent = agent_spec.build_agent() # Step env. diff --git a/smarts/env/gymnasium/hiway_env_v1.py b/smarts/env/gymnasium/hiway_env_v1.py index 73fc1895d6..2411037c4c 100644 --- a/smarts/env/gymnasium/hiway_env_v1.py +++ b/smarts/env/gymnasium/hiway_env_v1.py @@ -338,7 +338,9 @@ def reset( If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. options (optional dict): Additional information to specify how the environment is reset (optional, - depending on the specific environment) + depending on the specific environment). Forwards to :meth:`~smarts.core.smarts.SMARTS.reset`. + - "scenario" (smarts.sstudio.Scenario): An explicit scenario to reset to. The default is a scenario from the scenario iter. + - "start_time" (float): Forwards the start time of the current scenario. The default is 0. Returns: observation (dict): Observation of the initial state. This will be an element of :attr:`observation_space` @@ -347,10 +349,15 @@ def reset( the ``info`` returned by :meth:`step`. """ super().reset(seed=seed, options=options) - scenario = next(self._scenarios_iterator) + options = options or {} + scenario = options.get("scenario") + if scenario is None: + scenario = next(self._scenarios_iterator) self._dones_registered = 0 - observations = self._smarts.reset(scenario) + observations = self._smarts.reset( + scenario, start_time=options.get("start_time", 0) + ) info = {"map_source": self._smarts.scenario.road_map.source} if self._env_renderer is not None: @@ -497,3 +504,12 @@ def scenario(self) -> Scenario: scenario.Scenario: Current simulated scenario. """ return self._smarts.scenario + + @property + def smarts(self): + """Gives access to the underlying simulator. Use this carefully. + + Returns: + smarts.core.smarts.SMARTS: The smarts simulator instance. + """ + return self._smarts diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py new file mode 100644 index 0000000000..0ac83817cf --- /dev/null +++ b/smarts/env/tests/test_hiway_env_v1.py @@ -0,0 +1,99 @@ +# MIT License +# +# Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +import gymnasium as gym +import pytest + +from smarts.core.agent_interface import AgentInterface, AgentType +from smarts.core.utils.episodes import episodes +from smarts.env.gymnasium.hiway_env_v1 import HiWayEnvV1 +from smarts.core.scenario import Scenario + +AGENT_ID = "Agent-007" +MAX_EPISODES = 3 + + +@pytest.fixture +def env(): + agent_interfaces = { + AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=100) + } + agent_ids = set(agent_interfaces) + env: HiWayEnvV1 = gym.make( + "smarts.env:hiway-v1", + scenarios=["scenarios/sumo/loop"], + agent_interfaces=agent_interfaces, + action_options="unformatted", + observation_options="unformatted", + headless=True, + visdom=False, + fixed_timestep_sec=0.01, + disable_env_checker=True, + ) + assert isinstance(env.unwrapped, HiWayEnvV1) + assert not (agent_ids - set(env.agent_interfaces)) + matching_items = [ + env.agent_interfaces[k] == agent_interfaces[k] + for k in env.agent_interfaces + if k in agent_interfaces + ] + assert all(matching_items) + assert len(env.agent_interfaces) == len(agent_interfaces) + assert not (agent_ids - env.agent_ids) + yield env + env.close() + + +def test_hiway_env_v1_unformatted(env: HiWayEnvV1): + episode = None + for episode in episodes(n=MAX_EPISODES): + observations = env.reset() + episode.record_scenario(env.scenario_log) + + terminated = {"__all__": False} + while not terminated["__all__"]: + observations, rewards, terminated, truncated, infos = env.step( + {AGENT_ID: "keep_lane"} + ) + + # Reward is currently the delta in distance travelled by the agent. + # Ensure that it is infact a delta and not total distance travelled + # since this bug has appeared a few times. Verify by ensuring the + # reward does not grow unbounded. + assert all( + [-3 < reward < 3 for reward in rewards.values()] + ), f"Expected bounded reward per timestep, but got {rewards}." + + episode.record_step(observations, rewards, terminated, infos) + + assert episode is not None and episode.index == ( + MAX_EPISODES - 1 + ), "Simulation must cycle through to the final episode." + + +def test_hiway_env_v1_reset_with_scenario(env: HiWayEnvV1): + scenarios = ["scenarios/sumo/loop"] + scenario: Scenario = next(Scenario.scenario_variations(scenarios, [AGENT_ID])) + + env.reset(options={"scenario": scenario, "start_time": 100}) + assert "loop" in env.scenario.root_filepath + assert env.smarts.elapsed_sim_time >= 100 + env.step({AGENT_ID: "keep_lane"}) From b661dffde3b33cbc87ee058c27b18ac8d6847cad Mon Sep 17 00:00:00 2001 From: Tucker Date: Tue, 14 Feb 2023 09:08:46 -0500 Subject: [PATCH 2/4] Fix type test. --- smarts/env/tests/test_hiway_env_v1.py | 1 + 1 file changed, 1 insertion(+) diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py index 0ac83817cf..46020ce3d0 100644 --- a/smarts/env/tests/test_hiway_env_v1.py +++ b/smarts/env/tests/test_hiway_env_v1.py @@ -78,6 +78,7 @@ def test_hiway_env_v1_unformatted(env: HiWayEnvV1): # Ensure that it is infact a delta and not total distance travelled # since this bug has appeared a few times. Verify by ensuring the # reward does not grow unbounded. + assert isinstance(rewards, dict) assert all( [-3 < reward < 3 for reward in rewards.values()] ), f"Expected bounded reward per timestep, but got {rewards}." From f25fadab172c20e640eef6cd402bf3ff2299194c Mon Sep 17 00:00:00 2001 From: Tucker Date: Wed, 15 Feb 2023 10:36:34 -0500 Subject: [PATCH 3/4] Clean up test. --- smarts/env/tests/test_hiway_env_v1.py | 49 ++++++++++++++++----------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py index 46020ce3d0..0d09673395 100644 --- a/smarts/env/tests/test_hiway_env_v1.py +++ b/smarts/env/tests/test_hiway_env_v1.py @@ -19,24 +19,27 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. +from typing import Set + import gymnasium as gym import pytest from smarts.core.agent_interface import AgentInterface, AgentType +from smarts.core.scenario import Scenario from smarts.core.utils.episodes import episodes from smarts.env.gymnasium.hiway_env_v1 import HiWayEnvV1 -from smarts.core.scenario import Scenario AGENT_ID = "Agent-007" MAX_EPISODES = 3 @pytest.fixture -def env(): - agent_interfaces = { - AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=100) - } - agent_ids = set(agent_interfaces) +def agent_interfaces(): + return {AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=100)} + + +@pytest.fixture +def env(agent_interfaces): env: HiWayEnvV1 = gym.make( "smarts.env:hiway-v1", scenarios=["scenarios/sumo/loop"], @@ -48,20 +51,26 @@ def env(): fixed_timestep_sec=0.01, disable_env_checker=True, ) - assert isinstance(env.unwrapped, HiWayEnvV1) - assert not (agent_ids - set(env.agent_interfaces)) - matching_items = [ - env.agent_interfaces[k] == agent_interfaces[k] - for k in env.agent_interfaces - if k in agent_interfaces - ] - assert all(matching_items) - assert len(env.agent_interfaces) == len(agent_interfaces) - assert not (agent_ids - env.agent_ids) yield env env.close() +def test_hiway_env_v1_type(env: gym.Env): + # is base environment (also passes up correct environment) + assert isinstance(env.unwrapped, HiWayEnvV1) + # inherits gym.Env + assert isinstance(env.unwrapped, gym.Env) + + +def test_hiway_env_v1_interface_generation( + env: HiWayEnvV1, agent_interfaces: Set[AgentInterface] +): + agent_ids = set(agent_interfaces) + assert agent_ids == set(env.agent_interfaces) + assert all([env.agent_interfaces[k] == agent_interfaces[k] for k in agent_ids]) + assert not (agent_ids - env.agent_ids) + + def test_hiway_env_v1_unformatted(env: HiWayEnvV1): episode = None for episode in episodes(n=MAX_EPISODES): @@ -91,10 +100,10 @@ def test_hiway_env_v1_unformatted(env: HiWayEnvV1): def test_hiway_env_v1_reset_with_scenario(env: HiWayEnvV1): - scenarios = ["scenarios/sumo/loop"] + scenarios = ["scenarios/sumo/figure_eight"] scenario: Scenario = next(Scenario.scenario_variations(scenarios, [AGENT_ID])) - env.reset(options={"scenario": scenario, "start_time": 100}) - assert "loop" in env.scenario.root_filepath - assert env.smarts.elapsed_sim_time >= 100 + env.reset(options={"scenario": scenario, "start_time": 1000}) + assert "figure_eight" in env.scenario.root_filepath + assert env.smarts.elapsed_sim_time >= 1000 env.step({AGENT_ID: "keep_lane"}) From 844096207abeb9d59677ebbfaf31122e7f289e01 Mon Sep 17 00:00:00 2001 From: Tucker Date: Wed, 15 Feb 2023 10:37:59 -0500 Subject: [PATCH 4/4] Fix type --- smarts/env/tests/test_hiway_env_v1.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py index 0d09673395..c6d24a6784 100644 --- a/smarts/env/tests/test_hiway_env_v1.py +++ b/smarts/env/tests/test_hiway_env_v1.py @@ -19,7 +19,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -from typing import Set +from typing import Dict import gymnasium as gym import pytest @@ -39,7 +39,7 @@ def agent_interfaces(): @pytest.fixture -def env(agent_interfaces): +def env(agent_interfaces: Dict[str, AgentInterface]): env: HiWayEnvV1 = gym.make( "smarts.env:hiway-v1", scenarios=["scenarios/sumo/loop"], @@ -63,7 +63,7 @@ def test_hiway_env_v1_type(env: gym.Env): def test_hiway_env_v1_interface_generation( - env: HiWayEnvV1, agent_interfaces: Set[AgentInterface] + env: HiWayEnvV1, agent_interfaces: Dict[str, AgentInterface] ): agent_ids = set(agent_interfaces) assert agent_ids == set(env.agent_interfaces)