huawei-noah · Gamenot · Feb 15, 2023 · Feb 14, 2023 · Feb 14, 2023 · Feb 15, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,7 +12,11 @@ Copy and pasting the git commit messages is __NOT__ enough.
 ### Added
 - Added a zoo agent, named Control-and-Supervised-Learning, from NeurIPS 2022 submission. This zoo agent runs in benchmark `driving_smarts==0.0`.
 - Added a zoo agent, named Discrete Soft Actor Critic, from NeurIPS 2022 submission. This zoo agent runs in benchmark `driving_smarts==0.0`.
+- Added basic tests for `hiway-v1` resetting and unformatted observations and actions. 
 ### Changed
+- `HiWayEnvV1` derived environments now allow an explicit scenario through `reset(options["scenario"])`.
+- `HiWayEnvV1` derived environments now allow an explicit simulation start time through `reset(options["start_time"])`.
+- Exposed `smarts` as a property on `HiWayEnvV1`.
 ### Deprecated
 ### Fixed
 ### Removed

diff --git a/docs/sim/env.rst b/docs/sim/env.rst
@@ -9,15 +9,15 @@ Base environments
 SMARTS environment module is defined in :mod:`~smarts.env` package. Currently SMARTS provides two kinds of training 
 environments, namely:
 
-+ ``HiwayEnv`` utilising ``gym.env`` style interface 
++ ``HiWayEnv`` utilising ``gym.env`` style interface 
 + ``RLlibHiwayEnv`` customized for `RLlib <https://docs.ray.io/en/latest/rllib/index.html>`_ training
 
 .. image:: ../_static/env.png
 
-HiwayEnv
+HiWayEnv
 ^^^^^^^^
 
-``HiwayEnv`` inherits class ``gym.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below.
+``HiWayEnv`` inherits class ``gym.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below.
 Refer to :class:`~smarts.env.hiway_env.HiWayEnv` for more details.
 
 .. code-block:: python
@@ -44,10 +44,10 @@ Refer to :class:`~smarts.env.hiway_env.HiWayEnv` for more details.
     # Close env.
     env.close()
 
-HiwayEnvV1
+HiWayEnvV1
 ^^^^^^^^^^
 
-``HiwayEnvV1`` inherits class ``gymnasium.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below.
+``HiWayEnvV1`` inherits class ``gymnasium.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below.
 This version has two configurations of observation output: `ObservationOptions.full` which provides padded agents in the observations which
 exactly matches the `env.observation_space`, and `ObservationOptions.multi_agent` which provides only agents as are currently active. Refer to
 :class:`~smarts.env.gymnasium.hiway_env_v1.HiWayEnvV1` for more details.
@@ -66,7 +66,8 @@ exactly matches the `env.observation_space`, and `ObservationOptions.multi_agent
         )
 
     # Reset env and build agent.
-    observations, infos = env.reset()
+    ## `reset(options) forwards "scenario" and "start_time" to `smarts.reset()`
+    observations, infos = env.reset() 
     agent = agent_spec.build_agent()
 
     # Step env.

diff --git a/smarts/env/gymnasium/hiway_env_v1.py b/smarts/env/gymnasium/hiway_env_v1.py
@@ -338,7 +338,9 @@ def reset(
                 If you pass an integer, the PRNG will be reset even if it already exists.
                 Usually, you want to pass an integer *right after the environment has been initialized and then never again*.
             options (optional dict): Additional information to specify how the environment is reset (optional,
-                depending on the specific environment)
+                depending on the specific environment). Forwards to :meth:`~smarts.core.smarts.SMARTS.reset`.
+                - "scenario" (smarts.sstudio.Scenario): An explicit scenario to reset to. The default is a scenario from the scenario iter.
+                - "start_time" (float): Forwards the start time of the current scenario. The default is 0.
 
         Returns:
             observation (dict): Observation of the initial state. This will be an element of :attr:`observation_space`
@@ -347,10 +349,15 @@ def reset(
                 the ``info`` returned by :meth:`step`.
         """
         super().reset(seed=seed, options=options)
-        scenario = next(self._scenarios_iterator)
+        options = options or {}
+        scenario = options.get("scenario")
+        if scenario is None:
+            scenario = next(self._scenarios_iterator)
 
         self._dones_registered = 0
-        observations = self._smarts.reset(scenario)
+        observations = self._smarts.reset(
+            scenario, start_time=options.get("start_time", 0)
+        )
         info = {"map_source": self._smarts.scenario.road_map.source}
 
         if self._env_renderer is not None:
@@ -497,3 +504,12 @@ def scenario(self) -> Scenario:
             scenario.Scenario: Current simulated scenario.
         """
         return self._smarts.scenario
+
+    @property
+    def smarts(self):
+        """Gives access to the underlying simulator. Use this carefully.
+
+        Returns:
+            smarts.core.smarts.SMARTS: The smarts simulator instance.
+        """
+        return self._smarts
diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py
@@ -0,0 +1,100 @@
+# MIT License
+#
+# Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+import gymnasium as gym
+import pytest
+
+from smarts.core.agent_interface import AgentInterface, AgentType
+from smarts.core.utils.episodes import episodes
+from smarts.env.gymnasium.hiway_env_v1 import HiWayEnvV1
+from smarts.core.scenario import Scenario
+
+AGENT_ID = "Agent-007"
+MAX_EPISODES = 3
+
+
+@pytest.fixture
+def env():
+    agent_interfaces = {
+        AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=100)
+    }
+    agent_ids = set(agent_interfaces)
+    env: HiWayEnvV1 = gym.make(
+        "smarts.env:hiway-v1",
+        scenarios=["scenarios/sumo/loop"],
+        agent_interfaces=agent_interfaces,
+        action_options="unformatted",
+        observation_options="unformatted",
+        headless=True,
+        visdom=False,
+        fixed_timestep_sec=0.01,
+        disable_env_checker=True,
+    )
+    assert isinstance(env.unwrapped, HiWayEnvV1)
+    assert not (agent_ids - set(env.agent_interfaces))
+    matching_items = [
+        env.agent_interfaces[k] == agent_interfaces[k]
+        for k in env.agent_interfaces
+        if k in agent_interfaces
+    ]
+    assert all(matching_items)
+    assert len(env.agent_interfaces) == len(agent_interfaces)
+    assert not (agent_ids - env.agent_ids)
+    yield env
+    env.close()
+
+
+def test_hiway_env_v1_unformatted(env: HiWayEnvV1):
+    episode = None
+    for episode in episodes(n=MAX_EPISODES):
+        observations = env.reset()
+        episode.record_scenario(env.scenario_log)
+
+        terminated = {"__all__": False}
+        while not terminated["__all__"]:
+            observations, rewards, terminated, truncated, infos = env.step(
+                {AGENT_ID: "keep_lane"}
+            )
+
+            # Reward is currently the delta in distance travelled by the agent.
+            # Ensure that it is infact a delta and not total distance travelled
+            # since this bug has appeared a few times. Verify by ensuring the
+            # reward does not grow unbounded.
+            assert isinstance(rewards, dict)
+            assert all(
+                [-3 < reward < 3 for reward in rewards.values()]
+            ), f"Expected bounded reward per timestep, but got {rewards}."
+
+            episode.record_step(observations, rewards, terminated, infos)
+
+    assert episode is not None and episode.index == (
+        MAX_EPISODES - 1
+    ), "Simulation must cycle through to the final episode."
+
+
+def test_hiway_env_v1_reset_with_scenario(env: HiWayEnvV1):
+    scenarios = ["scenarios/sumo/loop"]
+    scenario: Scenario = next(Scenario.scenario_variations(scenarios, [AGENT_ID]))
+
+    env.reset(options={"scenario": scenario, "start_time": 100})
+    assert "loop" in env.scenario.root_filepath
+    assert env.smarts.elapsed_sim_time >= 100
+    env.step({AGENT_ID: "keep_lane"})