From 371e2aab47f16c9d4b0df053eb613abe642d0484 Mon Sep 17 00:00:00 2001
From: Tucker <montgomery.alban@huawei.com>
Date: Mon, 13 Feb 2023 19:51:25 -0500
Subject: [PATCH 1/4] Add reset options.

---
 CHANGELOG.md                          |  4 ++
 docs/sim/env.rst                      | 13 ++--
 smarts/env/gymnasium/hiway_env_v1.py  | 22 +++++-
 smarts/env/tests/test_hiway_env_v1.py | 99 +++++++++++++++++++++++++++
 4 files changed, 129 insertions(+), 9 deletions(-)
 create mode 100644 smarts/env/tests/test_hiway_env_v1.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 40f44dd2a4..5c6a2c0f43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,11 @@ Copy and pasting the git commit messages is __NOT__ enough.
 ### Added
 - Added a zoo agent, named Control-and-Supervised-Learning, from NeurIPS 2022 submission. This zoo agent runs in benchmark `driving_smarts==0.0`.
 - Added a zoo agent, named Discrete Soft Actor Critic, from NeurIPS 2022 submission. This zoo agent runs in benchmark `driving_smarts==0.0`.
+- Added basic tests for `hiway-v1` resetting and unformatted observations and actions. 
 ### Changed
+- `HiWayEnvV1` derived environments now allow an explicit scenario through `reset(options["scenario"])`.
+- `HiWayEnvV1` derived environments now allow an explicit simulation start time through `reset(options["start_time"])`.
+- Exposed `smarts` as a property on `HiWayEnvV1`.
 ### Deprecated
 ### Fixed
 ### Removed
diff --git a/docs/sim/env.rst b/docs/sim/env.rst
index 5c984612d1..9b1c7a4e9b 100644
--- a/docs/sim/env.rst
+++ b/docs/sim/env.rst
@@ -9,15 +9,15 @@ Base environments
 SMARTS environment module is defined in :mod:`~smarts.env` package. Currently SMARTS provides two kinds of training 
 environments, namely:
 
-+ ``HiwayEnv`` utilising ``gym.env`` style interface 
++ ``HiWayEnv`` utilising ``gym.env`` style interface 
 + ``RLlibHiwayEnv`` customized for `RLlib <https://docs.ray.io/en/latest/rllib/index.html>`_ training
 
 .. image:: ../_static/env.png
 
-HiwayEnv
+HiWayEnv
 ^^^^^^^^
 
-``HiwayEnv`` inherits class ``gym.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below.
+``HiWayEnv`` inherits class ``gym.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below.
 Refer to :class:`~smarts.env.hiway_env.HiWayEnv` for more details.
 
 .. code-block:: python
@@ -44,10 +44,10 @@ Refer to :class:`~smarts.env.hiway_env.HiWayEnv` for more details.
     # Close env.
     env.close()
 
-HiwayEnvV1
+HiWayEnvV1
 ^^^^^^^^^^
 
-``HiwayEnvV1`` inherits class ``gymnasium.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below.
+``HiWayEnvV1`` inherits class ``gymnasium.Env`` and supports gym APIs like ``reset``, ``step``, ``close``. An usage example is shown below.
 This version has two configurations of observation output: `ObservationOptions.full` which provides padded agents in the observations which
 exactly matches the `env.observation_space`, and `ObservationOptions.multi_agent` which provides only agents as are currently active. Refer to
 :class:`~smarts.env.gymnasium.hiway_env_v1.HiWayEnvV1` for more details.
@@ -66,7 +66,8 @@ exactly matches the `env.observation_space`, and `ObservationOptions.multi_agent
         )
 
     # Reset env and build agent.
-    observations, infos = env.reset()
+    ## `reset(options) forwards "scenario" and "start_time" to `smarts.reset()`
+    observations, infos = env.reset() 
     agent = agent_spec.build_agent()
 
     # Step env.
diff --git a/smarts/env/gymnasium/hiway_env_v1.py b/smarts/env/gymnasium/hiway_env_v1.py
index 73fc1895d6..2411037c4c 100644
--- a/smarts/env/gymnasium/hiway_env_v1.py
+++ b/smarts/env/gymnasium/hiway_env_v1.py
@@ -338,7 +338,9 @@ def reset(
                 If you pass an integer, the PRNG will be reset even if it already exists.
                 Usually, you want to pass an integer *right after the environment has been initialized and then never again*.
             options (optional dict): Additional information to specify how the environment is reset (optional,
-                depending on the specific environment)
+                depending on the specific environment). Forwards to :meth:`~smarts.core.smarts.SMARTS.reset`.
+                - "scenario" (smarts.sstudio.Scenario): An explicit scenario to reset to. The default is a scenario from the scenario iter.
+                - "start_time" (float): Forwards the start time of the current scenario. The default is 0.
 
         Returns:
             observation (dict): Observation of the initial state. This will be an element of :attr:`observation_space`
@@ -347,10 +349,15 @@ def reset(
                 the ``info`` returned by :meth:`step`.
         """
         super().reset(seed=seed, options=options)
-        scenario = next(self._scenarios_iterator)
+        options = options or {}
+        scenario = options.get("scenario")
+        if scenario is None:
+            scenario = next(self._scenarios_iterator)
 
         self._dones_registered = 0
-        observations = self._smarts.reset(scenario)
+        observations = self._smarts.reset(
+            scenario, start_time=options.get("start_time", 0)
+        )
         info = {"map_source": self._smarts.scenario.road_map.source}
 
         if self._env_renderer is not None:
@@ -497,3 +504,12 @@ def scenario(self) -> Scenario:
             scenario.Scenario: Current simulated scenario.
         """
         return self._smarts.scenario
+
+    @property
+    def smarts(self):
+        """Gives access to the underlying simulator. Use this carefully.
+
+        Returns:
+            smarts.core.smarts.SMARTS: The smarts simulator instance.
+        """
+        return self._smarts
diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py
new file mode 100644
index 0000000000..0ac83817cf
--- /dev/null
+++ b/smarts/env/tests/test_hiway_env_v1.py
@@ -0,0 +1,99 @@
+# MIT License
+#
+# Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+import gymnasium as gym
+import pytest
+
+from smarts.core.agent_interface import AgentInterface, AgentType
+from smarts.core.utils.episodes import episodes
+from smarts.env.gymnasium.hiway_env_v1 import HiWayEnvV1
+from smarts.core.scenario import Scenario
+
+AGENT_ID = "Agent-007"
+MAX_EPISODES = 3
+
+
+@pytest.fixture
+def env():
+    agent_interfaces = {
+        AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=100)
+    }
+    agent_ids = set(agent_interfaces)
+    env: HiWayEnvV1 = gym.make(
+        "smarts.env:hiway-v1",
+        scenarios=["scenarios/sumo/loop"],
+        agent_interfaces=agent_interfaces,
+        action_options="unformatted",
+        observation_options="unformatted",
+        headless=True,
+        visdom=False,
+        fixed_timestep_sec=0.01,
+        disable_env_checker=True,
+    )
+    assert isinstance(env.unwrapped, HiWayEnvV1)
+    assert not (agent_ids - set(env.agent_interfaces))
+    matching_items = [
+        env.agent_interfaces[k] == agent_interfaces[k]
+        for k in env.agent_interfaces
+        if k in agent_interfaces
+    ]
+    assert all(matching_items)
+    assert len(env.agent_interfaces) == len(agent_interfaces)
+    assert not (agent_ids - env.agent_ids)
+    yield env
+    env.close()
+
+
+def test_hiway_env_v1_unformatted(env: HiWayEnvV1):
+    episode = None
+    for episode in episodes(n=MAX_EPISODES):
+        observations = env.reset()
+        episode.record_scenario(env.scenario_log)
+
+        terminated = {"__all__": False}
+        while not terminated["__all__"]:
+            observations, rewards, terminated, truncated, infos = env.step(
+                {AGENT_ID: "keep_lane"}
+            )
+
+            # Reward is currently the delta in distance travelled by the agent.
+            # Ensure that it is infact a delta and not total distance travelled
+            # since this bug has appeared a few times. Verify by ensuring the
+            # reward does not grow unbounded.
+            assert all(
+                [-3 < reward < 3 for reward in rewards.values()]
+            ), f"Expected bounded reward per timestep, but got {rewards}."
+
+            episode.record_step(observations, rewards, terminated, infos)
+
+    assert episode is not None and episode.index == (
+        MAX_EPISODES - 1
+    ), "Simulation must cycle through to the final episode."
+
+
+def test_hiway_env_v1_reset_with_scenario(env: HiWayEnvV1):
+    scenarios = ["scenarios/sumo/loop"]
+    scenario: Scenario = next(Scenario.scenario_variations(scenarios, [AGENT_ID]))
+
+    env.reset(options={"scenario": scenario, "start_time": 100})
+    assert "loop" in env.scenario.root_filepath
+    assert env.smarts.elapsed_sim_time >= 100
+    env.step({AGENT_ID: "keep_lane"})

From b661dffde3b33cbc87ee058c27b18ac8d6847cad Mon Sep 17 00:00:00 2001
From: Tucker <montgomery.alban@huawei.com>
Date: Tue, 14 Feb 2023 09:08:46 -0500
Subject: [PATCH 2/4] Fix type test.

---
 smarts/env/tests/test_hiway_env_v1.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py
index 0ac83817cf..46020ce3d0 100644
--- a/smarts/env/tests/test_hiway_env_v1.py
+++ b/smarts/env/tests/test_hiway_env_v1.py
@@ -78,6 +78,7 @@ def test_hiway_env_v1_unformatted(env: HiWayEnvV1):
             # Ensure that it is infact a delta and not total distance travelled
             # since this bug has appeared a few times. Verify by ensuring the
             # reward does not grow unbounded.
+            assert isinstance(rewards, dict)
             assert all(
                 [-3 < reward < 3 for reward in rewards.values()]
             ), f"Expected bounded reward per timestep, but got {rewards}."

From f25fadab172c20e640eef6cd402bf3ff2299194c Mon Sep 17 00:00:00 2001
From: Tucker <montgomery.alban@huawei.com>
Date: Wed, 15 Feb 2023 10:36:34 -0500
Subject: [PATCH 3/4] Clean up test.

---
 smarts/env/tests/test_hiway_env_v1.py | 49 ++++++++++++++++-----------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py
index 46020ce3d0..0d09673395 100644
--- a/smarts/env/tests/test_hiway_env_v1.py
+++ b/smarts/env/tests/test_hiway_env_v1.py
@@ -19,24 +19,27 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
+from typing import Set
+
 import gymnasium as gym
 import pytest
 
 from smarts.core.agent_interface import AgentInterface, AgentType
+from smarts.core.scenario import Scenario
 from smarts.core.utils.episodes import episodes
 from smarts.env.gymnasium.hiway_env_v1 import HiWayEnvV1
-from smarts.core.scenario import Scenario
 
 AGENT_ID = "Agent-007"
 MAX_EPISODES = 3
 
 
 @pytest.fixture
-def env():
-    agent_interfaces = {
-        AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=100)
-    }
-    agent_ids = set(agent_interfaces)
+def agent_interfaces():
+    return {AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=100)}
+
+
+@pytest.fixture
+def env(agent_interfaces):
     env: HiWayEnvV1 = gym.make(
         "smarts.env:hiway-v1",
         scenarios=["scenarios/sumo/loop"],
@@ -48,20 +51,26 @@ def env():
         fixed_timestep_sec=0.01,
         disable_env_checker=True,
     )
-    assert isinstance(env.unwrapped, HiWayEnvV1)
-    assert not (agent_ids - set(env.agent_interfaces))
-    matching_items = [
-        env.agent_interfaces[k] == agent_interfaces[k]
-        for k in env.agent_interfaces
-        if k in agent_interfaces
-    ]
-    assert all(matching_items)
-    assert len(env.agent_interfaces) == len(agent_interfaces)
-    assert not (agent_ids - env.agent_ids)
     yield env
     env.close()
 
 
+def test_hiway_env_v1_type(env: gym.Env):
+    # is base environment (also passes up correct environment)
+    assert isinstance(env.unwrapped, HiWayEnvV1)
+    # inherits gym.Env
+    assert isinstance(env.unwrapped, gym.Env)
+
+
+def test_hiway_env_v1_interface_generation(
+    env: HiWayEnvV1, agent_interfaces: Set[AgentInterface]
+):
+    agent_ids = set(agent_interfaces)
+    assert agent_ids == set(env.agent_interfaces)
+    assert all([env.agent_interfaces[k] == agent_interfaces[k] for k in agent_ids])
+    assert not (agent_ids - env.agent_ids)
+
+
 def test_hiway_env_v1_unformatted(env: HiWayEnvV1):
     episode = None
     for episode in episodes(n=MAX_EPISODES):
@@ -91,10 +100,10 @@ def test_hiway_env_v1_unformatted(env: HiWayEnvV1):
 
 
 def test_hiway_env_v1_reset_with_scenario(env: HiWayEnvV1):
-    scenarios = ["scenarios/sumo/loop"]
+    scenarios = ["scenarios/sumo/figure_eight"]
     scenario: Scenario = next(Scenario.scenario_variations(scenarios, [AGENT_ID]))
 
-    env.reset(options={"scenario": scenario, "start_time": 100})
-    assert "loop" in env.scenario.root_filepath
-    assert env.smarts.elapsed_sim_time >= 100
+    env.reset(options={"scenario": scenario, "start_time": 1000})
+    assert "figure_eight" in env.scenario.root_filepath
+    assert env.smarts.elapsed_sim_time >= 1000
     env.step({AGENT_ID: "keep_lane"})

From 844096207abeb9d59677ebbfaf31122e7f289e01 Mon Sep 17 00:00:00 2001
From: Tucker <montgomery.alban@huawei.com>
Date: Wed, 15 Feb 2023 10:37:59 -0500
Subject: [PATCH 4/4] Fix type

---
 smarts/env/tests/test_hiway_env_v1.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py
index 0d09673395..c6d24a6784 100644
--- a/smarts/env/tests/test_hiway_env_v1.py
+++ b/smarts/env/tests/test_hiway_env_v1.py
@@ -19,7 +19,7 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
-from typing import Set
+from typing import Dict
 
 import gymnasium as gym
 import pytest
@@ -39,7 +39,7 @@ def agent_interfaces():
 
 
 @pytest.fixture
-def env(agent_interfaces):
+def env(agent_interfaces: Dict[str, AgentInterface]):
     env: HiWayEnvV1 = gym.make(
         "smarts.env:hiway-v1",
         scenarios=["scenarios/sumo/loop"],
@@ -63,7 +63,7 @@ def test_hiway_env_v1_type(env: gym.Env):
 
 
 def test_hiway_env_v1_interface_generation(
-    env: HiWayEnvV1, agent_interfaces: Set[AgentInterface]
+    env: HiWayEnvV1, agent_interfaces: Dict[str, AgentInterface]
 ):
     agent_ids = set(agent_interfaces)
     assert agent_ids == set(env.agent_interfaces)