Merge pull request #519 from LLNL/abmarl-9-incoming-outgoing-wrappers

rusu24edward · web-flow · commit 54f2135a0eb9 · 2024-04-24T13:31:31.000-04:00
Abmarl 9 incoming outgoing wrappers
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v3
diff --git a/abmarl/external/__init__.py b/abmarl/external/__init__.py
@@ -1,3 +1,3 @@
-from .gym_env_wrapper import GymWrapper
-from .rllib_multiagentenv_wrapper import MultiAgentWrapper
+from .gym_env_wrapper import GymWrapper, gym_to_abmarl
+from .rllib_multiagentenv_wrapper import MultiAgentWrapper, multi_agent_to_abmarl
 from .open_spiel_env_wrapper import OpenSpielWrapper
diff --git a/abmarl/external/gym_env_wrapper.py b/abmarl/external/gym_env_wrapper.py
@@ -1,6 +1,8 @@
+
 from gymnasium import Env as GymEnv
 
 from abmarl.sim import is_agent
+from abmarl.sim import Agent, AgentBasedSimulation
 
 
 class GymWrapper(GymEnv):
@@ -69,3 +71,101 @@ def render(self, **kwargs):
         Forward render calls to the composed simulation.
         """
         self.sim.render(**kwargs)
+
+
+class GymABS(AgentBasedSimulation):
+    """
+    Wraps a GymEnv and leverages it for implementing the ABS interface.
+
+    Args:
+        gym_env: The GymEnv to convert to an AgentBasedSimulation.
+        null_observation: Optional null observation, should be in the observation space.
+        null_action: Optional null action, should be in the action space.
+    """
+    def __init__(self, gym_env, null_observation, null_action, **kwargs):
+        assert isinstance(gym_env, GymEnv), "gym_env must be a GymEnv."
+        self._env = gym_env
+        agents = {
+            'agent': Agent(
+                id='agent',
+                observation_space=gym_env.observation_space,
+                null_observation=null_observation,
+                action_space=gym_env.action_space,
+                null_action=null_action
+            )
+        }
+        super().__init__(agents=agents, **kwargs)
+        # ABS storage
+        self._obs = None
+        self._reward = None
+        self._done = None
+        self._info = None
+
+    def reset(self, **kwargs):
+        """
+        Reset the simulation and store the observation and info.
+        """
+        self._obs, self._info = self._env.reset()
+
+    def step(self, action, *args, **kwargs):
+        """
+        Step the simulation and store the relevant data.
+
+        Args:
+            action: The agent's action. Because this is an AgentBasedSimulation,
+                the action will come in the form of a dictionary mapping the agent's
+                id to its action.
+        """
+        self._obs, self._reward, term, trunc, self._info = self._env.step(
+            action['agent'], *args, **kwargs
+        )
+        self._done = term or trunc
+
+    def render(self, **kwargs):
+        self._env.render(**kwargs)
+
+    def get_obs(self, *args, **kwargs):
+        """
+        Return the stored observation, either from reset or step, whichever was last called.
+        """
+        return self._obs
+
+    def get_reward(self, *args, **kwargs):
+        """
+        Return the stored reward, either from reset or step, whichever was last called.
+        """
+        return self._reward
+
+    def get_done(self, *args, **kwargs):
+        """
+        Return the stored done status, either from reset or step, whichever was last called.
+        """
+        return self._done
+
+    def get_all_done(self, **kwargs):
+        """
+        Same thing as get done.
+        """
+        return self.get_done()
+
+    def get_info(self, *args, **kwargs):
+        """
+        Return the stored info, either from reset or step, whichever was last called.
+        """
+        return self._info
+
+
+def gym_to_abmarl(gym_env, null_observation=None, null_action=None):
+    """
+    Convert a GymEnv to an AgentBasedSimulation.
+
+    Args:
+        gym_env: The GymEnv to be converted.
+        null_observation: Optional null observation, should be in the observation space.
+        null_action: Optional null action, should be in the action space.
+    """
+    return GymABS(
+        gym_env,
+        null_observation,
+        null_action
+    )
diff --git a/abmarl/external/rllib_multiagentenv_wrapper.py b/abmarl/external/rllib_multiagentenv_wrapper.py
@@ -1,7 +1,8 @@
 
 from gymnasium.spaces import Dict
 
-from abmarl.sim.agent_based_simulation import ActingAgent, ObservingAgent, is_agent
+from abmarl.sim.agent_based_simulation import ActingAgent, ObservingAgent, Agent, \
+    is_agent, AgentBasedSimulation
 
 try:
     from ray.rllib import MultiAgentEnv
@@ -61,6 +62,116 @@ def render(self, *args, **kwargs):
             """See SimulationManager."""
             return self.sim.render(*args, **kwargs)
 
+    class MultiAgentABS(AgentBasedSimulation):
+        """
+        Wraps an RLlib MultiAgentEnv and leverages it for implementing the ABS interface.
+
+        Args:
+            multi_agent_env: The MultiAgentEnv to convert to an AgentBasedSimulation.
+            null_observation: Optional null observation, should be a dictionary of
+                agents mapping to each one's observation space.
+            null_action: Optional null action, should be a dictionary of
+                agents mapping to each one's action space.
+        """
+        def __init__(self, multi_agent_env, null_observation=None, null_action=None, **kwargs):
+            assert isinstance(multi_agent_env, MultiAgentEnv), \
+                "multi_agent_env must be a MultiAgentEnv."
+            assert multi_agent_env._action_space_in_preferred_format and \
+                multi_agent_env._obs_space_in_preferred_format, \
+                "The action and observation spaces must be in the preferred format."
+            self._env = multi_agent_env
+            if not null_action:
+                null_action = {}
+            if not null_observation:
+                null_observation = {}
+            agents = {
+                agent_id: Agent(
+                    id=agent_id,
+                    observation_space=multi_agent_env.observation_space[agent_id],
+                    null_observation=null_observation.get(agent_id),
+                    action_space=multi_agent_env.action_space[agent_id],
+                    null_action=null_action.get(agent_id),
+                ) for agent_id in multi_agent_env._agent_ids
+            }
+            super().__init__(agents=agents, **kwargs)
+            # ABS storage
+            self._obs = None
+            self._reward = None
+            self._done = None
+            self._info = None
+
+        def reset(self, **kwargs):
+            """
+            Reset the simulation and store the observation and info.
+            """
+            self._obs, self._info = self._env.reset()
+
+        def step(self, action_dict, *args, **kwargs):
+            """
+            Step the simulation and store the relevant data.
+
+            Args:
+                action_dict: The agents' actions. Because this is an AgentBasedSimulation,
+                    the action will come in the form of a dictionary mapping the agents'
+                    ids to their actions.
+            """
+            self._obs, self._reward, term, trunc, self._info = self._env.step(
+                action_dict, *args, **kwargs
+            )
+            self._done = {**term, **trunc}
+            for agent in self._done:
+                self._done[agent] = term.get(agent, False) or trunc.get(agent, False)
+
+        def render(self, **kwargs):
+            self._env.render(**kwargs)
+
+        def get_obs(self, agent_id, **kwargs):
+            """
+            Return the stored observation, either from reset or step, whichever was last called.
+            """
+            return self._obs[agent_id]
+
+        def get_reward(self, agent_id, **kwargs):
+            """
+            Return the stored reward, either from reset or step, whichever was last called.
+            """
+            return self._reward[agent_id]
+
+        def get_done(self, agent_id, **kwargs):
+            """
+            Return the stored done status, either from reset or step, whichever was last called.
+            """
+            return self._done[agent_id]
+
+        def get_all_done(self, **kwargs):
+            """
+            Return the stored done status under "__all__".
+            """
+            return self._done['__all__']
+
+        def get_info(self, agent_id, **kwargs):
+            """
+            Return the stored info, either from reset or step, whichever was last called.
+            """
+            return self._info[agent_id]
+
+    def multi_agent_to_abmarl(multi_agent_env, null_observation=None, null_action=None):
+        """
+        Convert a MultiAgentEnv to an AgentBasedSimulation.
+
+        Args:
+            multi_agent_env: The MultiAgentEnv to be converted.
+            null_observation: Optional null observation, should be a dictionary of
+                agents mapping to each one's observation space.
+            null_action: Optional null action, should be a dictionary of
+                agents mapping to each one's action space.
+        """
+        return MultiAgentABS(
+            multi_agent_env,
+            null_observation,
+            null_action
+        )
+
 except ImportError:
     class MultiAgentWrapper:
         """
@@ -71,3 +182,22 @@ def __init__(self, sim):
                 "Cannot use MultiAgentWrapper without RLlib. Please install the "
                 "RLlib extra with, for example, pip install abmarl[rllib]."
             )
+
+    class MultiAgentABS(AgentBasedSimulation):
+        """
+        Stub for MultiAgentABS class, which is not implemented without RLlib.
+        """
+        def __init__(self, sim):
+            raise NotImplementedError(
+                "Cannot use MultiAgentABS without RLlib. Please install the "
+                "RLlib extra with, for example, pip install abmarl[rllib]."
+            )
+
+    def multi_agent_to_abmarl(*args, **kwargs):
+        """
+        Stub for multi_agent_to_abmarl function, which is not implemented without RLlib.
+        """
+        NotImplementedError(
+            "Cannot use multi_agent_to_abmarl without RLlib. Please install the "
+            "RLlib extra with, for example, pip install abmarl[rllib]."
+        )
diff --git a/tests/external/test_gym_wrapper.py b/tests/external/test_gym_wrapper.py
diff --git a/tests/external/test_rllib_wrapper.py b/tests/external/test_rllib_wrapper.py