add in offline baseline code

huawei-noah · Jul 20, 2022 · 373267e · 373267e
1 parent 76ff466
commit 373267e
Show file tree

Hide file tree

Showing 11 changed files with 983 additions and 0 deletions.
diff --git a/competition/track-2/submission/README.md b/competition/track-2/submission/README.md
@@ -0,0 +1,32 @@
+# Submission
+Once a model has been trained for `multi-scenario-v0` environments, place all necessary files to run the trained model inside this folder named `submission`. 
+
+The files named `policy.py`, `requirements.txt`, and `explanation.md`, must be included with the submission. Its contents are explained below.
+
+## Policy
++ The file `policy.py` must include a `Policy` class which inherits from the `BasePolicy` class.
++ The `Policy` class must implement an `act` method which accepts observations and returns actions.
++ Any policy initialization, including loading of model may be performed inside the `__init__` method of the `Policy` class.
++ A random policy named `RandomPolicy` class is provided for reference.
+
+## Wrappers
++ The file `policy.py` must include a `submitted_wrappers()` function.
++ The function `submitted_wrappers()` must return a list of callable wrappers, if any are used, else return an empty list `[]`. 
++ Use of wrappers is optional.
+
+## Requirements
++ Create a `requirements.txt` file containing all the dependencies needed to run the submitted model. 
++ The dependencies will be installed prior to evaluating the submitted model.
+
+## Explanation
++ Include an `explanation.md` file explaining the key techniques used in developing the submitted model.
+
+## Submit to Codalab
++ Zip the `submission` folder. 
+    + If the `submission` folder is located at `<path>/SMARTS/competition/submission`, then run `make submission.zip` from `<path>/SMARTS/competition` directory to easily create a zipped submission folder. 
++ Upload the `submission.zip` to CodaLab.
+    + Go to the [CodaLab competition page](https://codalab.lisn.upsaclay.fr/).
+    + Click `My Competitions -> Competitions I'm In`.
+    + Select the SMARTS competition.
+    + Click `Participate -> Submit/View Results -> Submit`
+    + Upload the zipped submission folder.
diff --git a/competition/track-2/submission/__init__.py b/competition/track-2/submission/__init__.py
diff --git a/competition/track-2/submission/action.py b/competition/track-2/submission/action.py
@@ -0,0 +1,84 @@
+from typing import Any, Callable, Dict, Tuple
+
+import gym
+import numpy as np
+
+
+class Action(gym.ActionWrapper):
+    """Modifies the action space."""
+
+    def __init__(self, env: gym.Env):
+        """Sets identical action space, denoted by `space`, for all agents.
+        Args:
+            env (gym.Env): Gym env to be wrapped.
+        """
+        super().__init__(env)
+        self._wrapper, action_space = _discrete()
+
+        self.action_space = gym.spaces.Dict(
+            {agent_id: action_space for agent_id in env.action_space.spaces.keys()}
+        )
+
+    def action(self, action):
+        """Adapts the action input to the wrapped environment.
+
+        `self.saved_obs` is retrieved from SaveObs wrapper. It contains previously
+        saved observation parameters.
+
+        Note: Users should not directly call this method.
+        """
+        wrapped_act = self._wrapper(action, self.saved_obs)
+
+        return wrapped_act
+
+
+def _discrete() -> Tuple[
+    Callable[[Dict[str, int], Dict[str, Any]], Dict[str, np.ndarray]], gym.Space
+]:
+    space = gym.spaces.Discrete(n=4)
+
+    time_delta = 0.1  # Time, in seconds, between steps.
+    angle = 30 / 180 * np.pi  # Turning angle in radians
+    speed = 40  # Speed in km/h
+    dist = (
+        speed * 1000 / 3600 * time_delta
+    )  # Distance, in meter, travelled in time_delta seconds
+
+    action_map = {
+        # key: [magnitude, angle]
+        0: [0, 0],  # slow_down
+        1: [dist, 0],  # keep_direction
+        2: [dist, angle],  # turn_left
+        3: [dist, -angle],  # turn_right
+    }
+
+    def wrapper(
+        action: Dict[str, int], saved_obs: Dict[str, Any]
+    ) -> Dict[str, np.ndarray]:
+        wrapped_obs = {}
+        for agent_id, agent_action in action.items():
+            new_heading = saved_obs[agent_id]["heading"] + action_map[agent_action][1]
+            new_heading = (new_heading + np.pi) % (2 * np.pi) - np.pi
+
+            magnitude = action_map[agent_action][0]
+            cur_coord = (
+                saved_obs[agent_id]["pos"][0] + 1j * saved_obs[agent_id]["pos"][1]
+            )
+            # Note: On the map, angle is zero at positive y axis, and increases anti-clockwise.
+            #       In np.exp(), angle is zero at positive x axis, and increases anti-clockwise.
+            #       Hence, numpy_angle = map_angle + π/2
+            new_pos = cur_coord + magnitude * np.exp(1j * (new_heading + np.pi / 2))
+            x_coord = np.real(new_pos)
+            y_coord = np.imag(new_pos)
+
+            wrapped_obs.update(
+                {
+                    agent_id: np.array(
+                        [x_coord, y_coord, new_heading, time_delta], dtype=np.float32
+                    )
+                }
+            )
+
+        return wrapped_obs
+
+    return wrapper, space
diff --git a/competition/track-2/submission/explanation.md b/competition/track-2/submission/explanation.md
@@ -0,0 +1,6 @@
+# Explanation
+
+Include explanation of your model here.
+
+Provide a GitHub link to your code.
+
diff --git a/competition/track-2/submission/network.py b/competition/track-2/submission/network.py
@@ -0,0 +1,54 @@
+import gym
+import torch as th
+import torch.nn as nn
+from stable_baselines3.common.preprocessing import get_flattened_obs_dim
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, NatureCNN
+from stable_baselines3.common.type_aliases import TensorDict
+
+
+class CombinedExtractor(BaseFeaturesExtractor):
+    """
+    :param observation_space: (gym.Space)
+    :param features_dim: (int) Number of features extracted.
+        This corresponds to the number of unit for the last layer.
+    """
+
+    def __init__(self, observation_space: gym.spaces.Dict, cnn_output_dim: int = 256):
+        super(CombinedExtractor, self).__init__(observation_space, features_dim=1)
+        # We assume CxHxW images (channels first)
+
+        extractors = {}
+
+        total_concat_size = 0
+        for key, subspace in observation_space.spaces.items():
+            if key == "rgb":
+                extractors[key] = NatureCNN(subspace, features_dim=cnn_output_dim)
+                total_concat_size += cnn_output_dim
+            else:
+                # The observation key is a vector, flatten it if needed
+                extractors[key] = nn.Flatten()
+                total_concat_size += get_flattened_obs_dim(subspace)
+
+        self.extractors = nn.ModuleDict(extractors)
+
+        # Update the features dim manually
+        self._features_dim = total_concat_size
+
+    def forward(self, observations: TensorDict) -> th.Tensor:
+        encoded_tensor_list = []
+
+        for key, extractor in self.extractors.items():
+            encoded_tensor_list.append(extractor(observations[key]))
+        return th.cat(encoded_tensor_list, dim=1)
+
+
+def combined_extractor(config):
+    kwargs = {}
+    kwargs["policy"] = "MultiInputPolicy"
+    kwargs["policy_kwargs"] = dict(
+        features_extractor_class=CombinedExtractor,
+        features_extractor_kwargs=dict(cnn_output_dim=256),
+        net_arch=[],
+    )
+    kwargs["target_kl"] = 0.1
+    return kwargs
diff --git a/competition/track-2/submission/observation.py b/competition/track-2/submission/observation.py
@@ -0,0 +1,187 @@
+from typing import Any, Dict
+import copy
+import gym
+import numpy as np
+
+
+class SaveObs(gym.ObservationWrapper):
+    """Saves several selected observation parameters."""
+
+    def __init__(self, env: gym.Env):
+        """
+        Args:
+            env (gym.Env): Environment to be wrapped.
+        """
+        super().__init__(env)
+        self.saved_obs: Dict[str, Dict[str, Any]]
+
+    def observation(self, obs: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+        """Saves the wrapped environment's observation.
+
+        Note: Users should not directly call this method.
+        """
+
+        obs_data = {}
+        for agent_id, agent_obs in obs.items():
+            obs_data.update(
+                {
+                    agent_id: {
+                        "pos": copy.deepcopy(agent_obs["ego"]["pos"]),
+                        "heading": copy.deepcopy(agent_obs["ego"]["heading"]),
+                    }
+                }
+            )
+        self.saved_obs = obs_data
+
+        return obs
+
+
+class FilterObs(gym.ObservationWrapper):
+    """Filter only the selected observation parameters."""
+
+    def __init__(self, env: gym.Env):
+        """
+        Args:
+            env (gym.Env): Environment to be wrapped.
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Dict(
+            {
+                agent_id: gym.spaces.Dict(
+                    {
+                        "rgb": gym.spaces.Box(
+                            low=0,
+                            high=255,
+                            shape=(agent_obs_space["rgb"].shape[-1],)
+                            + agent_obs_space["rgb"].shape[:-1],
+                            dtype=np.uint8,
+                        ),
+                        "goal_distance": gym.spaces.Box(
+                            low=-1e10,
+                            high=+1e10,
+                            shape=(1, 1),
+                            dtype=np.float32,
+                        ),
+                        "goal_heading": gym.spaces.Box(
+                            low=-np.pi,
+                            high=np.pi,
+                            shape=(1, 1),
+                            dtype=np.float32,
+                        ),
+                    }
+                )
+                for agent_id, agent_obs_space in env.observation_space.spaces.items()
+            }
+        )
+
+    def observation(self, obs: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+        """Adapts the wrapped environment's observation.
+
+        Note: Users should not directly call this method.
+        """
+        wrapped_obs = {}
+        for agent_id, agent_obs in obs.items():
+            # Channel first rgb
+            rgb = agent_obs["rgb"]
+            rgb = rgb.transpose(2, 0, 1)
+
+            # Distance between ego and goal.
+            goal_distance = np.array(
+                [
+                    [
+                        np.linalg.norm(
+                            agent_obs["mission"]["goal_pos"] - agent_obs["ego"]["pos"]
+                        )
+                    ]
+                ],
+                dtype=np.float32,
+            )
+
+            # Ego's heading with respect to the map's coordinate system.
+            # Note: All angles returned by smarts is with respect to the map's coordinate system.
+            #       On the map, angle is zero at positive y axis, and increases anti-clockwise.
+            ego_heading = (agent_obs["ego"]["heading"] + np.pi) % (2 * np.pi) - np.pi
+            ego_pos = agent_obs["ego"]["pos"]
+
+            # Goal's angle with respect to the ego's position.
+            # Note: In np.angle(), angle is zero at positive x axis, and increases anti-clockwise.
+            #       Hence, map_angle = np.angle() - π/2
+            goal_pos = agent_obs["mission"]["goal_pos"]
+            rel_pos = goal_pos - ego_pos
+            goal_angle = np.angle(rel_pos[0] + 1j * rel_pos[1]) - np.pi / 2
+            goal_angle = (goal_angle + np.pi) % (2 * np.pi) - np.pi
+
+            # Goal heading is the angle correction required by ego agent to face the goal.
+            goal_heading = goal_angle - ego_heading
+            goal_heading = (goal_heading + np.pi) % (2 * np.pi) - np.pi
+            goal_heading = np.array([[goal_heading]], dtype=np.float32)
+
+            wrapped_obs.update(
+                {
+                    agent_id: {
+                        "rgb": np.uint8(rgb),
+                        "goal_distance": goal_distance,
+                        "goal_heading": goal_heading,
+                    }
+                }
+            )
+
+        return wrapped_obs
+
+
+class Concatenate(gym.ObservationWrapper):
+    """Concatenates data from stacked dictionaries. Only works with nested gym.spaces.Box .
+    Dimension to stack over is determined by `channels_order`.
+    """
+
+    def __init__(self, env: gym.Env, channels_order: str = "first"):
+        """
+        Args:
+            env (gym.Env): Environment to be wrapped.
+            channels_order (str): A string, either "first" or "last", specifying
+                the dimension over which to stack each observation.
+        """
+        super().__init__(env)
+
+        self._repeat_axis = {
+            "first": 0,
+            "last": -1,
+        }.get(channels_order)
+
+        for agent_name, agent_space in env.observation_space.spaces.items():
+            for subspaces in agent_space:
+                for key, space in subspaces.spaces.items():
+                    assert isinstance(space, gym.spaces.Box), (
+                        f"Concatenate only works with nested gym.spaces.Box. "
+                        f"Got agent {agent_name} with key {key} and space {space}."
+                    )
+
+        _, agent_space = next(iter(env.observation_space.spaces.items()))
+        self._num_stack = len(agent_space)
+        self._keys = agent_space[0].spaces.keys()
+
+        obs_space = {}
+        for agent_name, agent_space in env.observation_space.spaces.items():
+            subspaces = {}
+            for key, space in agent_space[0].spaces.items():
+                low = np.repeat(space.low, self._num_stack, axis=self._repeat_axis)
+                high = np.repeat(space.high, self._num_stack, axis=self._repeat_axis)
+                subspaces[key] = gym.spaces.Box(low=low, high=high, dtype=space.dtype)
+            obs_space.update({agent_name: gym.spaces.Dict(subspaces)})
+        self.observation_space = gym.spaces.Dict(obs_space)
+
+    def observation(self, obs):
+        """Adapts the wrapped environment's observation.
+
+        Note: Users should not directly call this method.
+        """
+
+        wrapped_obs = {}
+        for agent_id, agent_obs in obs.items():
+            stacked_obs = {}
+            for key in self._keys:
+                val = [obs[key] for obs in agent_obs]
+                stacked_obs[key] = np.concatenate(val, axis=self._repeat_axis)
+            wrapped_obs.update({agent_id: stacked_obs})
+
+        return wrapped_obs