Skip to content

Commit

Permalink
add in offline baseline code
Browse files Browse the repository at this point in the history
  • Loading branch information
cz0204 committed Jul 20, 2022
1 parent 76ff466 commit 373267e
Show file tree
Hide file tree
Showing 11 changed files with 983 additions and 0 deletions.
32 changes: 32 additions & 0 deletions competition/track-2/submission/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Submission
Once a model has been trained for `multi-scenario-v0` environments, place all necessary files to run the trained model inside this folder named `submission`.

The files named `policy.py`, `requirements.txt`, and `explanation.md`, must be included with the submission. Its contents are explained below.

## Policy
+ The file `policy.py` must include a `Policy` class which inherits from the `BasePolicy` class.
+ The `Policy` class must implement an `act` method which accepts observations and returns actions.
+ Any policy initialization, including loading of model may be performed inside the `__init__` method of the `Policy` class.
+ A random policy named `RandomPolicy` class is provided for reference.

## Wrappers
+ The file `policy.py` must include a `submitted_wrappers()` function.
+ The function `submitted_wrappers()` must return a list of callable wrappers, if any are used, else return an empty list `[]`.
+ Use of wrappers is optional.

## Requirements
+ Create a `requirements.txt` file containing all the dependencies needed to run the submitted model.
+ The dependencies will be installed prior to evaluating the submitted model.

## Explanation
+ Include an `explanation.md` file explaining the key techniques used in developing the submitted model.

## Submit to Codalab
+ Zip the `submission` folder.
+ If the `submission` folder is located at `<path>/SMARTS/competition/submission`, then run `make submission.zip` from `<path>/SMARTS/competition` directory to easily create a zipped submission folder.
+ Upload the `submission.zip` to CodaLab.
+ Go to the [CodaLab competition page](https://codalab.lisn.upsaclay.fr/).
+ Click `My Competitions -> Competitions I'm In`.
+ Select the SMARTS competition.
+ Click `Participate -> Submit/View Results -> Submit`
+ Upload the zipped submission folder.
Empty file.
84 changes: 84 additions & 0 deletions competition/track-2/submission/action.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from typing import Any, Callable, Dict, Tuple

import gym
import numpy as np


class Action(gym.ActionWrapper):
"""Modifies the action space."""

def __init__(self, env: gym.Env):
"""Sets identical action space, denoted by `space`, for all agents.
Args:
env (gym.Env): Gym env to be wrapped.
"""
super().__init__(env)
self._wrapper, action_space = _discrete()

self.action_space = gym.spaces.Dict(
{agent_id: action_space for agent_id in env.action_space.spaces.keys()}
)

def action(self, action):
"""Adapts the action input to the wrapped environment.
`self.saved_obs` is retrieved from SaveObs wrapper. It contains previously
saved observation parameters.
Note: Users should not directly call this method.
"""
wrapped_act = self._wrapper(action, self.saved_obs)

return wrapped_act


def _discrete() -> Tuple[
Callable[[Dict[str, int], Dict[str, Any]], Dict[str, np.ndarray]], gym.Space
]:
space = gym.spaces.Discrete(n=4)

time_delta = 0.1 # Time, in seconds, between steps.
angle = 30 / 180 * np.pi # Turning angle in radians
speed = 40 # Speed in km/h
dist = (
speed * 1000 / 3600 * time_delta
) # Distance, in meter, travelled in time_delta seconds

action_map = {
# key: [magnitude, angle]
0: [0, 0], # slow_down
1: [dist, 0], # keep_direction
2: [dist, angle], # turn_left
3: [dist, -angle], # turn_right
}

def wrapper(
action: Dict[str, int], saved_obs: Dict[str, Any]
) -> Dict[str, np.ndarray]:
wrapped_obs = {}
for agent_id, agent_action in action.items():
new_heading = saved_obs[agent_id]["heading"] + action_map[agent_action][1]
new_heading = (new_heading + np.pi) % (2 * np.pi) - np.pi

magnitude = action_map[agent_action][0]
cur_coord = (
saved_obs[agent_id]["pos"][0] + 1j * saved_obs[agent_id]["pos"][1]
)
# Note: On the map, angle is zero at positive y axis, and increases anti-clockwise.
# In np.exp(), angle is zero at positive x axis, and increases anti-clockwise.
# Hence, numpy_angle = map_angle + π/2
new_pos = cur_coord + magnitude * np.exp(1j * (new_heading + np.pi / 2))
x_coord = np.real(new_pos)
y_coord = np.imag(new_pos)

wrapped_obs.update(
{
agent_id: np.array(
[x_coord, y_coord, new_heading, time_delta], dtype=np.float32
)
}
)

return wrapped_obs

return wrapper, space
6 changes: 6 additions & 0 deletions competition/track-2/submission/explanation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Explanation

Include explanation of your model here.

Provide a GitHub link to your code.

54 changes: 54 additions & 0 deletions competition/track-2/submission/network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import gym
import torch as th
import torch.nn as nn
from stable_baselines3.common.preprocessing import get_flattened_obs_dim
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, NatureCNN
from stable_baselines3.common.type_aliases import TensorDict


class CombinedExtractor(BaseFeaturesExtractor):
"""
:param observation_space: (gym.Space)
:param features_dim: (int) Number of features extracted.
This corresponds to the number of unit for the last layer.
"""

def __init__(self, observation_space: gym.spaces.Dict, cnn_output_dim: int = 256):
super(CombinedExtractor, self).__init__(observation_space, features_dim=1)
# We assume CxHxW images (channels first)

extractors = {}

total_concat_size = 0
for key, subspace in observation_space.spaces.items():
if key == "rgb":
extractors[key] = NatureCNN(subspace, features_dim=cnn_output_dim)
total_concat_size += cnn_output_dim
else:
# The observation key is a vector, flatten it if needed
extractors[key] = nn.Flatten()
total_concat_size += get_flattened_obs_dim(subspace)

self.extractors = nn.ModuleDict(extractors)

# Update the features dim manually
self._features_dim = total_concat_size

def forward(self, observations: TensorDict) -> th.Tensor:
encoded_tensor_list = []

for key, extractor in self.extractors.items():
encoded_tensor_list.append(extractor(observations[key]))
return th.cat(encoded_tensor_list, dim=1)


def combined_extractor(config):
kwargs = {}
kwargs["policy"] = "MultiInputPolicy"
kwargs["policy_kwargs"] = dict(
features_extractor_class=CombinedExtractor,
features_extractor_kwargs=dict(cnn_output_dim=256),
net_arch=[],
)
kwargs["target_kl"] = 0.1
return kwargs
187 changes: 187 additions & 0 deletions competition/track-2/submission/observation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
from typing import Any, Dict
import copy
import gym
import numpy as np


class SaveObs(gym.ObservationWrapper):
"""Saves several selected observation parameters."""

def __init__(self, env: gym.Env):
"""
Args:
env (gym.Env): Environment to be wrapped.
"""
super().__init__(env)
self.saved_obs: Dict[str, Dict[str, Any]]

def observation(self, obs: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
"""Saves the wrapped environment's observation.
Note: Users should not directly call this method.
"""

obs_data = {}
for agent_id, agent_obs in obs.items():
obs_data.update(
{
agent_id: {
"pos": copy.deepcopy(agent_obs["ego"]["pos"]),
"heading": copy.deepcopy(agent_obs["ego"]["heading"]),
}
}
)
self.saved_obs = obs_data

return obs


class FilterObs(gym.ObservationWrapper):
"""Filter only the selected observation parameters."""

def __init__(self, env: gym.Env):
"""
Args:
env (gym.Env): Environment to be wrapped.
"""
super().__init__(env)
self.observation_space = gym.spaces.Dict(
{
agent_id: gym.spaces.Dict(
{
"rgb": gym.spaces.Box(
low=0,
high=255,
shape=(agent_obs_space["rgb"].shape[-1],)
+ agent_obs_space["rgb"].shape[:-1],
dtype=np.uint8,
),
"goal_distance": gym.spaces.Box(
low=-1e10,
high=+1e10,
shape=(1, 1),
dtype=np.float32,
),
"goal_heading": gym.spaces.Box(
low=-np.pi,
high=np.pi,
shape=(1, 1),
dtype=np.float32,
),
}
)
for agent_id, agent_obs_space in env.observation_space.spaces.items()
}
)

def observation(self, obs: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
"""Adapts the wrapped environment's observation.
Note: Users should not directly call this method.
"""
wrapped_obs = {}
for agent_id, agent_obs in obs.items():
# Channel first rgb
rgb = agent_obs["rgb"]
rgb = rgb.transpose(2, 0, 1)

# Distance between ego and goal.
goal_distance = np.array(
[
[
np.linalg.norm(
agent_obs["mission"]["goal_pos"] - agent_obs["ego"]["pos"]
)
]
],
dtype=np.float32,
)

# Ego's heading with respect to the map's coordinate system.
# Note: All angles returned by smarts is with respect to the map's coordinate system.
# On the map, angle is zero at positive y axis, and increases anti-clockwise.
ego_heading = (agent_obs["ego"]["heading"] + np.pi) % (2 * np.pi) - np.pi
ego_pos = agent_obs["ego"]["pos"]

# Goal's angle with respect to the ego's position.
# Note: In np.angle(), angle is zero at positive x axis, and increases anti-clockwise.
# Hence, map_angle = np.angle() - π/2
goal_pos = agent_obs["mission"]["goal_pos"]
rel_pos = goal_pos - ego_pos
goal_angle = np.angle(rel_pos[0] + 1j * rel_pos[1]) - np.pi / 2
goal_angle = (goal_angle + np.pi) % (2 * np.pi) - np.pi

# Goal heading is the angle correction required by ego agent to face the goal.
goal_heading = goal_angle - ego_heading
goal_heading = (goal_heading + np.pi) % (2 * np.pi) - np.pi
goal_heading = np.array([[goal_heading]], dtype=np.float32)

wrapped_obs.update(
{
agent_id: {
"rgb": np.uint8(rgb),
"goal_distance": goal_distance,
"goal_heading": goal_heading,
}
}
)

return wrapped_obs


class Concatenate(gym.ObservationWrapper):
"""Concatenates data from stacked dictionaries. Only works with nested gym.spaces.Box .
Dimension to stack over is determined by `channels_order`.
"""

def __init__(self, env: gym.Env, channels_order: str = "first"):
"""
Args:
env (gym.Env): Environment to be wrapped.
channels_order (str): A string, either "first" or "last", specifying
the dimension over which to stack each observation.
"""
super().__init__(env)

self._repeat_axis = {
"first": 0,
"last": -1,
}.get(channels_order)

for agent_name, agent_space in env.observation_space.spaces.items():
for subspaces in agent_space:
for key, space in subspaces.spaces.items():
assert isinstance(space, gym.spaces.Box), (
f"Concatenate only works with nested gym.spaces.Box. "
f"Got agent {agent_name} with key {key} and space {space}."
)

_, agent_space = next(iter(env.observation_space.spaces.items()))
self._num_stack = len(agent_space)
self._keys = agent_space[0].spaces.keys()

obs_space = {}
for agent_name, agent_space in env.observation_space.spaces.items():
subspaces = {}
for key, space in agent_space[0].spaces.items():
low = np.repeat(space.low, self._num_stack, axis=self._repeat_axis)
high = np.repeat(space.high, self._num_stack, axis=self._repeat_axis)
subspaces[key] = gym.spaces.Box(low=low, high=high, dtype=space.dtype)
obs_space.update({agent_name: gym.spaces.Dict(subspaces)})
self.observation_space = gym.spaces.Dict(obs_space)

def observation(self, obs):
"""Adapts the wrapped environment's observation.
Note: Users should not directly call this method.
"""

wrapped_obs = {}
for agent_id, agent_obs in obs.items():
stacked_obs = {}
for key in self._keys:
val = [obs[key] for obs in agent_obs]
stacked_obs[key] = np.concatenate(val, axis=self._repeat_axis)
wrapped_obs.update({agent_id: stacked_obs})

return wrapped_obs
Loading

0 comments on commit 373267e

Please sign in to comment.