diff --git a/competition/track-2/submission/README.md b/competition/track-2/submission/README.md new file mode 100644 index 0000000000..c85a94bca4 --- /dev/null +++ b/competition/track-2/submission/README.md @@ -0,0 +1,32 @@ +# Submission +Once a model has been trained for `multi-scenario-v0` environments, place all necessary files to run the trained model inside this folder named `submission`. + +The files named `policy.py`, `requirements.txt`, and `explanation.md`, must be included with the submission. Its contents are explained below. + +## Policy ++ The file `policy.py` must include a `Policy` class which inherits from the `BasePolicy` class. ++ The `Policy` class must implement an `act` method which accepts observations and returns actions. ++ Any policy initialization, including loading of model may be performed inside the `__init__` method of the `Policy` class. ++ A random policy named `RandomPolicy` class is provided for reference. + +## Wrappers ++ The file `policy.py` must include a `submitted_wrappers()` function. ++ The function `submitted_wrappers()` must return a list of callable wrappers, if any are used, else return an empty list `[]`. ++ Use of wrappers is optional. + +## Requirements ++ Create a `requirements.txt` file containing all the dependencies needed to run the submitted model. ++ The dependencies will be installed prior to evaluating the submitted model. + +## Explanation ++ Include an `explanation.md` file explaining the key techniques used in developing the submitted model. + +## Submit to Codalab ++ Zip the `submission` folder. + + If the `submission` folder is located at `/SMARTS/competition/submission`, then run `make submission.zip` from `/SMARTS/competition` directory to easily create a zipped submission folder. ++ Upload the `submission.zip` to CodaLab. + + Go to the [CodaLab competition page](https://codalab.lisn.upsaclay.fr/). + + Click `My Competitions -> Competitions I'm In`. + + Select the SMARTS competition. + + Click `Participate -> Submit/View Results -> Submit` + + Upload the zipped submission folder. diff --git a/competition/track-2/submission/__init__.py b/competition/track-2/submission/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/competition/track-2/submission/action.py b/competition/track-2/submission/action.py new file mode 100644 index 0000000000..f340504148 --- /dev/null +++ b/competition/track-2/submission/action.py @@ -0,0 +1,84 @@ +from typing import Any, Callable, Dict, Tuple + +import gym +import numpy as np + + +class Action(gym.ActionWrapper): + """Modifies the action space.""" + + def __init__(self, env: gym.Env): + """Sets identical action space, denoted by `space`, for all agents. + Args: + env (gym.Env): Gym env to be wrapped. + """ + super().__init__(env) + self._wrapper, action_space = _discrete() + + self.action_space = gym.spaces.Dict( + {agent_id: action_space for agent_id in env.action_space.spaces.keys()} + ) + + def action(self, action): + """Adapts the action input to the wrapped environment. + + `self.saved_obs` is retrieved from SaveObs wrapper. It contains previously + saved observation parameters. + + Note: Users should not directly call this method. + """ + wrapped_act = self._wrapper(action, self.saved_obs) + + return wrapped_act + + +def _discrete() -> Tuple[ + Callable[[Dict[str, int], Dict[str, Any]], Dict[str, np.ndarray]], gym.Space +]: + space = gym.spaces.Discrete(n=4) + + time_delta = 0.1 # Time, in seconds, between steps. + angle = 30 / 180 * np.pi # Turning angle in radians + speed = 40 # Speed in km/h + dist = ( + speed * 1000 / 3600 * time_delta + ) # Distance, in meter, travelled in time_delta seconds + + action_map = { + # key: [magnitude, angle] + 0: [0, 0], # slow_down + 1: [dist, 0], # keep_direction + 2: [dist, angle], # turn_left + 3: [dist, -angle], # turn_right + } + + def wrapper( + action: Dict[str, int], saved_obs: Dict[str, Any] + ) -> Dict[str, np.ndarray]: + wrapped_obs = {} + for agent_id, agent_action in action.items(): + new_heading = saved_obs[agent_id]["heading"] + action_map[agent_action][1] + new_heading = (new_heading + np.pi) % (2 * np.pi) - np.pi + + magnitude = action_map[agent_action][0] + cur_coord = ( + saved_obs[agent_id]["pos"][0] + 1j * saved_obs[agent_id]["pos"][1] + ) + # Note: On the map, angle is zero at positive y axis, and increases anti-clockwise. + # In np.exp(), angle is zero at positive x axis, and increases anti-clockwise. + # Hence, numpy_angle = map_angle + π/2 + new_pos = cur_coord + magnitude * np.exp(1j * (new_heading + np.pi / 2)) + x_coord = np.real(new_pos) + y_coord = np.imag(new_pos) + + wrapped_obs.update( + { + agent_id: np.array( + [x_coord, y_coord, new_heading, time_delta], dtype=np.float32 + ) + } + ) + + return wrapped_obs + + return wrapper, space diff --git a/competition/track-2/submission/explanation.md b/competition/track-2/submission/explanation.md new file mode 100644 index 0000000000..a52e277e27 --- /dev/null +++ b/competition/track-2/submission/explanation.md @@ -0,0 +1,6 @@ +# Explanation + +Include explanation of your model here. + +Provide a GitHub link to your code. + \ No newline at end of file diff --git a/competition/track-2/submission/network.py b/competition/track-2/submission/network.py new file mode 100644 index 0000000000..ab9220fe3e --- /dev/null +++ b/competition/track-2/submission/network.py @@ -0,0 +1,54 @@ +import gym +import torch as th +import torch.nn as nn +from stable_baselines3.common.preprocessing import get_flattened_obs_dim +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, NatureCNN +from stable_baselines3.common.type_aliases import TensorDict + + +class CombinedExtractor(BaseFeaturesExtractor): + """ + :param observation_space: (gym.Space) + :param features_dim: (int) Number of features extracted. + This corresponds to the number of unit for the last layer. + """ + + def __init__(self, observation_space: gym.spaces.Dict, cnn_output_dim: int = 256): + super(CombinedExtractor, self).__init__(observation_space, features_dim=1) + # We assume CxHxW images (channels first) + + extractors = {} + + total_concat_size = 0 + for key, subspace in observation_space.spaces.items(): + if key == "rgb": + extractors[key] = NatureCNN(subspace, features_dim=cnn_output_dim) + total_concat_size += cnn_output_dim + else: + # The observation key is a vector, flatten it if needed + extractors[key] = nn.Flatten() + total_concat_size += get_flattened_obs_dim(subspace) + + self.extractors = nn.ModuleDict(extractors) + + # Update the features dim manually + self._features_dim = total_concat_size + + def forward(self, observations: TensorDict) -> th.Tensor: + encoded_tensor_list = [] + + for key, extractor in self.extractors.items(): + encoded_tensor_list.append(extractor(observations[key])) + return th.cat(encoded_tensor_list, dim=1) + + +def combined_extractor(config): + kwargs = {} + kwargs["policy"] = "MultiInputPolicy" + kwargs["policy_kwargs"] = dict( + features_extractor_class=CombinedExtractor, + features_extractor_kwargs=dict(cnn_output_dim=256), + net_arch=[], + ) + kwargs["target_kl"] = 0.1 + return kwargs diff --git a/competition/track-2/submission/observation.py b/competition/track-2/submission/observation.py new file mode 100644 index 0000000000..ce572d71f6 --- /dev/null +++ b/competition/track-2/submission/observation.py @@ -0,0 +1,187 @@ +from typing import Any, Dict +import copy +import gym +import numpy as np + + +class SaveObs(gym.ObservationWrapper): + """Saves several selected observation parameters.""" + + def __init__(self, env: gym.Env): + """ + Args: + env (gym.Env): Environment to be wrapped. + """ + super().__init__(env) + self.saved_obs: Dict[str, Dict[str, Any]] + + def observation(self, obs: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + """Saves the wrapped environment's observation. + + Note: Users should not directly call this method. + """ + + obs_data = {} + for agent_id, agent_obs in obs.items(): + obs_data.update( + { + agent_id: { + "pos": copy.deepcopy(agent_obs["ego"]["pos"]), + "heading": copy.deepcopy(agent_obs["ego"]["heading"]), + } + } + ) + self.saved_obs = obs_data + + return obs + + +class FilterObs(gym.ObservationWrapper): + """Filter only the selected observation parameters.""" + + def __init__(self, env: gym.Env): + """ + Args: + env (gym.Env): Environment to be wrapped. + """ + super().__init__(env) + self.observation_space = gym.spaces.Dict( + { + agent_id: gym.spaces.Dict( + { + "rgb": gym.spaces.Box( + low=0, + high=255, + shape=(agent_obs_space["rgb"].shape[-1],) + + agent_obs_space["rgb"].shape[:-1], + dtype=np.uint8, + ), + "goal_distance": gym.spaces.Box( + low=-1e10, + high=+1e10, + shape=(1, 1), + dtype=np.float32, + ), + "goal_heading": gym.spaces.Box( + low=-np.pi, + high=np.pi, + shape=(1, 1), + dtype=np.float32, + ), + } + ) + for agent_id, agent_obs_space in env.observation_space.spaces.items() + } + ) + + def observation(self, obs: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + """Adapts the wrapped environment's observation. + + Note: Users should not directly call this method. + """ + wrapped_obs = {} + for agent_id, agent_obs in obs.items(): + # Channel first rgb + rgb = agent_obs["rgb"] + rgb = rgb.transpose(2, 0, 1) + + # Distance between ego and goal. + goal_distance = np.array( + [ + [ + np.linalg.norm( + agent_obs["mission"]["goal_pos"] - agent_obs["ego"]["pos"] + ) + ] + ], + dtype=np.float32, + ) + + # Ego's heading with respect to the map's coordinate system. + # Note: All angles returned by smarts is with respect to the map's coordinate system. + # On the map, angle is zero at positive y axis, and increases anti-clockwise. + ego_heading = (agent_obs["ego"]["heading"] + np.pi) % (2 * np.pi) - np.pi + ego_pos = agent_obs["ego"]["pos"] + + # Goal's angle with respect to the ego's position. + # Note: In np.angle(), angle is zero at positive x axis, and increases anti-clockwise. + # Hence, map_angle = np.angle() - π/2 + goal_pos = agent_obs["mission"]["goal_pos"] + rel_pos = goal_pos - ego_pos + goal_angle = np.angle(rel_pos[0] + 1j * rel_pos[1]) - np.pi / 2 + goal_angle = (goal_angle + np.pi) % (2 * np.pi) - np.pi + + # Goal heading is the angle correction required by ego agent to face the goal. + goal_heading = goal_angle - ego_heading + goal_heading = (goal_heading + np.pi) % (2 * np.pi) - np.pi + goal_heading = np.array([[goal_heading]], dtype=np.float32) + + wrapped_obs.update( + { + agent_id: { + "rgb": np.uint8(rgb), + "goal_distance": goal_distance, + "goal_heading": goal_heading, + } + } + ) + + return wrapped_obs + + +class Concatenate(gym.ObservationWrapper): + """Concatenates data from stacked dictionaries. Only works with nested gym.spaces.Box . + Dimension to stack over is determined by `channels_order`. + """ + + def __init__(self, env: gym.Env, channels_order: str = "first"): + """ + Args: + env (gym.Env): Environment to be wrapped. + channels_order (str): A string, either "first" or "last", specifying + the dimension over which to stack each observation. + """ + super().__init__(env) + + self._repeat_axis = { + "first": 0, + "last": -1, + }.get(channels_order) + + for agent_name, agent_space in env.observation_space.spaces.items(): + for subspaces in agent_space: + for key, space in subspaces.spaces.items(): + assert isinstance(space, gym.spaces.Box), ( + f"Concatenate only works with nested gym.spaces.Box. " + f"Got agent {agent_name} with key {key} and space {space}." + ) + + _, agent_space = next(iter(env.observation_space.spaces.items())) + self._num_stack = len(agent_space) + self._keys = agent_space[0].spaces.keys() + + obs_space = {} + for agent_name, agent_space in env.observation_space.spaces.items(): + subspaces = {} + for key, space in agent_space[0].spaces.items(): + low = np.repeat(space.low, self._num_stack, axis=self._repeat_axis) + high = np.repeat(space.high, self._num_stack, axis=self._repeat_axis) + subspaces[key] = gym.spaces.Box(low=low, high=high, dtype=space.dtype) + obs_space.update({agent_name: gym.spaces.Dict(subspaces)}) + self.observation_space = gym.spaces.Dict(obs_space) + + def observation(self, obs): + """Adapts the wrapped environment's observation. + + Note: Users should not directly call this method. + """ + + wrapped_obs = {} + for agent_id, agent_obs in obs.items(): + stacked_obs = {} + for key in self._keys: + val = [obs[key] for obs in agent_obs] + stacked_obs[key] = np.concatenate(val, axis=self._repeat_axis) + wrapped_obs.update({agent_id: stacked_obs}) + + return wrapped_obs diff --git a/competition/track-2/submission/policy.py b/competition/track-2/submission/policy.py new file mode 100644 index 0000000000..189cd05935 --- /dev/null +++ b/competition/track-2/submission/policy.py @@ -0,0 +1,118 @@ +from pathlib import Path +from typing import Any, Dict +import numpy as np + +# Environment variables (optional) +IMG_METERS = 50 # Observation image area size in meters. +IMG_PIXELS = 256 # Observation image size in pixels. + + +class BasePolicy: + def act(self, obs: Dict[str, Any]): + """Act function to be implemented by user. + + Args: + obs (Dict[str, Any]): A dictionary of observation for each ego agent step. + + Returns: + Dict[str, Any]: A dictionary of actions for each ego agent. + """ + raise NotImplementedError + + +def submitted_wrappers(): + """Return environment wrappers for wrapping the evaluation environment. + Each wrapper is of the form: Callable[[env], env]. Use of wrappers is + optional. If wrappers are not used, return empty list []. + + Returns: + List[wrappers]: List of wrappers. Default is empty list []. + """ + + from action import Action as DiscreteAction + from observation import Concatenate, FilterObs, SaveObs + from reward import Reward + + from smarts.core.controllers import ActionSpaceType + from smarts.env.wrappers.format_action import FormatAction + from smarts.env.wrappers.format_obs import FormatObs + from smarts.env.wrappers.frame_stack import FrameStack + + # fmt: off + wrappers = [ + FormatObs, + lambda env: FormatAction(env=env, space=ActionSpaceType["TargetPose"]), + Reward, + SaveObs, + # lambda env: DiscreteAction(env=env, space='Discrete'), + #DiscreteAction, + #FilterObs, + #lambda env: FrameStack(env=env, num_stack=3), + #lambda env: Concatenate(env=env, channels_order="first"), + ] + # fmt: on + + return wrappers + + +class Policy(BasePolicy): + """Policy class to be submitted by the user. This class will be loaded + and tested during evaluation.""" + + def __init__(self): + """All policy initialization matters, including loading of model, is + performed here. To be implemented by the user. + """ + + from d3rlpy.algos import CQL + + + self.model = CQL.from_json(Path(__file__).absolute().parents[0]/'model/params.json', use_gpu=True) + self.model.load_model(Path(__file__).absolute().parents[0]/'model/model.pt') + + + def act(self, obs: Dict[str, Any]): + """Act function to be implemented by user. + + Args: + obs (Dict[str, Any]): A dictionary of observation for each ego agent step. + + Returns: + Dict[str, Any]: A dictionary of actions for each ego agent. + """ + wrapped_act = {} + for agent_id, agent_obs in obs.items(): + action = self.model.predict(np.array([agent_obs['rgb'].reshape(3, 256, 256)]))[0] + print(action) + target_pose = np.array([(action[0] + agent_obs['ego']['pos'][0]), action[1] + agent_obs['ego']['pos'][1], action[2] + agent_obs['ego']['heading'], 0.1]) + wrapped_act.update({agent_id: target_pose}) + return wrapped_act + + +class RandomPolicy(BasePolicy): + """A sample policy with random actions. Note that only the class named `Policy` + will be tested during evaluation.""" + + def __init__(self): + """All policy initialization matters, including loading of model, is + performed here. To be implemented by the user. + """ + import gym + + self._action_space = gym.spaces.Discrete(4) + + def act(self, obs: Dict[str, Any]): + """Act function to be implemented by user. + + Args: + obs (Dict[str, Any]): A dictionary of observation for each ego agent step. + + Returns: + Dict[str, Any]: A dictionary of actions for each ego agent. + """ + wrapped_act = {} + for agent_id, agent_obs in obs.items(): + action = self._action_space.sample() + wrapped_act.update({agent_id: action}) + + return wrapped_act diff --git a/competition/track-2/submission/requirements.txt b/competition/track-2/submission/requirements.txt new file mode 100644 index 0000000000..7e2f4035c1 --- /dev/null +++ b/competition/track-2/submission/requirements.txt @@ -0,0 +1,77 @@ +absl-py==1.1.0 +attrs==21.4.0 +Automat==20.2.0 +cached-property==1.5.2 +cachetools==4.2.4 +certifi==2022.6.15 +charset-normalizer==2.1.0 +click==8.1.3 +cloudpickle==1.3.0 +commonmark==0.9.1 +constantly==15.1.0 +cycler==0.11.0 +eclipse-sumo==1.10.0 +fonttools==4.34.4 +future==0.18.2 +google-auth==1.35.0 +google-auth-oauthlib==0.4.6 +grpcio==1.47.0 +gym==0.19.0 +hyperlink==21.0.0 +idna==3.3 +ijson==3.1.4 +importlib-metadata==4.12.0 +incremental==21.3.0 +jsonpatch==1.32 +jsonpointer==2.3 +kiwisolver==1.4.3 +Markdown==3.3.7 +matplotlib==3.5.2 +numpy==1.23.1 +oauthlib==3.2.0 +packaging==21.3 +Panda3D==1.10.9 +panda3d-gltf==0.13 +panda3d-simplepbr==0.10 +pandas==1.4.3 +Pillow==9.2.0 +protobuf==3.20.1 +psutil==5.9.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pybullet==3.0.6 +Pygments==2.12.0 +pyparsing==3.0.9 +python-dateutil==2.8.2 +pytz==2022.1 +PyYAML==6.0 +pyzmq==23.2.0 +requests==2.28.1 +requests-oauthlib==1.3.1 +rich==12.5.1 +rsa==4.8 +Rtree==1.0.0 +ruamel.yaml==0.17.17 +ruamel.yaml.clib==0.2.6 +scipy==1.8.1 +sh==1.14.2 +Shapely==1.8.2 +six==1.16.0 +stable-baselines3==1.4.0 +tableprint==0.9.1 +tensorboard==2.2.0 +tensorboard-plugin-wit==1.8.1 +torch==1.11.0 +torchfile==0.1.0 +tornado==6.2 +trimesh==3.9.29 +Twisted==22.4.0 +typing_extensions==4.3.0 +urllib3==1.26.10 +visdom==0.1.8.9 +wcwidth==0.2.5 +websocket-client==1.3.3 +Werkzeug==2.1.2 +yattag==1.14.0 +zipp==3.8.1 +zope.interface==5.4.0 diff --git a/competition/track-2/submission/reward.py b/competition/track-2/submission/reward.py new file mode 100644 index 0000000000..891b37fe0c --- /dev/null +++ b/competition/track-2/submission/reward.py @@ -0,0 +1,84 @@ +from typing import Any, Dict + +import gym +import numpy as np + + +class Reward(gym.Wrapper): + def __init__(self, env: gym.Env): + super().__init__(env) + + def reset(self, **kwargs): + return self.env.reset(**kwargs) + + def step(self, action): + """Adapts the wrapped environment's step. + + Note: Users should not directly call this method. + """ + obs, reward, done, info = self.env.step(action) + wrapped_reward = self._reward(obs, reward) + + for agent_id, agent_done in done.items(): + if agent_id != "__all__" and agent_done == True: + if obs[agent_id]["events"]["reached_goal"]: + print(f"{agent_id}: Hooray! Reached goal.") + elif obs[agent_id]["events"]["reached_max_episode_steps"]: + print(f"{agent_id}: Reached max episode steps.") + elif ( + obs[agent_id]["events"]["collisions"] + | obs[agent_id]["events"]["off_road"] + | obs[agent_id]["events"]["off_route"] + | obs[agent_id]["events"]["on_shoulder"] + | obs[agent_id]["events"]["wrong_way"] + ): + pass + else: + print("Events: ", obs[agent_id]["events"]) + raise Exception("Episode ended for unknown reason.") + + return obs, wrapped_reward, done, info + + def _reward( + self, obs: Dict[str, Dict[str, Any]], env_reward: Dict[str, np.float64] + ) -> Dict[str, np.float64]: + reward = {agent_id: np.float64(0) for agent_id in env_reward.keys()} + + for agent_id, agent_reward in env_reward.items(): + # Penalty for colliding + if obs[agent_id]["events"]["collisions"]: + reward[agent_id] -= np.float64(10) + print(f"{agent_id}: Collided.") + break + + # Penalty for driving off road + if obs[agent_id]["events"]["off_road"]: + reward[agent_id] -= np.float64(10) + print(f"{agent_id}: Went off road.") + break + + # Penalty for driving off route + if obs[agent_id]["events"]["off_route"]: + reward[agent_id] -= np.float64(10) + print(f"{agent_id}: Went off route.") + break + + # Penalty for driving on road shoulder + if obs[agent_id]["events"]["on_shoulder"]: + reward[agent_id] -= np.float64(1) + break + + # Penalty for driving on wrong way + if obs[agent_id]["events"]["wrong_way"]: + reward[agent_id] -= np.float64(10) + print(f"{agent_id}: Went wrong way.") + break + + # Reward for reaching goal + if obs[agent_id]["events"]["reached_goal"]: + reward[agent_id] += np.float64(30) + + # Reward for distance travelled + reward[agent_id] += np.float64(agent_reward) + + return reward diff --git a/competition/track-2/train/train.py b/competition/track-2/train/train.py new file mode 100644 index 0000000000..97425d208a --- /dev/null +++ b/competition/track-2/train/train.py @@ -0,0 +1,155 @@ +from utility import remote_operations +from utility import goal_region_reward +from utility import get_goal_layer +import paramiko +import getpass +import pickle +import numpy as np +import d3rlpy +from d3rlpy.dataset import MDPDataset +from d3rlpy.algos import CQL, BCQ +import os +from PIL import Image +import re +import torch +torch.cuda.empty_cache() +import pathlib +from d3rlpy.metrics.scorer import average_value_estimation_scorer +from d3rlpy.metrics.scorer import td_error_scorer +import glob + + + +path = '/net/storage-1/home/c84201475/waymo_bev/' +remote = remote_operations() +ip_add = input("Server IP: ") +user_name = input("Username: ") +pswd = getpass.getpass("Password: ") + +if ip_add == "gx1": + ip_add = "10.193.241.237" +elif ip_add == "gx2": + ip_add = "10.193.241.238" +elif ip_add == "gx3": + ip_add = "10.193.241.239" + +while True: + try: + client = remote.connect(ip_add, user_name, pswd) # ip, username, password + break + except paramiko.ssh_exception.AuthenticationException: + print("Authentication Failed") + pswd = getpass.getpass("Password: ") + + +scenarios = list() +for scenario_name in client.listdir(path): + scenarios.append(scenario_name) + + +if not os.listdir('d3rlpy_logs/'): # if empty + index = 0 +else: + index = len(os.listdir('d3rlpy_logs/')) + + +for scenario in scenarios[index:len(scenarios)]: + try: + obs = list() + actions = list() + rewards = list() + terminals = list() + print('processing scenario ' + scenario) + vehicle_ids = list() + + for filename in client.listdir(path + scenario): + if filename.endswith('.png'): + vehicle_id = re.search('vehicle-(.*).png', filename).group(1) + if vehicle_id not in vehicle_ids: + vehicle_ids.append(vehicle_id) + + for id in vehicle_ids[0:2]: + print('adding data for vehicle id ' + id + ' in scenario ' + scenario) + + with client.file(path + scenario + '/Agent-history-vehicle-' + id + '.pkl', 'rb') as f: + vehicle_data = pickle.load(f) + image_names = list() + + for filename in client.listdir(path + scenario): + if filename.endswith('-' + id + '.png'): + image_names.append(filename) + + image_names = sorted(image_names) + + goal_pos_x = vehicle_data[float(image_names[-1].split('_Agent')[0])]['ego']['pos'][0] + goal_pos_y = vehicle_data[float(image_names[-1].split('_Agent')[0])]['ego']['pos'][1] + threshold = 3 + + for i in range(len(image_names) - 1): + imgfile = client.open(path + scenario + '/' + image_names[i], 'r') + imgfile.seek(0) + image = Image.open(imgfile) + + sim_time = image_names[i].split('_Agent')[0] + sim_time_next = image_names[i + 1].split('_Agent')[0] + current_position = vehicle_data[float(sim_time)]['ego']['pos'] + current_heading = vehicle_data[float(sim_time)]['ego']['heading'] + next_position = vehicle_data[float(sim_time_next)]['ego']['pos'] + next_heading = vehicle_data[float(sim_time_next)]['ego']['heading'] + dx = next_position[0] - current_position[0] + dy = next_position[1] - current_position[1] + dheading = next_heading - current_heading + events = vehicle_data[float(sim_time)]['events'] + if all(value == 0 for value in events.values()): + terminal = 0 + else: + terminal = 1 + + img_obs = np.asarray(image).reshape(3,256,256) + goal_obs = get_goal_layer(goal_pos_x, goal_pos_y, current_position[0], current_position[1], current_heading) + obs.append(np.concatenate((img_obs, goal_obs), axis=0)) + + actions.append([dx, dy, dheading]) + + dist_reward = vehicle_data[float(sim_time)]['dist'] + goal_reward = goal_region_reward(threshold, goal_pos_x, goal_pos_y, current_position[0], current_position[1]) + rewards.append(dist_reward + goal_reward) + + terminals.append(terminal) + print(str(len(obs)) + ' pieces of data are added into dataset.' ) + + obs = np.array(obs, dtype=np.uint8) + actions = np.array(actions) + rewards = np.array(rewards) + terminals = np.array(terminals) + dataset = MDPDataset(obs, actions, rewards, terminals) + + if index == 0: + model = d3rlpy.algos.CQL(use_gpu=True, batch_size=32) + else: + saved_models = glob.glob('d3rlpy_logs/*') + latest_model = max(saved_models, key=os.path.getctime) + model = CQL.from_json('d3rlpy_logs/1/params.json', use_gpu=True) + model.load_model(latest_model + '/model_1.pt') + + model.fit(dataset, + eval_episodes=dataset, + n_steps_per_epoch = 1, + n_steps = 1, + scorers={ + 'td_error': td_error_scorer, + 'value_scale': average_value_estimation_scorer, + } + ) + + saved_models = glob.glob('d3rlpy_logs/*') + latest_model = max(saved_models, key=os.path.getctime) + os.rename(latest_model, 'd3rlpy_logs/' + str(index + 1)) + index += 1 + except: + pass + +imgfile.close() +client.close() +print("Finish Processing") + diff --git a/competition/track-2/train/utility.py b/competition/track-2/train/utility.py new file mode 100644 index 0000000000..7afcd47b43 --- /dev/null +++ b/competition/track-2/train/utility.py @@ -0,0 +1,186 @@ +import paramiko +import math +import numpy as np + +class remote_operations: + def __init__(self): + self.ssh_client = paramiko.SSHClient() + + def connect(self, hostname, username, password): + self.ssh_client.load_system_host_keys() + self.ssh_client.connect(hostname, username=username, password=password) + sftp_client = self.ssh_client.open_sftp() + return sftp_client + # file = sftp_client.open(filename) + # files_list = sftp_client.listdir(path) + +def goal_region_reward(threshold, goal_x, goal_y, cur_x, cur_y): + eucl_distance = math.sqrt((goal_x - cur_x)**2 + (goal_y - cur_y)**2) + + if eucl_distance <= threshold: + return 10 + else: + return 0 + +def inside_coor_to_pixel(goal_x, goal_y, cur_x, cur_y): + ratio = 256 / 50 # 256 pixels corresonds to 50 meters + x_diff = abs(goal_x - cur_x) + y_diff = abs(goal_y - cur_y) + + # find true condition of first quadrant + if goal_x > cur_x and goal_y > cur_y: + x_pixel_loc = min(128 + round(x_diff * ratio), 255) # cap on 256 which is the right edge + y_pixel_loc = max(127 - round(y_diff * ratio), 0) # cap on 0 which is the upper edge + + # find second quadrant + elif goal_x < cur_x and goal_y > cur_y: + x_pixel_loc = max(127 - round(x_diff * ratio), 0) # cap on 0 which is the left edge + y_pixel_loc = max(127 - round(y_diff * ratio), 0) # cap on 0 which is the upper edge + + # To find third quadrant + elif goal_x < cur_x and goal_y < cur_y: + x_pixel_loc = max(127 - round(x_diff * ratio), 0) # cap on 0 which is the left edge + y_pixel_loc = min(128 + round(y_diff * ratio), 255) # cap on 256 which is the bottom edge + + # To find Fourth quadrant + elif goal_x > cur_x and goal_y < cur_y: + x_pixel_loc = min(128 + round(x_diff * ratio), 255) # cap on 256 which is the right edge + y_pixel_loc = min(128 + round(y_diff * ratio), 255) # cap on 256 which is the bottom edge + + # To find if goal is at cur + if (abs(0.98*cur_x) <= abs(goal_x) <= abs(1.02*cur_x)) and (abs(0.98*cur_y) <= abs(goal_y) <= abs(1.02*cur_y)): + x_pixel_loc = 128 + y_pixel_loc = 128 + + # On x-axis + elif (abs(0.98*cur_y) <= abs(goal_y) <= abs(1.02*cur_y)) and goal_x != cur_x: + if goal_x >= cur_x: + x_pixel_loc = min(128 + round(x_diff * ratio), 255) + else: + x_pixel_loc = max(127 - round(x_diff * ratio), 0) + y_pixel_loc = min(128 + round(y_diff * ratio), 255) + + # On y-axis + elif (abs(0.98*cur_x) <= abs(goal_x) <= abs(1.02*cur_x)) and goal_y != cur_y: + if goal_y >= cur_y: + y_pixel_loc = max(127 - round(y_diff * ratio), 0) + else: + y_pixel_loc = min(128 + round(y_diff * ratio), 255) + x_pixel_loc = min(128 + round(x_diff * ratio), 255) + + goal_obs = np.zeros((1,256,256)) + goal_obs[0, y_pixel_loc, x_pixel_loc] = 255 + return goal_obs + +def outside_coor_to_pixel(goal_x, goal_y, cur_x, cur_y): + ratio = 256 / 50 # 256 pixels corresonds to 25 meters + x_diff = abs(goal_x - cur_x) + y_diff = abs(goal_y - cur_y) + + # find true condition of first quadrant + if goal_x > cur_x and goal_y > cur_y: + theta = math.atan(y_diff / x_diff) + if 0 < theta < (math.pi / 4): + x_pixel_loc = 255 + y_pixel_loc = max(127 - round((25 * (y_diff/x_diff)) * ratio), 0) + elif (math.pi / 4) < theta < (math.pi / 2): + x_pixel_loc = min(128 + round((25 / (y_diff/x_diff)) * ratio), 255) + y_pixel_loc = 0 + elif theta == (math.pi / 4): + x_pixel_loc = 255 + y_pixel_loc = 0 + + # find second quadrant + elif goal_x < cur_x and goal_y > cur_y: + theta = math.atan(y_diff / x_diff) + if 0 < theta < (math.pi / 4): + x_pixel_loc = 0 + y_pixel_loc = max(127 - round((25 * (y_diff/x_diff)) * ratio), 0) + elif (math.pi / 4) < theta < (math.pi / 2): + x_pixel_loc = max(127 - round((25 / (y_diff/x_diff)) * ratio), 0) + y_pixel_loc = 0 + elif theta == (math.pi / 4): + x_pixel_loc = 0 + y_pixel_loc = 0 + + # To find third quadrant + elif goal_x < cur_x and goal_y < cur_y: + theta = math.atan(y_diff / x_diff) + if 0 < theta < (math.pi / 4): + x_pixel_loc = 0 + y_pixel_loc = min(128 + round((25 * (y_diff/x_diff)) * ratio), 255) + elif (math.pi / 4) < theta < (math.pi / 2): + x_pixel_loc = max(127 - round((25 / (y_diff/x_diff)) * ratio), 0) + y_pixel_loc = 255 + elif theta == (math.pi / 4): + x_pixel_loc = 0 + y_pixel_loc = 255 + + # To find Fourth quadrant + elif goal_x > cur_x and goal_y < cur_y: + theta = math.atan(y_diff / x_diff) + if 0 < theta < (math.pi / 4): + x_pixel_loc = 255 + y_pixel_loc = min(128 + round((25 * (y_diff/x_diff)) * ratio), 255) + elif (math.pi / 4) < theta < (math.pi / 2): + x_pixel_loc = min(128 + round((25 / (y_diff/x_diff)) * ratio), 255) + y_pixel_loc = 255 + elif theta == (math.pi / 4): + x_pixel_loc = 255 + y_pixel_loc = 255 + + # On x-axis + if (abs(0.98*cur_y) <= abs(goal_y) <= abs(1.02*cur_y)) and goal_x != cur_x: + if goal_x >= cur_x: + x_pixel_loc = 255 + else: + x_pixel_loc = 0 + y_pixel_loc = 128 + + # On y-axis + elif (abs(0.98*cur_x) <= abs(goal_x) <= abs(1.02*cur_x)) and goal_y != cur_y: + if goal_y >= cur_y: + y_pixel_loc = 0 + else: + y_pixel_loc = 255 + x_pixel_loc = 128 + + goal_obs = np.zeros((1,256,256)) + goal_obs[0, y_pixel_loc, x_pixel_loc] = 255 + return goal_obs + +# mark goal position with integer 1, other entries are all 0 +def get_goal_layer(goal_x, goal_y, cur_x, cur_y, cur_heading): + + if 0 < cur_heading < math.pi: # Facing Left Half + theta = cur_heading + + elif -(math.pi) < cur_heading < 0: # Facing Right Half + theta = 2*math.pi + cur_heading + + elif cur_heading == 0: # Facing up North + theta = 0 + + elif (cur_heading == math.pi) or (cur_heading == -(math.pi)): # Facing South + theta = 2*math.pi + cur_heading + + trans_matrix = np.array([[math.cos(theta),math.sin(theta)],[-math.sin(theta),math.cos(theta)]]) + cur_pos = np.array([[cur_x],[cur_y]]) + goal_pos = np.array([[goal_x],[goal_y]]) + trans_cur = np.round(np.matmul(trans_matrix, cur_pos), 5) + trans_goal = np.round(np.matmul(trans_matrix, goal_pos), 5) + + if (trans_cur[0,0] - 25) <= trans_goal[0,0] <= (trans_cur[0,0] + 25): + if (trans_cur[1,0] - 25) <= trans_goal[1,0] <= (trans_cur[1,0] + 25): + inside = True + else: + inside = False + else: + inside = False + + if inside: + goal_obs = inside_coor_to_pixel(trans_goal[0,0], trans_goal[1,0], trans_cur[0,0], trans_cur[1,0]) + else: + goal_obs = outside_coor_to_pixel(trans_goal[0,0], trans_goal[1,0], trans_cur[0,0], trans_cur[1,0]) + + return goal_obs