diff --git a/competition/competition_bundle/track1_evaluation/config.yaml b/competition/competition_bundle/track1_evaluation/config.yaml index 462068d82b..93daf858dc 100644 --- a/competition/competition_bundle/track1_evaluation/config.yaml +++ b/competition/competition_bundle/track1_evaluation/config.yaml @@ -2,3 +2,4 @@ phase: track1 eval_episodes: 50 seed: 42 scenarios: [] +bubble_env_evaluation_seeds: [] \ No newline at end of file diff --git a/competition/competition_bundle/track2_evaluation/config.yaml b/competition/competition_bundle/track2_evaluation/config.yaml index 6e1f5ed330..6b65c7671e 100644 --- a/competition/competition_bundle/track2_evaluation/config.yaml +++ b/competition/competition_bundle/track2_evaluation/config.yaml @@ -1,4 +1,5 @@ phase: track2 eval_episodes: 50 seed: 42 -scenarios: [] \ No newline at end of file +scenarios: [] +bubble_env_evaluation_seeds: [] \ No newline at end of file diff --git a/competition/competition_bundle/validation_evaluation/config.yaml b/competition/competition_bundle/validation_evaluation/config.yaml index c4e72c56b4..4b969206f8 100644 --- a/competition/competition_bundle/validation_evaluation/config.yaml +++ b/competition/competition_bundle/validation_evaluation/config.yaml @@ -3,3 +3,4 @@ eval_episodes: 1 seed: 42 scenarios: - 1_to_2lane_left_turn_c +bubble_env_evaluation_seeds: [] diff --git a/competition/evaluation/copy_data.py b/competition/evaluation/copy_data.py index 3b793dcc15..6fd5b19937 100644 --- a/competition/evaluation/copy_data.py +++ b/competition/evaluation/copy_data.py @@ -1,5 +1,5 @@ import copy -from typing import Any, Dict, Iterable, Tuple +from typing import Any, Dict, Iterable, List, Tuple import gym @@ -10,7 +10,10 @@ def __init__(self): self._agent_names = None def __call__(self, **kwargs): - self._data = copy.deepcopy(dict(**kwargs)) + try: + self._data = copy.deepcopy(dict(**kwargs)) + except RecursionError: + self._data = copy.copy(dict(**kwargs)) @property def data(self): @@ -26,10 +29,10 @@ def agent_names(self, names: Iterable[str]): class CopyData(gym.Wrapper): - def __init__(self, env: gym.Env, datastore: DataStore): + def __init__(self, env: gym.Env, agent_ids: List[str], datastore: DataStore): super(CopyData, self).__init__(env) self._datastore = datastore - self._datastore.agent_names = list(env.agent_specs.keys()) + self._datastore.agent_names = agent_ids def step( self, action: Dict[str, Any] diff --git a/competition/evaluation/costs.py b/competition/evaluation/costs.py index 418b1beafc..8bc4454900 100644 --- a/competition/evaluation/costs.py +++ b/competition/evaluation/costs.py @@ -38,12 +38,12 @@ def _collisions(obs: Observation) -> Dict[str, int]: def _dist_to_goal(obs: Observation) -> Dict[str, float]: mission_goal = obs.ego_vehicle_state.mission.goal - assert hasattr( - mission_goal, "position" - ), "Mission has no goal position, thus `dist_to_goal` cannot be calculated." + if hasattr(mission_goal, "position"): + rel = obs.ego_vehicle_state.position[:2] - mission_goal.position[:2] + dist = sum(abs(rel)) + else: + dist = 0 - rel = obs.ego_vehicle_state.position[:2] - mission_goal.position[:2] - dist = sum(abs(rel)) return {"dist_to_goal": dist} @@ -169,7 +169,7 @@ def func(obs: Observation) -> Dict[str, float]: norm_dist_from_center = signed_dist_from_center / lane_hwidth # J_LC : Lane center offset - j_lc = norm_dist_from_center**2 + j_lc = norm_dist_from_center ** 2 ave, step = _running_ave(prev_ave=ave, prev_step=step, new_val=j_lc) return {"lane_center_offset": ave} @@ -199,7 +199,7 @@ def func(obs: Observation) -> Dict[str, float]: # Excess speed beyond speed limit. overspeed = ego.speed - speed_limit if ego.speed > speed_limit else 0 - j_v = overspeed**2 + j_v = overspeed ** 2 ave, step = _running_ave(prev_ave=ave, prev_step=step, new_val=j_v) return {"speed_limit": ave} diff --git a/competition/evaluation/evaluate.py b/competition/evaluation/evaluate.py index d38e744c6d..818914cfa2 100644 --- a/competition/evaluation/evaluate.py +++ b/competition/evaluation/evaluate.py @@ -4,7 +4,7 @@ import subprocess import sys from pathlib import Path -from typing import Any, Dict +from typing import Any, Dict, List, Optional logger = logging.getLogger(__file__) @@ -15,6 +15,7 @@ "eval_episodes", "seed", "scenarios", + "bubble_env_evaluation_seeds", } _DEFAULT_EVALUATION_CONFIG = dict( phase="track1", @@ -30,6 +31,7 @@ "3lane_cut_in", "3lane_overtake", ], + bubble_env_evaluation_seeds=[6], ) _SUBMISSION_CONFIG_KEYS = { "img_meters", @@ -41,36 +43,24 @@ ) -def make_env( - config: Dict[str, Any], - scenario: str, +def wrap_env( + env, + agent_ids: List[str], datastore: "DataStore", wrappers=[], ): """Make environment. Args: - config (Dict[str, Any]): A dictionary of config parameters. - scenario (str): Scenario + env (gym.Env): The environment to wrap. wrappers (List[gym.Wrapper], optional): Sequence of gym environment wrappers. Defaults to empty list []. Returns: - gym.Env: Environment corresponding to the `scenario`. + gym.Env: Environment wrapped for evaluation. """ - - # Create environment - env = gym.make( - "smarts.env:multi-scenario-v0", - scenario=scenario, - img_meters=int(config["img_meters"]), - img_pixels=int(config["img_pixels"]), - action_space="TargetPose", - sumo_headless=True, - ) - # Make a copy of original info. - env = CopyData(env, datastore) + env = CopyData(env, agent_ids, datastore) # Disallow modification of attributes starting with "_" by external users. env = gym.Wrapper(env) @@ -82,20 +72,44 @@ def make_env( def evaluate(config): - scenarios = config["scenarios"] - + base_scenarios = config["scenarios"] + shared_configs = dict( + action_space="TargetPose", + img_meters=int(config["img_meters"]), + img_pixels=int(config["img_pixels"]), + sumo_headless=True, + ) # Make evaluation environments. envs_eval = {} - for scen in scenarios: + for scenario in base_scenarios: + env = gym.make( + "smarts.env:multi-scenario-v0", scenario=scenario, **shared_configs + ) + datastore = DataStore() + envs_eval[f"{scenario}"] = ( + wrap_env( + env, + agent_ids=list(env.agent_specs.keys()), + datastore=datastore, + wrappers=submitted_wrappers(), + ), + datastore, + None, + ) + + bonus_eval_seeds = config.get("bubble_env_evaluation_seeds", []) + for seed in bonus_eval_seeds: + env = gym.make("bubble_env_contrib:bubble_env-v0", **shared_configs) datastore = DataStore() - envs_eval[f"{scen}"] = ( - make_env( - config=config, - scenario=scen, + envs_eval[f"bubble_env_{seed}"] = ( + wrap_env( + env, + agent_ids=list(env.agent_ids), datastore=datastore, wrappers=submitted_wrappers(), ), datastore, + seed, ) # Instantiate submitted policy. @@ -103,7 +117,7 @@ def evaluate(config): # Evaluate model for each scenario score = Score() - for index, (env_name, (env, datastore)) in enumerate(envs_eval.items()): + for index, (env_name, (env, datastore, seed)) in enumerate(envs_eval.items()): logger.info(f"\n{index}. Evaluating env {env_name}.\n") counts, costs = run( env=env, @@ -111,6 +125,7 @@ def evaluate(config): env_name=env_name, policy=policy, config=config, + seed=seed, ) score.add(counts, costs) @@ -119,18 +134,25 @@ def evaluate(config): logger.info("\nFinished evaluating.\n") # Close all environments - for env, _ in envs_eval.values(): + for env, _, _ in envs_eval.values(): env.close() return rank def run( - env, datastore: "DataStore", env_name: str, policy: "Policy", config: Dict[str, Any] + env, + datastore: "DataStore", + env_name: str, + policy: "Policy", + config: Dict[str, Any], + seed: Optional[int], ): # Instantiate metric for score calculation. metric = Metric(env_name=env_name, agent_names=datastore.agent_names) + # Ensure deterministic seeding + env.seed((seed or 0) + config["seed"]) for _ in range(config["eval_episodes"]): observations = env.reset() dones = {"__all__": False} @@ -203,6 +225,7 @@ def to_codalab_scores_string(rank) -> str: "pip", "install", "smarts[camera-obs] @ git+https://github.com/huawei-noah/SMARTS.git@comp-1", + "bubble_env @ git+https://bitbucket.org/malban/bubble_env.git@master", ] ) subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", req_file]) diff --git a/competition/track1/train/train/info.py b/competition/track1/train/train/info.py index c15b0fb980..2530ed7d91 100644 --- a/competition/track1/train/train/info.py +++ b/competition/track1/train/train/info.py @@ -28,6 +28,6 @@ def step( obs, reward, done, info = self.env.step(action) for agent_id in info.keys(): - info[agent_id]["is_success"] = bool(info[agent_id]["score"]) + info[agent_id]["is_success"] = bool(info[agent_id].get("score", True)) return obs, reward, done, info diff --git a/smarts/env/multi_scenario_v0_env.py b/smarts/env/multi_scenario_v0_env.py index fec8288cdf..93b57e6564 100644 --- a/smarts/env/multi_scenario_v0_env.py +++ b/smarts/env/multi_scenario_v0_env.py @@ -109,7 +109,7 @@ def multi_scenario_v0_env( agent_specs = { f"Agent_{i}": AgentSpec( - interface=_resolve_agent_interface(img_meters, img_pixels, action_space) + interface=resolve_agent_interface(img_meters, img_pixels, action_space) ) for i in range(env_specs["num_agent"]) } @@ -238,9 +238,11 @@ def _get_env_specs(scenario: str): raise Exception(f"Unknown scenario {scenario}.") -def _resolve_agent_interface( +def resolve_agent_interface( img_meters: int = 64, img_pixels: int = 256, action_space="TargetPose", **kwargs ): + """Resolve an agent interface for the environments in this module.""" + done_criteria = DoneCriteria( collision=True, off_road=True,