Skip to content

Improved metrics #1952

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Apr 21, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,18 @@ Copy and pasting the git commit messages is __NOT__ enough.
- The trap manager, `TrapManager`, is now a subclass of `ActorCaptureManager`.
- Considering lane-change time ranges between 3s and 6s, assuming a speed of 13.89m/s, the via sensor lane acquisition range was increased from 40m to 80m, for better driving ability.
- Modified naming of benchmark used in NeurIPS 2022 from driving-smarts-competition-env to driving-smarts-v2022.
- Made the metrics module configurable by supplying parameters through a `Params` class.
- `Params` allows to specify vehicles to be ignored in `dist_to_obstacles` cost function. This would be applicable in platooning tasks.
- Unified computation of `dist_to_destination` (previously known as `completion`) and `steps` (i.e., time taken) as functions inside the cost functions module, instead of computing them separately in a different module.
- In the metrics module, the records which is the raw metrics data and the scoring which is the formula to compute the final results are now separated to provided greater flexibility for applying metrics to different environments.
- Changed `benchmark_runner_v0.py` to only average records across scenarios within an environment. Records are not averaged across enviroments, because the scoring formula may differ in different environments.
### Deprecated
### Fixed
- Fixed an issue where Argoverse scenarios with a `Mission` would not run properly.
- `Trip.actor` field is now effective. Previously `actor` had no effect.
- Fixed an issue where building sumo scenarios would sometimes stall.
- `VehicleIndex` no longer segfaults when attempting to `repr()` it.
- Fixed CI tests for metrics.
### Removed
- Removed the deprecated `waymo_browser` utility.
### Security
Expand Down
160 changes: 160 additions & 0 deletions smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# MIT License

# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import functools
from typing import Dict

import numpy as np

from smarts.env.gymnasium.wrappers.metric.costs import Costs
from smarts.env.gymnasium.wrappers.metric.formula import FormulaBase, Score
from smarts.env.gymnasium.wrappers.metric.params import (
Comfort,
DistToObstacles,
GapBetweenVehicles,
Params,
Steps,
)
from smarts.env.gymnasium.wrappers.metric.types import Record
from smarts.env.gymnasium.wrappers.metric.utils import (
add_dataclass,
divide,
op_dataclass,
)


class Formula(FormulaBase):
"""Sets the (i) cost function parameters, and (ii) score computation formula,
for an environment.
"""

def __init__(self):
pass

def params(self) -> Params:
"""Return parameters to configure and initialize cost functions.

Returns:
Params: Cost function parameters.
"""
params = Params(
comfort=Comfort(
active=False,
), # TODO: Activate after implementing comfort cost function.
dist_to_obstacles=DistToObstacles(
active=True,
ignore=[
"ego", # TODO: Ignore other ego vehicles.
"Leader-007",
],
),
gap_between_vehicles=GapBetweenVehicles(
active=False,
interest="Leader-007",
), # TODO: Activate after implmenting gap_between_vehicles cost function.
steps=Steps(
active=False,
),
)
return params

def score(self, records_sum: Dict[str, Dict[str, Record]]) -> Score:
"""
Computes several sub-component scores and one total combined score named
"Overall" on the wrapped environment.

+-------------------+--------+-----------------------------------------------------------+
| | Range | Remarks |
+===================+========+===========================================================+
| Overall | [0, 1] | Total score. The higher, the better. |
+-------------------+--------+-----------------------------------------------------------+
| DistToDestination | [0, 1] | Remaining distance to destination. The lower, the better. |
+-------------------+--------+-----------------------------------------------------------+
| GapBetweenVehicles| [0, 1] | Gap between vehicles in a platoon. The higher, the better.|
+-------------------+--------+-----------------------------------------------------------+
| Humanness | [0, 1] | Humanness indicator. The higher, the better. |
+-------------------+--------+-----------------------------------------------------------+
| Rules | [0, 1] | Traffic rules compliance. The higher, the better. |
+-------------------+--------+-----------------------------------------------------------+

Returns:
Score: Contains "Overall", "DistToDestination", "GapBetweenVehicles",
"Humanness", and "Rules" scores.
"""

costs_total = Costs()
episodes = 0
for scen, val in records_sum.items():
# Number of agents in scenario.
agents_in_scenario = len(val.keys())
costs_list, counts_list = zip(
*[(record.costs, record.counts) for agent, record in val.items()]
)
# Sum costs over all agents in scenario.
costs_sum_agent: Costs = functools.reduce(
lambda a, b: add_dataclass(a, b), costs_list
)
# Average costs over number of agents in scenario.
costs_mean_agent = op_dataclass(costs_sum_agent, agents_in_scenario, divide)
# Sum costs over all scenarios.
costs_total = add_dataclass(costs_total, costs_mean_agent)
# Increment total number of episodes.
episodes += counts_list[0].episodes

# Average costs over total number of episodes.
costs_final = op_dataclass(costs_total, episodes, divide)

# Compute sub-components of score.
dist_to_destination = costs_final.dist_to_destination
humanness = _humanness(costs=costs_final)
rules = _rules(costs=costs_final)
gap_between_vehicles = costs_final.gap_between_vehicles
overall = (
0.50 * (1 - dist_to_destination)
+ 0.25 * gap_between_vehicles
+ 0.20 * humanness
+ 0.05 * rules
)

return Score(
{
"overall": overall,
"dist_to_destination": dist_to_destination,
"gap_between_vehicles": gap_between_vehicles,
"humanness": humanness,
"rules": rules,
}
)


def _humanness(costs: Costs) -> float:
humanness = np.array(
[costs.dist_to_obstacles, costs.jerk_linear, costs.lane_center_offset]
)
humanness = np.mean(humanness, dtype=float)
return 1 - humanness


def _rules(costs: Costs) -> float:
rules = np.array([costs.speed_limit, costs.wrong_way])
rules = np.mean(rules, dtype=float)
return 1 - rules
127 changes: 71 additions & 56 deletions smarts/benchmark/entrypoints/benchmark_runner_v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,21 @@
# THE SOFTWARE.
import logging
import os
import pprint
from pathlib import Path
from typing import List, Tuple
from typing import Dict

import gymnasium as gym
import psutil
import ray

from smarts.benchmark.driving_smarts import load_config
from smarts.core.utils.import_utils import import_module_from_file
from smarts.core.utils.logging import suppress_output
from smarts.env.gymnasium.wrappers.metrics import Metrics, Score
from smarts.env.gymnasium.wrappers.metric.formula import Score
from smarts.env.gymnasium.wrappers.metric.metrics import Metrics
from smarts.env.gymnasium.wrappers.metric.types import Record
from smarts.env.gymnasium.wrappers.metric.utils import multiply, op_dataclass
from smarts.zoo import registry as agent_registry

LOG_WORKERS = False
Expand All @@ -52,20 +57,20 @@ def _eval_worker_local(name, env_config, episodes, agent_locator, error_tolerant
agent_interface=agent_registry.make(locator=agent_locator).interface,
**env_config["kwargs"],
)
env = Metrics(env)
env = Metrics(env, formula_path=env_config["metric_formula"])
agents = {
agent_id: agent_registry.make_agent(locator=agent_locator)
for agent_id in env.agent_ids
}

observation, info = env.reset()
obs, info = env.reset()
current_resets = 0
try:
while current_resets < episodes:
try:
action = {
agent_id: agents[agent_id].act(obs)
for agent_id, obs in observation.items()
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in obs.items()
}
# assert env.action_space.contains(action)
except Exception:
Expand All @@ -76,14 +81,14 @@ def _eval_worker_local(name, env_config, episodes, agent_locator, error_tolerant
raise
terminated, truncated = False, True
else:
observation, reward, terminated, truncated, info = env.step(action)
obs, reward, terminated, truncated, info = env.step(action)
if terminated["__all__"] or truncated["__all__"]:
current_resets += 1
observation, info = env.reset()
obs, info = env.reset()
finally:
score = env.score()
records = env.records()
env.close()
return name, score
return name, records


def _parallel_task_iterator(env_args, benchmark_args, agent_locator, log_workers):
Expand All @@ -97,9 +102,9 @@ def _parallel_task_iterator(env_args, benchmark_args, agent_locator, log_workers
for name, env_config in env_args.items():
if len(unfinished_refs) >= max_queued_tasks:
ready_refs, unfinished_refs = ray.wait(unfinished_refs, num_returns=1)
for name, score in ray.get(ready_refs):
yield name, score
print(f"Evaluating {name}...")
for name, records in ray.get(ready_refs):
yield name, records
print(f"\nEvaluating {name}...")
unfinished_refs.append(
_eval_worker.remote(
name=name,
Expand All @@ -109,23 +114,23 @@ def _parallel_task_iterator(env_args, benchmark_args, agent_locator, log_workers
error_tolerant=ERROR_TOLERANT,
)
)
for name, score in ray.get(unfinished_refs):
yield name, score
for name, records in ray.get(unfinished_refs):
yield name, records
finally:
ray.shutdown()


def _serial_task_iterator(env_args, benchmark_args, agent_locator, *args, **_):
for name, env_config in env_args.items():
print(f"Evaluating {name}...")
name, score = _eval_worker_local(
print(f"\nEvaluating {name}...")
name, records = _eval_worker_local(
name=name,
env_config=env_config,
episodes=benchmark_args["eval_episodes"],
agent_locator=agent_locator,
error_tolerant=ERROR_TOLERANT,
)
yield name, score
yield name, records


def benchmark(benchmark_args, agent_locator, log_workers=False):
Expand All @@ -135,59 +140,69 @@ def benchmark(benchmark_args, agent_locator, log_workers=False):
agent_loctor(str): Locator string for the registered agent.
debug_log(bool): Whether the benchmark should log to stdout.
"""
print(f"Starting `{benchmark_args['name']}` benchmark.")
debug = benchmark_args.get("debug", {})
print(f"\n\n<-- Starting `{benchmark_args['name']}` benchmark -->\n")
message = benchmark_args.get("message")
if message is not None:
print(message)
env_args = {}

debug = benchmark_args.get("debug", {})
iterator = _serial_task_iterator if debug.get("serial") else _parallel_task_iterator

root_dir = Path(__file__).resolve().parents[3]
for env_name, env_config in benchmark_args["envs"].items():
metric_formula = (
root_dir / x
if (x := env_config.get("metric_formula", None)) != None
else None
)

env_args = {}
for scenario in env_config["scenarios"]:
scenario_path = str(Path(__file__).resolve().parents[3] / scenario)
kwargs = dict(benchmark_args.get("shared_env_kwargs", {}))
kwargs.update(env_config.get("kwargs", {}))
env_args[f"{env_name}-{scenario}"] = dict(
env=env_config["loc"],
scenario=scenario_path,
scenario=str(root_dir / scenario),
kwargs=kwargs,
metric_formula=metric_formula,
)
named_scores = []

iterator = _serial_task_iterator if debug.get("serial") else _parallel_task_iterator

for name, score in iterator(
env_args=env_args,
benchmark_args=benchmark_args,
agent_locator=agent_locator,
log_workers=log_workers,
):
named_scores.append((name, score))
print(f"Scoring {name}...")

def format_one_line_scores(named_scores: List[Tuple[str, Score]]):
name_just = 30
headers = "SCENARIO".ljust(name_just) + "SCORE"
return (
headers
+ "\n"
+ "\n".join(
f"- {name}:".ljust(name_just) + f"{score}"
for name, score in named_scores
records_cumulative: Dict[str, Dict[str, Record]] = {}
for name, records in iterator(
env_args=env_args,
benchmark_args=benchmark_args,
agent_locator=agent_locator,
log_workers=log_workers,
):
records_cumulative.update(records)
print(f"\nScoring {name} ...")

score = _get_score(records=records_cumulative, metric_formula=metric_formula)
print("\nSCORE")
pprint.pprint(score)

print("\n<-- Evaluation complete -->\n")


def _get_score(records: Dict[str, Dict[str, Record]], metric_formula: Path) -> Score:
# Convert averaged records into sum of records.
records_sum = {}
for scen, agents in records.items():
records_sum[scen] = {}
for agent, data in agents.items():
records_sum[scen][agent] = Record(
costs=op_dataclass(data.costs, data.counts.episodes, multiply),
counts=data.counts,
)
)

def format_scores_total(named_scores: List[Tuple[str, Score]], scenario_count):
score_sum = Score(*[sum(f) for f in zip(*[score for _, score in named_scores])])
return "\n".join(
f"- {k}: {v/scenario_count}" for k, v in score_sum._asdict().items()
)
# Import scoring formula
import_module_from_file("custom_formula", metric_formula)
from custom_formula import Formula

formula = Formula()

print("Evaluation complete...")
print()
print(format_one_line_scores(named_scores))
print()
print("`Driving SMARTS` averaged result:")
print(format_scores_total(named_scores, len(env_args)))
score = formula.score(records_sum=records_sum)
return score


def benchmark_from_configs(benchmark_config, agent_locator, debug_log=False):
Expand Down
Loading