From e93fe7def79e6fdf303c1bd9193a88d5ff61553c Mon Sep 17 00:00:00 2001 From: adaickalavan Date: Sat, 3 Jun 2023 12:25:01 -0400 Subject: [PATCH] Cap the steps and weight the scores. --- CHANGELOG.md | 2 + .../driving_smarts/v2023/config_1.yaml | 2 - .../driving_smarts/v2023/config_2.yaml | 2 - .../driving_smarts/v2023/config_3.yaml | 2 - .../v2023/metric_formula_drive.py | 138 ++++++----- .../v2023/metric_formula_platoon.py | 142 ++++++------ .../entrypoints/benchmark_runner_v0.py | 48 ++-- .../env/gymnasium/driving_smarts_2023_env.py | 1 + smarts/env/gymnasium/platoon_env.py | 1 + smarts/env/gymnasium/wrappers/metric/costs.py | 8 +- .../env/gymnasium/wrappers/metric/formula.py | 214 +++++++++++------- .../env/gymnasium/wrappers/metric/metrics.py | 30 ++- smarts/env/gymnasium/wrappers/metric/types.py | 14 +- smarts/env/tests/test_metrics.py | 4 +- smarts/sstudio/types/scenario.py | 8 +- 15 files changed, 366 insertions(+), 250 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52b02584b2..46e3611340 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,8 @@ Copy and pasting the git commit messages is __NOT__ enough. - Changed instances of `hiway-v0` and `gym` to use `hiway-v1` and `gymnasium`, respectively. - `RoadMap.Route` now optionally stores the start and end lanes of the route. - `DistToDestination` metric is now computed by summing the (i) off-route distance driven by the vehicle from its last on-route position, and (ii) the distance to goal from the vehicle's last on-route position. +- `Steps` metric is capped by scenario duration set in the scenario metadata. +- Overall metric score is weighted by each agent's task difficulty. ### Deprecated - `visdom` is set to be removed from the SMARTS object parameters. - Deprecated `start_time` on missions. diff --git a/smarts/benchmark/driving_smarts/v2023/config_1.yaml b/smarts/benchmark/driving_smarts/v2023/config_1.yaml index 5f480d19d1..6b3a57f675 100644 --- a/smarts/benchmark/driving_smarts/v2023/config_1.yaml +++ b/smarts/benchmark/driving_smarts/v2023/config_1.yaml @@ -2,8 +2,6 @@ benchmark: name: "Driving SMARTS 2023.1" message: | - This is the Driving SMARTS 2023.1 benchmark. - For context see: - https://smarts-project.github.io/competition/2023_driving_smarts/ - https://codalab.lisn.upsaclay.fr/competitions/ diff --git a/smarts/benchmark/driving_smarts/v2023/config_2.yaml b/smarts/benchmark/driving_smarts/v2023/config_2.yaml index de913eaa87..e7131ef475 100644 --- a/smarts/benchmark/driving_smarts/v2023/config_2.yaml +++ b/smarts/benchmark/driving_smarts/v2023/config_2.yaml @@ -2,8 +2,6 @@ benchmark: name: "Driving SMARTS 2023.2" message: | - This is the Driving SMARTS 2023.2 benchmark. - For context see: - https://smarts-project.github.io/competition/2023_driving_smarts/ - https://codalab.lisn.upsaclay.fr/competitions/ diff --git a/smarts/benchmark/driving_smarts/v2023/config_3.yaml b/smarts/benchmark/driving_smarts/v2023/config_3.yaml index fd11954a73..d2e89b43f4 100644 --- a/smarts/benchmark/driving_smarts/v2023/config_3.yaml +++ b/smarts/benchmark/driving_smarts/v2023/config_3.yaml @@ -2,8 +2,6 @@ benchmark: name: "Driving SMARTS 2023.3" message: | - This is the Driving SMARTS 2023.3 benchmark. - For context see: - https://smarts-project.github.io/competition/2023_driving_smarts/ - https://codalab.lisn.upsaclay.fr/competitions/ diff --git a/smarts/benchmark/driving_smarts/v2023/metric_formula_drive.py b/smarts/benchmark/driving_smarts/v2023/metric_formula_drive.py index 8c06a81b01..925d9dec00 100644 --- a/smarts/benchmark/driving_smarts/v2023/metric_formula_drive.py +++ b/smarts/benchmark/driving_smarts/v2023/metric_formula_drive.py @@ -25,13 +25,27 @@ import numpy as np from smarts.env.gymnasium.wrappers.metric.costs import Costs -from smarts.env.gymnasium.wrappers.metric.formula import FormulaBase, Score, avg_costs +from smarts.env.gymnasium.wrappers.metric.formula import ( + FormulaBase, + Score, + agent_scores, + agent_weights, + score_rule_violation, + weighted_score, +) from smarts.env.gymnasium.wrappers.metric.params import ( + Collisions, Comfort, DistToDestination, DistToObstacles, JerkLinear, + LaneCenterOffset, + OffRoad, Params, + SpeedLimit, + Steps, + VehicleGap, + WrongWay, ) from smarts.env.gymnasium.wrappers.metric.types import Record @@ -51,75 +65,83 @@ def params(self) -> Params: Params: Cost function parameters. """ params = Params( - comfort=Comfort( - active=True, - ), - dist_to_destination=DistToDestination( - active=True, - ), - dist_to_obstacles=DistToObstacles( - active=False, - ), + collisions=Collisions(active=False), + comfort=Comfort(active=True), + dist_to_destination=DistToDestination(active=True), + dist_to_obstacles=DistToObstacles(active=False), jerk_linear=JerkLinear(active=False), + lane_center_offset=LaneCenterOffset(active=True), + off_road=OffRoad(active=False), + speed_limit=SpeedLimit(active=True), + steps=Steps(active=True), + vehicle_gap=VehicleGap(active=False), + wrong_way=WrongWay(active=True), ) return params - def score(self, records_sum: Dict[str, Dict[str, Record]]) -> Score: + def score(self, records: Dict[str, Dict[str, Record]]) -> Score: """ Computes several sub-component scores and one total combined score named "Overall" on the wrapped environment. - +-------------------+--------+-----------------------------------------------------------+ - | | Range | Remarks | - +===================+========+===========================================================+ - | Overall | [0, 1] | Total score. The higher, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | DistToDestination | [0, 1] | Remaining distance to destination. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | Time | [0, 1] | Time taken to complete scenario. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | HumannessError | [0, 1] | Humanness indicator. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | RuleViolation | [0, 1] | Traffic rules compliance. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ + Args: + records (Dict[str, Dict[str, Record]]): Records. Returns: - Score: Contains "Overall", "DistToDestination", "VehicleGap", - "HumannessError", and "RuleViolation" scores. + Score: "Overall" score and other sub-component scores. """ - costs_final = avg_costs(records_sum=records_sum) - - # Compute sub-components of score. - dist_to_destination = costs_final.dist_to_destination - humanness_error = _humanness_error(costs=costs_final) - rule_violation = _rule_violation(costs=costs_final) - time = costs_final.steps - overall = ( - 0.25 * (1 - dist_to_destination) - + 0.25 * (1 - time) - + 0.25 * (1 - humanness_error) - + 0.25 * (1 - rule_violation) - ) - - return Score( - { - "overall": overall, - "dist_to_destination": dist_to_destination, - "time": time, - "humanness_error": humanness_error, - "rule_violation": rule_violation, - } - ) - - -def _humanness_error(costs: Costs) -> float: + agent_weight = agent_weights(records=records) + agent_score = agent_scores(records=records, func=costs_to_score) + return weighted_score(scores=agent_score, weights=agent_weight) + + +def costs_to_score(costs: Costs) -> Score: + """Compute score from costs. + + +-------------------+--------+-----------------------------------------------------------+ + | | Range | Remarks | + +===================+========+===========================================================+ + | Overall | [0, 1] | Total score. The higher, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | DistToDestination | [0, 1] | Remaining distance to destination. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | Time | [0, 1] | Time taken to complete scenario. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | HumannessError | [0, 1] | Humanness indicator. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | RuleViolation | [0, 1] | Traffic rules compliance. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + + Args: + costs (Costs): Costs. + + Returns: + Score: Score. + """ + dist_to_destination = costs.dist_to_destination + humanness_error = _score_humanness_error(costs=costs) + rule_violation = score_rule_violation(costs=costs) + time = costs.steps + overall = ( + 0.25 * (1 - dist_to_destination) + + 0.25 * (1 - time) + + 0.25 * (1 - humanness_error) + + 0.25 * (1 - rule_violation) + ) + + return Score( + { + "overall": overall, + "dist_to_destination": dist_to_destination, + "time": time, + "humanness_error": humanness_error, + "rule_violation": rule_violation, + } + ) + + +def _score_humanness_error(costs: Costs) -> float: humanness_error = np.array([costs.comfort, costs.lane_center_offset]) humanness_error = np.mean(humanness_error, dtype=float) return humanness_error - - -def _rule_violation(costs: Costs) -> float: - rule_violation = np.array([costs.speed_limit, costs.wrong_way]) - rule_violation = np.mean(rule_violation, dtype=float) - return rule_violation diff --git a/smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py b/smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py index 2b10d76fa7..a3391407cd 100644 --- a/smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py +++ b/smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py @@ -25,15 +25,27 @@ import numpy as np from smarts.env.gymnasium.wrappers.metric.costs import Costs -from smarts.env.gymnasium.wrappers.metric.formula import FormulaBase, Score, avg_costs +from smarts.env.gymnasium.wrappers.metric.formula import ( + FormulaBase, + Score, + agent_scores, + agent_weights, + score_rule_violation, + weighted_score, +) from smarts.env.gymnasium.wrappers.metric.params import ( + Collisions, Comfort, DistToDestination, DistToObstacles, JerkLinear, + LaneCenterOffset, + OffRoad, Params, + SpeedLimit, Steps, VehicleGap, + WrongWay, ) from smarts.env.gymnasium.wrappers.metric.types import Record @@ -53,81 +65,83 @@ def params(self) -> Params: Params: Cost function parameters. """ params = Params( - comfort=Comfort( - active=True, - ), - dist_to_destination=DistToDestination( - active=True, - ), - dist_to_obstacles=DistToObstacles( - active=False, - ), + collisions=Collisions(active=False), + comfort=Comfort(active=True), + dist_to_destination=DistToDestination(active=True), + dist_to_obstacles=DistToObstacles(active=False), jerk_linear=JerkLinear(active=False), - vehicle_gap=VehicleGap( - active=True, - ), - steps=Steps( - active=False, - ), + lane_center_offset=LaneCenterOffset(active=True), + off_road=OffRoad(active=False), + speed_limit=SpeedLimit(active=True), + steps=Steps(active=False), + vehicle_gap=VehicleGap(active=True), + wrong_way=WrongWay(active=True), ) return params - def score(self, records_sum: Dict[str, Dict[str, Record]]) -> Score: + def score(self, records: Dict[str, Dict[str, Record]]) -> Score: """ Computes several sub-component scores and one total combined score named "Overall" on the wrapped environment. - +-------------------+--------+-----------------------------------------------------------+ - | | Range | Remarks | - +===================+========+===========================================================+ - | Overall | [0, 1] | Total score. The higher, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | DistToDestination | [0, 1] | Remaining distance to destination. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | VehicleGap | [0, 1] | Gap between vehicles in a convoy. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | HumannessError | [0, 1] | Humanness indicator. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | RuleViolation | [0, 1] | Traffic rules compliance. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ + Args: + records (Dict[str, Dict[str, Record]]): Records. Returns: - Score: Contains "Overall", "DistToDestination", "VehicleGap", - "HumannessError", and "RuleViolation" scores. + Score: "Overall" score and other sub-component scores. """ - costs_final = avg_costs(records_sum=records_sum) - - # Compute sub-components of score. - dist_to_destination = costs_final.dist_to_destination - humanness_error = _humanness_error(costs=costs_final) - rule_violation = _rule_violation(costs=costs_final) - vehicle_gap = costs_final.vehicle_gap - overall = ( - 0.25 * (1 - dist_to_destination) - + 0.25 * (1 - vehicle_gap) - + 0.25 * (1 - humanness_error) - + 0.25 * (1 - rule_violation) - ) - - return Score( - { - "overall": overall, - "dist_to_destination": dist_to_destination, - "vehicle_gap": vehicle_gap, - "humanness_error": humanness_error, - "rule_violation": rule_violation, - } - ) - - -def _humanness_error(costs: Costs) -> float: + agent_weight = agent_weights(records=records) + agent_score = agent_scores(records=records, func=costs_to_score) + return weighted_score(scores=agent_score, weights=agent_weight) + + +def costs_to_score(costs: Costs) -> Score: + """Compute score from costs. + + +-------------------+--------+-----------------------------------------------------------+ + | | Range | Remarks | + +===================+========+===========================================================+ + | Overall | [0, 1] | Total score. The higher, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | DistToDestination | [0, 1] | Remaining distance to destination. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | VehicleGap | [0, 1] | Gap between vehicles in a convoy. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | HumannessError | [0, 1] | Humanness indicator. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | RuleViolation | [0, 1] | Traffic rules compliance. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + + Args: + costs (Costs): Costs. + + Returns: + Score: Score. + """ + dist_to_destination = costs.dist_to_destination + humanness_error = _score_humanness_error(costs=costs) + rule_violation = score_rule_violation(costs=costs) + vehicle_gap = costs.vehicle_gap + overall = ( + 0.25 * (1 - dist_to_destination) + + 0.25 * (1 - vehicle_gap) + + 0.25 * (1 - humanness_error) + + 0.25 * (1 - rule_violation) + ) + + return Score( + { + "overall": overall, + "dist_to_destination": dist_to_destination, + "vehicle_gap": vehicle_gap, + "humanness_error": humanness_error, + "rule_violation": rule_violation, + } + ) + + +def _score_humanness_error(costs: Costs) -> float: humanness_error = np.array([costs.comfort, costs.lane_center_offset]) humanness_error = np.mean(humanness_error, dtype=float) return humanness_error - - -def _rule_violation(costs: Costs) -> float: - rule_violation = np.array([costs.speed_limit, costs.wrong_way]) - rule_violation = np.mean(rule_violation, dtype=float) - return rule_violation diff --git a/smarts/benchmark/entrypoints/benchmark_runner_v0.py b/smarts/benchmark/entrypoints/benchmark_runner_v0.py index bedd9e2d53..294e79c70a 100644 --- a/smarts/benchmark/entrypoints/benchmark_runner_v0.py +++ b/smarts/benchmark/entrypoints/benchmark_runner_v0.py @@ -19,9 +19,9 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. +import json import logging import os -import pprint from pathlib import Path from typing import Dict @@ -35,7 +35,6 @@ from smarts.env.gymnasium.wrappers.metric.formula import FormulaBase, Score from smarts.env.gymnasium.wrappers.metric.metrics import Metrics from smarts.env.gymnasium.wrappers.metric.types import Record -from smarts.env.gymnasium.wrappers.metric.utils import multiply, op_dataclass from smarts.zoo import registry as agent_registry LOG_WORKERS = False @@ -171,40 +170,49 @@ def benchmark(benchmark_args, agent_locator, log_workers=False): ) records_cumulative: Dict[str, Dict[str, Record]] = {} - for name, records in iterator( + for _, records in iterator( env_args=env_args, benchmark_args=benchmark_args, agent_locator=agent_locator, log_workers=log_workers, ): records_cumulative.update(records) - print(f"\nScoring {name} ...") - score = _get_score(records=records_cumulative, metric_formula=metric_formula) - print("\nSCORE") - pprint.pprint(score) + score = _get_weighted_score( + records=records_cumulative, metric_formula=metric_formula + ) + print("\n\nOverall Weighted Score:\n") + print(json.dumps(score, indent=2)) + score = _get_agent_score( + records=records_cumulative, metric_formula=metric_formula + ) + print("\n\nIndividual Agent Score:\n") + print(json.dumps(score, indent=2)) print("\n<-- Evaluation complete -->\n") -def _get_score(records: Dict[str, Dict[str, Record]], metric_formula: Path) -> Score: - # Convert averaged records into sum of records. - records_sum = {} - for scen, agents in records.items(): - records_sum[scen] = {} - for agent, data in agents.items(): - records_sum[scen][agent] = Record( - costs=op_dataclass(data.costs, data.counts.episodes, multiply), - counts=data.counts, - ) - - # Import scoring formula +def _get_weighted_score( + records: Dict[str, Dict[str, Record]], metric_formula: Path +) -> Score: import_module_from_file("custom_formula", metric_formula) from custom_formula import Formula formula: FormulaBase = Formula() - score = formula.score(records_sum=records_sum) + score = formula.score(records=records) + return score + + +def _get_agent_score( + records: Dict[str, Dict[str, Record]], metric_formula: Path +) -> Dict[str, Dict[str, Score]]: + import_module_from_file("custom_formula", metric_formula) + from custom_formula import costs_to_score + + from smarts.env.gymnasium.wrappers.metric.formula import agent_scores + + score = agent_scores(records=records, func=costs_to_score) return score diff --git a/smarts/env/gymnasium/driving_smarts_2023_env.py b/smarts/env/gymnasium/driving_smarts_2023_env.py index 86cb84edbd..25127dddfd 100644 --- a/smarts/env/gymnasium/driving_smarts_2023_env.py +++ b/smarts/env/gymnasium/driving_smarts_2023_env.py @@ -165,6 +165,7 @@ def resolve_agent_interface(agent_interface: AgentInterface): wrong_way=False, not_moving=False, agents_alive=None, + interest=None, ) max_episode_steps = 1000 waypoints_lookahead = 80 diff --git a/smarts/env/gymnasium/platoon_env.py b/smarts/env/gymnasium/platoon_env.py index ff8e6662f5..1df1e23e98 100644 --- a/smarts/env/gymnasium/platoon_env.py +++ b/smarts/env/gymnasium/platoon_env.py @@ -169,6 +169,7 @@ def resolve_agent_interface(agent_interface: AgentInterface): on_shoulder=False, wrong_way=False, not_moving=False, + agents_alive=None, interest=InterestDoneCriteria( include_scenario_marked=True, strict=True, diff --git a/smarts/env/gymnasium/wrappers/metric/costs.py b/smarts/env/gymnasium/wrappers/metric/costs.py index 9c908275f0..f95fd1b37b 100644 --- a/smarts/env/gymnasium/wrappers/metric/costs.py +++ b/smarts/env/gymnasium/wrappers/metric/costs.py @@ -94,7 +94,7 @@ def func( T_u += u_t if not done: - return Costs(comfort=-np.inf) + return Costs(comfort=np.nan) else: T_trv = step for _ in range(T_p): @@ -148,7 +148,7 @@ def func( prev_route_displacement = cur_route_displacement prev_dist_travelled = tot_dist_travelled - return Costs(dist_to_destination=-np.inf) + return Costs(dist_to_destination=np.nan) elif obs.events.reached_goal: return Costs(dist_to_destination=0) else: @@ -422,10 +422,10 @@ def func( step = step + 1 if not done: - return Costs(steps=-np.inf) + return Costs(steps=np.nan) if obs.events.reached_goal or obs.events.interest_done: - return Costs(steps=step / max_episode_steps) + return Costs(steps=min(step, max_episode_steps) / max_episode_steps) elif ( len(obs.events.collisions) > 0 or obs.events.off_road diff --git a/smarts/env/gymnasium/wrappers/metric/formula.py b/smarts/env/gymnasium/wrappers/metric/formula.py index 0e947c8740..ed653d661d 100644 --- a/smarts/env/gymnasium/wrappers/metric/formula.py +++ b/smarts/env/gymnasium/wrappers/metric/formula.py @@ -21,18 +21,12 @@ # THE SOFTWARE. from __future__ import annotations -import functools -from typing import Dict, NewType +from typing import Callable, Dict, NewType import numpy as np from smarts.env.gymnasium.wrappers.metric.params import Params from smarts.env.gymnasium.wrappers.metric.types import Costs, Record -from smarts.env.gymnasium.wrappers.metric.utils import ( - add_dataclass, - divide, - op_dataclass, -) Score = NewType("Score", Dict[str, float]) @@ -53,10 +47,13 @@ def params(self) -> Params: """ raise NotImplementedError - def score(self, records_sum: Dict[str, Dict[str, Record]]) -> Score: + def score(self, records: Dict[str, Dict[str, Record]]) -> Score: """Computes sub-component scores and one total combined score named "Overall" on the wrapped environment. + Args: + records (Dict[str, Dict[str, Record]]): Records. + Returns: "Overall" score and other sub-component scores. """ @@ -79,57 +76,138 @@ def params(self) -> Params: """ return Params() - def score(self, records_sum: Dict[str, Dict[str, Record]]) -> Score: - """ - Computes four sub-component scores, namely, "DistanceToDestination", - "Time", "HumannessError", "RuleViolation", and one total combined score named + def score(self, records: Dict[str, Dict[str, Record]]) -> Score: + """Computes sub-component scores and one total combined score named "Overall" on the wrapped environment. - +-------------------+--------+-----------------------------------------------------------+ - | | Range | Remarks | - +===================+========+===========================================================+ - | Overall | [0, 1] | Total score. The higher, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | DistToDestination | [0, 1] | Remaining distance to destination. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | Time | [0, 1] | Time taken to complete scenario. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | HumannessError | [0, 1] | Humanness indicator. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ - | RuleViolation | [0, 1] | Traffic rules compliance. The lower, the better. | - +-------------------+--------+-----------------------------------------------------------+ + Args: + records (Dict[str, Dict[str, Record]]): Records. Returns: - "Overall", "DistToDestination", "Time", - "HumannessError", and "RuleViolation" scores. + Score: "Overall" score and other sub-component scores. """ - costs_final = avg_costs(records_sum=records_sum) - - # Compute sub-components of score. - dist_to_destination = costs_final.dist_to_destination - humanness_error = _humanness_error(costs=costs_final) - rule_violation = _rule_violation(costs=costs_final) - time = costs_final.steps - overall = ( - 0.25 * (1 - dist_to_destination) - + 0.25 * (1 - time) - + 0.25 * (1 - humanness_error) - + 0.25 * (1 - rule_violation) + agent_weight = agent_weights(records=records) + agent_score = agent_scores(records=records, func=costs_to_score) + return weighted_score(scores=agent_score, weights=agent_weight) + + +def agent_weights(records: Dict[str, Dict[str, Record]]) -> Dict[str, Dict[str, float]]: + """Retrieves weight for each agent in every scenario. + + Args: + records (Dict[str, Dict[str, Record]]): Records. + + Returns: + Dict[str,Dict[str,float]]: Weight for each agent in every scenario. + """ + + weights = {} + for scen, agents in records.items(): + weights[scen] = dict( + map(lambda i: (i[0], i[1].metadata.difficulty), agents.items()) ) - return Score( - { - "overall": overall, - "dist_to_destination": dist_to_destination, - "time": time, - "humanness_error": humanness_error, - "rule_violation": rule_violation, + return weights + + +def agent_scores( + records: Dict[str, Dict[str, Record]], func: Callable[[Costs], Score] +) -> Dict[str, Dict[str, Score]]: + """Computes score for each agent in every scenario. + + Args: + records (Dict[str, Dict[str, Record]]): Records. + func (Callable[[Costs],Score]): Function which computes Score given Costs. + + Returns: + Dict[str,Dict[str,Score]]: Score for each agent in every scenario. + """ + + scores = {} + for scen, agents in records.items(): + scores[scen] = dict(map(lambda i: (i[0], func(i[1].costs)), agents.items())) + + return scores + + +def weighted_score( + scores: Dict[str, Dict[str, Score]], weights: Dict[str, Dict[str, float]] +) -> Score: + """Computes single overall weighted score using `weights`. + + Args: + scores (Dict[str,Dict[str,Score]]): Score for each agent in every scenario. + weights (Dict[str,Dict[str,float]]): Weight for each agent in every scenario. + + Returns: + Score: Weighted score. + """ + cumulative_score = {} + total_weight = 0 + for scen, agent in scores.items(): + for agent_name, agent_score in agent.items(): + current_score = dict( + map( + lambda i: (i[0], i[1] * weights[scen][agent_name]), + agent_score.items(), + ) + ) + cumulative_score = { + score_name: score_val + cumulative_score.get(score_name, 0) + for score_name, score_val in current_score.items() } - ) + total_weight += weights[scen][agent_name] + + return Score({key: val / total_weight for key, val in cumulative_score.items()}) + + +def costs_to_score(costs: Costs) -> Score: + """Compute score from costs. + + +-------------------+--------+-----------------------------------------------------------+ + | | Range | Remarks | + +===================+========+===========================================================+ + | Overall | [0, 1] | Total score. The higher, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | DistToDestination | [0, 1] | Remaining distance to destination. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | Time | [0, 1] | Time taken to complete scenario. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | HumannessError | [0, 1] | Humanness indicator. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + | RuleViolation | [0, 1] | Traffic rules compliance. The lower, the better. | + +-------------------+--------+-----------------------------------------------------------+ + + Args: + costs (Costs): Costs. + + Returns: + Score: Score. + """ + dist_to_destination = costs.dist_to_destination + humanness_error = _score_humanness_error(costs=costs) + rule_violation = score_rule_violation(costs=costs) + time = costs.steps + overall = ( + 0.25 * (1 - dist_to_destination) + + 0.25 * (1 - time) + + 0.25 * (1 - humanness_error) + + 0.25 * (1 - rule_violation) + ) + + return Score( + { + "overall": overall, + "dist_to_destination": dist_to_destination, + "time": time, + "humanness_error": humanness_error, + "rule_violation": rule_violation, + } + ) -def _humanness_error(costs: Costs) -> float: +def _score_humanness_error(costs: Costs) -> float: humanness_error = np.array( [costs.dist_to_obstacles, costs.jerk_linear, costs.lane_center_offset] ) @@ -137,41 +215,15 @@ def _humanness_error(costs: Costs) -> float: return humanness_error -def _rule_violation(costs: Costs) -> float: - rule_violation = np.array([costs.speed_limit, costs.wrong_way]) - rule_violation = np.mean(rule_violation, dtype=float) - return rule_violation - - -def avg_costs(records_sum: Dict[str, Dict[str, Record]]) -> Costs: - """Averages costs over number of agents and number of episodes. +def score_rule_violation(costs: Costs) -> float: + """Default rule violation scoring formula. Args: - records_sum (Dict[str, Dict[str, Record]]): Raw costs. + costs (Costs): Costs. Returns: - Costs: Averaged costs. + float: Rule violation score. """ - costs_total = Costs() - episodes = 0 - for scen, val in records_sum.items(): - # Number of agents in scenario. - agents_in_scenario = len(val.keys()) - costs_list, counts_list = zip( - *[(record.costs, record.counts) for agent, record in val.items()] - ) - # Sum costs over all agents in scenario. - costs_sum_agent: Costs = functools.reduce( - lambda a, b: add_dataclass(a, b), costs_list - ) - # Average costs over number of agents in scenario. - costs_mean_agent = op_dataclass(costs_sum_agent, agents_in_scenario, divide) - # Sum costs over all scenarios. - costs_total = add_dataclass(costs_total, costs_mean_agent) - # Increment total number of episodes. - episodes += counts_list[0].episodes - - # Average costs over total number of episodes. - costs_final = op_dataclass(costs_total, episodes, divide) - - return costs_final + rule_violation = np.array([costs.speed_limit, costs.wrong_way]) + rule_violation = np.mean(rule_violation, dtype=float) + return rule_violation diff --git a/smarts/env/gymnasium/wrappers/metric/metrics.py b/smarts/env/gymnasium/wrappers/metric/metrics.py index 14fb0c0b6a..87b03d756c 100644 --- a/smarts/env/gymnasium/wrappers/metric/metrics.py +++ b/smarts/env/gymnasium/wrappers/metric/metrics.py @@ -46,7 +46,7 @@ ) from smarts.env.gymnasium.wrappers.metric.formula import FormulaBase, Score from smarts.env.gymnasium.wrappers.metric.params import Params -from smarts.env.gymnasium.wrappers.metric.types import Costs, Counts, Record +from smarts.env.gymnasium.wrappers.metric.types import Costs, Counts, Metadata, Record from smarts.env.gymnasium.wrappers.metric.utils import ( add_dataclass, divide, @@ -250,12 +250,14 @@ def reset(self, **kwargs): } }) + max_episode_steps = self._scen.metadata.get("scenario_duration",0) / self.env.smarts.fixed_timestep_sec + max_episode_steps = max_episode_steps or self.env.agent_interfaces[agent_name].max_episode_steps cost_funcs_kwargs.update({ "dist_to_obstacles": { "ignore": self._params.dist_to_obstacles.ignore }, "steps": { - "max_episode_steps": self.env.agent_interfaces[agent_name].max_episode_steps + "max_episode_steps": max_episode_steps }, }) self._cost_funcs[agent_name] = make_cost_funcs( @@ -268,6 +270,7 @@ def reset(self, **kwargs): agent_name: Record( costs=Costs(), counts=Counts(), + metadata=Metadata(difficulty=self._scen.metadata.get("scenario_difficulty",1)), ) for agent_name in self._cur_agents } @@ -284,11 +287,11 @@ def records(self) -> Dict[str, Dict[str, Record]]: $ env.records() $ { scen1: { - agent1: Record(costs, counts), - agent2: Record(costs, counts), + agent1: Record(costs, counts, metadata), + agent2: Record(costs, counts, metadata), }, scen2: { - agent1: Record(costs, counts), + agent1: Record(costs, counts, metadata), }, } @@ -307,6 +310,7 @@ def records(self) -> Dict[str, Dict[str, Record]]: data_copy.costs, data_copy.counts.episodes, divide ), counts=data_copy.counts, + metadata=data_copy.metadata, ) return records @@ -320,8 +324,7 @@ def score(self) -> Score: Dict[str, float]: Contains key-value pairs denoting score components. """ - records_sum_copy = copy.deepcopy(self._records_sum) - return self._formula.score(records_sum=records_sum_copy) + return self._formula.score(records=self.records()) def _get_end_and_dist( @@ -527,8 +530,17 @@ def _check_scen(scenario: Scenario, agent_interfaces: Dict[str, AgentInterface]) agent_interfaces (Dict[str,AgentInterface]): Agent interfaces. Raises: - AttributeError: If any agent's mission is not of type PositionGoal. + MetricsError: If (i) scenario difficulty is not properly normalized, + or (ii) any agent's goal is improperly configured. """ + + difficulty = scenario.metadata.get("scenario_difficulty", None) + if not ((difficulty is None) or (0 < difficulty <= 1)): + raise MetricsError( + "Expected scenario difficulty to be normalized within (0,1], but " + f"got difficulty={difficulty}." + ) + goal_types = { agent_name: type(agent_mission.goal) for agent_name, agent_mission in scenario.missions.items() @@ -545,7 +557,7 @@ def _check_scen(scenario: Scenario, agent_interfaces: Dict[str, AgentInterface]) and aoi != None ) ): - raise AttributeError( + raise MetricsError( "{0} has an unsupported goal type {1} and interest done criteria {2} " "combination.".format( agent_name, goal_types[agent_name], interest_criteria diff --git a/smarts/env/gymnasium/wrappers/metric/types.py b/smarts/env/gymnasium/wrappers/metric/types.py index 6ad22c17cd..06e97a7a0f 100644 --- a/smarts/env/gymnasium/wrappers/metric/types.py +++ b/smarts/env/gymnasium/wrappers/metric/types.py @@ -55,10 +55,20 @@ class Counts: """ +@dataclass(frozen=True) +class Metadata: + """Metadata of the record.""" + + difficulty: float = 1 + """Task difficulty value. + """ + + @dataclass class Record: - """Stores an agent's scenario-completion, performance-count, and - performance-cost values.""" + """Stores an agent's performance-cost, performance-count, and + performance-metadata values.""" costs: Costs counts: Counts + metadata: Metadata diff --git a/smarts/env/tests/test_metrics.py b/smarts/env/tests/test_metrics.py index 7893573d3f..a9d4394367 100644 --- a/smarts/env/tests/test_metrics.py +++ b/smarts/env/tests/test_metrics.py @@ -31,7 +31,7 @@ from smarts.core.controllers import ActionSpaceType from smarts.core.coordinates import Heading, Point from smarts.core.plan import EndlessGoal, Goal, Mission, PositionalGoal, Start -from smarts.env.gymnasium.wrappers.metric.metrics import Metrics +from smarts.env.gymnasium.wrappers.metric.metrics import Metrics, MetricsError from smarts.zoo.agent_spec import AgentSpec @@ -163,7 +163,7 @@ def test_reset(make_env): goal=EndlessGoal(), ), ): - with pytest.raises(AttributeError): + with pytest.raises(MetricsError): env = Metrics(env=make_env) env.reset() return diff --git a/smarts/sstudio/types/scenario.py b/smarts/sstudio/types/scenario.py index e0021190c0..eb0b0d5557 100644 --- a/smarts/sstudio/types/scenario.py +++ b/smarts/sstudio/types/scenario.py @@ -45,9 +45,9 @@ class ScenarioMetadataFields(IntEnum): actor_of_interest_re_filter = enum.auto() """Actors with names that match this pattern are actors of interest.""" scenario_difficulty = enum.auto() - """Custom difficulty marking values.""" + """Custom difficulty marking values, normalized to (0,1].""" scenario_duration = enum.auto() - """The expected scenario time length.""" + """The expected scenario time length in seconds.""" class ScenarioMetadata(StandardMetadata): @@ -58,8 +58,8 @@ def __init__( metadata: Optional[Dict[Union[str, ScenarioMetadataFields], Any]] = None, *, actor_of_interest_re_filter: Optional[str] = None, - actor_of_interest_color: Optional[Colors] = Colors.Blue, - scenario_difficulty: Optional[int] = None, + actor_of_interest_color: Optional[Colors] = None, + scenario_difficulty: Optional[float] = None, scenario_duration: Optional[float] = None, ) -> None: if metadata is None: