huawei-noah · Adaickalavan · Apr 21, 2023 · Apr 10, 2023 · Apr 10, 2023 · Apr 10, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,12 +21,18 @@ Copy and pasting the git commit messages is __NOT__ enough.
 - The trap manager, `TrapManager`, is now a subclass of `ActorCaptureManager`.
 - Considering lane-change time ranges between 3s and 6s, assuming a speed of 13.89m/s, the via sensor lane acquisition range was increased from 40m to 80m, for better driving ability.
 - Modified naming of benchmark used in NeurIPS 2022 from driving-smarts-competition-env to driving-smarts-v2022.
+- Made the metrics module configurable by supplying parameters through a `Params` class.
+- `Params` allows to specify vehicles to be ignored in `dist_to_obstacles` cost function. This would be applicable in platooning tasks.
+- Unified computation of `dist_to_destination` (previously known as `completion`) and `steps` (i.e., time taken) as functions inside the cost functions module, instead of computing them separately in a different module.
+- In the metrics module, the records which is the raw metrics data and the scoring which is the formula to compute the final results are now separated to provided greater flexibility for applying metrics to different environments.
+- Changed `benchmark_runner_v0.py` to only average records across scenarios within an environment. Records are not averaged across enviroments, because the scoring formula may differ in different environments.
 ### Deprecated
 ### Fixed
 - Fixed an issue where Argoverse scenarios with a `Mission` would not run properly.
 - `Trip.actor` field is now effective. Previously `actor` had no effect.
 - Fixed an issue where building sumo scenarios would sometimes stall.
 - `VehicleIndex` no longer segfaults when attempting to `repr()` it.
+- Fixed CI tests for metrics.
 ### Removed
 - Removed the deprecated `waymo_browser` utility.
 ### Security

diff --git a/smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py b/smarts/benchmark/driving_smarts/v2023/metric_formula_platoon.py
@@ -0,0 +1,160 @@
+# MIT License
+
+# Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import functools
+from typing import Dict
+
+import numpy as np
+
+from smarts.env.gymnasium.wrappers.metric.costs import Costs
+from smarts.env.gymnasium.wrappers.metric.formula import FormulaBase, Score
+from smarts.env.gymnasium.wrappers.metric.params import (
+    Comfort,
+    DistToObstacles,
+    GapBetweenVehicles,
+    Params,
+    Steps,
+)
+from smarts.env.gymnasium.wrappers.metric.types import Record
+from smarts.env.gymnasium.wrappers.metric.utils import (
+    add_dataclass,
+    divide,
+    op_dataclass,
+)
+
+
+class Formula(FormulaBase):
+    """Sets the (i) cost function parameters, and (ii) score computation formula,
+    for an environment.
+    """
+
+    def __init__(self):
+        pass
+
+    def params(self) -> Params:
+        """Return parameters to configure and initialize cost functions.
+
+        Returns:
+            Params: Cost function parameters.
+        """
+        params = Params(
+            comfort=Comfort(
+                active=False,
+            ), # TODO: Activate after implementing comfort cost function.
+            dist_to_obstacles=DistToObstacles(
+                active=True,
+                ignore=[
+                    "ego", # TODO: Ignore other ego vehicles.
+                    "Leader-007",
+                ],  
+            ),
+            gap_between_vehicles=GapBetweenVehicles(
+                active=False,
+                interest="Leader-007",
+            ), # TODO: Activate after implmenting gap_between_vehicles cost function.
+            steps=Steps(
+                active=False,
+            ),
+        )
+        return params
+
+    def score(self, records_sum: Dict[str, Dict[str, Record]]) -> Score:
+        """
+        Computes several sub-component scores and one total combined score named
+        "Overall" on the wrapped environment.
+
+        +-------------------+--------+-----------------------------------------------------------+
+        |                   | Range  | Remarks                                                   |
+        +===================+========+===========================================================+
+        | Overall           | [0, 1] | Total score. The higher, the better.                      |
+        +-------------------+--------+-----------------------------------------------------------+
+        | DistToDestination | [0, 1] | Remaining distance to destination. The lower, the better. |
+        +-------------------+--------+-----------------------------------------------------------+
+        | GapBetweenVehicles| [0, 1] | Gap between vehicles in a platoon. The higher, the better.|
+        +-------------------+--------+-----------------------------------------------------------+
+        | Humanness         | [0, 1] | Humanness indicator. The higher, the better.              |
+        +-------------------+--------+-----------------------------------------------------------+
+        | Rules             | [0, 1] | Traffic rules compliance. The higher, the better.         |
+        +-------------------+--------+-----------------------------------------------------------+
+
+        Returns:
+            Score: Contains "Overall", "DistToDestination", "GapBetweenVehicles",
+            "Humanness", and "Rules" scores.
+        """
+
+        costs_total = Costs()
+        episodes = 0
+        for scen, val in records_sum.items():
+            # Number of agents in scenario.
+            agents_in_scenario = len(val.keys())
+            costs_list, counts_list = zip(
+                *[(record.costs, record.counts) for agent, record in val.items()]
+            )
+            # Sum costs over all agents in scenario.
+            costs_sum_agent: Costs = functools.reduce(
+                lambda a, b: add_dataclass(a, b), costs_list
+            )
+            # Average costs over number of agents in scenario.
+            costs_mean_agent = op_dataclass(costs_sum_agent, agents_in_scenario, divide)
+            # Sum costs over all scenarios.
+            costs_total = add_dataclass(costs_total, costs_mean_agent)
+            # Increment total number of episodes.
+            episodes += counts_list[0].episodes
+
+        # Average costs over total number of episodes.
+        costs_final = op_dataclass(costs_total, episodes, divide)
+
+        # Compute sub-components of score.
+        dist_to_destination = costs_final.dist_to_destination
+        humanness = _humanness(costs=costs_final)
+        rules = _rules(costs=costs_final)
+        gap_between_vehicles = costs_final.gap_between_vehicles
+        overall = (
+            0.50 * (1 - dist_to_destination)
+            + 0.25 * gap_between_vehicles
+            + 0.20 * humanness
+            + 0.05 * rules
+        )
+
+        return Score(
+            {
+                "overall": overall,
+                "dist_to_destination": dist_to_destination,
+                "gap_between_vehicles": gap_between_vehicles,
+                "humanness": humanness,
+                "rules": rules,
+            }
+        )
+
+
+def _humanness(costs: Costs) -> float:
+    humanness = np.array(
+        [costs.dist_to_obstacles, costs.jerk_linear, costs.lane_center_offset]
+    )
+    humanness = np.mean(humanness, dtype=float)
+    return 1 - humanness
+
+
+def _rules(costs: Costs) -> float:
+    rules = np.array([costs.speed_limit, costs.wrong_way])
+    rules = np.mean(rules, dtype=float)
+    return 1 - rules
diff --git a/smarts/benchmark/entrypoints/benchmark_runner_v0.py b/smarts/benchmark/entrypoints/benchmark_runner_v0.py
@@ -21,16 +21,21 @@
 # THE SOFTWARE.
 import logging
 import os
+import pprint
 from pathlib import Path
-from typing import List, Tuple
+from typing import Dict
 
 import gymnasium as gym
 import psutil
 import ray
 
 from smarts.benchmark.driving_smarts import load_config
+from smarts.core.utils.import_utils import import_module_from_file
 from smarts.core.utils.logging import suppress_output
-from smarts.env.gymnasium.wrappers.metrics import Metrics, Score
+from smarts.env.gymnasium.wrappers.metric.formula import Score
+from smarts.env.gymnasium.wrappers.metric.metrics import Metrics
+from smarts.env.gymnasium.wrappers.metric.types import Record
+from smarts.env.gymnasium.wrappers.metric.utils import multiply, op_dataclass
 from smarts.zoo import registry as agent_registry
 
 LOG_WORKERS = False
@@ -52,20 +57,20 @@ def _eval_worker_local(name, env_config, episodes, agent_locator, error_tolerant
         agent_interface=agent_registry.make(locator=agent_locator).interface,
         **env_config["kwargs"],
     )
-    env = Metrics(env)
+    env = Metrics(env, formula_path=env_config["metric_formula"])
     agents = {
         agent_id: agent_registry.make_agent(locator=agent_locator)
         for agent_id in env.agent_ids
     }
 
-    observation, info = env.reset()
+    obs, info = env.reset()
     current_resets = 0
     try:
         while current_resets < episodes:
             try:
                 action = {
-                    agent_id: agents[agent_id].act(obs)
-                    for agent_id, obs in observation.items()
+                    agent_id: agents[agent_id].act(agent_obs)
+                    for agent_id, agent_obs in obs.items()
                 }
                 # assert env.action_space.contains(action)
             except Exception:
@@ -76,14 +81,14 @@ def _eval_worker_local(name, env_config, episodes, agent_locator, error_tolerant
                     raise
                 terminated, truncated = False, True
             else:
-                observation, reward, terminated, truncated, info = env.step(action)
+                obs, reward, terminated, truncated, info = env.step(action)
             if terminated["__all__"] or truncated["__all__"]:
                 current_resets += 1
-                observation, info = env.reset()
+                obs, info = env.reset()
     finally:
-        score = env.score()
+        records = env.records()
         env.close()
-    return name, score
+    return name, records
 
 
 def _parallel_task_iterator(env_args, benchmark_args, agent_locator, log_workers):
@@ -97,9 +102,9 @@ def _parallel_task_iterator(env_args, benchmark_args, agent_locator, log_workers
         for name, env_config in env_args.items():
             if len(unfinished_refs) >= max_queued_tasks:
                 ready_refs, unfinished_refs = ray.wait(unfinished_refs, num_returns=1)
-                for name, score in ray.get(ready_refs):
-                    yield name, score
-            print(f"Evaluating {name}...")
+                for name, records in ray.get(ready_refs):
+                    yield name, records
+            print(f"\nEvaluating {name}...")
             unfinished_refs.append(
                 _eval_worker.remote(
                     name=name,
@@ -109,23 +114,23 @@ def _parallel_task_iterator(env_args, benchmark_args, agent_locator, log_workers
                     error_tolerant=ERROR_TOLERANT,
                 )
             )
-        for name, score in ray.get(unfinished_refs):
-            yield name, score
+        for name, records in ray.get(unfinished_refs):
+            yield name, records
     finally:
         ray.shutdown()
 
 
 def _serial_task_iterator(env_args, benchmark_args, agent_locator, *args, **_):
     for name, env_config in env_args.items():
-        print(f"Evaluating {name}...")
-        name, score = _eval_worker_local(
+        print(f"\nEvaluating {name}...")
+        name, records = _eval_worker_local(
             name=name,
             env_config=env_config,
             episodes=benchmark_args["eval_episodes"],
             agent_locator=agent_locator,
             error_tolerant=ERROR_TOLERANT,
         )
-        yield name, score
+        yield name, records
 
 
 def benchmark(benchmark_args, agent_locator, log_workers=False):
@@ -135,59 +140,69 @@ def benchmark(benchmark_args, agent_locator, log_workers=False):
         agent_loctor(str): Locator string for the registered agent.
         debug_log(bool): Whether the benchmark should log to stdout.
     """
-    print(f"Starting `{benchmark_args['name']}` benchmark.")
-    debug = benchmark_args.get("debug", {})
+    print(f"\n\n<-- Starting `{benchmark_args['name']}` benchmark -->\n")
     message = benchmark_args.get("message")
     if message is not None:
         print(message)
-    env_args = {}
+
+    debug = benchmark_args.get("debug", {})
+    iterator = _serial_task_iterator if debug.get("serial") else _parallel_task_iterator
+
+    root_dir = Path(__file__).resolve().parents[3]
     for env_name, env_config in benchmark_args["envs"].items():
+        metric_formula = (
+            root_dir / x
+            if (x := env_config.get("metric_formula", None)) != None
+            else None
+        )
+
+        env_args = {}
         for scenario in env_config["scenarios"]:
-            scenario_path = str(Path(__file__).resolve().parents[3] / scenario)
             kwargs = dict(benchmark_args.get("shared_env_kwargs", {}))
             kwargs.update(env_config.get("kwargs", {}))
             env_args[f"{env_name}-{scenario}"] = dict(
                 env=env_config["loc"],
-                scenario=scenario_path,
+                scenario=str(root_dir / scenario),
                 kwargs=kwargs,
+                metric_formula=metric_formula,
             )
-    named_scores = []
 
-    iterator = _serial_task_iterator if debug.get("serial") else _parallel_task_iterator
-
-    for name, score in iterator(
-        env_args=env_args,
-        benchmark_args=benchmark_args,
-        agent_locator=agent_locator,
-        log_workers=log_workers,
-    ):
-        named_scores.append((name, score))
-        print(f"Scoring {name}...")
-
-    def format_one_line_scores(named_scores: List[Tuple[str, Score]]):
-        name_just = 30
-        headers = "SCENARIO".ljust(name_just) + "SCORE"
-        return (
-            headers
-            + "\n"
-            + "\n".join(
-                f"- {name}:".ljust(name_just) + f"{score}"
-                for name, score in named_scores
+        records_cumulative: Dict[str, Dict[str, Record]] = {}
+        for name, records in iterator(
+            env_args=env_args,
+            benchmark_args=benchmark_args,
+            agent_locator=agent_locator,
+            log_workers=log_workers,
+        ):
+            records_cumulative.update(records)
+            print(f"\nScoring {name} ...")
+
+        score = _get_score(records=records_cumulative, metric_formula=metric_formula)
+        print("\nSCORE")
+        pprint.pprint(score)
+
+    print("\n<-- Evaluation complete -->\n")
+
+
+def _get_score(records: Dict[str, Dict[str, Record]], metric_formula: Path) -> Score:
+    # Convert averaged records into sum of records.
+    records_sum = {}
+    for scen, agents in records.items():
+        records_sum[scen] = {}
+        for agent, data in agents.items():
+            records_sum[scen][agent] = Record(
+                costs=op_dataclass(data.costs, data.counts.episodes, multiply),
+                counts=data.counts,
             )
-        )
 
-    def format_scores_total(named_scores: List[Tuple[str, Score]], scenario_count):
-        score_sum = Score(*[sum(f) for f in zip(*[score for _, score in named_scores])])
-        return "\n".join(
-            f"- {k}: {v/scenario_count}" for k, v in score_sum._asdict().items()
-        )
+    # Import scoring formula
+    import_module_from_file("custom_formula", metric_formula)
+    from custom_formula import Formula
+
+    formula = Formula()
 
-    print("Evaluation complete...")
-    print()
-    print(format_one_line_scores(named_scores))
-    print()
-    print("`Driving SMARTS` averaged result:")
-    print(format_scores_total(named_scores, len(env_args)))
+    score = formula.score(records_sum=records_sum)
+    return score
 
 
 def benchmark_from_configs(benchmark_config, agent_locator, debug_log=False):