diff --git a/docs/ecosystem/rllib.rst b/docs/ecosystem/rllib.rst
index 2a49341ae8..8e4b031f1c 100644
--- a/docs/ecosystem/rllib.rst
+++ b/docs/ecosystem/rllib.rst
@@ -28,7 +28,7 @@ many docs about ``Ray`` and ``RLlib``. We recommend to read the following pages
 Resume training
 ---------------
 
-With respect to ``SMARTS/examples/rl/rllib`` example, if you want to continue an aborted experiment, you can set ``resume=True`` in ``tune.run``. But note that ``resume=True`` will continue to use the same configuration as was set in the original experiment.
+With respect to ``SMARTS/examples/rl/rllib`` examples, if you want to continue an aborted experiment, you can set ``resume_training=True``. But note that ``resume_training=True`` will continue to use the same configuration as was set in the original experiment.
 To make changes to a started experiment, you can edit the latest experiment file in ``./results``.
 
-Or if you want to start a new experiment but train from an existing checkpoint, you can set ``restore=checkpoint_path`` in ``tune.run``.
+Or if you want to start a new experiment but train from an existing checkpoint, you will need to look into `How to Save and Load Trial Checkpoints <https://docs.ray.io/en/latest/tune/tutorials/tune-trial-checkpoints>`_.
diff --git a/examples/rl/rllib/configs.py b/examples/rl/rllib/configs.py
new file mode 100644
index 0000000000..841b8f1cdc
--- /dev/null
+++ b/examples/rl/rllib/configs.py
@@ -0,0 +1,149 @@
+import argparse
+import multiprocessing
+from pathlib import Path
+from typing import Any, Dict, Literal, Optional, Union
+
+try:
+    from ray.rllib.algorithms.algorithm import AlgorithmConfig
+    from ray.rllib.algorithms.callbacks import DefaultCallbacks
+    from ray.rllib.algorithms.pg import PGConfig
+    from ray.tune.search.sample import Integer as IntegerDomain
+except Exception as e:
+    from smarts.core.utils.custom_exceptions import RayException
+
+    raise RayException.required_to("rllib.py")
+
+
+def gen_pg_config(
+    scenario,
+    envision,
+    rollout_fragment_length,
+    train_batch_size,
+    num_workers,
+    log_level: Literal["DEBUG", "INFO", "WARN", "ERROR"],
+    seed: Union[int, IntegerDomain],
+    rllib_policies: Dict[str, Any],
+    agent_specs: Dict[str, Any],
+    callbacks: Optional[DefaultCallbacks],
+) -> AlgorithmConfig:
+    assert len(set(rllib_policies.keys()).difference(agent_specs)) == 0
+    algo_config = (
+        PGConfig()
+        .environment(
+            env="rllib_hiway-v0",
+            env_config={
+                "seed": seed,
+                "scenarios": [str(Path(scenario).expanduser().resolve().absolute())],
+                "headless": not envision,
+                "agent_specs": agent_specs,
+                "observation_options": "multi_agent",
+            },
+            disable_env_checking=True,
+        )
+        .framework(framework="tf2", eager_tracing=True)
+        .rollouts(
+            rollout_fragment_length=rollout_fragment_length,
+            num_rollout_workers=num_workers,
+            num_envs_per_worker=1,
+            enable_tf1_exec_eagerly=True,
+        )
+        .training(
+            lr_schedule=[(0, 1e-3), (1e3, 5e-4), (1e5, 1e-4), (1e7, 5e-5), (1e8, 1e-5)],
+            train_batch_size=train_batch_size,
+        )
+        .multi_agent(
+            policies=rllib_policies,
+            policy_mapping_fn=lambda agent_id, episode, worker, **kwargs: f"{agent_id}",
+        )
+        .callbacks(callbacks_class=callbacks)
+        .debugging(log_level=log_level)
+    )
+    return algo_config
+
+
+def gen_parser(
+    prog: str, default_result_dir: str, default_save_model_path: str
+) -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog)
+    parser.add_argument(
+        "--scenario",
+        type=str,
+        default=str(Path(__file__).resolve().parents[3] / "scenarios/sumo/loop"),
+        help="Scenario to run (see scenarios/ for some samples you can use)",
+    )
+    parser.add_argument(
+        "--envision",
+        action="store_true",
+        help="Run simulation with Envision display.",
+    )
+    parser.add_argument(
+        "--num_samples",
+        type=int,
+        default=1,
+        help="Number of times to sample from hyperparameter space",
+    )
+    parser.add_argument(
+        "--rollout_fragment_length",
+        type=str,
+        default="auto",
+        help="Episodes are divided into fragments of this many steps for each rollout. In this example this will be ensured to be `1=<rollout_fragment_length<=train_batch_size`",
+    )
+    parser.add_argument(
+        "--train_batch_size",
+        type=int,
+        default=2000,
+        help="The training batch size. This value must be > 0.",
+    )
+    parser.add_argument(
+        "--time_total_s",
+        type=int,
+        default=1 * 60 * 60,  # 1 hour
+        help="Total time in seconds to run the simulation for. This is a rough end time as it will be checked per training batch.",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="The base random seed to use, intended to be mixed with --num_samples",
+    )
+    parser.add_argument(
+        "--num_agents", type=int, default=2, help="Number of agents (one per policy)"
+    )
+    parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=(multiprocessing.cpu_count() // 2 + 1),
+        help="Number of workers (defaults to use all system cores)",
+    )
+    parser.add_argument(
+        "--resume_training",
+        default=False,
+        action="store_true",
+        help="Resume an errored or 'ctrl+c' cancelled training. This does not extend a fully run original experiment.",
+    )
+    parser.add_argument(
+        "--result_dir",
+        type=str,
+        default=default_result_dir,
+        help="Directory containing results",
+    )
+    parser.add_argument(
+        "--log_level",
+        type=str,
+        default="ERROR",
+        help="Log level (DEBUG|INFO|WARN|ERROR)",
+    )
+    parser.add_argument(
+        "--checkpoint_num", type=int, default=None, help="Checkpoint number"
+    )
+    parser.add_argument(
+        "--checkpoint_freq", type=int, default=3, help="Checkpoint frequency"
+    )
+
+    parser.add_argument(
+        "--save_model_path",
+        type=str,
+        default=default_save_model_path,
+        help="Destination path of where to copy the model when training is over",
+    )
+    return parser
diff --git a/examples/rl/rllib/pg_example.py b/examples/rl/rllib/pg_example.py
new file mode 100644
index 0000000000..f422dfa435
--- /dev/null
+++ b/examples/rl/rllib/pg_example.py
@@ -0,0 +1,190 @@
+from pathlib import Path
+from typing import Dict, Literal, Optional, Union
+
+import numpy as np
+
+try:
+    from ray.rllib.algorithms.algorithm import Algorithm, AlgorithmConfig
+    from ray.rllib.algorithms.callbacks import DefaultCallbacks
+    from ray.rllib.env.base_env import BaseEnv
+    from ray.rllib.evaluation.episode import Episode
+    from ray.rllib.evaluation.episode_v2 import EpisodeV2
+    from ray.rllib.evaluation.rollout_worker import RolloutWorker
+    from ray.rllib.policy.policy import Policy
+    from ray.rllib.utils.typing import PolicyID
+except Exception as e:
+    from smarts.core.utils.custom_exceptions import RayException
+
+    raise RayException.required_to("rllib.py")
+
+import smarts
+from smarts.sstudio.scenario_construction import build_scenario
+
+if __name__ == "__main__":
+    from configs import gen_parser, gen_pg_config
+    from rllib_agent import TrainingModel, rllib_agent
+else:
+    from .configs import gen_parser, gen_pg_config
+    from .rllib_agent import TrainingModel, rllib_agent
+
+# Add custom metrics to your tensorboard using these callbacks
+# See: https://ray.readthedocs.io/en/latest/rllib-training.html#callbacks-and-custom-metrics
+class Callbacks(DefaultCallbacks):
+    @staticmethod
+    def on_episode_start(
+        worker: RolloutWorker,
+        base_env: BaseEnv,
+        policies: Dict[PolicyID, Policy],
+        episode: Union[Episode, EpisodeV2],
+        env_index: int,
+        **kwargs,
+    ):
+
+        episode.user_data["ego_reward"] = []
+
+    @staticmethod
+    def on_episode_step(
+        worker: RolloutWorker,
+        base_env: BaseEnv,
+        episode: Union[Episode, EpisodeV2],
+        env_index: int,
+        **kwargs,
+    ):
+        single_agent_id = list(episode.get_agents())[0]
+        infos = episode._last_infos.get(single_agent_id)
+        if infos is not None:
+            episode.user_data["ego_reward"].append(infos["reward"])
+
+    @staticmethod
+    def on_episode_end(
+        worker: RolloutWorker,
+        base_env: BaseEnv,
+        policies: Dict[PolicyID, Policy],
+        episode: Union[Episode, EpisodeV2],
+        env_index: int,
+        **kwargs,
+    ):
+
+        mean_ego_speed = np.mean(episode.user_data["ego_reward"])
+        print(
+            f"ep. {episode.episode_id:<12} ended;"
+            f" length={episode.length:<6}"
+            f" mean_ego_reward={mean_ego_speed:.2f}"
+        )
+        episode.custom_metrics["mean_ego_reward"] = mean_ego_speed
+
+
+def main(
+    scenario,
+    envision,
+    time_total_s,
+    rollout_fragment_length,
+    train_batch_size,
+    seed,
+    num_samples,
+    num_agents,
+    num_workers,
+    resume_training,
+    result_dir,
+    checkpoint_freq: int,
+    checkpoint_num: Optional[int],
+    log_level: Literal["DEBUG", "INFO", "WARN", "ERROR"],
+    save_model_path,
+):
+    agent_values = {
+        "agent_specs": {
+            f"AGENT-{i}": rllib_agent["agent_spec"] for i in range(num_agents)
+        },
+        "rllib_policies": {
+            f"AGENT-{i}": (
+                None,
+                rllib_agent["observation_space"],
+                rllib_agent["action_space"],
+                {"model": {"custom_model": TrainingModel.NAME}},
+            )
+            for i in range(num_agents)
+        },
+    }
+    rllib_policies = agent_values["rllib_policies"]
+    agent_specs = agent_values["agent_specs"]
+
+    smarts.core.seed(seed)
+    algo_config: AlgorithmConfig = gen_pg_config(
+        scenario=scenario,
+        envision=envision,
+        rollout_fragment_length=rollout_fragment_length,
+        train_batch_size=train_batch_size,
+        num_workers=num_workers,
+        seed=seed,
+        log_level=log_level,
+        rllib_policies=rllib_policies,
+        agent_specs=agent_specs,
+        callbacks=Callbacks,
+    )
+
+    def get_checkpoint_dir(num):
+        checkpoint_dir = result_dir / f"checkpoint_{num}" / f"checkpoint-{num}"
+        checkpoint_dir.mkdir(parents=True, exist_ok=True)
+        return checkpoint_dir
+
+    if resume_training:
+        checkpoint = str(get_checkpoint_dir("latest"))
+    if checkpoint_num:
+        checkpoint = str(get_checkpoint_dir(checkpoint_num))
+    else:
+        checkpoint = None
+
+    print(f"======= Checkpointing at {str(result_dir)} =======")
+
+    algo = algo_config.build()
+    if checkpoint is not None:
+        Algorithm.load_checkpoint(algo, checkpoint=checkpoint)
+    result = {}
+    current_iteration = 0
+    checkpoint_iteration = checkpoint_num or 0
+
+    try:
+        while result.get("time_total_s", 0) < time_total_s:
+            result = algo.train()
+            print(f"======== Iteration {result['training_iteration']} ========")
+            print(result, depth=1)
+
+            if current_iteration % checkpoint_freq == 0:
+                checkpoint_dir = get_checkpoint_dir(checkpoint_iteration)
+                print(f"======= Saving checkpoint {checkpoint_iteration} =======")
+                algo.save_checkpoint(checkpoint_dir)
+                checkpoint_iteration += 1
+            current_iteration += 1
+        algo.save_checkpoint(get_checkpoint_dir(checkpoint_iteration))
+    finally:
+        algo.save_checkpoint(get_checkpoint_dir("latest"))
+        algo.stop()
+
+
+if __name__ == "__main__":
+    default_save_model_path = str(
+        Path(__file__).expanduser().resolve().parent / "pg_model"
+    )
+    default_result_dir = str(Path(__file__).resolve().parent / "results" / "pg_results")
+    parser = gen_parser("rllib-example", default_result_dir, default_save_model_path)
+
+    args = parser.parse_args()
+    build_scenario(scenario=args.scenario, clean=False, seed=42)
+
+    main(
+        scenario=args.scenario,
+        envision=args.envision,
+        time_total_s=args.time_total_s,
+        rollout_fragment_length=args.rollout_fragment_length,
+        train_batch_size=args.train_batch_size,
+        seed=args.seed,
+        num_samples=args.num_samples,
+        num_agents=args.num_agents,
+        num_workers=args.num_workers,
+        resume_training=args.resume_training,
+        result_dir=args.result_dir,
+        checkpoint_freq=max(args.checkpoint_freq, 1),
+        checkpoint_num=args.checkpoint_num,
+        log_level=args.log_level,
+        save_model_path=args.save_model_path,
+    )
diff --git a/examples/rl/rllib/rllib.py b/examples/rl/rllib/rllib.py
index 9260599c83..6c3672251f 100644
--- a/examples/rl/rllib/rllib.py
+++ b/examples/rl/rllib/rllib.py
@@ -1,30 +1,27 @@
-import argparse
 import logging
-import multiprocessing
-import random
 from pathlib import Path
 from pprint import pprint as print
-from typing import Dict, Literal, Optional, Union
+from typing import Dict, List, Literal, Optional, Union
 
 import numpy as np
+from ray.tune.experiment import Trial
 
 # ray[rllib] is not the part of main dependency of the SMARTS package. It needs to be installed separately
 # as a part of the smarts[train] dependency using the command "pip install -e .[train]. The following try block checks
 # whether ray[rllib] was installed by user and raises an Exception warning the user to install it if not so.
 try:
     from ray import tune
-    from ray.rllib.algorithms.callbacks import DefaultCallbacks
-    from ray.rllib.algorithms.pg import PGConfig
+    from ray.rllib.algorithms.callbacks import DefaultCallbacks, make_multi_callbacks
     from ray.rllib.env.base_env import BaseEnv
     from ray.rllib.evaluation.episode import Episode
     from ray.rllib.evaluation.episode_v2 import EpisodeV2
     from ray.rllib.evaluation.rollout_worker import RolloutWorker
     from ray.rllib.policy.policy import Policy
     from ray.rllib.utils.typing import PolicyID
+    from ray.tune.callback import Callback
     from ray.tune.registry import register_env
     from ray.tune.schedulers import PopulationBasedTraining
 except Exception as e:
-    raise
     from smarts.core.utils.custom_exceptions import RayException
 
     raise RayException.required_to("rllib.py")
@@ -35,8 +32,10 @@
 from smarts.sstudio.scenario_construction import build_scenario
 
 if __name__ == "__main__":
+    from configs import gen_parser, gen_pg_config
     from rllib_agent import TrainingModel, rllib_agent
 else:
+    from .configs import gen_parser, gen_pg_config
     from .rllib_agent import TrainingModel, rllib_agent
 
 logging.basicConfig(level=logging.INFO)
@@ -54,8 +53,7 @@ def on_episode_start(
         env_index: int,
         **kwargs,
     ):
-
-        episode.user_data["ego_reward"] = []
+        episode.user_data["ego_speed"] = []
 
     @staticmethod
     def on_episode_step(
@@ -68,7 +66,7 @@ def on_episode_step(
         single_agent_id = list(episode.get_agents())[0]
         infos = episode._last_infos.get(single_agent_id)
         if infos is not None:
-            episode.user_data["ego_reward"].append(infos["reward"])
+            episode.user_data["ego_speed"].append(infos["speed"])
 
     @staticmethod
     def on_episode_end(
@@ -80,13 +78,30 @@ def on_episode_end(
         **kwargs,
     ):
 
-        mean_ego_speed = np.mean(episode.user_data["ego_reward"])
+        mean_ego_speed = np.mean(episode.user_data["ego_speed"])
         print(
             f"ep. {episode.episode_id:<12} ended;"
             f" length={episode.length:<6}"
-            f" mean_ego_reward={mean_ego_speed:.2f}"
+            f" mean_ego_speed={mean_ego_speed:.2f}"
         )
-        episode.custom_metrics["mean_ego_reward"] = mean_ego_speed
+        episode.custom_metrics["mean_ego_speed"] = mean_ego_speed
+
+
+class TrialCallback(Callback):
+    def on_trial_error(self, iteration: int, trials: List[Trial], trial: Trial, **info):
+        t = trials[-1]
+        path = Path(t.local_path)
+        with open(path.parent / "meta", "wt") as f:
+            f.write(str(path))
+        return super().on_trial_error(iteration, trials, trial, **info)
+
+    def on_experiment_end(self, trials: List[Trial], **info):
+        t = trials[-1]
+        path = Path(t.local_path)
+        with open(path.parent / "meta", "wt") as f:
+            f.write(str(path))
+            print(f"Saved to {f.name}")
+        return super().on_experiment_end(trials, **info)
 
 
 def explore(config):
@@ -120,66 +135,42 @@ def main(
     ):
         rollout_fragment_length = train_batch_size
 
-    rllib_policies = {
-        f"AGENT-{i}": (
-            None,
-            rllib_agent["observation_space"],
-            rllib_agent["action_space"],
-            {"model": {"custom_model": TrainingModel.NAME}},
-        )
-        for i in range(num_agents)
+    agent_values = {
+        "agent_specs": {
+            f"AGENT-{i}": rllib_agent["agent_spec"] for i in range(num_agents)
+        },
+        "rllib_policies": {
+            f"AGENT-{i}": (
+                None,
+                rllib_agent["observation_space"],
+                rllib_agent["action_space"],
+                {"model": {"custom_model": TrainingModel.NAME}},
+            )
+            for i in range(num_agents)
+        },
     }
+    rllib_policies = agent_values["rllib_policies"]
+    agent_specs = agent_values["agent_specs"]
 
     smarts.core.seed(seed)
-    algo_config = (
-        PGConfig()
-        .environment(
-            env="rllib_hiway-v0",
-            env_config={
-                "seed": 42,
-                "scenarios": [str(Path(scenario).expanduser().resolve().absolute())],
-                "headless": not envision,
-                "agent_specs": {
-                    f"AGENT-{i}": rllib_agent["agent_spec"] for i in range(num_agents)
-                },
-                "observation_options": "multi_agent",
-            },
-            disable_env_checking=True,
-        )
-        .framework(framework="tf2", eager_tracing=True)
-        .rollouts(
-            rollout_fragment_length=rollout_fragment_length,
-            num_rollout_workers=num_workers,
-            num_envs_per_worker=1,
-        )
-        .training(
-            lr_schedule=[(0, 1e-3), (1e3, 5e-4), (1e5, 1e-4), (1e7, 5e-5), (1e8, 1e-5)],
-            train_batch_size=train_batch_size,
-        )
-        .multi_agent(
-            policies=rllib_policies,
-            policy_mapping_fn=lambda agent_id, episode, worker, **kwargs: f"{agent_id}",
-        )
-        # .callbacks(callbacks_class=Callbacks)
-        .debugging(log_level=log_level)
+    algo_config = gen_pg_config(
+        scenario=scenario,
+        envision=envision,
+        rollout_fragment_length=rollout_fragment_length,
+        train_batch_size=train_batch_size,
+        num_workers=max(num_workers, 1),
+        log_level=log_level,
+        seed=seed,
+        callbacks=make_multi_callbacks([Callbacks]),
+        rllib_policies=rllib_policies,
+        agent_specs=agent_specs,
     )
 
     experiment_name = "rllib_example_multi"
     result_dir = Path(result_dir).expanduser().resolve().absolute()
-
-    def get_checkpoint_dir(num):
-        checkpoint_dir = result_dir / f"checkpoint_{num}" / f"checkpoint-{num}"
-        checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        return checkpoint_dir
-
-    if checkpoint_num:
-        checkpoint = str(get_checkpoint_dir(checkpoint_num))
-    else:
-        checkpoint = None
+    experiment_dir = result_dir / experiment_name
 
     print(f"======= Checkpointing at {str(result_dir)} =======")
-
-    ## Approach 1
     pbt = PopulationBasedTraining(
         time_attr="time_total_s",
         metric="episode_reward_mean",
@@ -196,181 +187,65 @@ def get_checkpoint_dir(num):
         # Specifies additional mutations after hyperparam_mutations is applied
         custom_explore_fn=explore,
     )
-    # analysis = tune.run(
-    #     "PG",
-    #     name=experiment_name,
-    #     stop={"time_total_s": time_total_s},
-    #     checkpoint_freq=checkpoint_freq,
-    #     checkpoint_at_end=True,
-    #     local_dir=str(result_dir),
-    #     resume=resume_training,
-    #     restore=checkpoint,
-    #     max_failures=3,
-    #     num_samples=num_samples,
-    #     export_formats=["model", "checkpoint"],
-    #     config=algo_config,
-    #     scheduler=pbt,
-    # )
-
-    # print(analysis.dataframe().head())
-
-    # best_logdir = Path(analysis.get_best_logdir("episode_reward_max", mode="max"))
-    # model_path = best_logdir / "model"
-
-    # copy_tree(str(model_path), save_model_path, overwrite=True)
-    # print(f"Wrote model to: {save_model_path}")
-
-    ## Approach 2
-    from ray.rllib.algorithms.algorithm import Algorithm
-
-    algo = algo_config.build()
-    if checkpoint is not None:
-        Algorithm.load_checkpoint(algo, checkpoint=checkpoint)
-    result = {}
-    current_iteration = 0
-    checkpoint_iteration = checkpoint_num or 0
 
-    try:
-        while result.get("time_total_s", 0) < time_total_s:
-            result = algo.train()
-            print(f"======== Iteration {result['training_iteration']} ========")
-            print(result, depth=1)
-
-            if current_iteration % checkpoint_freq == 0:
-                checkpoint_dir = get_checkpoint_dir(checkpoint_iteration)
-                print(f"======= Saving checkpoint {checkpoint_iteration} =======")
-                algo.save_checkpoint(checkpoint_dir)
-                checkpoint_iteration += 1
-            current_iteration += 1
-        algo.save_checkpoint(get_checkpoint_dir(checkpoint_iteration))
-    finally:
-        algo.save(get_checkpoint_dir("latest"))
+    from ray import air
+
+    run_config = air.RunConfig(
+        name=experiment_name,
+        stop={"time_total_s": time_total_s},
+        callbacks=[TrialCallback()],
+        storage_path=str(result_dir),
+        checkpoint_config=air.CheckpointConfig(
+            num_to_keep=3,
+            checkpoint_frequency=checkpoint_freq,
+            checkpoint_at_end=True,
+        ),
+        failure_config=air.FailureConfig(
+            max_failures=3,
+            fail_fast=False,
+        ),
+    )
+    tune_config = tune.TuneConfig(
+        num_samples=num_samples,
+        scheduler=pbt,
+        max_concurrent_trials=4,
+    )
+    trainable = "PG"
+    if resume_training:
+        tuner = tune.Tuner.restore(
+            str(experiment_dir),
+            trainable=trainable,
+            param_space=algo_config,
+        )
+    else:
+        tuner = tune.Tuner(
+            trainable=trainable,
+            param_space=algo_config,
+            tune_config=tune_config,
+            run_config=run_config,
+        )
 
-    algo.stop()
+    results = tuner.fit()
+    # Get the best result based on a particular metric.
+    best_result = results.get_best_result(metric="episode_reward_mean", mode="max")
 
-    ## Approach 3
-    # from ray import air
-    # run_config = air.RunConfig(
-    #     name=experiment_name,
-    #     stop={"time_total_s": time_total_s},
-    #     callbacks=[Callbacks],
-    #     storage_path=result_dir,
-    #     checkpoint_config=air.CheckpointConfig(
-    #         num_to_keep=3,
-    #         checkpoint_frequency=checkpoint_freq,
-    #         checkpoint_at_end=True,
-    #     ),
-    #     failure_config=air.FailureConfig(
-    #         max_failures=3,
-    #         fail_fast=False,
-    #     ),
-    #     local_dir=str(result_dir),
-    # )
-    # tune_config = tune.TuneConfig(
-    #     metric="episode_reward_mean",
-    #     mode="max",
-    #     num_samples=num_samples,
-    #     scheduler=pbt,
-    # )
-    # tuner = tune.Tuner(
-    #     "PPO",
-    #     param_space=algo_config,
-    #     tune_config=tune_config,
-    #     run_config=run_config,
-    # )
+    # Get the best checkpoint corresponding to the best result.
+    best_checkpoint = best_result.checkpoint
 
-    # results = tuner.fit()
-    # # Get the best result based on a particular metric.
-    # best_result = results.get_best_result(metric="episode_reward_mean", mode="max")
+    best_logdir = Path(best_result.log_dir)
+    model_path = best_logdir
 
-    # # Get the best checkpoint corresponding to the best result.
-    # best_checkpoint = best_result.checkpoint
+    copy_tree(str(model_path), save_model_path, overwrite=True)
+    print(f"Wrote model to: {save_model_path}")
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser("rllib-example")
-    parser.add_argument(
-        "--scenario",
-        type=str,
-        default=str(Path(__file__).resolve().parents[3] / "scenarios/sumo/loop"),
-        help="Scenario to run (see scenarios/ for some samples you can use)",
-    )
-    parser.add_argument(
-        "--envision",
-        action="store_true",
-        help="Run simulation with Envision display.",
-    )
-    parser.add_argument(
-        "--num_samples",
-        type=int,
-        default=1,
-        help="Number of times to sample from hyperparameter space",
-    )
-    parser.add_argument(
-        "--rollout_fragment_length",
-        type=str,
-        default="auto",
-        help="Episodes are divided into fragments of this many steps for each rollout. In this example this will be ensured to be `1=<rollout_fragment_length<=train_batch_size`",
-    )
-    parser.add_argument(
-        "--train_batch_size",
-        type=int,
-        default=2000,
-        help="The training batch size. This value must be > 0.",
-    )
-    parser.add_argument(
-        "--time_total_s",
-        type=int,
-        default=1 * 60 * 60,  # 1 hour
-        help="Total time in seconds to run the simulation for. This is a rough end time as it will be checked per training batch.",
-    )
-    parser.add_argument(
-        "--seed",
-        type=int,
-        default=42,
-        help="The base random seed to use, intended to be mixed with --num_samples",
-    )
-    parser.add_argument(
-        "--num_agents", type=int, default=2, help="Number of agents (one per policy)"
-    )
-    parser.add_argument(
-        "--num_workers",
-        type=int,
-        default=(multiprocessing.cpu_count() // 2 + 1),
-        help="Number of workers (defaults to use all system cores)",
-    )
-    parser.add_argument(
-        "--resume_training",
-        default=False,
-        action="store_true",
-        help="Resume the last trained example",
-    )
-    parser.add_argument(
-        "--result_dir",
-        type=str,
-        default=str(Path(__file__).resolve().parent / "results"),
-        help="Directory containing results",
-    )
-    parser.add_argument(
-        "--log_level",
-        type=str,
-        default="ERROR",
-        help="Log level (DEBUG|INFO|WARN|ERROR)",
-    )
-    parser.add_argument(
-        "--checkpoint_num", type=int, default=None, help="Checkpoint number"
-    )
-    parser.add_argument(
-        "--checkpoint_freq", type=int, default=3, help="Checkpoint frequency"
+    default_save_model_path = str(
+        Path(__file__).expanduser().resolve().parent / "model"
     )
+    default_result_dir = str(Path(__file__).resolve().parent / "results" / "tune_pg_results")
+    parser = gen_parser("rllib-example", default_result_dir, default_save_model_path)
 
-    save_model_path = str(Path(__file__).expanduser().resolve().parent / "model")
-    parser.add_argument(
-        "--save_model_path",
-        type=str,
-        default=save_model_path,
-        help="Destination path of where to copy the model when training is over",
-    )
     args = parser.parse_args()
     build_scenario(scenario=args.scenario, clean=False, seed=42)
 
diff --git a/examples/rl/rllib/rllib_agent.py b/examples/rl/rllib/rllib_agent.py
index f35c023b9f..d71376f431 100644
--- a/examples/rl/rllib/rllib_agent.py
+++ b/examples/rl/rllib/rllib_agent.py
@@ -79,16 +79,16 @@ def forward(self, input_dict, state, seq_lens):
 
 
 class RLLibTFSavedModelAgent(Agent):
-    def __init__(self, path_to_model, observation_space):
+    def __init__(self, path_to_model, observation_space, policy_name="default_policy"):
         path_to_model = str(path_to_model)  # might be a str or a Path, normalize to str
         self._prep = ModelCatalog.get_preprocessor_for_space(observation_space)
         self._sess = tf.compat.v1.Session(graph=tf.Graph())
         tf.compat.v1.saved_model.load(
             self._sess, export_dir=path_to_model, tags=["serve"]
         )
-        self._output_node = self._sess.graph.get_tensor_by_name("default_policy/add:0")
+        self._output_node = self._sess.graph.get_tensor_by_name(f"policy_name/add:0")
         self._input_node = self._sess.graph.get_tensor_by_name(
-            "default_policy/observation:0"
+            f"{policy_name}/observation:0"
         )
 
     def __del__(self):
diff --git a/examples/tests/test_examples.py b/examples/tests/test_examples.py
index 8ff7470e2e..f148331d2c 100644
--- a/examples/tests/test_examples.py
+++ b/examples/tests/test_examples.py
@@ -39,7 +39,31 @@ def test_examples(example):
     )
 
 
-def test_rllib_example():
+def test_rllib_pg_example():
+    from examples.rl.rllib import pg_example
+
+    main = pg_example.main
+    with tempfile.TemporaryDirectory() as result_dir, tempfile.TemporaryDirectory() as model_dir:
+        main(
+            scenario="scenarios/sumo/loop",
+            envision=False,
+            time_total_s=20,
+            rollout_fragment_length=200,
+            train_batch_size=200,
+            seed=42,
+            num_samples=1,
+            num_agents=2,
+            num_workers=1,
+            resume_training=False,
+            result_dir=result_dir,
+            checkpoint_num=None,
+            checkpoint_freq=1,
+            save_model_path=model_dir,
+            log_level="WARN",
+        )
+
+
+def test_rllib_tune_pg_example():
     from examples.rl.rllib import rllib
 
     main = rllib.main
diff --git a/smarts/env/custom_observations.py b/smarts/env/custom_observations.py
index f27db0469c..b876457999 100644
--- a/smarts/env/custom_observations.py
+++ b/smarts/env/custom_observations.py
@@ -26,11 +26,7 @@
 from smarts.core.coordinates import Heading
 from smarts.core.observations import EgoVehicleObservation, Observation
 from smarts.core.road_map import Waypoint
-from smarts.core.utils.math import (
-    squared_dist,
-    vec_2d,
-    vec_to_radians,
-)
+from smarts.core.utils.math import squared_dist, vec_2d, vec_to_radians
 
 
 @dataclass
diff --git a/smarts/env/rllib_hiway_env.py b/smarts/env/rllib_hiway_env.py
index 0a7e1e4d88..8d1397e96b 100644
--- a/smarts/env/rllib_hiway_env.py
+++ b/smarts/env/rllib_hiway_env.py
@@ -116,7 +116,8 @@ def __init__(self, config):
         a = config.worker_index
         b = config.vector_index
         c = (a + b) * (a + b + 1) // 2 + b
-        smarts.core.seed(seed + c)
+        self._seed = seed + c
+        smarts.core.seed(self._seed + c)
 
         self._scenarios = [
             str(Path(scenario).resolve()) for scenario in config["scenarios"]
@@ -192,7 +193,14 @@ def step(self, agent_actions):
             if agent_id in formatted_actions
         }
 
-        infos = {key: {"score": value} for key, value in scores.items()}
+        infos = {
+            agent_id: {
+                "score": value,
+                "reward": rewards[agent_id],
+                "speed": observations[agent_id]["ego_vehicle_state"]["speed"],
+            }
+            for agent_id, value in scores.items()
+        }
 
         # Ensure all contain the same agent_ids as keys
         assert (
@@ -225,11 +233,7 @@ def step(self, agent_actions):
 
     def reset(self, *, seed=None, options=None):
         """Environment reset."""
-        if seed is not None:
-            a = self._config.worker_index
-            b = self._config.vector_index
-            c = (a + b) * (a + b + 1) // 2 + b
-            smarts.core.seed(seed + c)
+        smarts.core.seed(self._seed + (seed or 0))
 
         scenario = next(self._scenarios_iterator)
 
@@ -250,9 +254,9 @@ def reset(self, *, seed=None, options=None):
         info = {
             agent_id: {
                 "score": 0,
+                "reward": 0,
                 "env_obs": agent_obs,
                 "done": False,
-                "reward": 0,
                 "map_source": self._smarts.scenario.road_map.source,
             }
             for agent_id, agent_obs in observations.items()