From e1c08d5dccd2ab720dc9f855334b48496d25c568 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sat, 2 Aug 2025 16:14:40 -0400 Subject: [PATCH 1/8] Make Isaac-Humanoid-v0 agent config indentical as far as possible --- .../classic/humanoid/agents/rl_games_ppo_cfg.yaml | 6 +++--- .../manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py | 6 +++--- .../manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml | 4 ++-- .../manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml | 5 ++--- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml index 95e5f8c4b3b..7f4db42e170 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml @@ -45,13 +45,13 @@ params: device_name: 'cuda:0' multi_gpu: False ppo: True - mixed_precision: True + mixed_precision: False normalize_input: True normalize_value: True value_bootstrap: True num_actors: -1 reward_shaper: - scale_value: 0.6 + scale_value: 1.0 normalize_advantage: True gamma: 0.99 tau: 0.95 @@ -67,7 +67,7 @@ params: truncate_grads: True e_clip: 0.2 horizon_length: 32 - minibatch_size: 32768 + minibatch_size: 32768 # num_envs * horizon_length / num_mini_batches mini_epochs: 5 critic_coef: 4 clip_value: True diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py index ae44b8085a1..56be542e58f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py @@ -12,9 +12,9 @@ class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg): num_steps_per_env = 32 max_iterations = 1000 - save_interval = 50 + save_interval = 100 experiment_name = "humanoid" - empirical_normalization = False + empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, actor_hidden_dims=[400, 200, 100], @@ -22,7 +22,7 @@ class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg): activation="elu", ) algorithm = RslRlPpoAlgorithmCfg( - value_loss_coef=1.0, + value_loss_coef=2.0, use_clipped_value_loss=True, clip_param=0.2, entropy_coef=0.0, diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml index 0259e5240f8..23880465905 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml @@ -1,7 +1,7 @@ # Adapted from rsl_rl config seed: 42 policy: "MlpPolicy" -n_timesteps: !!float 5e7 +n_timesteps: !!float 3.2e4 # For 4 minibatches with 4096 envs # batch_size = (n_envs * n_steps) / n_minibatches = 32768 n_minibatches: 4 @@ -13,7 +13,7 @@ clip_range: 0.2 n_epochs: 5 gae_lambda: 0.95 max_grad_norm: 1.0 -vf_coef: 0.5 +vf_coef: 2.0 policy_kwargs: "dict( activation_fn=nn.ELU, net_arch=[400, 200, 100], diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml index 9c668ca8315..e2d8e588e46 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml @@ -62,14 +62,13 @@ agent: entropy_loss_scale: 0.0 value_loss_scale: 2.0 kl_threshold: 0.0 - rewards_shaper_scale: 0.6 time_limit_bootstrap: False # logging and checkpoint experiment: directory: "humanoid" experiment_name: "" - write_interval: auto - checkpoint_interval: auto + write_interval: 32 + checkpoint_interval: 3200 # Sequential trainer From a270caaac7193a51cd7b14408fd1077281d6785c Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sat, 2 Aug 2025 16:16:20 -0400 Subject: [PATCH 2/8] Print training time --- scripts/reinforcement_learning/rl_games/train.py | 5 +++++ scripts/reinforcement_learning/rsl_rl/train.py | 5 +++++ scripts/reinforcement_learning/sb3/train.py | 5 +++++ scripts/reinforcement_learning/skrl/train.py | 5 +++++ 4 files changed, 20 insertions(+) diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py index eb350382979..10bf387262a 100644 --- a/scripts/reinforcement_learning/rl_games/train.py +++ b/scripts/reinforcement_learning/rl_games/train.py @@ -59,6 +59,7 @@ import math import os import random +import time from datetime import datetime from rl_games.common import env_configurations, vecenv @@ -163,6 +164,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) + start_time = time.time() + # wrap around environment for rl-games env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions) @@ -205,6 +208,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen else: runner.run({"train": True, "play": False, "sigma": train_sigma}) + print(f"Training time: {round(time.time() - start_time, 2)} seconds") + # close the simulator env.close() diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index e534079d052..7e64d7e0eb6 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -71,6 +71,7 @@ import gymnasium as gym import os +import time import torch from datetime import datetime @@ -160,6 +161,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) + start_time = time.time() + # wrap around environment for rsl-rl env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) @@ -182,6 +185,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # run training runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True) + print(f"Training time: {round(time.time() - start_time, 2)} seconds") + # close the simulator env.close() diff --git a/scripts/reinforcement_learning/sb3/train.py b/scripts/reinforcement_learning/sb3/train.py index e12907d6260..4add5223058 100644 --- a/scripts/reinforcement_learning/sb3/train.py +++ b/scripts/reinforcement_learning/sb3/train.py @@ -72,6 +72,7 @@ def cleanup_pbar(*args): import numpy as np import os import random +import time from datetime import datetime from stable_baselines3 import PPO @@ -157,6 +158,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) + start_time = time.time() + # wrap around environment for stable baselines env = Sb3VecEnvWrapper(env, fast_variant=not args_cli.keep_all_info) @@ -204,6 +207,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print("Saving normalization") env.save(os.path.join(log_dir, "model_vecnormalize.pkl")) + print(f"Training time: {round(time.time() - start_time, 2)} seconds") + # close the simulator env.close() diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index b76eb80132c..6cd3c8ac1cc 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -65,6 +65,7 @@ import gymnasium as gym import os import random +import time from datetime import datetime import skrl @@ -178,6 +179,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print_dict(video_kwargs, nesting=4) env = gym.wrappers.RecordVideo(env, **video_kwargs) + start_time = time.time() + # wrap around environment for skrl env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")` @@ -193,6 +196,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # run training runner.run() + print(f"Training time: {round(time.time() - start_time, 2)} seconds") + # close the simulator env.close() From 3e59997507f91e68c512df01a6b0ad69a880e1af Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sat, 2 Aug 2025 16:29:31 -0400 Subject: [PATCH 3/8] Include training commands executed to build the Training Performance table --- .../overview/reinforcement-learning/rl_frameworks.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/source/overview/reinforcement-learning/rl_frameworks.rst b/docs/source/overview/reinforcement-learning/rl_frameworks.rst index 34d47c17cdc..ba6fe037135 100644 --- a/docs/source/overview/reinforcement-learning/rl_frameworks.rst +++ b/docs/source/overview/reinforcement-learning/rl_frameworks.rst @@ -74,7 +74,6 @@ We performed training with each RL library on the same ``Isaac-Humanoid-v0`` env with ``--headless`` on a single RTX PRO 6000 GPU using 4096 environments and logged the total training time for 65.5M steps for each RL library. - +--------------------+-----------------+ | RL Library | Time in seconds | +====================+=================+ @@ -86,3 +85,12 @@ and logged the total training time for 65.5M steps for each RL library. +--------------------+-----------------+ | Stable-Baselines3 | 322 | +--------------------+-----------------+ + +Training commands: + +.. code:: bash + + python scripts/reinforcement_learning/rl_games/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless + python scripts/reinforcement_learning/rsl_rl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless + python scripts/reinforcement_learning/sb3/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless + python scripts/reinforcement_learning/skrl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless From 9ad22b468a85dba9de4bdf45728933d7e729e9bd Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Mon, 4 Aug 2025 12:10:36 -0400 Subject: [PATCH 4/8] Add note about agent config --- .../classic/humanoid/agents/rl_games_ppo_cfg.yaml | 4 ++++ .../classic/humanoid/agents/rsl_rl_ppo_cfg.py | 7 +++++++ .../manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml | 5 ++++- .../classic/humanoid/agents/skrl_ppo_cfg.yaml | 4 ++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml index 7f4db42e170..6eab2794ff9 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml @@ -1,3 +1,7 @@ +# This file defines the agent configuration used to generate the "Training Performance" table in +# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html. +# Ensure that the configurations for the other RL libraries are updated if this one is modified. + params: seed: 42 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py index 56be542e58f..de253e4b72c 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py @@ -3,6 +3,13 @@ # # SPDX-License-Identifier: BSD-3-Clause +""" +This file defines the agent configuration used to generate the "Training Performance" table in +https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html. +Ensure that the configurations for the other RL libraries are updated if this one is modified. +""" + + from isaaclab.utils import configclass from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml index 23880465905..7732ae45398 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml @@ -1,4 +1,7 @@ -# Adapted from rsl_rl config +# This file defines the agent configuration used to generate the "Training Performance" table in +# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html. +# Ensure that the configurations for the other RL libraries are updated if this one is modified. + seed: 42 policy: "MlpPolicy" n_timesteps: !!float 3.2e4 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml index e2d8e588e46..074a373387d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml @@ -1,3 +1,7 @@ +# This file defines the agent configuration used to generate the "Training Performance" table in +# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html. +# Ensure that the configurations for the other RL libraries are updated if this one is modified. + seed: 42 From 7df21d203a64329fdef73b622b74c71024cd6ed3 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Thu, 27 Nov 2025 17:02:38 -0500 Subject: [PATCH 5/8] Update agent config notice --- .../classic/humanoid/agents/rl_games_ppo_cfg.yaml | 6 ++++-- .../manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py | 4 ++++ .../manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml | 6 ++++-- .../manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml | 6 ++++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml index 48d693acb03..c756670aef2 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rl_games_ppo_cfg.yaml @@ -3,11 +3,13 @@ # # SPDX-License-Identifier: BSD-3-Clause -# ---------------------------------------------------------------------------------------------------- +# ========================================= IMPORTANT NOTICE ========================================= +# # This file defines the agent configuration used to generate the "Training Performance" table in # https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html. # Ensure that the configurations for the other RL libraries are updated if this one is modified. -# ---------------------------------------------------------------------------------------------------- +# +# ==================================================================================================== params: seed: 42 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py index 52e4019a97a..c5f77400cf6 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py @@ -4,9 +4,13 @@ # SPDX-License-Identifier: BSD-3-Clause """ +========================================= IMPORTANT NOTICE ========================================= + This file defines the agent configuration used to generate the "Training Performance" table in https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html. Ensure that the configurations for the other RL libraries are updated if this one is modified. + +==================================================================================================== """ diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml index 646c02382fc..288a82f3a12 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml @@ -3,11 +3,13 @@ # # SPDX-License-Identifier: BSD-3-Clause -# ---------------------------------------------------------------------------------------------------- +# ========================================= IMPORTANT NOTICE ========================================= +# # This file defines the agent configuration used to generate the "Training Performance" table in # https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html. # Ensure that the configurations for the other RL libraries are updated if this one is modified. -# ---------------------------------------------------------------------------------------------------- +# +# ==================================================================================================== seed: 42 policy: "MlpPolicy" diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml index 00b2d28d252..ecfa82513d8 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml @@ -3,11 +3,13 @@ # # SPDX-License-Identifier: BSD-3-Clause -# ---------------------------------------------------------------------------------------------------- +# ========================================= IMPORTANT NOTICE ========================================= +# # This file defines the agent configuration used to generate the "Training Performance" table in # https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html. # Ensure that the configurations for the other RL libraries are updated if this one is modified. -# ---------------------------------------------------------------------------------------------------- +# +# ==================================================================================================== seed: 42 From 1fa55cdc5ec0a608e36fcdc56a6d981a1b2bc7a5 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Thu, 27 Nov 2025 17:11:02 -0500 Subject: [PATCH 6/8] Update docs --- docs/source/overview/reinforcement-learning/rl_frameworks.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/overview/reinforcement-learning/rl_frameworks.rst b/docs/source/overview/reinforcement-learning/rl_frameworks.rst index ba6fe037135..babc994a692 100644 --- a/docs/source/overview/reinforcement-learning/rl_frameworks.rst +++ b/docs/source/overview/reinforcement-learning/rl_frameworks.rst @@ -86,11 +86,11 @@ and logged the total training time for 65.5M steps for each RL library. | Stable-Baselines3 | 322 | +--------------------+-----------------+ -Training commands: +Training commands (check for the *'Training time: XXX seconds'* line in the terminal output): .. code:: bash python scripts/reinforcement_learning/rl_games/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless + python scripts/reinforcement_learning/skrl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless python scripts/reinforcement_learning/rsl_rl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless python scripts/reinforcement_learning/sb3/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless - python scripts/reinforcement_learning/skrl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless From 4da2b835413e19a1df4aefb4720583d730430545 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Thu, 27 Nov 2025 17:39:05 -0500 Subject: [PATCH 7/8] Update table --- .../reinforcement-learning/rl_frameworks.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/overview/reinforcement-learning/rl_frameworks.rst b/docs/source/overview/reinforcement-learning/rl_frameworks.rst index babc994a692..5f9d25e06e0 100644 --- a/docs/source/overview/reinforcement-learning/rl_frameworks.rst +++ b/docs/source/overview/reinforcement-learning/rl_frameworks.rst @@ -71,19 +71,19 @@ Training Performance -------------------- We performed training with each RL library on the same ``Isaac-Humanoid-v0`` environment -with ``--headless`` on a single RTX PRO 6000 GPU using 4096 environments -and logged the total training time for 65.5M steps for each RL library. +with ``--headless`` on a single NVIDIA GeForce RTX 4090 and logged the total training time +for 65.5M steps (4096 environments x 32 rollout steps x 500 iterations). +--------------------+-----------------+ | RL Library | Time in seconds | +====================+=================+ -| RL-Games | 207 | +| RL-Games | 201 | +--------------------+-----------------+ -| SKRL | 208 | +| SKRL | 201 | +--------------------+-----------------+ -| RSL RL | 199 | +| RSL RL | 198 | +--------------------+-----------------+ -| Stable-Baselines3 | 322 | +| Stable-Baselines3 | 287 | +--------------------+-----------------+ Training commands (check for the *'Training time: XXX seconds'* line in the terminal output): From 020960cd78d78f8f9c60940df0a111c0caf8ba3d Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Fri, 28 Nov 2025 14:19:05 -0500 Subject: [PATCH 8/8] Revert modified value --- .../manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml index 288a82f3a12..6d8f3d98d4e 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/sb3_ppo_cfg.yaml @@ -13,7 +13,7 @@ seed: 42 policy: "MlpPolicy" -n_timesteps: !!float 3.2e4 +n_timesteps: !!float 5e7 # For 4 minibatches with 4096 envs # batch_size = (n_envs * n_steps) / n_minibatches = 32768 n_minibatches: 4