isaac-sim · Toni-SM · Aug 2, 2025 · Aug 2, 2025 · Aug 2, 2025 · Aug 4, 2025
@@ -71,18 +71,26 @@ Training Performance
 --------------------
 
 We performed training with each RL library on the same ``Isaac-Humanoid-v0`` environment
-with ``--headless`` on a single RTX PRO 6000 GPU using 4096 environments
-and logged the total training time for 65.5M steps for each RL library.
-
+with ``--headless`` on a single NVIDIA GeForce RTX 4090 and logged the total training time
+for 65.5M steps (4096 environments x 32 rollout steps x 500 iterations).
 
 +--------------------+-----------------+
 | RL Library         | Time in seconds |
 +====================+=================+
-| RL-Games           | 207             |
+| RL-Games           | 201             |
 +--------------------+-----------------+
-| SKRL               | 208             |
+| SKRL               | 201             |
 +--------------------+-----------------+
-| RSL RL             | 199             |
+| RSL RL             | 198             |
 +--------------------+-----------------+
-| Stable-Baselines3  | 322             |
+| Stable-Baselines3  | 287             |
 +--------------------+-----------------+
+
+Training commands (check for the *'Training time: XXX seconds'* line in the terminal output):
+
+.. code:: bash
+
+    python scripts/reinforcement_learning/rl_games/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless
+    python scripts/reinforcement_learning/skrl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless
+    python scripts/reinforcement_learning/rsl_rl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless
+    python scripts/reinforcement_learning/sb3/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless
@@ -67,6 +67,7 @@
 import math
 import os
 import random
+import time
 from datetime import datetime
 
 from rl_games.common import env_configurations, vecenv
@@ -201,6 +202,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
+    start_time = time.time()
+
     # wrap around environment for rl-games
     env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups)
 
@@ -250,6 +253,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     else:
         runner.run({"train": True, "play": False, "sigma": train_sigma})
 
+    print(f"Training time: {round(time.time() - start_time, 2)} seconds")
+
     # close the simulator
     env.close()
 

@@ -78,6 +78,7 @@
 import gymnasium as gym
 import logging
 import os
+import time
 import torch
 from datetime import datetime
 
@@ -187,6 +188,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
+    start_time = time.time()
+
     # wrap around environment for rsl-rl
     env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
 
@@ -212,6 +215,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # run training
     runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)
 
+    print(f"Training time: {round(time.time() - start_time, 2)} seconds")
+
     # close the simulator
     env.close()
 

@@ -80,6 +80,7 @@ def cleanup_pbar(*args):
 import numpy as np
 import os
 import random
+import time
 from datetime import datetime
 
 from stable_baselines3 import PPO
@@ -176,6 +177,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
+    start_time = time.time()
+
     # wrap around environment for stable baselines
     env = Sb3VecEnvWrapper(env, fast_variant=not args_cli.keep_all_info)
 
@@ -223,6 +226,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print("Saving normalization")
         env.save(os.path.join(log_dir, "model_vecnormalize.pkl"))
 
+    print(f"Training time: {round(time.time() - start_time, 2)} seconds")
+
     # close the simulator
     env.close()
 

@@ -78,6 +78,7 @@
 import logging
 import os
 import random
+import time
 from datetime import datetime
 
 import skrl
@@ -214,6 +215,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print_dict(video_kwargs, nesting=4)
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
+    start_time = time.time()
+
     # wrap around environment for skrl
     env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework)  # same as: `wrap_env(env, wrapper="auto")`
 
@@ -229,6 +232,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # run training
     runner.run()
 
+    print(f"Training time: {round(time.time() - start_time, 2)} seconds")
+
     # close the simulator
     env.close()
 

@@ -3,6 +3,14 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
+# ========================================= IMPORTANT NOTICE =========================================
+#
+# This file defines the agent configuration used to generate the "Training Performance" table in
+# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html.
+# Ensure that the configurations for the other RL libraries are updated if this one is modified.
+#
+# ====================================================================================================
+
 params:
   seed: 42
 
@@ -50,13 +58,13 @@ params:
     device_name: 'cuda:0'
     multi_gpu: False
     ppo: True
-    mixed_precision: True
+    mixed_precision: False
     normalize_input: True
     normalize_value: True
     value_bootstrap: True
     num_actors: -1
     reward_shaper:
-      scale_value: 0.6
+      scale_value: 1.0
     normalize_advantage: True
     gamma: 0.99
     tau: 0.95
@@ -72,7 +80,7 @@ params:
     truncate_grads: True
     e_clip: 0.2
     horizon_length: 32
-    minibatch_size: 32768
+    minibatch_size: 32768  # num_envs * horizon_length / num_mini_batches
     mini_epochs: 5
     critic_coef: 4
     clip_value: True

@@ -3,6 +3,17 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
+"""
+========================================= IMPORTANT NOTICE =========================================
+
+This file defines the agent configuration used to generate the "Training Performance" table in
+https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html.
+Ensure that the configurations for the other RL libraries are updated if this one is modified.
+
+====================================================================================================
+"""
+
+
 from isaaclab.utils import configclass
 
 from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
@@ -12,18 +23,18 @@
 class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg):
     num_steps_per_env = 32
     max_iterations = 1000
-    save_interval = 50
+    save_interval = 100
     experiment_name = "humanoid"
     policy = RslRlPpoActorCriticCfg(
         init_noise_std=1.0,
-        actor_obs_normalization=False,
-        critic_obs_normalization=False,
+        actor_obs_normalization=True,
+        critic_obs_normalization=True,
         actor_hidden_dims=[400, 200, 100],
         critic_hidden_dims=[400, 200, 100],
         activation="elu",
     )
     algorithm = RslRlPpoAlgorithmCfg(
-        value_loss_coef=1.0,
+        value_loss_coef=2.0,
         use_clipped_value_loss=True,
         clip_param=0.2,
         entropy_coef=0.0,

@@ -3,7 +3,14 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Adapted from rsl_rl config
+# ========================================= IMPORTANT NOTICE =========================================
+#
+# This file defines the agent configuration used to generate the "Training Performance" table in
+# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html.
+# Ensure that the configurations for the other RL libraries are updated if this one is modified.
+#
+# ====================================================================================================
+
 seed: 42
 policy: "MlpPolicy"
 n_timesteps: !!float 5e7
@@ -18,7 +25,7 @@ clip_range: 0.2
 n_epochs: 5
 gae_lambda: 0.95
 max_grad_norm: 1.0
-vf_coef: 0.5
+vf_coef: 2.0
 policy_kwargs:
   activation_fn: 'nn.ELU'
   net_arch: [400, 200, 100]

@@ -3,6 +3,14 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
+# ========================================= IMPORTANT NOTICE =========================================
+#
+# This file defines the agent configuration used to generate the "Training Performance" table in
+# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html.
+# Ensure that the configurations for the other RL libraries are updated if this one is modified.
+#
+# ====================================================================================================
+
 seed: 42
 
 
@@ -67,14 +75,13 @@ agent:
   entropy_loss_scale: 0.0
   value_loss_scale: 2.0
   kl_threshold: 0.0
-  rewards_shaper_scale: 0.6
   time_limit_bootstrap: False
   # logging and checkpoint
   experiment:
     directory: "humanoid"
     experiment_name: ""
-    write_interval: auto
-    checkpoint_interval: auto
+    write_interval: 32
+    checkpoint_interval: 3200
 
 
 # Sequential trainer