Skip to content
22 changes: 15 additions & 7 deletions docs/source/overview/reinforcement-learning/rl_frameworks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,26 @@ Training Performance
--------------------

We performed training with each RL library on the same ``Isaac-Humanoid-v0`` environment
with ``--headless`` on a single RTX PRO 6000 GPU using 4096 environments
and logged the total training time for 65.5M steps for each RL library.

with ``--headless`` on a single NVIDIA GeForce RTX 4090 and logged the total training time
for 65.5M steps (4096 environments x 32 rollout steps x 500 iterations).

+--------------------+-----------------+
| RL Library | Time in seconds |
+====================+=================+
| RL-Games | 207 |
| RL-Games | 201 |
+--------------------+-----------------+
| SKRL | 208 |
| SKRL | 201 |
+--------------------+-----------------+
| RSL RL | 199 |
| RSL RL | 198 |
+--------------------+-----------------+
| Stable-Baselines3 | 322 |
| Stable-Baselines3 | 287 |
+--------------------+-----------------+

Training commands (check for the *'Training time: XXX seconds'* line in the terminal output):

.. code:: bash

python scripts/reinforcement_learning/rl_games/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless
python scripts/reinforcement_learning/skrl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless
python scripts/reinforcement_learning/rsl_rl/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless
python scripts/reinforcement_learning/sb3/train.py --task Isaac-Humanoid-v0 --max_iterations 500 --headless
5 changes: 5 additions & 0 deletions scripts/reinforcement_learning/rl_games/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import math
import os
import random
import time
from datetime import datetime

from rl_games.common import env_configurations, vecenv
Expand Down Expand Up @@ -201,6 +202,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

start_time = time.time()

# wrap around environment for rl-games
env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions, obs_groups, concate_obs_groups)

Expand Down Expand Up @@ -250,6 +253,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
else:
runner.run({"train": True, "play": False, "sigma": train_sigma})

print(f"Training time: {round(time.time() - start_time, 2)} seconds")

# close the simulator
env.close()

Expand Down
5 changes: 5 additions & 0 deletions scripts/reinforcement_learning/rsl_rl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import gymnasium as gym
import logging
import os
import time
import torch
from datetime import datetime

Expand Down Expand Up @@ -187,6 +188,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

start_time = time.time()

# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)

Expand All @@ -212,6 +215,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
# run training
runner.learn(num_learning_iterations=agent_cfg.max_iterations, init_at_random_ep_len=True)

print(f"Training time: {round(time.time() - start_time, 2)} seconds")

# close the simulator
env.close()

Expand Down
5 changes: 5 additions & 0 deletions scripts/reinforcement_learning/sb3/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def cleanup_pbar(*args):
import numpy as np
import os
import random
import time
from datetime import datetime

from stable_baselines3 import PPO
Expand Down Expand Up @@ -176,6 +177,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

start_time = time.time()

# wrap around environment for stable baselines
env = Sb3VecEnvWrapper(env, fast_variant=not args_cli.keep_all_info)

Expand Down Expand Up @@ -223,6 +226,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print("Saving normalization")
env.save(os.path.join(log_dir, "model_vecnormalize.pkl"))

print(f"Training time: {round(time.time() - start_time, 2)} seconds")

# close the simulator
env.close()

Expand Down
5 changes: 5 additions & 0 deletions scripts/reinforcement_learning/skrl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import logging
import os
import random
import time
from datetime import datetime

import skrl
Expand Down Expand Up @@ -214,6 +215,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

start_time = time.time()

# wrap around environment for skrl
env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")`

Expand All @@ -229,6 +232,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
# run training
runner.run()

print(f"Training time: {round(time.time() - start_time, 2)} seconds")

# close the simulator
env.close()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
#
# SPDX-License-Identifier: BSD-3-Clause

# ========================================= IMPORTANT NOTICE =========================================
#
# This file defines the agent configuration used to generate the "Training Performance" table in
# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html.
# Ensure that the configurations for the other RL libraries are updated if this one is modified.
#
# ====================================================================================================

params:
seed: 42

Expand Down Expand Up @@ -50,13 +58,13 @@ params:
device_name: 'cuda:0'
multi_gpu: False
ppo: True
mixed_precision: True
mixed_precision: False
normalize_input: True
normalize_value: True
value_bootstrap: True
num_actors: -1
reward_shaper:
scale_value: 0.6
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95
Expand All @@ -72,7 +80,7 @@ params:
truncate_grads: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 32768
minibatch_size: 32768 # num_envs * horizon_length / num_mini_batches
mini_epochs: 5
critic_coef: 4
clip_value: True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
#
# SPDX-License-Identifier: BSD-3-Clause

"""
========================================= IMPORTANT NOTICE =========================================

This file defines the agent configuration used to generate the "Training Performance" table in
https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html.
Ensure that the configurations for the other RL libraries are updated if this one is modified.

====================================================================================================
"""


from isaaclab.utils import configclass

from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
Expand All @@ -12,18 +23,18 @@
class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 32
max_iterations = 1000
save_interval = 50
save_interval = 100
experiment_name = "humanoid"
policy = RslRlPpoActorCriticCfg(
init_noise_std=1.0,
actor_obs_normalization=False,
critic_obs_normalization=False,
actor_obs_normalization=True,
critic_obs_normalization=True,
actor_hidden_dims=[400, 200, 100],
critic_hidden_dims=[400, 200, 100],
activation="elu",
)
algorithm = RslRlPpoAlgorithmCfg(
value_loss_coef=1.0,
value_loss_coef=2.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
#
# SPDX-License-Identifier: BSD-3-Clause

# Adapted from rsl_rl config
# ========================================= IMPORTANT NOTICE =========================================
#
# This file defines the agent configuration used to generate the "Training Performance" table in
# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html.
# Ensure that the configurations for the other RL libraries are updated if this one is modified.
#
# ====================================================================================================

seed: 42
policy: "MlpPolicy"
n_timesteps: !!float 5e7
Expand All @@ -18,7 +25,7 @@ clip_range: 0.2
n_epochs: 5
gae_lambda: 0.95
max_grad_norm: 1.0
vf_coef: 0.5
vf_coef: 2.0
policy_kwargs:
activation_fn: 'nn.ELU'
net_arch: [400, 200, 100]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
#
# SPDX-License-Identifier: BSD-3-Clause

# ========================================= IMPORTANT NOTICE =========================================
#
# This file defines the agent configuration used to generate the "Training Performance" table in
# https://isaac-sim.github.io/IsaacLab/main/source/overview/reinforcement-learning/rl_frameworks.html.
# Ensure that the configurations for the other RL libraries are updated if this one is modified.
#
# ====================================================================================================

seed: 42


Expand Down Expand Up @@ -67,14 +75,13 @@ agent:
entropy_loss_scale: 0.0
value_loss_scale: 2.0
kl_threshold: 0.0
rewards_shaper_scale: 0.6
time_limit_bootstrap: False
# logging and checkpoint
experiment:
directory: "humanoid"
experiment_name: ""
write_interval: auto
checkpoint_interval: auto
write_interval: 32
checkpoint_interval: 3200


# Sequential trainer
Expand Down