From a4118d7f9be827f8755ee71ea922928bb6c4a314 Mon Sep 17 00:00:00 2001
From: Toni-SM <aserranomuno@nvidia.com>
Date: Wed, 16 Oct 2024 21:59:05 -0400
Subject: [PATCH 1/2] Support other gymnasium spaces in Direct workflow (#1117)

# Description

This PR add supports for different Gymnasium spaces (`Box`, `Discrete`,
`MultiDiscrete`, `Tuple` and `Dict`) to define observation, action and
state spaces in the direct workflow.

See
https://github.com/isaac-sim/IsaacLab/issues/864#issuecomment-2351819930

## Type of change

<!-- As you go through the list, delete the ones that are not
applicable. -->

- New feature (non-breaking change which adds functionality)
- This change requires a documentation update

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there

<!--
As you go through the checklist above, you can mark something as done by
putting an x character in it

For example,
- [x] I have done this task
- [ ] I have not done this task
-->
---
 docs/source/features/hydra.rst                |   2 +-
 .../migration/migrating_from_isaacgymenvs.rst |  18 +--
 .../migrating_from_omniisaacgymenvs.rst       |  18 +--
 .../snippets/tutorial_modify_direct_rl_env.py |   4 +-
 .../03_envs/create_direct_rl_env.rst          |   6 +-
 .../omni.isaac.lab/config/extension.toml      |   2 +-
 .../omni.isaac.lab/docs/CHANGELOG.rst         |  19 +++
 .../omni/isaac/lab/envs/__init__.py           |   2 +-
 .../omni/isaac/lab/envs/common.py             |   4 +
 .../omni/isaac/lab/envs/direct_marl_env.py    |  61 ++++++---
 .../isaac/lab/envs/direct_marl_env_cfg.py     |  97 +++++++++++++-
 .../omni/isaac/lab/envs/direct_rl_env.py      |  38 ++++--
 .../omni/isaac/lab/envs/direct_rl_env_cfg.py  |  97 +++++++++++++-
 .../omni/isaac/lab/envs/utils/__init__.py     |   6 +
 .../lab/envs/{utils.py => utils/marl.py}      |  73 +++++++----
 .../omni/isaac/lab/envs/utils/spaces.py       |  92 +++++++++++++
 .../test/envs/test_spaces_utils.py            | 122 ++++++++++++++++++
 .../config/extension.toml                     |   2 +-
 .../omni.isaac.lab_tasks/docs/CHANGELOG.rst   |  13 ++
 .../allegro_hand/allegro_hand_env_cfg.py      |   6 +-
 .../isaac/lab_tasks/direct/ant/ant_env.py     |   6 +-
 .../lab_tasks/direct/anymal_c/anymal_c_env.py |  15 ++-
 .../cart_double_pendulum_env.py               |   6 +-
 .../direct/cartpole/cartpole_camera_env.py    |  45 +------
 .../lab_tasks/direct/cartpole/cartpole_env.py |   6 +-
 .../franka_cabinet/franka_cabinet_env.py      |   6 +-
 .../lab_tasks/direct/humanoid/humanoid_env.py |   6 +-
 .../direct/quadcopter/quadcopter_env.py       |   9 +-
 .../direct/shadow_hand/shadow_hand_env_cfg.py |  12 +-
 .../shadow_hand/shadow_hand_vision_env.py     |   4 +-
 .../shadow_hand_over_env_cfg.py               |   6 +-
 .../utils/wrappers/rsl_rl/vecenv_wrapper.py   |   8 +-
 .../test/test_environments.py                 |  17 +--
 .../test/test_multi_agent_environments.py     |  15 +--
 34 files changed, 645 insertions(+), 198 deletions(-)
 create mode 100644 source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py
 rename source/extensions/omni.isaac.lab/omni/isaac/lab/envs/{utils.py => utils/marl.py} (76%)
 create mode 100644 source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py
 create mode 100644 source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py

diff --git a/docs/source/features/hydra.rst b/docs/source/features/hydra.rst
index 2fa5d2d706..89673dbe35 100644
--- a/docs/source/features/hydra.rst
+++ b/docs/source/features/hydra.rst
@@ -115,7 +115,7 @@ For example, for the configuration of the Cartpole camera depth environment:
     :emphasize-lines: 16
 
 If the user were to modify the width of the camera, i.e. ``env.tiled_camera.width=128``, then the parameter
-``env.num_observations=10240`` (1*80*128) must be updated and given as input as well.
+``env.observation_space=[80,128,1]`` must be updated and given as input as well.
 
 Similarly, the ``__post_init__`` method is not updated with the command line inputs. In the ``LocomotionVelocityRoughEnvCfg``, for example,
 the post init update is as follows:
diff --git a/docs/source/migration/migrating_from_isaacgymenvs.rst b/docs/source/migration/migrating_from_isaacgymenvs.rst
index c903d83c3d..2073854d32 100644
--- a/docs/source/migration/migrating_from_isaacgymenvs.rst
+++ b/docs/source/migration/migrating_from_isaacgymenvs.rst
@@ -45,9 +45,9 @@ Below is an example skeleton of a task config class:
       # env
       decimation = 2
       episode_length_s = 5.0
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
       # task-specific parameters
       ...
 
@@ -135,9 +135,9 @@ The following parameters must be set for each environment config:
 
    decimation = 2
    episode_length_s = 5.0
-   num_actions = 1
-   num_observations = 4
-   num_states = 0
+   action_space = 1
+   observation_space = 4
+   state_space = 0
 
 Note that the maximum episode length parameter (now ``episode_length_s``) is in seconds instead of steps as it was
 in IsaacGymEnvs. To convert between step count to seconds, use the equation:
@@ -569,9 +569,9 @@ Task Config
 |                                                        |     decimation = 2                                                  |
 |   asset:                                               |     episode_length_s = 5.0                                          |
 |     assetRoot: "../../assets"                          |     action_scale = 100.0  # [N]                                     |
-|     assetFileName: "urdf/cartpole.urdf"                |     num_actions = 1                                                 |
-|                                                        |     num_observations = 4                                            |
-|   enableCameraSensors: False                           |     num_states = 0                                                  |
+|     assetFileName: "urdf/cartpole.urdf"                |     action_space = 1                                                |
+|                                                        |     observation_space = 4                                           |
+|   enableCameraSensors: False                           |     state_space = 0                                                 |
 |                                                        |     # reset                                                         |
 | sim:                                                   |     max_cart_pos = 3.0                                              |
 |   dt: 0.0166 # 1/60 s                                  |     initial_pole_angle_range = [-0.25, 0.25]                        |
diff --git a/docs/source/migration/migrating_from_omniisaacgymenvs.rst b/docs/source/migration/migrating_from_omniisaacgymenvs.rst
index 50f9d5b9d6..cbda1e8d45 100644
--- a/docs/source/migration/migrating_from_omniisaacgymenvs.rst
+++ b/docs/source/migration/migrating_from_omniisaacgymenvs.rst
@@ -46,9 +46,9 @@ Below is an example skeleton of a task config class:
       # env
       decimation = 2
       episode_length_s = 5.0
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
       # task-specific parameters
       ...
 
@@ -158,9 +158,9 @@ The following parameters must be set for each environment config:
 
    decimation = 2
    episode_length_s = 5.0
-   num_actions = 1
-   num_observations = 4
-   num_states = 0
+   action_space = 1
+   observation_space = 4
+   state_space = 0
 
 
 RL Config Setup
@@ -501,9 +501,9 @@ Task config in Isaac Lab can be split into the main task configuration class and
 |   clipObservations: 5.0                                         |     decimation = 2                                              |
 |   clipActions: 1.0                                              |     episode_length_s = 5.0                                      |
 |   controlFrequencyInv: 2 # 60 Hz                                |     action_scale = 100.0  # [N]                                 |
-|                                                                 |     num_actions = 1                                             |
-| sim:                                                            |     num_observations = 4                                        |
-|                                                                 |     num_states = 0                                              |
+|                                                                 |     action_space = 1                                            |
+| sim:                                                            |     observation_space = 4                                       |
+|                                                                 |     state_space = 0                                             |
 |   dt: 0.0083 # 1/120 s                                          |     # reset                                                     |
 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}                  |     max_cart_pos = 3.0                                          |
 |   gravity: [0.0, 0.0, -9.81]                                    |     initial_pole_angle_range = [-0.25, 0.25]                    |
diff --git a/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py b/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
index 8073886840..215ef3bd4f 100644
--- a/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
+++ b/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
@@ -28,8 +28,8 @@
 # [end-h1_env-import]
 
 # [start-h1_env-spaces]
-num_actions = 19
-num_observations = 69
+action_space = 19
+observation_space = 69
 # [end-h1_env-spaces]
 
 # [start-h1_env-robot]
diff --git a/docs/source/tutorials/03_envs/create_direct_rl_env.rst b/docs/source/tutorials/03_envs/create_direct_rl_env.rst
index ab5136106a..a4b945be9d 100644
--- a/docs/source/tutorials/03_envs/create_direct_rl_env.rst
+++ b/docs/source/tutorials/03_envs/create_direct_rl_env.rst
@@ -48,9 +48,9 @@ config should define the number of actions and observations for the environment.
    @configclass
    class CartpoleEnvCfg(DirectRLEnvCfg):
       ...
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
 
 The config class can also be used to define task-specific attributes, such as scaling for reward terms
 and thresholds for reset conditions.
diff --git a/source/extensions/omni.isaac.lab/config/extension.toml b/source/extensions/omni.isaac.lab/config/extension.toml
index 517ac00610..a633e64fb8 100644
--- a/source/extensions/omni.isaac.lab/config/extension.toml
+++ b/source/extensions/omni.isaac.lab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.25.1"
+version = "0.25.2"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
index e78abdb068..12e4b5b45a 100644
--- a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
@@ -1,6 +1,25 @@
 Changelog
 ---------
 
+0.25.2 (2024-10-16)
+~~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added support for different Gymnasium spaces (``Box``, ``Discrete``, ``MultiDiscrete``, ``Tuple`` and ``Dict``)
+  to define observation, action and state spaces in the direct workflow.
+* Added :meth:`sample_space` to environment utils to sample supported spaces where data containers are torch tensors.
+
+Changed
+^^^^^^^
+
+* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectRLEnvCfg` as deprecated
+  in favor of :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively.
+* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectMARLEnvCfg` as deprecated
+  in favor of :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively.
+
+
 0.25.1 (2024-10-10)
 ~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
index 97701e50cc..667c0b5049 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
@@ -52,4 +52,4 @@
 from .manager_based_env_cfg import ManagerBasedEnvCfg
 from .manager_based_rl_env import ManagerBasedRLEnv
 from .manager_based_rl_env_cfg import ManagerBasedRLEnvCfg
-from .utils import multi_agent_to_single_agent, multi_agent_with_one_agent
+from .utils.marl import multi_agent_to_single_agent, multi_agent_with_one_agent
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
index d0c99f8ad8..7d6b02d309 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import gymnasium as gym
 import torch
 from typing import Dict, Literal, TypeVar
 
@@ -62,6 +63,9 @@ class ViewerCfg:
 # Types.
 ##
 
+SpaceType = TypeVar("SpaceType", gym.spaces.Space, int, set, tuple, list, dict)
+"""A sentinel object to indicate a valid space type to specify states, observations and actions."""
+
 VecEnvObs = Dict[str, torch.Tensor | Dict[str, torch.Tensor]]
 """Observation returned by the environment.
 
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
index f58a80dd47..5bcedb0591 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
@@ -14,6 +14,7 @@
 import weakref
 from abc import abstractmethod
 from collections.abc import Sequence
+from dataclasses import MISSING
 from typing import Any, ClassVar
 
 import omni.isaac.core.utils.torch as torch_utils
@@ -30,6 +31,7 @@
 from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType
 from .direct_marl_env_cfg import DirectMARLEnvCfg
 from .ui import ViewportCameraController
+from .utils.spaces import sample_space, spec_to_gym_space
 
 
 class DirectMARLEnv:
@@ -164,10 +166,6 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
         # -- init buffers
         self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
         self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
-        self.actions = {
-            agent: torch.zeros(self.num_envs, self.cfg.num_actions[agent], device=self.sim.device)
-            for agent in self.cfg.possible_agents
-        }
 
         # setup the observation, state and action spaces
         self._configure_env_spaces()
@@ -406,16 +404,19 @@ def state(self) -> StateType | None:
         """Returns the state for the environment.
 
         The state-space is used for centralized training or asymmetric actor-critic architectures. It is configured
-        using the :attr:`DirectMARLEnvCfg.num_states` parameter.
+        using the :attr:`DirectMARLEnvCfg.state_space` parameter.
 
         Returns:
-            The states for the environment, or None if :attr:`DirectMARLEnvCfg.num_states` parameter is zero.
+            The states for the environment, or None if :attr:`DirectMARLEnvCfg.state_space` parameter is zero.
         """
-        if not self.cfg.num_states:
+        if not self.cfg.state_space:
             return None
         # concatenate and return the observations as state
-        if self.cfg.num_states < 0:
-            self.state_buf = torch.cat([self.obs_dict[agent] for agent in self.cfg.possible_agents], dim=-1)
+        # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
+        if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0:
+            self.state_buf = torch.cat(
+                [self.obs_dict[agent].reshape(self.num_envs, -1) for agent in self.cfg.possible_agents], dim=-1
+            )
         # compute and return custom environment state
         else:
             self.state_buf = self._get_states()
@@ -568,25 +569,45 @@ def _configure_env_spaces(self):
         self.agents = self.cfg.possible_agents
         self.possible_agents = self.cfg.possible_agents
 
+        # show deprecation message and overwrite configuration
+        if self.cfg.num_actions is not None:
+            omni.log.warn("DirectMARLEnvCfg.num_actions is deprecated. Use DirectMARLEnvCfg.action_spaces instead.")
+            if isinstance(self.cfg.action_spaces, type(MISSING)):
+                self.cfg.action_spaces = self.cfg.num_actions
+        if self.cfg.num_observations is not None:
+            omni.log.warn(
+                "DirectMARLEnvCfg.num_observations is deprecated. Use DirectMARLEnvCfg.observation_spaces instead."
+            )
+            if isinstance(self.cfg.observation_spaces, type(MISSING)):
+                self.cfg.observation_spaces = self.cfg.num_observations
+        if self.cfg.num_states is not None:
+            omni.log.warn("DirectMARLEnvCfg.num_states is deprecated. Use DirectMARLEnvCfg.state_space instead.")
+            if isinstance(self.cfg.state_space, type(MISSING)):
+                self.cfg.state_space = self.cfg.num_states
+
         # set up observation and action spaces
         self.observation_spaces = {
-            agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_observations[agent],))
-            for agent in self.cfg.possible_agents
+            agent: spec_to_gym_space(self.cfg.observation_spaces[agent]) for agent in self.cfg.possible_agents
         }
         self.action_spaces = {
-            agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_actions[agent],))
-            for agent in self.cfg.possible_agents
+            agent: spec_to_gym_space(self.cfg.action_spaces[agent]) for agent in self.cfg.possible_agents
         }
 
         # set up state space
-        if not self.cfg.num_states:
+        if not self.cfg.state_space:
             self.state_space = None
-        if self.cfg.num_states < 0:
-            self.state_space = gym.spaces.Box(
-                low=-np.inf, high=np.inf, shape=(sum(self.cfg.num_observations.values()),)
+        if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0:
+            self.state_space = gym.spaces.flatten_space(
+                gym.spaces.Tuple([self.observation_spaces[agent] for agent in self.cfg.possible_agents])
             )
         else:
-            self.state_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_states,))
+            self.state_space = spec_to_gym_space(self.cfg.state_space)
+
+        # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+        self.actions = {
+            agent: sample_space(self.action_spaces[agent], self.sim.device, batch_size=self.num_envs, fill_value=0)
+            for agent in self.cfg.possible_agents
+        }
 
     def _reset_idx(self, env_ids: Sequence[int]):
         """Reset environments based on specified indices.
@@ -664,8 +685,8 @@ def _get_observations(self) -> dict[AgentID, ObsType]:
     def _get_states(self) -> StateType:
         """Compute and return the states for the environment.
 
-        This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.num_states`
-        parameter is greater than zero.
+        This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.state_space`
+        parameter is not a number less than or equal to zero.
 
         Returns:
             The states for the environment.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
index 3dcf364f5c..40ecb64297 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
@@ -10,7 +10,7 @@
 from omni.isaac.lab.utils import configclass
 from omni.isaac.lab.utils.noise import NoiseModelCfg
 
-from .common import AgentID, ViewerCfg
+from .common import AgentID, SpaceType, ViewerCfg
 from .ui import BaseEnvWindow
 
 
@@ -104,11 +104,39 @@ class DirectMARLEnvCfg:
     Please refer to the :class:`omni.isaac.lab.managers.EventManager` class for more details.
     """
 
-    num_observations: dict[AgentID, int] = MISSING
-    """The dimension of the observation space from each agent."""
+    observation_spaces: dict[AgentID, SpaceType] = MISSING
+    """Observation space definition for each agent.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
 
-    num_states: int = MISSING
-    """The dimension of the state space from each environment instance.
+    num_observations: dict[AgentID, int] | None = None
+    """The dimension of the observation space for each agent.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.observation_spaces` instead.
+    """
+
+    state_space: SpaceType = MISSING
+    """State space definition.
 
     The following values are supported:
 
@@ -116,6 +144,33 @@ class DirectMARLEnvCfg:
     * 0: No state-space will be constructed (`state_space` is None).
       This is useful to save computational resources when the algorithm to be trained does not need it.
     * greater than 0: Custom state-space dimension to be provided by the task implementation.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_states: int | None = None
+    """The dimension of the state space from each environment instance.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.state_space` instead.
     """
 
     observation_noise_model: dict[AgentID, NoiseModelCfg | None] | None = None
@@ -124,8 +179,36 @@ class DirectMARLEnvCfg:
     Please refer to the :class:`omni.isaac.lab.utils.noise.NoiseModel` class for more details.
     """
 
-    num_actions: dict[AgentID, int] = MISSING
-    """The dimension of the action space for each agent."""
+    action_spaces: dict[AgentID, SpaceType] = MISSING
+    """Action space definition for each agent.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_actions: dict[AgentID, int] | None = None
+    """The dimension of the action space for each agent.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.action_spaces` instead.
+    """
 
     action_noise_model: dict[AgentID, NoiseModelCfg | None] | None = None
     """The noise model applied to the actions provided to the environment. Default is None, which means no noise is added.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
index 5663977fda..2a0e88cb63 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
@@ -14,6 +14,7 @@
 import weakref
 from abc import abstractmethod
 from collections.abc import Sequence
+from dataclasses import MISSING
 from typing import Any, ClassVar
 
 import omni.isaac.core.utils.torch as torch_utils
@@ -30,6 +31,7 @@
 from .common import VecEnvObs, VecEnvStepReturn
 from .direct_rl_env_cfg import DirectRLEnvCfg
 from .ui import ViewportCameraController
+from .utils.spaces import sample_space, spec_to_gym_space
 
 
 class DirectRLEnv(gym.Env):
@@ -171,7 +173,6 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs
         self.reset_terminated = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
         self.reset_time_outs = torch.zeros_like(self.reset_terminated)
         self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
-        self.actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.sim.device)
 
         # setup the action and observation spaces for Gym
         self._configure_gym_env_spaces()
@@ -507,27 +508,40 @@ def set_debug_vis(self, debug_vis: bool) -> bool:
 
     def _configure_gym_env_spaces(self):
         """Configure the action and observation spaces for the Gym environment."""
-        # observation space (unbounded since we don't impose any limits)
-        self.num_actions = self.cfg.num_actions
-        self.num_observations = self.cfg.num_observations
-        self.num_states = self.cfg.num_states
+        # show deprecation message and overwrite configuration
+        if self.cfg.num_actions is not None:
+            omni.log.warn("DirectRLEnvCfg.num_actions is deprecated. Use DirectRLEnvCfg.action_space instead.")
+            if isinstance(self.cfg.action_space, type(MISSING)):
+                self.cfg.action_space = self.cfg.num_actions
+        if self.cfg.num_observations is not None:
+            omni.log.warn(
+                "DirectRLEnvCfg.num_observations is deprecated. Use DirectRLEnvCfg.observation_space instead."
+            )
+            if isinstance(self.cfg.observation_space, type(MISSING)):
+                self.cfg.observation_space = self.cfg.num_observations
+        if self.cfg.num_states is not None:
+            omni.log.warn("DirectRLEnvCfg.num_states is deprecated. Use DirectRLEnvCfg.state_space instead.")
+            if isinstance(self.cfg.state_space, type(MISSING)):
+                self.cfg.state_space = self.cfg.num_states
 
         # set up spaces
         self.single_observation_space = gym.spaces.Dict()
-        self.single_observation_space["policy"] = gym.spaces.Box(
-            low=-np.inf, high=np.inf, shape=(self.num_observations,)
-        )
-        self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
+        self.single_observation_space["policy"] = spec_to_gym_space(self.cfg.observation_space)
+        self.single_action_space = spec_to_gym_space(self.cfg.action_space)
 
         # batch the spaces for vectorized environments
         self.observation_space = gym.vector.utils.batch_space(self.single_observation_space["policy"], self.num_envs)
         self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs)
 
         # optional state space for asymmetric actor-critic architectures
-        if self.num_states > 0:
-            self.single_observation_space["critic"] = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_states,))
+        self.state_space = None
+        if self.cfg.state_space > 0:
+            self.single_observation_space["critic"] = spec_to_gym_space(self.cfg.state_space)
             self.state_space = gym.vector.utils.batch_space(self.single_observation_space["critic"], self.num_envs)
 
+        # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+        self.actions = sample_space(self.single_action_space, self.sim.device, batch_size=self.num_envs, fill_value=0)
+
     def _reset_idx(self, env_ids: Sequence[int]):
         """Reset environments based on specified indices.
 
@@ -601,7 +615,7 @@ def _get_states(self) -> VecEnvObs | None:
         """Compute and return the states for the environment.
 
         The state-space is used for asymmetric actor-critic architectures. It is configured
-        using the :attr:`DirectRLEnvCfg.num_states` parameter.
+        using the :attr:`DirectRLEnvCfg.state_space` parameter.
 
         Returns:
             The states for the environment. If the environment does not have a state-space, the function
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
index ad8c6c18c8..e86b366cc2 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
@@ -10,7 +10,7 @@
 from omni.isaac.lab.utils import configclass
 from omni.isaac.lab.utils.noise import NoiseModelCfg
 
-from .common import ViewerCfg
+from .common import SpaceType, ViewerCfg
 from .ui import BaseEnvWindow
 
 
@@ -104,13 +104,68 @@ class DirectRLEnvCfg:
     Please refer to the :class:`omni.isaac.lab.managers.EventManager` class for more details.
     """
 
-    num_observations: int = MISSING
-    """The dimension of the observation space from each environment instance."""
+    observation_space: SpaceType = MISSING
+    """Observation space definition.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_observations: int | None = None
+    """The dimension of the observation space from each environment instance.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.observation_space` instead.
+    """
 
-    num_states: int = 0
-    """The dimension of the state-space from each environment instance. Default is 0, which means no state-space is defined.
+    state_space: SpaceType = MISSING
+    """State space definition.
 
     This is useful for asymmetric actor-critic and defines the observation space for the critic.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_states: int | None = None
+    """The dimension of the state-space from each environment instance.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.state_space` instead.
     """
 
     observation_noise_model: NoiseModelCfg | None = None
@@ -119,8 +174,36 @@ class DirectRLEnvCfg:
     Please refer to the :class:`omni.isaac.lab.utils.noise.NoiseModel` class for more details.
     """
 
-    num_actions: int = MISSING
-    """The dimension of the action space for each environment."""
+    action_space: SpaceType = MISSING
+    """Action space definition.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_actions: int | None = None
+    """The dimension of the action space for each environment.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.action_space` instead.
+    """
 
     action_noise_model: NoiseModelCfg | None = None
     """The noise model applied to the actions provided to the environment. Default is None, which means no noise is added.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py
new file mode 100644
index 0000000000..913e1edb90
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Sub-package for environment utils."""
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
similarity index 76%
rename from source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py
rename to source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
index cacbdeaf81..46519048ae 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
@@ -9,9 +9,9 @@
 import torch
 from typing import Any
 
-from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType, VecEnvObs, VecEnvStepReturn
-from .direct_marl_env import DirectMARLEnv
-from .direct_rl_env import DirectRLEnv
+from ..common import ActionType, AgentID, EnvStepReturn, ObsType, StateType, VecEnvObs, VecEnvStepReturn
+from ..direct_marl_env import DirectMARLEnv
+from ..direct_rl_env import DirectRLEnv
 
 
 def multi_agent_to_single_agent(env: DirectMARLEnv, state_as_observation: bool = False) -> DirectRLEnv:
@@ -39,7 +39,7 @@ def multi_agent_to_single_agent(env: DirectMARLEnv, state_as_observation: bool =
 
     Raises:
         AssertionError: If the environment state cannot be used as observation since it was explicitly defined
-            as unconstructed (:attr:`DirectMARLEnvCfg.num_states`).
+            as unconstructed (:attr:`DirectMARLEnvCfg.state_space`).
     """
 
     class Env(DirectRLEnv):
@@ -49,7 +49,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
             # check if it is possible to use the multi-agent environment state as single-agent observation
             self._state_as_observation = state_as_observation
             if self._state_as_observation:
-                assert self.env.cfg.num_states != 0, (
+                assert self.env.cfg.state_space != 0, (
                     "The environment state cannot be used as observation since it was explicitly defined as"
                     " unconstructed"
                 )
@@ -58,18 +58,17 @@ def __init__(self, env: DirectMARLEnv) -> None:
             self.cfg = self.env.cfg
             self.sim = self.env.sim
             self.scene = self.env.scene
-            self.num_actions = sum(self.env.cfg.num_actions.values())
-            self.num_observations = sum(self.env.cfg.num_observations.values())
-            self.num_states = self.env.cfg.num_states
 
             self.single_observation_space = gym.spaces.Dict()
             if self._state_as_observation:
                 self.single_observation_space["policy"] = self.env.state_space
             else:
-                self.single_observation_space["policy"] = gym.spaces.Box(
-                    low=-np.inf, high=np.inf, shape=(self.num_observations,)
+                self.single_observation_space["policy"] = gym.spaces.flatten_space(
+                    gym.spaces.Tuple([self.env.observation_spaces[agent] for agent in self.env.possible_agents])
                 )
-            self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
+            self.single_action_space = gym.spaces.flatten_space(
+                gym.spaces.Tuple([self.env.action_spaces[agent] for agent in self.env.possible_agents])
+            )
 
             # batch the spaces for vectorized environments
             self.observation_space = gym.vector.utils.batch_space(
@@ -84,18 +83,25 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None)
             if self._state_as_observation:
                 obs = {"policy": self.env.state()}
             # concatenate agents' observations
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             else:
-                obs = {"policy": torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+                obs = {
+                    "policy": torch.cat(
+                        [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+                    )
+                }
 
             return obs, extras
 
         def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             # split single-agent actions to build the multi-agent ones
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             index = 0
             _actions = {}
             for agent in self.env.possible_agents:
-                _actions[agent] = action[:, index : index + self.env.cfg.num_actions[agent]]
-                index += self.env.cfg.num_actions[agent]
+                delta = gym.spaces.flatdim(self.env.action_spaces[agent])
+                _actions[agent] = action[:, index : index + delta]
+                index += delta
 
             # step the environment
             obs, rewards, terminated, time_outs, extras = self.env.step(_actions)
@@ -104,8 +110,13 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             if self._state_as_observation:
                 obs = {"policy": self.env.state()}
             # concatenate agents' observations
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             else:
-                obs = {"policy": torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+                obs = {
+                    "policy": torch.cat(
+                        [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+                    )
+                }
 
             # process environment outputs to return single-agent data
             rewards = sum(rewards.values())
@@ -147,7 +158,7 @@ def multi_agent_with_one_agent(env: DirectMARLEnv, state_as_observation: bool =
 
     Raises:
         AssertionError: If the environment state cannot be used as observation since it was explicitly defined
-            as unconstructed (:attr:`DirectMARLEnvCfg.num_states`).
+            as unconstructed (:attr:`DirectMARLEnvCfg.state_space`).
     """
 
     class Env(DirectMARLEnv):
@@ -157,7 +168,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
             # check if it is possible to use the multi-agent environment state as agent observation
             self._state_as_observation = state_as_observation
             if self._state_as_observation:
-                assert self.env.cfg.num_states != 0, (
+                assert self.env.cfg.state_space != 0, (
                     "The environment state cannot be used as observation since it was explicitly defined as"
                     " unconstructed"
                 )
@@ -170,13 +181,13 @@ def __init__(self, env: DirectMARLEnv) -> None:
                 self._exported_observation_spaces = {self._agent_id: self.env.state_space}
             else:
                 self._exported_observation_spaces = {
-                    self._agent_id: gym.spaces.Box(
-                        low=-np.inf, high=np.inf, shape=(sum(self.env.cfg.num_observations.values()),)
+                    self._agent_id: gym.spaces.flatten_space(
+                        gym.spaces.Tuple([self.env.observation_spaces[agent] for agent in self.env.possible_agents])
                     )
                 }
             self._exported_action_spaces = {
-                self._agent_id: gym.spaces.Box(
-                    low=-np.inf, high=np.inf, shape=(sum(self.env.cfg.num_actions.values()),)
+                self._agent_id: gym.spaces.flatten_space(
+                    gym.spaces.Tuple([self.env.action_spaces[agent] for agent in self.env.possible_agents])
                 )
             }
 
@@ -208,18 +219,25 @@ def reset(
             if self._state_as_observation:
                 obs = {self._agent_id: self.env.state()}
             # concatenate agents' observations
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             else:
-                obs = {self._agent_id: torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+                obs = {
+                    self._agent_id: torch.cat(
+                        [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+                    )
+                }
 
             return obs, extras
 
         def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
             # split agent actions to build the multi-agent ones
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             index = 0
             _actions = {}
             for agent in self.env.possible_agents:
-                _actions[agent] = actions[self._agent_id][:, index : index + self.env.cfg.num_actions[agent]]
-                index += self.env.cfg.num_actions[agent]
+                delta = gym.spaces.flatdim(self.env.action_spaces[agent])
+                _actions[agent] = actions[self._agent_id][:, index : index + delta]
+                index += delta
 
             # step the environment
             obs, rewards, terminated, time_outs, extras = self.env.step(_actions)
@@ -228,8 +246,13 @@ def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
             if self._state_as_observation:
                 obs = {self._agent_id: self.env.state()}
             # concatenate agents' observations
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             else:
-                obs = {self._agent_id: torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+                obs = {
+                    self._agent_id: torch.cat(
+                        [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+                    )
+                }
 
             # process environment outputs to return agent data
             rewards = {self._agent_id: sum(rewards.values())}
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py
new file mode 100644
index 0000000000..8604392ec6
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+import numpy as np
+import torch
+from typing import Any
+
+from ..common import SpaceType
+
+
+def spec_to_gym_space(spec: SpaceType) -> gym.spaces.Space:
+    """Generate an appropriate Gymnasium space according to the given space specification.
+
+    Args:
+        spec: Space specification.
+
+    Returns:
+        Gymnasium space.
+
+    Raises:
+        ValueError: If the given space specification is not valid/supported.
+    """
+    if isinstance(spec, gym.spaces.Space):
+        return spec
+    # fundamental spaces
+    # Box
+    elif isinstance(spec, int):
+        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(spec,))
+    elif isinstance(spec, list) and all(isinstance(x, int) for x in spec):
+        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=spec)
+    # Discrete
+    elif isinstance(spec, set) and len(spec) == 1:
+        return gym.spaces.Discrete(n=next(iter(spec)))
+    # MultiDiscrete
+    elif isinstance(spec, list) and all(isinstance(x, set) and len(x) == 1 for x in spec):
+        return gym.spaces.MultiDiscrete(nvec=[next(iter(x)) for x in spec])
+    # composite spaces
+    # Tuple
+    elif isinstance(spec, tuple):
+        return gym.spaces.Tuple([spec_to_gym_space(x) for x in spec])
+    # Dict
+    elif isinstance(spec, dict):
+        return gym.spaces.Dict({k: spec_to_gym_space(v) for k, v in spec.items()})
+    raise ValueError(f"Unsupported space specification: {spec}")
+
+
+def sample_space(space: gym.spaces.Space, device: str, batch_size: int = -1, fill_value: float | None = None) -> Any:
+    """Sample a Gymnasium space where the data container are PyTorch tensors.
+
+    Args:
+        space: Gymnasium space.
+        device: The device where the tensor should be created.
+        batch_size: Batch size. If the specified value is greater than zero, a batched space will be created and sampled from it.
+        fill_value: The value to fill the created tensors with. If None (default value), tensors will keep their random values.
+
+    Returns:
+        Tensorized sampled space.
+    """
+
+    def tensorize(s, x):
+        if isinstance(s, gym.spaces.Box):
+            tensor = torch.tensor(x, device=device, dtype=torch.float32).reshape(batch_size, *s.shape)
+            if fill_value is not None:
+                tensor.fill_(fill_value)
+            return tensor
+        elif isinstance(s, gym.spaces.Discrete):
+            if isinstance(x, np.ndarray):
+                tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, 1)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+            elif isinstance(x, np.number) or type(x) in [int, float]:
+                tensor = torch.tensor([x], device=device, dtype=torch.int64).reshape(batch_size, 1)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+        elif isinstance(s, gym.spaces.MultiDiscrete):
+            if isinstance(x, np.ndarray):
+                tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, *s.shape)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+        elif isinstance(s, gym.spaces.Dict):
+            return {k: tensorize(_s, x[k]) for k, _s in s.items()}
+        elif isinstance(s, gym.spaces.Tuple):
+            return tuple([tensorize(_s, v) for _s, v in zip(s, x)])
+
+    sample = (gym.vector.utils.batch_space(space, batch_size) if batch_size > 0 else space).sample()
+    return tensorize(space, sample)
diff --git a/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py b/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py
new file mode 100644
index 0000000000..274f0de650
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+# ignore private usage of variables warning
+# pyright: reportPrivateUsage=none
+
+from __future__ import annotations
+
+"""Launch Isaac Sim Simulator first."""
+
+from omni.isaac.lab.app import AppLauncher, run_tests
+
+# Can set this to False to see the GUI for debugging
+HEADLESS = True
+
+# launch omniverse app
+app_launcher = AppLauncher(headless=HEADLESS)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import numpy as np
+import torch
+import unittest
+from gymnasium.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple
+
+from omni.isaac.lab.envs.utils.spaces import sample_space, spec_to_gym_space
+
+
+class TestSpacesUtils(unittest.TestCase):
+    """Test for spaces utils' functions"""
+
+    """
+    Tests
+    """
+
+    def test_spec_to_gym_space(self):
+        # fundamental spaces
+        # Box
+        space = spec_to_gym_space(1)
+        self.assertIsInstance(space, Box)
+        self.assertEqual(space.shape, (1,))
+        space = spec_to_gym_space([1, 2, 3, 4, 5])
+        self.assertIsInstance(space, Box)
+        self.assertEqual(space.shape, (1, 2, 3, 4, 5))
+        space = spec_to_gym_space(Box(low=-1.0, high=1.0, shape=(1, 2)))
+        self.assertIsInstance(space, Box)
+        # Discrete
+        space = spec_to_gym_space({2})
+        self.assertIsInstance(space, Discrete)
+        self.assertEqual(space.n, 2)
+        space = spec_to_gym_space(Discrete(2))
+        self.assertIsInstance(space, Discrete)
+        # MultiDiscrete
+        space = spec_to_gym_space([{1}, {2}, {3}])
+        self.assertIsInstance(space, MultiDiscrete)
+        self.assertEqual(space.nvec.shape, (3,))
+        space = spec_to_gym_space(MultiDiscrete(np.array([1, 2, 3])))
+        self.assertIsInstance(space, MultiDiscrete)
+        # composite spaces
+        # Tuple
+        space = spec_to_gym_space(([1, 2, 3, 4, 5], {2}, [{1}, {2}, {3}]))
+        self.assertIsInstance(space, Tuple)
+        self.assertEqual(len(space), 3)
+        self.assertIsInstance(space[0], Box)
+        self.assertIsInstance(space[1], Discrete)
+        self.assertIsInstance(space[2], MultiDiscrete)
+        space = spec_to_gym_space(Tuple((Box(-1, 1, shape=(1,)), Discrete(2))))
+        self.assertIsInstance(space, Tuple)
+        # Dict
+        space = spec_to_gym_space({"box": [1, 2, 3, 4, 5], "discrete": {2}, "multi_discrete": [{1}, {2}, {3}]})
+        self.assertIsInstance(space, Dict)
+        self.assertEqual(len(space), 3)
+        self.assertIsInstance(space["box"], Box)
+        self.assertIsInstance(space["discrete"], Discrete)
+        self.assertIsInstance(space["multi_discrete"], MultiDiscrete)
+        space = spec_to_gym_space(Dict({"box": Box(-1, 1, shape=(1,)), "discrete": Discrete(2)}))
+        self.assertIsInstance(space, Dict)
+
+    def test_sample_space(self):
+        device = "cpu"
+        # fundamental spaces
+        # Box
+        sample = sample_space(Box(low=-1.0, high=1.0, shape=(1, 2)), device, batch_size=1)
+        self.assertIsInstance(sample, torch.Tensor)
+        self._check_tensorized(sample, batch_size=1)
+        # Discrete
+        sample = sample_space(Discrete(2), device, batch_size=2)
+        self.assertIsInstance(sample, torch.Tensor)
+        self._check_tensorized(sample, batch_size=2)
+        # MultiDiscrete
+        sample = sample_space(MultiDiscrete(np.array([1, 2, 3])), device, batch_size=3)
+        self.assertIsInstance(sample, torch.Tensor)
+        self._check_tensorized(sample, batch_size=3)
+        # composite spaces
+        # Tuple
+        sample = sample_space(Tuple((Box(-1, 1, shape=(1,)), Discrete(2))), device, batch_size=4)
+        self.assertIsInstance(sample, (tuple, list))
+        self._check_tensorized(sample, batch_size=4)
+        # Dict
+        sample = sample_space(Dict({"box": Box(-1, 1, shape=(1,)), "discrete": Discrete(2)}), device, batch_size=5)
+        self.assertIsInstance(sample, dict)
+        self._check_tensorized(sample, batch_size=5)
+
+    """
+    Helper functions.
+    """
+
+    def _check_tensorized(self, sample, batch_size):
+        if isinstance(sample, (tuple, list)):
+            list(map(self._check_tensorized, sample, [batch_size] * len(sample)))
+        elif isinstance(sample, dict):
+            list(map(self._check_tensorized, sample.values(), [batch_size] * len(sample)))
+        else:
+            self.assertIsInstance(sample, torch.Tensor)
+            self.assertEqual(sample.shape[0], batch_size)
+
+
+if __name__ == "__main__":
+    run_tests()
diff --git a/source/extensions/omni.isaac.lab_tasks/config/extension.toml b/source/extensions/omni.isaac.lab_tasks/config/extension.toml
index 89ca646936..a6ecb7a56c 100644
--- a/source/extensions/omni.isaac.lab_tasks/config/extension.toml
+++ b/source/extensions/omni.isaac.lab_tasks/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.10.5"
+version = "0.10.7"
 
 # Description
 title = "Isaac Lab Environments"
diff --git a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
index 2614630bd7..b3ba0a77fd 100644
--- a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
@@ -1,11 +1,24 @@
 Changelog
 ---------
 
+0.10.7 (2024-10-02)
+~~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Replace deprecated :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in single-agent direct tasks
+  by :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively.
+* Replace deprecated :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in multi-agent direct tasks
+  by :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively.
+
+
 0.10.6 (2024-09-25)
 ~~~~~~~~~~~~~~~~~~~
 
 Added
 ^^^^^
+
 * Added ``Isaac-Cartpole-RGB-Camera-v0`` and ``Isaac-Cartpole-Depth-Camera-v0``
   manager based camera cartpole environments.
 
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
index b83b6782a6..b5c53a91d3 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
@@ -22,9 +22,9 @@ class AllegroHandEnvCfg(DirectRLEnvCfg):
     # env
     decimation = 4
     episode_length_s = 10.0
-    num_actions = 16
-    num_observations = 124  # (full)
-    num_states = 0
+    action_space = 16
+    observation_space = 124  # (full)
+    state_space = 0
     asymmetric_obs = False
     obs_type = "full"
     # simulation
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
index 8bf6d6bcc9..42f57127ee 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
@@ -24,9 +24,9 @@ class AntEnvCfg(DirectRLEnvCfg):
     episode_length_s = 15.0
     decimation = 2
     action_scale = 0.5
-    num_actions = 8
-    num_observations = 36
-    num_states = 0
+    action_space = 8
+    observation_space = 36
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
index 5490bb0dd3..ca1f61c54a 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import gymnasium as gym
 import torch
 
 import omni.isaac.lab.envs.mdp as mdp
@@ -59,9 +60,9 @@ class AnymalCFlatEnvCfg(DirectRLEnvCfg):
     episode_length_s = 20.0
     decimation = 4
     action_scale = 0.5
-    num_actions = 12
-    num_observations = 48
-    num_states = 0
+    action_space = 12
+    observation_space = 48
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(
@@ -118,7 +119,7 @@ class AnymalCFlatEnvCfg(DirectRLEnvCfg):
 @configclass
 class AnymalCRoughEnvCfg(AnymalCFlatEnvCfg):
     # env
-    num_observations = 235
+    observation_space = 235
 
     terrain = TerrainImporterCfg(
         prim_path="/World/ground",
@@ -160,8 +161,10 @@ def __init__(self, cfg: AnymalCFlatEnvCfg | AnymalCRoughEnvCfg, render_mode: str
         super().__init__(cfg, render_mode, **kwargs)
 
         # Joint position command (deviation from default joint positions)
-        self._actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
-        self._previous_actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
+        self._actions = torch.zeros(self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device)
+        self._previous_actions = torch.zeros(
+            self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device
+        )
 
         # X/Y linear velocity and yaw angular velocity commands
         self._commands = torch.zeros(self.num_envs, 3, device=self.device)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
index 0b606fe899..ad8c616940 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
@@ -27,9 +27,9 @@ class CartDoublePendulumEnvCfg(DirectMARLEnvCfg):
     decimation = 2
     episode_length_s = 5.0
     possible_agents = ["cart", "pendulum"]
-    num_actions = {"cart": 1, "pendulum": 1}
-    num_observations = {"cart": 4, "pendulum": 3}
-    num_states = -1
+    action_spaces = {"cart": 1, "pendulum": 1}
+    observation_spaces = {"cart": 4, "pendulum": 3}
+    state_space = -1
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
index b2a1b1e303..dc7db07030 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
@@ -5,9 +5,7 @@
 
 from __future__ import annotations
 
-import gymnasium as gym
 import math
-import numpy as np
 import torch
 from collections.abc import Sequence
 
@@ -29,9 +27,6 @@ class CartpoleRGBCameraEnvCfg(DirectRLEnvCfg):
     decimation = 2
     episode_length_s = 5.0
     action_scale = 100.0  # [N]
-    num_actions = 1
-    num_channels = 3
-    num_states = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
@@ -52,9 +47,13 @@ class CartpoleRGBCameraEnvCfg(DirectRLEnvCfg):
         width=80,
         height=80,
     )
-    num_observations = num_channels * tiled_camera.height * tiled_camera.width
     write_image_to_file = False
 
+    # spaces
+    action_space = 1
+    state_space = 0
+    observation_space = [tiled_camera.height, tiled_camera.width, 3]
+
     # change viewer settings
     viewer = ViewerCfg(eye=(20.0, 20.0, 20.0))
 
@@ -87,9 +86,8 @@ class CartpoleDepthCameraEnvCfg(CartpoleRGBCameraEnvCfg):
         height=80,
     )
 
-    # env
-    num_channels = 1
-    num_observations = num_channels * tiled_camera.height * tiled_camera.width
+    # spaces
+    observation_space = [tiled_camera.height, tiled_camera.width, 1]
 
 
 class CartpoleCameraEnv(DirectRLEnv):
@@ -118,35 +116,6 @@ def close(self):
         """Cleanup for the environment."""
         super().close()
 
-    def _configure_gym_env_spaces(self):
-        """Configure the action and observation spaces for the Gym environment."""
-        # observation space (unbounded since we don't impose any limits)
-        self.num_actions = self.cfg.num_actions
-        self.num_observations = self.cfg.num_observations
-        self.num_states = self.cfg.num_states
-
-        # set up spaces
-        self.single_observation_space = gym.spaces.Dict()
-        self.single_observation_space["policy"] = gym.spaces.Box(
-            low=-np.inf,
-            high=np.inf,
-            shape=(self.cfg.tiled_camera.height, self.cfg.tiled_camera.width, self.cfg.num_channels),
-        )
-        if self.num_states > 0:
-            self.single_observation_space["critic"] = gym.spaces.Box(
-                low=-np.inf,
-                high=np.inf,
-                shape=(self.cfg.tiled_camera.height, self.cfg.tiled_camera.width, self.cfg.num_channels),
-            )
-        self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
-
-        # batch the spaces for vectorized environments
-        self.observation_space = gym.vector.utils.batch_space(self.single_observation_space, self.num_envs)
-        self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs)
-
-        # RL specifics
-        self.actions = torch.zeros(self.num_envs, self.num_actions, device=self.sim.device)
-
     def _setup_scene(self):
         """Setup the scene with the cartpole and camera."""
         self._cartpole = Articulation(self.cfg.robot_cfg)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
index 44926e95f9..534fb26443 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
@@ -27,9 +27,9 @@ class CartpoleEnvCfg(DirectRLEnvCfg):
     decimation = 2
     episode_length_s = 5.0
     action_scale = 100.0  # [N]
-    num_actions = 1
-    num_observations = 4
-    num_states = 0
+    action_space = 1
+    observation_space = 4
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
index 4eb01953fe..3a6a480ed0 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
@@ -28,9 +28,9 @@ class FrankaCabinetEnvCfg(DirectRLEnvCfg):
     # env
     episode_length_s = 8.3333  # 500 timesteps
     decimation = 2
-    num_actions = 9
-    num_observations = 23
-    num_states = 0
+    action_space = 9
+    observation_space = 23
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
index bfaf8f8190..2a4d330e6a 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
@@ -24,9 +24,9 @@ class HumanoidEnvCfg(DirectRLEnvCfg):
     episode_length_s = 15.0
     decimation = 2
     action_scale = 1.0
-    num_actions = 21
-    num_observations = 75
-    num_states = 0
+    action_space = 21
+    observation_space = 75
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
index c6df659ec6..97156618f1 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import gymnasium as gym
 import torch
 
 import omni.isaac.lab.sim as sim_utils
@@ -50,9 +51,9 @@ class QuadcopterEnvCfg(DirectRLEnvCfg):
     # env
     episode_length_s = 10.0
     decimation = 2
-    num_actions = 4
-    num_observations = 12
-    num_states = 0
+    action_space = 4
+    observation_space = 12
+    state_space = 0
     debug_vis = True
 
     ui_window_class_type = QuadcopterEnvWindow
@@ -105,7 +106,7 @@ def __init__(self, cfg: QuadcopterEnvCfg, render_mode: str | None = None, **kwar
         super().__init__(cfg, render_mode, **kwargs)
 
         # Total thrust and moment applied to the base of the quadcopter
-        self._actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
+        self._actions = torch.zeros(self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device)
         self._thrust = torch.zeros(self.num_envs, 1, 3, device=self.device)
         self._moment = torch.zeros(self.num_envs, 1, 3, device=self.device)
         # Goal position
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
index f4b8407296..af88124792 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
@@ -119,9 +119,9 @@ class ShadowHandEnvCfg(DirectRLEnvCfg):
     # env
     decimation = 2
     episode_length_s = 10.0
-    num_actions = 20
-    num_observations = 157  # (full)
-    num_states = 0
+    action_space = 20
+    observation_space = 157  # (full)
+    state_space = 0
     asymmetric_obs = False
     obs_type = "full"
 
@@ -232,9 +232,9 @@ class ShadowHandOpenAIEnvCfg(ShadowHandEnvCfg):
     # env
     decimation = 3
     episode_length_s = 8.0
-    num_actions = 20
-    num_observations = 42
-    num_states = 187
+    action_space = 20
+    observation_space = 42
+    state_space = 187
     asymmetric_obs = True
     obs_type = "openai"
     # simulation
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
index b025bfb052..492074d8a9 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
@@ -48,8 +48,8 @@ class ShadowHandVisionEnvCfg(ShadowHandEnvCfg):
     feature_extractor = FeatureExtractorCfg()
 
     # env
-    num_observations = 164 + 27  # state observation + vision CNN embedding
-    num_states = 187 + 27  # asymettric states + vision CNN embedding
+    observation_space = 164 + 27  # state observation + vision CNN embedding
+    state_space = 187 + 27  # asymettric states + vision CNN embedding
 
 
 @configclass
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
index d6dbb3d6a2..d3a7c33b3f 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
@@ -118,9 +118,9 @@ class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
     decimation = 2
     episode_length_s = 7.5
     possible_agents = ["right_hand", "left_hand"]
-    num_actions = {"right_hand": 20, "left_hand": 20}
-    num_observations = {"right_hand": 157, "left_hand": 157}
-    num_states = 290
+    action_spaces = {"right_hand": 20, "left_hand": 20}
+    observation_spaces = {"right_hand": 157, "left_hand": 157}
+    state_space = 290
 
     # simulation
     sim: SimulationCfg = SimulationCfg(
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
index 0badd08c31..0dedef9ef0 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
@@ -70,19 +70,19 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
         if hasattr(self.unwrapped, "action_manager"):
             self.num_actions = self.unwrapped.action_manager.total_action_dim
         else:
-            self.num_actions = self.unwrapped.num_actions
+            self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space)
         if hasattr(self.unwrapped, "observation_manager"):
             self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0]
         else:
-            self.num_obs = self.unwrapped.num_observations
+            self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"])
         # -- privileged observations
         if (
             hasattr(self.unwrapped, "observation_manager")
             and "critic" in self.unwrapped.observation_manager.group_obs_dim
         ):
             self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0]
-        elif hasattr(self.unwrapped, "num_states"):
-            self.num_privileged_obs = self.unwrapped.num_states
+        elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space:
+            self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"])
         else:
             self.num_privileged_obs = 0
         # reset at the start since the RSL-RL runner does not call reset
diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
index 9e92e26156..993b776a81 100644
--- a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
+++ b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
@@ -22,6 +22,7 @@
 import omni.usd
 
 from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
+from omni.isaac.lab.envs.utils import sample_space
 
 import omni.isaac.lab_tasks  # noqa: F401
 from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
@@ -108,12 +109,12 @@ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_
         # simulate environment for num_steps steps
         with torch.inference_mode():
             for _ in range(num_steps):
-                # sample actions from -1 to 1
-                actions = 2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1
+                # sample actions according to the defined space
+                actions = sample_space(env.single_action_space, device=env.unwrapped.device, batch_size=num_envs)
                 # apply actions
                 transition = env.step(actions)
                 # check signals
-                for data in transition:
+                for data in transition[:-1]:  # exclude info
                     self.assertTrue(self._check_valid_tensor(data), msg=f"Invalid data: {data}")
 
         # close the environment
@@ -131,14 +132,10 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool:
         """
         if isinstance(data, torch.Tensor):
             return not torch.any(torch.isnan(data))
+        elif isinstance(data, (tuple, list)):
+            return all(TestEnvironments._check_valid_tensor(value) for value in data)
         elif isinstance(data, dict):
-            valid_tensor = True
-            for value in data.values():
-                if isinstance(value, dict):
-                    valid_tensor &= TestEnvironments._check_valid_tensor(value)
-                elif isinstance(value, torch.Tensor):
-                    valid_tensor &= not torch.any(torch.isnan(value))
-            return valid_tensor
+            return all(TestEnvironments._check_valid_tensor(value) for value in data.values())
         else:
             raise ValueError(f"Input data of invalid type: {type(data)}.")
 
diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
index 19fcd88936..2f543a84e3 100644
--- a/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
+++ b/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
@@ -21,6 +21,7 @@
 import omni.usd
 
 from omni.isaac.lab.envs import DirectMARLEnv, DirectMARLEnvCfg
+from omni.isaac.lab.envs.utils import sample_space
 
 import omni.isaac.lab_tasks  # noqa: F401
 from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
@@ -104,9 +105,9 @@ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_
         # simulate environment for num_steps steps
         with torch.inference_mode():
             for _ in range(num_steps):
-                # sample actions from -1 to 1
+                # sample actions according to the defined space
                 actions = {
-                    agent: 2 * torch.rand(env.action_space(agent).shape, device=env.unwrapped.device) - 1
+                    agent: sample_space(env.action_spaces[agent], device=env.unwrapped.device)
                     for agent in env.unwrapped.possible_agents
                 }
                 # apply actions
@@ -131,14 +132,10 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool:
         """
         if isinstance(data, torch.Tensor):
             return not torch.any(torch.isnan(data))
+        elif isinstance(data, (tuple, list)):
+            return all(TestEnvironments._check_valid_tensor(value) for value in data)
         elif isinstance(data, dict):
-            valid_tensor = True
-            for value in data.values():
-                if isinstance(value, dict):
-                    valid_tensor &= TestEnvironments._check_valid_tensor(value)
-                elif isinstance(value, torch.Tensor):
-                    valid_tensor &= not torch.any(torch.isnan(value))
-            return valid_tensor
+            return all(TestEnvironments._check_valid_tensor(value) for value in data.values())
         else:
             raise ValueError(f"Input data of invalid type: {type(data)}.")
 

From 0bccd886c19a9891c0b6bdd37e13f338eacc0bba Mon Sep 17 00:00:00 2001
From: Sheikh Dawood <sabdulajees@nvidia.com>
Date: Fri, 18 Oct 2024 05:15:12 -0500
Subject: [PATCH 2/2] Adds versioning to the docs (#1247)

# Description

Adds versioning to the docs. Users can now choose a specific tag from
version v1.0.0 onwards.

## Type of change

- New feature (non-breaking change which adds functionality)

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [x] I have made corresponding changes to the documentation
- [ ] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [x] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there
---
 .github/workflows/docs.yaml                   | 79 +++++++++--------
 CONTRIBUTORS.md                               |  1 +
 docs/Makefile                                 | 16 ++--
 docs/README.md                                | 85 ++++++++++++++-----
 docs/_redirect/index.html                     |  8 ++
 docs/_templates/versioning.html               | 21 +++++
 docs/conf.py                                  | 22 ++++-
 .../sphinx-multiversion-license.txt           | 25 ++++++
 docs/make.bat                                 | 71 +++++++++++-----
 docs/requirements.txt                         |  2 +
 isaaclab.bat                                  |  4 +-
 isaaclab.sh                                   |  4 +-
 12 files changed, 245 insertions(+), 93 deletions(-)
 create mode 100644 docs/_redirect/index.html
 create mode 100644 docs/_templates/versioning.html
 create mode 100644 docs/licenses/dependencies/sphinx-multiversion-license.txt

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index b0ceb6bff7..92f854ac96 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -1,6 +1,8 @@
 name: Build & deploy docs
 
-on: [push]
+on:
+  push:
+  pull_request:
 
 jobs:
   build-docs:
@@ -8,28 +10,35 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
+    - name: Checkout code
+      uses: actions/checkout@v2
 
-      - name: Setup python
-        uses: actions/setup-python@v2
-        with:
-          python-version: "3.10"
-          architecture: x64
+    - name: Setup python
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.10"
+        architecture: x64
 
-      - name: Install dev requirements
-        working-directory: ./docs
-        run: pip install -r requirements.txt
+    - name: Install dev requirements
+      working-directory: ./docs
+      run: pip install -r requirements.txt
 
-      - name: Generate docs
-        working-directory: ./docs
-        run: make html
+    - name: Check branch docs building
+      if: ${{ github.event_name == 'pull_request' }}
+      working-directory: ./docs
+      run: make current-docs
 
-      - name: Upload docs artifact
-        uses: actions/upload-artifact@v4
-        with:
-          name: docs-html
-          path: ./docs/_build/html
+    - name: Generate multi-version docs
+      working-directory: ./docs
+      run: |
+        git fetch --prune --unshallow --tags
+        make multi-docs
+
+    - name: Upload docs artifact
+      uses: actions/upload-artifact@v4
+      with:
+        name: docs-html
+        path: ./docs/_build
 
   check-secrets:
     name: Check secrets
@@ -37,12 +46,12 @@ jobs:
     outputs:
       trigger-deploy: ${{ steps.trigger-deploy.outputs.defined }}
     steps:
-      - id: trigger-deploy
-        env:
-          REPO_NAME: ${{ secrets.REPO_NAME }}
-          BRANCH_REF: ${{ secrets.BRANCH_REF }}
-        if: "${{ github.repository == env.REPO_NAME && github.ref == env.BRANCH_REF }}"
-        run: echo "defined=true" >> "$GITHUB_OUTPUT"
+    - id: trigger-deploy
+      env:
+        REPO_NAME: ${{ secrets.REPO_NAME }}
+        BRANCH_REF: ${{ secrets.BRANCH_REF }}
+      if: "${{ github.repository == env.REPO_NAME && github.ref == env.BRANCH_REF }}"
+      run: echo "defined=true" >> "$GITHUB_OUTPUT"
 
   deploy-docs:
     name: Deploy Docs
@@ -51,14 +60,14 @@ jobs:
     if: needs.check-secrets.outputs.trigger-deploy == 'true'
 
     steps:
-      - name: Download docs artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: docs-html
-          path: ./docs/_build/html
+    - name: Download docs artifact
+      uses: actions/download-artifact@v4
+      with:
+        name: docs-html
+        path: ./docs/_build
 
-      - name: Deploy to gh-pages
-        uses: peaceiris/actions-gh-pages@v3
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: ./docs/_build/html
+    - name: Deploy to gh-pages
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: ./docs/_build
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 244b910786..3c345a9d31 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -28,6 +28,7 @@ Guidelines for modifications:
 * Mayank Mittal
 * Nikita Rudin
 * Pascal Roth
+* Sheikh Dawood
 
 ## Contributors
 
diff --git a/docs/Makefile b/docs/Makefile
index d4bb2cbb9e..ce33dad503 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -8,13 +8,11 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
 BUILDDIR      = _build
 
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: multi-docs
+multi-docs:
+	@sphinx-multiversion "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS)
+	@cp _redirect/index.html $(BUILDDIR)/index.html
 
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: current-docs
+current-docs:
+	@$(SPHINXBUILD) "$(SOURCEDIR)" "$(BUILDDIR)/current" $(SPHINXOPTS)
diff --git a/docs/README.md b/docs/README.md
index c154e0ad0f..69a77a48d9 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,30 +1,75 @@
 # Building Documentation
 
-We use [Sphinx](https://www.sphinx-doc.org/en/master/) with the [Book Theme](https://sphinx-book-theme.readthedocs.io/en/stable/) for maintaining the documentation.
+We use [Sphinx](https://www.sphinx-doc.org/en/master/) with the [Book Theme](https://sphinx-book-theme.readthedocs.io/en/stable/) for maintaining and generating our documentation.
 
-> **Note:** To build the documentation, we recommend creating a virtual environment to avoid any conflicts with system installed dependencies.
+> **Note:** To avoid dependency conflicts, we strongly recommend using a Python virtual environment to isolate the required dependencies from your system's global Python environment.
 
-Execute the following instructions to build the documentation (assumed from the top of the repository):
+## Current-Version Documentation
 
-1. Install the dependencies for [Sphinx](https://www.sphinx-doc.org/en/master/):
+This section describes how to build the documentation for the current version of the project.
 
-    ```bash
-    # enter the location where this readme exists
-    cd docs
-    # install dependencies
-    pip install -r requirements.txt
-    ```
+<details open>
+<summary><strong>Linux</strong></summary>
 
-2. Generate the documentation file via:
+```bash
+# 1. Navigate to the docs directory and install dependencies
+cd docs
+pip install -r requirements.txt
 
-    ```bash
-    # make the html version
-    make html
-    ```
+# 2. Build the current documentation
+make current-docs
 
-3. The documentation is now available at `docs/_build/html/index.html`:
+# 3. Open the current docs
+xdg-open _build/current/index.html
+```
+</details>
 
-    ```bash
-    # open on default browser
-    xdg-open _build/html/index.html
-    ```
+<details> <summary><strong>Windows</strong></summary>
+
+```batch
+:: 1. Navigate to the docs directory and install dependencies
+cd docs
+pip install -r requirements.txt
+
+:: 2. Build the current documentation
+make current-docs
+
+:: 3. Open the current docs
+start _build\current\index.html
+```
+</details>
+
+
+## Multi-Version Documentation
+
+This section describes how to build the multi-version documentation, which includes previous tags and the main branch.
+
+<details open> <summary><strong>Linux</strong></summary>
+
+```bash
+# 1. Navigate to the docs directory and install dependencies
+cd docs
+pip install -r requirements.txt
+
+# 2. Build the multi-version documentation
+make multi-docs
+
+# 3. Open the multi-version docs
+xdg-open _build/index.html
+```
+</details>
+
+<details> <summary><strong>Windows</strong></summary>
+
+```batch
+:: 1. Navigate to the docs directory and install dependencies
+cd docs
+pip install -r requirements.txt
+
+:: 2. Build the multi-version documentation
+make multi-docs
+
+:: 3. Open the multi-version docs
+start _build\index.html
+```
+</details>
diff --git a/docs/_redirect/index.html b/docs/_redirect/index.html
new file mode 100644
index 0000000000..5208597ed1
--- /dev/null
+++ b/docs/_redirect/index.html
@@ -0,0 +1,8 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Redirecting to the latest Isaac Lab documentation</title>
+    <meta charset="utf-8">
+    <meta http-equiv="refresh" content="0; url=./main/index.html">
+  </head>
+</html>
diff --git a/docs/_templates/versioning.html b/docs/_templates/versioning.html
new file mode 100644
index 0000000000..eb67be60e1
--- /dev/null
+++ b/docs/_templates/versioning.html
@@ -0,0 +1,21 @@
+{% if versions %}
+<nav class="bd-links bd-docs-nav">
+    <div class="bd-toc-item navbar-nav">
+      <ul class="nav bd-sidenav">
+        <li class="toctree-l1 has-children" style="display: flex; justify-content: center; align-items: center; flex-direction: column;">
+          <div  style ="text-align:center;">
+            <label for="version-select" style="font-weight: bold; display: block;">Version</label>
+          </div>
+          <select id="version-select" class="version-dropdown" style="margin: 0 auto; display: block;" onchange="location = this.value;">
+            {%- for item in versions.branches %}
+            <option value="{{ item.url }}" {% if item == current_version %}selected{% endif %}>{{ item.name }}</option>
+            {%- endfor %}
+            {%- for item in versions.tags|reverse %}
+            <option value="{{ item.url }}" {% if item == current_version %}selected{% endif %}>{{ item.name }}</option>
+            {%- endfor %}
+          </select>
+        </li>
+      </ul>
+    </div>
+</nav>
+{% endif %}
diff --git a/docs/conf.py b/docs/conf.py
index a30e673280..4c7a777559 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -54,6 +54,8 @@
     "sphinxcontrib.icon",
     "sphinx_copybutton",
     "sphinx_design",
+    "sphinx_tabs.tabs",  # backwards compatibility for building docs on v1.0.0
+    "sphinx_multiversion",
 ]
 
 # mathjax hacks
@@ -115,7 +117,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "README.md", "licenses/*"]
+exclude_patterns = ["_build", "_redirect", "_templates", "Thumbs.db", ".DS_Store", "README.md", "licenses/*"]
 
 # Mock out modules that are not available on RTD
 autodoc_mock_imports = [
@@ -190,7 +192,7 @@
 
 import sphinx_book_theme
 
-html_title = "Isaac Lab documentation"
+html_title = "Isaac Lab Documentation"
 html_theme_path = [sphinx_book_theme.get_html_theme_path()]
 html_theme = "sphinx_book_theme"
 html_favicon = "source/_static/favicon.ico"
@@ -213,7 +215,7 @@
     "show_toc_level": 1,
     "use_sidenotes": True,
     "logo": {
-        "text": "Isaac Lab documentation",
+        "text": "Isaac Lab Documentation",
         "image_light": "source/_static/NVIDIA-logo-white.png",
         "image_dark": "source/_static/NVIDIA-logo-black.png",
     },
@@ -240,7 +242,19 @@
     "icon_links_label": "Quick Links",
 }
 
-html_sidebars = {"**": ["navbar-logo.html", "icon-links.html", "search-field.html", "sbt-sidebar-nav.html"]}
+templates_path = [
+    "_templates",
+]
+
+# Whitelist pattern for remotes
+smv_remote_whitelist = r"^.*$"
+# Whitelist pattern for branches (set to None to ignore all branches)
+smv_branch_whitelist = os.getenv("SMV_BRANCH_WHITELIST", r"^(main|devel)$")
+# Whitelist pattern for tags (set to None to ignore all tags)
+smv_tag_whitelist = os.getenv("SMV_TAG_WHITELIST", r"^v[1-9]\d*\.\d+\.\d+$")
+html_sidebars = {
+    "**": ["navbar-logo.html", "versioning.html", "icon-links.html", "search-field.html", "sbt-sidebar-nav.html"]
+}
 
 
 # -- Advanced configuration -------------------------------------------------
diff --git a/docs/licenses/dependencies/sphinx-multiversion-license.txt b/docs/licenses/dependencies/sphinx-multiversion-license.txt
new file mode 100644
index 0000000000..172d6b3f5d
--- /dev/null
+++ b/docs/licenses/dependencies/sphinx-multiversion-license.txt
@@ -0,0 +1,25 @@
+BSD 2-Clause License
+
+Copyright (c) 2020, Jan Holthuis <jan.holthuis@ruhr-uni-bochum.de>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/docs/make.bat b/docs/make.bat
index 2119f51099..cdaf22f257 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -2,34 +2,63 @@
 
 pushd %~dp0
 
-REM Command file for Sphinx documentation
+REM Command file to build Sphinx documentation
 
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
 set SOURCEDIR=.
 set BUILDDIR=_build
 
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
+REM Check if a specific target was passed
+if "%1" == "multi-docs" (
+	REM Check if SPHINXBUILD is set, if not default to sphinx-multiversion
+	if "%SPHINXBUILD%" == "" (
+		set SPHINXBUILD=sphinx-multiversion
+	)
+	%SPHINXBUILD% >NUL 2>NUL
+	if errorlevel 9009 (
+		echo.
+		echo.The 'sphinx-multiversion' command was not found. Make sure you have Sphinx
+		echo.installed, then set the SPHINXBUILD environment variable to point
+		echo.to the full path of the 'sphinx-multiversion' executable. Alternatively you
+		echo.may add the Sphinx directory to PATH.
+		echo.
+		echo.If you don't have Sphinx installed, grab it from
+		echo.http://sphinx-doc.org/
+		exit /b 1
+	)
+	%SPHINXBUILD% %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+	REM Copy the redirect index.html to the build directory
+	copy _redirect\index.html %BUILDDIR%\index.html
+	goto end
 )
 
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
+if "%1" == "current-docs" (
+	REM Check if SPHINXBUILD is set, if not default to sphinx-build
+	if "%SPHINXBUILD%" == "" (
+		set SPHINXBUILD=sphinx-build
+	)
+	%SPHINXBUILD% >NUL 2>NUL
+	if errorlevel 9009 (
+		echo.
+		echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+		echo.installed, then set the SPHINXBUILD environment variable to point
+		echo.to the full path of the 'sphinx-build' executable. Alternatively you
+		echo.may add the Sphinx directory to PATH.
+		echo.
+		echo.If you don't have Sphinx installed, grab it from
+		echo.http://sphinx-doc.org/
+		exit /b 1
+	)
+	%SPHINXBUILD% %SOURCEDIR% %BUILDDIR%\current %SPHINXOPTS% %O%
+	goto end
+)
 
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+REM If no valid target is passed, show usage instructions
+echo.
+echo.Usage:
+echo.  make.bat multi-docs    - To build the multi-version documentation.
+echo.  make.bat current-docs  - To build the current documentation.
+echo.
 
 :end
 popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 33917bd712..13b2bfe9d6 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -7,6 +7,8 @@ sphinx-copybutton
 sphinx-icon
 sphinx_design
 sphinxemoji
+sphinx-tabs # backwards compatibility for building docs on v1.0.0
+sphinx-multiversion==0.2.4
 
 # basic python
 numpy
diff --git a/isaaclab.bat b/isaaclab.bat
index b415ef1a13..09c6818e37 100644
--- a/isaaclab.bat
+++ b/isaaclab.bat
@@ -519,9 +519,9 @@ if "%arg%"=="-i" (
     call :extract_python_exe
     pushd %ISAACLAB_PATH%\docs
     call !python_exe! -m pip install -r requirements.txt >nul
-    call !python_exe! -m sphinx -b html -d _build\doctrees . _build\html
+    call !python_exe! -m sphinx -b html -d _build\doctrees . _build\current
     echo [INFO] To open documentation on default browser, run:
-    echo xdg-open "%ISAACLAB_PATH%\docs\_build\html\index.html"
+    echo xdg-open "%ISAACLAB_PATH%\docs\_build\current\index.html"
     popd >nul
     shift
     goto :end
diff --git a/isaaclab.sh b/isaaclab.sh
index a604706e70..22374fe0ff 100755
--- a/isaaclab.sh
+++ b/isaaclab.sh
@@ -396,10 +396,10 @@ while [[ $# -gt 0 ]]; do
             cd ${ISAACLAB_PATH}/docs
             ${python_exe} -m pip install -r requirements.txt > /dev/null
             # build the documentation
-            ${python_exe} -m sphinx -b html -d _build/doctrees . _build/html
+            ${python_exe} -m sphinx -b html -d _build/doctrees . _build/current
             # open the documentation
             echo -e "[INFO] To open documentation on default browser, run:"
-            echo -e "\n\t\txdg-open $(pwd)/_build/html/index.html\n"
+            echo -e "\n\t\txdg-open $(pwd)/_build/current/index.html\n"
             # exit neatly
             cd - > /dev/null
             shift # past argument