diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index b0ceb6bff7..92f854ac96 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -1,6 +1,8 @@
name: Build & deploy docs
-on: [push]
+on:
+ push:
+ pull_request:
jobs:
build-docs:
@@ -8,28 +10,35 @@ jobs:
runs-on: ubuntu-latest
steps:
- - name: Checkout code
- uses: actions/checkout@v2
+ - name: Checkout code
+ uses: actions/checkout@v2
- - name: Setup python
- uses: actions/setup-python@v2
- with:
- python-version: "3.10"
- architecture: x64
+ - name: Setup python
+ uses: actions/setup-python@v2
+ with:
+ python-version: "3.10"
+ architecture: x64
- - name: Install dev requirements
- working-directory: ./docs
- run: pip install -r requirements.txt
+ - name: Install dev requirements
+ working-directory: ./docs
+ run: pip install -r requirements.txt
- - name: Generate docs
- working-directory: ./docs
- run: make html
+ - name: Check branch docs building
+ if: ${{ github.event_name == 'pull_request' }}
+ working-directory: ./docs
+ run: make current-docs
- - name: Upload docs artifact
- uses: actions/upload-artifact@v4
- with:
- name: docs-html
- path: ./docs/_build/html
+ - name: Generate multi-version docs
+ working-directory: ./docs
+ run: |
+ git fetch --prune --unshallow --tags
+ make multi-docs
+
+ - name: Upload docs artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: docs-html
+ path: ./docs/_build
check-secrets:
name: Check secrets
@@ -37,12 +46,12 @@ jobs:
outputs:
trigger-deploy: ${{ steps.trigger-deploy.outputs.defined }}
steps:
- - id: trigger-deploy
- env:
- REPO_NAME: ${{ secrets.REPO_NAME }}
- BRANCH_REF: ${{ secrets.BRANCH_REF }}
- if: "${{ github.repository == env.REPO_NAME && github.ref == env.BRANCH_REF }}"
- run: echo "defined=true" >> "$GITHUB_OUTPUT"
+ - id: trigger-deploy
+ env:
+ REPO_NAME: ${{ secrets.REPO_NAME }}
+ BRANCH_REF: ${{ secrets.BRANCH_REF }}
+ if: "${{ github.repository == env.REPO_NAME && github.ref == env.BRANCH_REF }}"
+ run: echo "defined=true" >> "$GITHUB_OUTPUT"
deploy-docs:
name: Deploy Docs
@@ -51,14 +60,14 @@ jobs:
if: needs.check-secrets.outputs.trigger-deploy == 'true'
steps:
- - name: Download docs artifact
- uses: actions/download-artifact@v4
- with:
- name: docs-html
- path: ./docs/_build/html
+ - name: Download docs artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: docs-html
+ path: ./docs/_build
- - name: Deploy to gh-pages
- uses: peaceiris/actions-gh-pages@v3
- with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
- publish_dir: ./docs/_build/html
+ - name: Deploy to gh-pages
+ uses: peaceiris/actions-gh-pages@v3
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: ./docs/_build
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 244b910786..3c345a9d31 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -28,6 +28,7 @@ Guidelines for modifications:
* Mayank Mittal
* Nikita Rudin
* Pascal Roth
+* Sheikh Dawood
## Contributors
diff --git a/docs/Makefile b/docs/Makefile
index d4bb2cbb9e..ce33dad503 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -8,13 +8,11 @@ SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
-# Put it first so that "make" without argument is like "make help".
-help:
- @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: multi-docs
+multi-docs:
+ @sphinx-multiversion "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS)
+ @cp _redirect/index.html $(BUILDDIR)/index.html
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: current-docs
+current-docs:
+ @$(SPHINXBUILD) "$(SOURCEDIR)" "$(BUILDDIR)/current" $(SPHINXOPTS)
diff --git a/docs/README.md b/docs/README.md
index c154e0ad0f..69a77a48d9 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,30 +1,75 @@
# Building Documentation
-We use [Sphinx](https://www.sphinx-doc.org/en/master/) with the [Book Theme](https://sphinx-book-theme.readthedocs.io/en/stable/) for maintaining the documentation.
+We use [Sphinx](https://www.sphinx-doc.org/en/master/) with the [Book Theme](https://sphinx-book-theme.readthedocs.io/en/stable/) for maintaining and generating our documentation.
-> **Note:** To build the documentation, we recommend creating a virtual environment to avoid any conflicts with system installed dependencies.
+> **Note:** To avoid dependency conflicts, we strongly recommend using a Python virtual environment to isolate the required dependencies from your system's global Python environment.
-Execute the following instructions to build the documentation (assumed from the top of the repository):
+## Current-Version Documentation
-1. Install the dependencies for [Sphinx](https://www.sphinx-doc.org/en/master/):
+This section describes how to build the documentation for the current version of the project.
- ```bash
- # enter the location where this readme exists
- cd docs
- # install dependencies
- pip install -r requirements.txt
- ```
+
+Linux
-2. Generate the documentation file via:
+```bash
+# 1. Navigate to the docs directory and install dependencies
+cd docs
+pip install -r requirements.txt
- ```bash
- # make the html version
- make html
- ```
+# 2. Build the current documentation
+make current-docs
-3. The documentation is now available at `docs/_build/html/index.html`:
+# 3. Open the current docs
+xdg-open _build/current/index.html
+```
+
- ```bash
- # open on default browser
- xdg-open _build/html/index.html
- ```
+ Windows
+
+```batch
+:: 1. Navigate to the docs directory and install dependencies
+cd docs
+pip install -r requirements.txt
+
+:: 2. Build the current documentation
+make current-docs
+
+:: 3. Open the current docs
+start _build\current\index.html
+```
+
+
+
+## Multi-Version Documentation
+
+This section describes how to build the multi-version documentation, which includes previous tags and the main branch.
+
+ Linux
+
+```bash
+# 1. Navigate to the docs directory and install dependencies
+cd docs
+pip install -r requirements.txt
+
+# 2. Build the multi-version documentation
+make multi-docs
+
+# 3. Open the multi-version docs
+xdg-open _build/index.html
+```
+
+
+ Windows
+
+```batch
+:: 1. Navigate to the docs directory and install dependencies
+cd docs
+pip install -r requirements.txt
+
+:: 2. Build the multi-version documentation
+make multi-docs
+
+:: 3. Open the multi-version docs
+start _build\index.html
+```
+
diff --git a/docs/_redirect/index.html b/docs/_redirect/index.html
new file mode 100644
index 0000000000..5208597ed1
--- /dev/null
+++ b/docs/_redirect/index.html
@@ -0,0 +1,8 @@
+
+
+
+ Redirecting to the latest Isaac Lab documentation
+
+
+
+
diff --git a/docs/_templates/versioning.html b/docs/_templates/versioning.html
new file mode 100644
index 0000000000..eb67be60e1
--- /dev/null
+++ b/docs/_templates/versioning.html
@@ -0,0 +1,21 @@
+{% if versions %}
+
+{% endif %}
diff --git a/docs/conf.py b/docs/conf.py
index a30e673280..4c7a777559 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -54,6 +54,8 @@
"sphinxcontrib.icon",
"sphinx_copybutton",
"sphinx_design",
+ "sphinx_tabs.tabs", # backwards compatibility for building docs on v1.0.0
+ "sphinx_multiversion",
]
# mathjax hacks
@@ -115,7 +117,7 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "README.md", "licenses/*"]
+exclude_patterns = ["_build", "_redirect", "_templates", "Thumbs.db", ".DS_Store", "README.md", "licenses/*"]
# Mock out modules that are not available on RTD
autodoc_mock_imports = [
@@ -190,7 +192,7 @@
import sphinx_book_theme
-html_title = "Isaac Lab documentation"
+html_title = "Isaac Lab Documentation"
html_theme_path = [sphinx_book_theme.get_html_theme_path()]
html_theme = "sphinx_book_theme"
html_favicon = "source/_static/favicon.ico"
@@ -213,7 +215,7 @@
"show_toc_level": 1,
"use_sidenotes": True,
"logo": {
- "text": "Isaac Lab documentation",
+ "text": "Isaac Lab Documentation",
"image_light": "source/_static/NVIDIA-logo-white.png",
"image_dark": "source/_static/NVIDIA-logo-black.png",
},
@@ -240,7 +242,19 @@
"icon_links_label": "Quick Links",
}
-html_sidebars = {"**": ["navbar-logo.html", "icon-links.html", "search-field.html", "sbt-sidebar-nav.html"]}
+templates_path = [
+ "_templates",
+]
+
+# Whitelist pattern for remotes
+smv_remote_whitelist = r"^.*$"
+# Whitelist pattern for branches (set to None to ignore all branches)
+smv_branch_whitelist = os.getenv("SMV_BRANCH_WHITELIST", r"^(main|devel)$")
+# Whitelist pattern for tags (set to None to ignore all tags)
+smv_tag_whitelist = os.getenv("SMV_TAG_WHITELIST", r"^v[1-9]\d*\.\d+\.\d+$")
+html_sidebars = {
+ "**": ["navbar-logo.html", "versioning.html", "icon-links.html", "search-field.html", "sbt-sidebar-nav.html"]
+}
# -- Advanced configuration -------------------------------------------------
diff --git a/docs/licenses/dependencies/sphinx-multiversion-license.txt b/docs/licenses/dependencies/sphinx-multiversion-license.txt
new file mode 100644
index 0000000000..172d6b3f5d
--- /dev/null
+++ b/docs/licenses/dependencies/sphinx-multiversion-license.txt
@@ -0,0 +1,25 @@
+BSD 2-Clause License
+
+Copyright (c) 2020, Jan Holthuis
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/docs/make.bat b/docs/make.bat
index 2119f51099..cdaf22f257 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -2,34 +2,63 @@
pushd %~dp0
-REM Command file for Sphinx documentation
+REM Command file to build Sphinx documentation
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
set SOURCEDIR=.
set BUILDDIR=_build
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
- echo.
- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
- echo.installed, then set the SPHINXBUILD environment variable to point
- echo.to the full path of the 'sphinx-build' executable. Alternatively you
- echo.may add the Sphinx directory to PATH.
- echo.
- echo.If you don't have Sphinx installed, grab it from
- echo.http://sphinx-doc.org/
- exit /b 1
+REM Check if a specific target was passed
+if "%1" == "multi-docs" (
+ REM Check if SPHINXBUILD is set, if not default to sphinx-multiversion
+ if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-multiversion
+ )
+ %SPHINXBUILD% >NUL 2>NUL
+ if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-multiversion' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-multiversion' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+ )
+ %SPHINXBUILD% %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+ REM Copy the redirect index.html to the build directory
+ copy _redirect\index.html %BUILDDIR%\index.html
+ goto end
)
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
+if "%1" == "current-docs" (
+ REM Check if SPHINXBUILD is set, if not default to sphinx-build
+ if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+ )
+ %SPHINXBUILD% >NUL 2>NUL
+ if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+ )
+ %SPHINXBUILD% %SOURCEDIR% %BUILDDIR%\current %SPHINXOPTS% %O%
+ goto end
+)
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+REM If no valid target is passed, show usage instructions
+echo.
+echo.Usage:
+echo. make.bat multi-docs - To build the multi-version documentation.
+echo. make.bat current-docs - To build the current documentation.
+echo.
:end
popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 33917bd712..13b2bfe9d6 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -7,6 +7,8 @@ sphinx-copybutton
sphinx-icon
sphinx_design
sphinxemoji
+sphinx-tabs # backwards compatibility for building docs on v1.0.0
+sphinx-multiversion==0.2.4
# basic python
numpy
diff --git a/docs/source/features/hydra.rst b/docs/source/features/hydra.rst
index 2fa5d2d706..89673dbe35 100644
--- a/docs/source/features/hydra.rst
+++ b/docs/source/features/hydra.rst
@@ -115,7 +115,7 @@ For example, for the configuration of the Cartpole camera depth environment:
:emphasize-lines: 16
If the user were to modify the width of the camera, i.e. ``env.tiled_camera.width=128``, then the parameter
-``env.num_observations=10240`` (1*80*128) must be updated and given as input as well.
+``env.observation_space=[80,128,1]`` must be updated and given as input as well.
Similarly, the ``__post_init__`` method is not updated with the command line inputs. In the ``LocomotionVelocityRoughEnvCfg``, for example,
the post init update is as follows:
diff --git a/docs/source/migration/migrating_from_isaacgymenvs.rst b/docs/source/migration/migrating_from_isaacgymenvs.rst
index c903d83c3d..2073854d32 100644
--- a/docs/source/migration/migrating_from_isaacgymenvs.rst
+++ b/docs/source/migration/migrating_from_isaacgymenvs.rst
@@ -45,9 +45,9 @@ Below is an example skeleton of a task config class:
# env
decimation = 2
episode_length_s = 5.0
- num_actions = 1
- num_observations = 4
- num_states = 0
+ action_space = 1
+ observation_space = 4
+ state_space = 0
# task-specific parameters
...
@@ -135,9 +135,9 @@ The following parameters must be set for each environment config:
decimation = 2
episode_length_s = 5.0
- num_actions = 1
- num_observations = 4
- num_states = 0
+ action_space = 1
+ observation_space = 4
+ state_space = 0
Note that the maximum episode length parameter (now ``episode_length_s``) is in seconds instead of steps as it was
in IsaacGymEnvs. To convert between step count to seconds, use the equation:
@@ -569,9 +569,9 @@ Task Config
| | decimation = 2 |
| asset: | episode_length_s = 5.0 |
| assetRoot: "../../assets" | action_scale = 100.0 # [N] |
-| assetFileName: "urdf/cartpole.urdf" | num_actions = 1 |
-| | num_observations = 4 |
-| enableCameraSensors: False | num_states = 0 |
+| assetFileName: "urdf/cartpole.urdf" | action_space = 1 |
+| | observation_space = 4 |
+| enableCameraSensors: False | state_space = 0 |
| | # reset |
| sim: | max_cart_pos = 3.0 |
| dt: 0.0166 # 1/60 s | initial_pole_angle_range = [-0.25, 0.25] |
diff --git a/docs/source/migration/migrating_from_omniisaacgymenvs.rst b/docs/source/migration/migrating_from_omniisaacgymenvs.rst
index 50f9d5b9d6..cbda1e8d45 100644
--- a/docs/source/migration/migrating_from_omniisaacgymenvs.rst
+++ b/docs/source/migration/migrating_from_omniisaacgymenvs.rst
@@ -46,9 +46,9 @@ Below is an example skeleton of a task config class:
# env
decimation = 2
episode_length_s = 5.0
- num_actions = 1
- num_observations = 4
- num_states = 0
+ action_space = 1
+ observation_space = 4
+ state_space = 0
# task-specific parameters
...
@@ -158,9 +158,9 @@ The following parameters must be set for each environment config:
decimation = 2
episode_length_s = 5.0
- num_actions = 1
- num_observations = 4
- num_states = 0
+ action_space = 1
+ observation_space = 4
+ state_space = 0
RL Config Setup
@@ -501,9 +501,9 @@ Task config in Isaac Lab can be split into the main task configuration class and
| clipObservations: 5.0 | decimation = 2 |
| clipActions: 1.0 | episode_length_s = 5.0 |
| controlFrequencyInv: 2 # 60 Hz | action_scale = 100.0 # [N] |
-| | num_actions = 1 |
-| sim: | num_observations = 4 |
-| | num_states = 0 |
+| | action_space = 1 |
+| sim: | observation_space = 4 |
+| | state_space = 0 |
| dt: 0.0083 # 1/120 s | # reset |
| use_gpu_pipeline: ${eq:${...pipeline},"gpu"} | max_cart_pos = 3.0 |
| gravity: [0.0, 0.0, -9.81] | initial_pole_angle_range = [-0.25, 0.25] |
diff --git a/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py b/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
index 8073886840..215ef3bd4f 100644
--- a/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
+++ b/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
@@ -28,8 +28,8 @@
# [end-h1_env-import]
# [start-h1_env-spaces]
-num_actions = 19
-num_observations = 69
+action_space = 19
+observation_space = 69
# [end-h1_env-spaces]
# [start-h1_env-robot]
diff --git a/docs/source/tutorials/03_envs/create_direct_rl_env.rst b/docs/source/tutorials/03_envs/create_direct_rl_env.rst
index ab5136106a..a4b945be9d 100644
--- a/docs/source/tutorials/03_envs/create_direct_rl_env.rst
+++ b/docs/source/tutorials/03_envs/create_direct_rl_env.rst
@@ -48,9 +48,9 @@ config should define the number of actions and observations for the environment.
@configclass
class CartpoleEnvCfg(DirectRLEnvCfg):
...
- num_actions = 1
- num_observations = 4
- num_states = 0
+ action_space = 1
+ observation_space = 4
+ state_space = 0
The config class can also be used to define task-specific attributes, such as scaling for reward terms
and thresholds for reset conditions.
diff --git a/isaaclab.bat b/isaaclab.bat
index b415ef1a13..09c6818e37 100644
--- a/isaaclab.bat
+++ b/isaaclab.bat
@@ -519,9 +519,9 @@ if "%arg%"=="-i" (
call :extract_python_exe
pushd %ISAACLAB_PATH%\docs
call !python_exe! -m pip install -r requirements.txt >nul
- call !python_exe! -m sphinx -b html -d _build\doctrees . _build\html
+ call !python_exe! -m sphinx -b html -d _build\doctrees . _build\current
echo [INFO] To open documentation on default browser, run:
- echo xdg-open "%ISAACLAB_PATH%\docs\_build\html\index.html"
+ echo xdg-open "%ISAACLAB_PATH%\docs\_build\current\index.html"
popd >nul
shift
goto :end
diff --git a/isaaclab.sh b/isaaclab.sh
index a604706e70..22374fe0ff 100755
--- a/isaaclab.sh
+++ b/isaaclab.sh
@@ -396,10 +396,10 @@ while [[ $# -gt 0 ]]; do
cd ${ISAACLAB_PATH}/docs
${python_exe} -m pip install -r requirements.txt > /dev/null
# build the documentation
- ${python_exe} -m sphinx -b html -d _build/doctrees . _build/html
+ ${python_exe} -m sphinx -b html -d _build/doctrees . _build/current
# open the documentation
echo -e "[INFO] To open documentation on default browser, run:"
- echo -e "\n\t\txdg-open $(pwd)/_build/html/index.html\n"
+ echo -e "\n\t\txdg-open $(pwd)/_build/current/index.html\n"
# exit neatly
cd - > /dev/null
shift # past argument
diff --git a/source/extensions/omni.isaac.lab/config/extension.toml b/source/extensions/omni.isaac.lab/config/extension.toml
index 517ac00610..a633e64fb8 100644
--- a/source/extensions/omni.isaac.lab/config/extension.toml
+++ b/source/extensions/omni.isaac.lab/config/extension.toml
@@ -1,7 +1,7 @@
[package]
# Note: Semantic Versioning is used: https://semver.org/
-version = "0.25.1"
+version = "0.25.2"
# Description
title = "Isaac Lab framework for Robot Learning"
diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
index e78abdb068..12e4b5b45a 100644
--- a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
@@ -1,6 +1,25 @@
Changelog
---------
+0.25.2 (2024-10-16)
+~~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added support for different Gymnasium spaces (``Box``, ``Discrete``, ``MultiDiscrete``, ``Tuple`` and ``Dict``)
+ to define observation, action and state spaces in the direct workflow.
+* Added :meth:`sample_space` to environment utils to sample supported spaces where data containers are torch tensors.
+
+Changed
+^^^^^^^
+
+* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectRLEnvCfg` as deprecated
+ in favor of :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively.
+* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectMARLEnvCfg` as deprecated
+ in favor of :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively.
+
+
0.25.1 (2024-10-10)
~~~~~~~~~~~~~~~~~~~
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
index 97701e50cc..667c0b5049 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
@@ -52,4 +52,4 @@
from .manager_based_env_cfg import ManagerBasedEnvCfg
from .manager_based_rl_env import ManagerBasedRLEnv
from .manager_based_rl_env_cfg import ManagerBasedRLEnvCfg
-from .utils import multi_agent_to_single_agent, multi_agent_with_one_agent
+from .utils.marl import multi_agent_to_single_agent, multi_agent_with_one_agent
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
index d0c99f8ad8..7d6b02d309 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
@@ -5,6 +5,7 @@
from __future__ import annotations
+import gymnasium as gym
import torch
from typing import Dict, Literal, TypeVar
@@ -62,6 +63,9 @@ class ViewerCfg:
# Types.
##
+SpaceType = TypeVar("SpaceType", gym.spaces.Space, int, set, tuple, list, dict)
+"""A sentinel object to indicate a valid space type to specify states, observations and actions."""
+
VecEnvObs = Dict[str, torch.Tensor | Dict[str, torch.Tensor]]
"""Observation returned by the environment.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
index f58a80dd47..5bcedb0591 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
@@ -14,6 +14,7 @@
import weakref
from abc import abstractmethod
from collections.abc import Sequence
+from dataclasses import MISSING
from typing import Any, ClassVar
import omni.isaac.core.utils.torch as torch_utils
@@ -30,6 +31,7 @@
from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType
from .direct_marl_env_cfg import DirectMARLEnvCfg
from .ui import ViewportCameraController
+from .utils.spaces import sample_space, spec_to_gym_space
class DirectMARLEnv:
@@ -164,10 +166,6 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
# -- init buffers
self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
- self.actions = {
- agent: torch.zeros(self.num_envs, self.cfg.num_actions[agent], device=self.sim.device)
- for agent in self.cfg.possible_agents
- }
# setup the observation, state and action spaces
self._configure_env_spaces()
@@ -406,16 +404,19 @@ def state(self) -> StateType | None:
"""Returns the state for the environment.
The state-space is used for centralized training or asymmetric actor-critic architectures. It is configured
- using the :attr:`DirectMARLEnvCfg.num_states` parameter.
+ using the :attr:`DirectMARLEnvCfg.state_space` parameter.
Returns:
- The states for the environment, or None if :attr:`DirectMARLEnvCfg.num_states` parameter is zero.
+ The states for the environment, or None if :attr:`DirectMARLEnvCfg.state_space` parameter is zero.
"""
- if not self.cfg.num_states:
+ if not self.cfg.state_space:
return None
# concatenate and return the observations as state
- if self.cfg.num_states < 0:
- self.state_buf = torch.cat([self.obs_dict[agent] for agent in self.cfg.possible_agents], dim=-1)
+ # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
+ if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0:
+ self.state_buf = torch.cat(
+ [self.obs_dict[agent].reshape(self.num_envs, -1) for agent in self.cfg.possible_agents], dim=-1
+ )
# compute and return custom environment state
else:
self.state_buf = self._get_states()
@@ -568,25 +569,45 @@ def _configure_env_spaces(self):
self.agents = self.cfg.possible_agents
self.possible_agents = self.cfg.possible_agents
+ # show deprecation message and overwrite configuration
+ if self.cfg.num_actions is not None:
+ omni.log.warn("DirectMARLEnvCfg.num_actions is deprecated. Use DirectMARLEnvCfg.action_spaces instead.")
+ if isinstance(self.cfg.action_spaces, type(MISSING)):
+ self.cfg.action_spaces = self.cfg.num_actions
+ if self.cfg.num_observations is not None:
+ omni.log.warn(
+ "DirectMARLEnvCfg.num_observations is deprecated. Use DirectMARLEnvCfg.observation_spaces instead."
+ )
+ if isinstance(self.cfg.observation_spaces, type(MISSING)):
+ self.cfg.observation_spaces = self.cfg.num_observations
+ if self.cfg.num_states is not None:
+ omni.log.warn("DirectMARLEnvCfg.num_states is deprecated. Use DirectMARLEnvCfg.state_space instead.")
+ if isinstance(self.cfg.state_space, type(MISSING)):
+ self.cfg.state_space = self.cfg.num_states
+
# set up observation and action spaces
self.observation_spaces = {
- agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_observations[agent],))
- for agent in self.cfg.possible_agents
+ agent: spec_to_gym_space(self.cfg.observation_spaces[agent]) for agent in self.cfg.possible_agents
}
self.action_spaces = {
- agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_actions[agent],))
- for agent in self.cfg.possible_agents
+ agent: spec_to_gym_space(self.cfg.action_spaces[agent]) for agent in self.cfg.possible_agents
}
# set up state space
- if not self.cfg.num_states:
+ if not self.cfg.state_space:
self.state_space = None
- if self.cfg.num_states < 0:
- self.state_space = gym.spaces.Box(
- low=-np.inf, high=np.inf, shape=(sum(self.cfg.num_observations.values()),)
+ if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0:
+ self.state_space = gym.spaces.flatten_space(
+ gym.spaces.Tuple([self.observation_spaces[agent] for agent in self.cfg.possible_agents])
)
else:
- self.state_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_states,))
+ self.state_space = spec_to_gym_space(self.cfg.state_space)
+
+ # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+ self.actions = {
+ agent: sample_space(self.action_spaces[agent], self.sim.device, batch_size=self.num_envs, fill_value=0)
+ for agent in self.cfg.possible_agents
+ }
def _reset_idx(self, env_ids: Sequence[int]):
"""Reset environments based on specified indices.
@@ -664,8 +685,8 @@ def _get_observations(self) -> dict[AgentID, ObsType]:
def _get_states(self) -> StateType:
"""Compute and return the states for the environment.
- This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.num_states`
- parameter is greater than zero.
+ This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.state_space`
+ parameter is not a number less than or equal to zero.
Returns:
The states for the environment.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
index 3dcf364f5c..40ecb64297 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
@@ -10,7 +10,7 @@
from omni.isaac.lab.utils import configclass
from omni.isaac.lab.utils.noise import NoiseModelCfg
-from .common import AgentID, ViewerCfg
+from .common import AgentID, SpaceType, ViewerCfg
from .ui import BaseEnvWindow
@@ -104,11 +104,39 @@ class DirectMARLEnvCfg:
Please refer to the :class:`omni.isaac.lab.managers.EventManager` class for more details.
"""
- num_observations: dict[AgentID, int] = MISSING
- """The dimension of the observation space from each agent."""
+ observation_spaces: dict[AgentID, SpaceType] = MISSING
+ """Observation space definition for each agent.
+
+ The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+ specification of the space is desired) or basic Python data types (for simplicity).
+
+ .. list-table::
+ :header-rows: 1
+
+ * - Gymnasium space
+ - Python data type
+ * - :class:`~gymnasium.spaces.Box`
+ - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+ * - :class:`~gymnasium.spaces.Discrete`
+ - Single-element set (e.g.: ``{2}``)
+ * - :class:`~gymnasium.spaces.MultiDiscrete`
+ - List of single-element sets (e.g.: ``[{2}, {5}]``)
+ * - :class:`~gymnasium.spaces.Dict`
+ - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+ * - :class:`~gymnasium.spaces.Tuple`
+ - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+ """
- num_states: int = MISSING
- """The dimension of the state space from each environment instance.
+ num_observations: dict[AgentID, int] | None = None
+ """The dimension of the observation space for each agent.
+
+ .. warning::
+
+ This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.observation_spaces` instead.
+ """
+
+ state_space: SpaceType = MISSING
+ """State space definition.
The following values are supported:
@@ -116,6 +144,33 @@ class DirectMARLEnvCfg:
* 0: No state-space will be constructed (`state_space` is None).
This is useful to save computational resources when the algorithm to be trained does not need it.
* greater than 0: Custom state-space dimension to be provided by the task implementation.
+
+ The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+ specification of the space is desired) or basic Python data types (for simplicity).
+
+ .. list-table::
+ :header-rows: 1
+
+ * - Gymnasium space
+ - Python data type
+ * - :class:`~gymnasium.spaces.Box`
+ - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+ * - :class:`~gymnasium.spaces.Discrete`
+ - Single-element set (e.g.: ``{2}``)
+ * - :class:`~gymnasium.spaces.MultiDiscrete`
+ - List of single-element sets (e.g.: ``[{2}, {5}]``)
+ * - :class:`~gymnasium.spaces.Dict`
+ - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+ * - :class:`~gymnasium.spaces.Tuple`
+ - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+ """
+
+ num_states: int | None = None
+ """The dimension of the state space from each environment instance.
+
+ .. warning::
+
+ This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.state_space` instead.
"""
observation_noise_model: dict[AgentID, NoiseModelCfg | None] | None = None
@@ -124,8 +179,36 @@ class DirectMARLEnvCfg:
Please refer to the :class:`omni.isaac.lab.utils.noise.NoiseModel` class for more details.
"""
- num_actions: dict[AgentID, int] = MISSING
- """The dimension of the action space for each agent."""
+ action_spaces: dict[AgentID, SpaceType] = MISSING
+ """Action space definition for each agent.
+
+ The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+ specification of the space is desired) or basic Python data types (for simplicity).
+
+ .. list-table::
+ :header-rows: 1
+
+ * - Gymnasium space
+ - Python data type
+ * - :class:`~gymnasium.spaces.Box`
+ - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+ * - :class:`~gymnasium.spaces.Discrete`
+ - Single-element set (e.g.: ``{2}``)
+ * - :class:`~gymnasium.spaces.MultiDiscrete`
+ - List of single-element sets (e.g.: ``[{2}, {5}]``)
+ * - :class:`~gymnasium.spaces.Dict`
+ - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+ * - :class:`~gymnasium.spaces.Tuple`
+ - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+ """
+
+ num_actions: dict[AgentID, int] | None = None
+ """The dimension of the action space for each agent.
+
+ .. warning::
+
+ This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.action_spaces` instead.
+ """
action_noise_model: dict[AgentID, NoiseModelCfg | None] | None = None
"""The noise model applied to the actions provided to the environment. Default is None, which means no noise is added.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
index 5663977fda..2a0e88cb63 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
@@ -14,6 +14,7 @@
import weakref
from abc import abstractmethod
from collections.abc import Sequence
+from dataclasses import MISSING
from typing import Any, ClassVar
import omni.isaac.core.utils.torch as torch_utils
@@ -30,6 +31,7 @@
from .common import VecEnvObs, VecEnvStepReturn
from .direct_rl_env_cfg import DirectRLEnvCfg
from .ui import ViewportCameraController
+from .utils.spaces import sample_space, spec_to_gym_space
class DirectRLEnv(gym.Env):
@@ -171,7 +173,6 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs
self.reset_terminated = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
self.reset_time_outs = torch.zeros_like(self.reset_terminated)
self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
- self.actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.sim.device)
# setup the action and observation spaces for Gym
self._configure_gym_env_spaces()
@@ -507,27 +508,40 @@ def set_debug_vis(self, debug_vis: bool) -> bool:
def _configure_gym_env_spaces(self):
"""Configure the action and observation spaces for the Gym environment."""
- # observation space (unbounded since we don't impose any limits)
- self.num_actions = self.cfg.num_actions
- self.num_observations = self.cfg.num_observations
- self.num_states = self.cfg.num_states
+ # show deprecation message and overwrite configuration
+ if self.cfg.num_actions is not None:
+ omni.log.warn("DirectRLEnvCfg.num_actions is deprecated. Use DirectRLEnvCfg.action_space instead.")
+ if isinstance(self.cfg.action_space, type(MISSING)):
+ self.cfg.action_space = self.cfg.num_actions
+ if self.cfg.num_observations is not None:
+ omni.log.warn(
+ "DirectRLEnvCfg.num_observations is deprecated. Use DirectRLEnvCfg.observation_space instead."
+ )
+ if isinstance(self.cfg.observation_space, type(MISSING)):
+ self.cfg.observation_space = self.cfg.num_observations
+ if self.cfg.num_states is not None:
+ omni.log.warn("DirectRLEnvCfg.num_states is deprecated. Use DirectRLEnvCfg.state_space instead.")
+ if isinstance(self.cfg.state_space, type(MISSING)):
+ self.cfg.state_space = self.cfg.num_states
# set up spaces
self.single_observation_space = gym.spaces.Dict()
- self.single_observation_space["policy"] = gym.spaces.Box(
- low=-np.inf, high=np.inf, shape=(self.num_observations,)
- )
- self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
+ self.single_observation_space["policy"] = spec_to_gym_space(self.cfg.observation_space)
+ self.single_action_space = spec_to_gym_space(self.cfg.action_space)
# batch the spaces for vectorized environments
self.observation_space = gym.vector.utils.batch_space(self.single_observation_space["policy"], self.num_envs)
self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs)
# optional state space for asymmetric actor-critic architectures
- if self.num_states > 0:
- self.single_observation_space["critic"] = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_states,))
+ self.state_space = None
+ if self.cfg.state_space > 0:
+ self.single_observation_space["critic"] = spec_to_gym_space(self.cfg.state_space)
self.state_space = gym.vector.utils.batch_space(self.single_observation_space["critic"], self.num_envs)
+ # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+ self.actions = sample_space(self.single_action_space, self.sim.device, batch_size=self.num_envs, fill_value=0)
+
def _reset_idx(self, env_ids: Sequence[int]):
"""Reset environments based on specified indices.
@@ -601,7 +615,7 @@ def _get_states(self) -> VecEnvObs | None:
"""Compute and return the states for the environment.
The state-space is used for asymmetric actor-critic architectures. It is configured
- using the :attr:`DirectRLEnvCfg.num_states` parameter.
+ using the :attr:`DirectRLEnvCfg.state_space` parameter.
Returns:
The states for the environment. If the environment does not have a state-space, the function
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
index ad8c6c18c8..e86b366cc2 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
@@ -10,7 +10,7 @@
from omni.isaac.lab.utils import configclass
from omni.isaac.lab.utils.noise import NoiseModelCfg
-from .common import ViewerCfg
+from .common import SpaceType, ViewerCfg
from .ui import BaseEnvWindow
@@ -104,13 +104,68 @@ class DirectRLEnvCfg:
Please refer to the :class:`omni.isaac.lab.managers.EventManager` class for more details.
"""
- num_observations: int = MISSING
- """The dimension of the observation space from each environment instance."""
+ observation_space: SpaceType = MISSING
+ """Observation space definition.
+
+ The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+ specification of the space is desired) or basic Python data types (for simplicity).
+
+ .. list-table::
+ :header-rows: 1
+
+ * - Gymnasium space
+ - Python data type
+ * - :class:`~gymnasium.spaces.Box`
+ - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+ * - :class:`~gymnasium.spaces.Discrete`
+ - Single-element set (e.g.: ``{2}``)
+ * - :class:`~gymnasium.spaces.MultiDiscrete`
+ - List of single-element sets (e.g.: ``[{2}, {5}]``)
+ * - :class:`~gymnasium.spaces.Dict`
+ - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+ * - :class:`~gymnasium.spaces.Tuple`
+ - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+ """
+
+ num_observations: int | None = None
+ """The dimension of the observation space from each environment instance.
+
+ .. warning::
+
+ This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.observation_space` instead.
+ """
- num_states: int = 0
- """The dimension of the state-space from each environment instance. Default is 0, which means no state-space is defined.
+ state_space: SpaceType = MISSING
+ """State space definition.
This is useful for asymmetric actor-critic and defines the observation space for the critic.
+
+ The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+ specification of the space is desired) or basic Python data types (for simplicity).
+
+ .. list-table::
+ :header-rows: 1
+
+ * - Gymnasium space
+ - Python data type
+ * - :class:`~gymnasium.spaces.Box`
+ - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+ * - :class:`~gymnasium.spaces.Discrete`
+ - Single-element set (e.g.: ``{2}``)
+ * - :class:`~gymnasium.spaces.MultiDiscrete`
+ - List of single-element sets (e.g.: ``[{2}, {5}]``)
+ * - :class:`~gymnasium.spaces.Dict`
+ - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+ * - :class:`~gymnasium.spaces.Tuple`
+ - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+ """
+
+ num_states: int | None = None
+ """The dimension of the state-space from each environment instance.
+
+ .. warning::
+
+ This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.state_space` instead.
"""
observation_noise_model: NoiseModelCfg | None = None
@@ -119,8 +174,36 @@ class DirectRLEnvCfg:
Please refer to the :class:`omni.isaac.lab.utils.noise.NoiseModel` class for more details.
"""
- num_actions: int = MISSING
- """The dimension of the action space for each environment."""
+ action_space: SpaceType = MISSING
+ """Action space definition.
+
+ The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+ specification of the space is desired) or basic Python data types (for simplicity).
+
+ .. list-table::
+ :header-rows: 1
+
+ * - Gymnasium space
+ - Python data type
+ * - :class:`~gymnasium.spaces.Box`
+ - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+ * - :class:`~gymnasium.spaces.Discrete`
+ - Single-element set (e.g.: ``{2}``)
+ * - :class:`~gymnasium.spaces.MultiDiscrete`
+ - List of single-element sets (e.g.: ``[{2}, {5}]``)
+ * - :class:`~gymnasium.spaces.Dict`
+ - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+ * - :class:`~gymnasium.spaces.Tuple`
+ - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+ """
+
+ num_actions: int | None = None
+ """The dimension of the action space for each environment.
+
+ .. warning::
+
+ This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.action_space` instead.
+ """
action_noise_model: NoiseModelCfg | None = None
"""The noise model applied to the actions provided to the environment. Default is None, which means no noise is added.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py
new file mode 100644
index 0000000000..913e1edb90
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Sub-package for environment utils."""
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
similarity index 76%
rename from source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py
rename to source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
index cacbdeaf81..46519048ae 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
@@ -9,9 +9,9 @@
import torch
from typing import Any
-from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType, VecEnvObs, VecEnvStepReturn
-from .direct_marl_env import DirectMARLEnv
-from .direct_rl_env import DirectRLEnv
+from ..common import ActionType, AgentID, EnvStepReturn, ObsType, StateType, VecEnvObs, VecEnvStepReturn
+from ..direct_marl_env import DirectMARLEnv
+from ..direct_rl_env import DirectRLEnv
def multi_agent_to_single_agent(env: DirectMARLEnv, state_as_observation: bool = False) -> DirectRLEnv:
@@ -39,7 +39,7 @@ def multi_agent_to_single_agent(env: DirectMARLEnv, state_as_observation: bool =
Raises:
AssertionError: If the environment state cannot be used as observation since it was explicitly defined
- as unconstructed (:attr:`DirectMARLEnvCfg.num_states`).
+ as unconstructed (:attr:`DirectMARLEnvCfg.state_space`).
"""
class Env(DirectRLEnv):
@@ -49,7 +49,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
# check if it is possible to use the multi-agent environment state as single-agent observation
self._state_as_observation = state_as_observation
if self._state_as_observation:
- assert self.env.cfg.num_states != 0, (
+ assert self.env.cfg.state_space != 0, (
"The environment state cannot be used as observation since it was explicitly defined as"
" unconstructed"
)
@@ -58,18 +58,17 @@ def __init__(self, env: DirectMARLEnv) -> None:
self.cfg = self.env.cfg
self.sim = self.env.sim
self.scene = self.env.scene
- self.num_actions = sum(self.env.cfg.num_actions.values())
- self.num_observations = sum(self.env.cfg.num_observations.values())
- self.num_states = self.env.cfg.num_states
self.single_observation_space = gym.spaces.Dict()
if self._state_as_observation:
self.single_observation_space["policy"] = self.env.state_space
else:
- self.single_observation_space["policy"] = gym.spaces.Box(
- low=-np.inf, high=np.inf, shape=(self.num_observations,)
+ self.single_observation_space["policy"] = gym.spaces.flatten_space(
+ gym.spaces.Tuple([self.env.observation_spaces[agent] for agent in self.env.possible_agents])
)
- self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
+ self.single_action_space = gym.spaces.flatten_space(
+ gym.spaces.Tuple([self.env.action_spaces[agent] for agent in self.env.possible_agents])
+ )
# batch the spaces for vectorized environments
self.observation_space = gym.vector.utils.batch_space(
@@ -84,18 +83,25 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None)
if self._state_as_observation:
obs = {"policy": self.env.state()}
# concatenate agents' observations
+ # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
else:
- obs = {"policy": torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+ obs = {
+ "policy": torch.cat(
+ [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+ )
+ }
return obs, extras
def step(self, action: torch.Tensor) -> VecEnvStepReturn:
# split single-agent actions to build the multi-agent ones
+ # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
index = 0
_actions = {}
for agent in self.env.possible_agents:
- _actions[agent] = action[:, index : index + self.env.cfg.num_actions[agent]]
- index += self.env.cfg.num_actions[agent]
+ delta = gym.spaces.flatdim(self.env.action_spaces[agent])
+ _actions[agent] = action[:, index : index + delta]
+ index += delta
# step the environment
obs, rewards, terminated, time_outs, extras = self.env.step(_actions)
@@ -104,8 +110,13 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
if self._state_as_observation:
obs = {"policy": self.env.state()}
# concatenate agents' observations
+ # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
else:
- obs = {"policy": torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+ obs = {
+ "policy": torch.cat(
+ [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+ )
+ }
# process environment outputs to return single-agent data
rewards = sum(rewards.values())
@@ -147,7 +158,7 @@ def multi_agent_with_one_agent(env: DirectMARLEnv, state_as_observation: bool =
Raises:
AssertionError: If the environment state cannot be used as observation since it was explicitly defined
- as unconstructed (:attr:`DirectMARLEnvCfg.num_states`).
+ as unconstructed (:attr:`DirectMARLEnvCfg.state_space`).
"""
class Env(DirectMARLEnv):
@@ -157,7 +168,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
# check if it is possible to use the multi-agent environment state as agent observation
self._state_as_observation = state_as_observation
if self._state_as_observation:
- assert self.env.cfg.num_states != 0, (
+ assert self.env.cfg.state_space != 0, (
"The environment state cannot be used as observation since it was explicitly defined as"
" unconstructed"
)
@@ -170,13 +181,13 @@ def __init__(self, env: DirectMARLEnv) -> None:
self._exported_observation_spaces = {self._agent_id: self.env.state_space}
else:
self._exported_observation_spaces = {
- self._agent_id: gym.spaces.Box(
- low=-np.inf, high=np.inf, shape=(sum(self.env.cfg.num_observations.values()),)
+ self._agent_id: gym.spaces.flatten_space(
+ gym.spaces.Tuple([self.env.observation_spaces[agent] for agent in self.env.possible_agents])
)
}
self._exported_action_spaces = {
- self._agent_id: gym.spaces.Box(
- low=-np.inf, high=np.inf, shape=(sum(self.env.cfg.num_actions.values()),)
+ self._agent_id: gym.spaces.flatten_space(
+ gym.spaces.Tuple([self.env.action_spaces[agent] for agent in self.env.possible_agents])
)
}
@@ -208,18 +219,25 @@ def reset(
if self._state_as_observation:
obs = {self._agent_id: self.env.state()}
# concatenate agents' observations
+ # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
else:
- obs = {self._agent_id: torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+ obs = {
+ self._agent_id: torch.cat(
+ [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+ )
+ }
return obs, extras
def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
# split agent actions to build the multi-agent ones
+ # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
index = 0
_actions = {}
for agent in self.env.possible_agents:
- _actions[agent] = actions[self._agent_id][:, index : index + self.env.cfg.num_actions[agent]]
- index += self.env.cfg.num_actions[agent]
+ delta = gym.spaces.flatdim(self.env.action_spaces[agent])
+ _actions[agent] = actions[self._agent_id][:, index : index + delta]
+ index += delta
# step the environment
obs, rewards, terminated, time_outs, extras = self.env.step(_actions)
@@ -228,8 +246,13 @@ def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
if self._state_as_observation:
obs = {self._agent_id: self.env.state()}
# concatenate agents' observations
+ # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
else:
- obs = {self._agent_id: torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+ obs = {
+ self._agent_id: torch.cat(
+ [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+ )
+ }
# process environment outputs to return agent data
rewards = {self._agent_id: sum(rewards.values())}
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py
new file mode 100644
index 0000000000..8604392ec6
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+import numpy as np
+import torch
+from typing import Any
+
+from ..common import SpaceType
+
+
+def spec_to_gym_space(spec: SpaceType) -> gym.spaces.Space:
+ """Generate an appropriate Gymnasium space according to the given space specification.
+
+ Args:
+ spec: Space specification.
+
+ Returns:
+ Gymnasium space.
+
+ Raises:
+ ValueError: If the given space specification is not valid/supported.
+ """
+ if isinstance(spec, gym.spaces.Space):
+ return spec
+ # fundamental spaces
+ # Box
+ elif isinstance(spec, int):
+ return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(spec,))
+ elif isinstance(spec, list) and all(isinstance(x, int) for x in spec):
+ return gym.spaces.Box(low=-np.inf, high=np.inf, shape=spec)
+ # Discrete
+ elif isinstance(spec, set) and len(spec) == 1:
+ return gym.spaces.Discrete(n=next(iter(spec)))
+ # MultiDiscrete
+ elif isinstance(spec, list) and all(isinstance(x, set) and len(x) == 1 for x in spec):
+ return gym.spaces.MultiDiscrete(nvec=[next(iter(x)) for x in spec])
+ # composite spaces
+ # Tuple
+ elif isinstance(spec, tuple):
+ return gym.spaces.Tuple([spec_to_gym_space(x) for x in spec])
+ # Dict
+ elif isinstance(spec, dict):
+ return gym.spaces.Dict({k: spec_to_gym_space(v) for k, v in spec.items()})
+ raise ValueError(f"Unsupported space specification: {spec}")
+
+
+def sample_space(space: gym.spaces.Space, device: str, batch_size: int = -1, fill_value: float | None = None) -> Any:
+ """Sample a Gymnasium space where the data container are PyTorch tensors.
+
+ Args:
+ space: Gymnasium space.
+ device: The device where the tensor should be created.
+ batch_size: Batch size. If the specified value is greater than zero, a batched space will be created and sampled from it.
+ fill_value: The value to fill the created tensors with. If None (default value), tensors will keep their random values.
+
+ Returns:
+ Tensorized sampled space.
+ """
+
+ def tensorize(s, x):
+ if isinstance(s, gym.spaces.Box):
+ tensor = torch.tensor(x, device=device, dtype=torch.float32).reshape(batch_size, *s.shape)
+ if fill_value is not None:
+ tensor.fill_(fill_value)
+ return tensor
+ elif isinstance(s, gym.spaces.Discrete):
+ if isinstance(x, np.ndarray):
+ tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, 1)
+ if fill_value is not None:
+ tensor.fill_(int(fill_value))
+ return tensor
+ elif isinstance(x, np.number) or type(x) in [int, float]:
+ tensor = torch.tensor([x], device=device, dtype=torch.int64).reshape(batch_size, 1)
+ if fill_value is not None:
+ tensor.fill_(int(fill_value))
+ return tensor
+ elif isinstance(s, gym.spaces.MultiDiscrete):
+ if isinstance(x, np.ndarray):
+ tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, *s.shape)
+ if fill_value is not None:
+ tensor.fill_(int(fill_value))
+ return tensor
+ elif isinstance(s, gym.spaces.Dict):
+ return {k: tensorize(_s, x[k]) for k, _s in s.items()}
+ elif isinstance(s, gym.spaces.Tuple):
+ return tuple([tensorize(_s, v) for _s, v in zip(s, x)])
+
+ sample = (gym.vector.utils.batch_space(space, batch_size) if batch_size > 0 else space).sample()
+ return tensorize(space, sample)
diff --git a/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py b/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py
new file mode 100644
index 0000000000..274f0de650
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+# ignore private usage of variables warning
+# pyright: reportPrivateUsage=none
+
+from __future__ import annotations
+
+"""Launch Isaac Sim Simulator first."""
+
+from omni.isaac.lab.app import AppLauncher, run_tests
+
+# Can set this to False to see the GUI for debugging
+HEADLESS = True
+
+# launch omniverse app
+app_launcher = AppLauncher(headless=HEADLESS)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import numpy as np
+import torch
+import unittest
+from gymnasium.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple
+
+from omni.isaac.lab.envs.utils.spaces import sample_space, spec_to_gym_space
+
+
+class TestSpacesUtils(unittest.TestCase):
+ """Test for spaces utils' functions"""
+
+ """
+ Tests
+ """
+
+ def test_spec_to_gym_space(self):
+ # fundamental spaces
+ # Box
+ space = spec_to_gym_space(1)
+ self.assertIsInstance(space, Box)
+ self.assertEqual(space.shape, (1,))
+ space = spec_to_gym_space([1, 2, 3, 4, 5])
+ self.assertIsInstance(space, Box)
+ self.assertEqual(space.shape, (1, 2, 3, 4, 5))
+ space = spec_to_gym_space(Box(low=-1.0, high=1.0, shape=(1, 2)))
+ self.assertIsInstance(space, Box)
+ # Discrete
+ space = spec_to_gym_space({2})
+ self.assertIsInstance(space, Discrete)
+ self.assertEqual(space.n, 2)
+ space = spec_to_gym_space(Discrete(2))
+ self.assertIsInstance(space, Discrete)
+ # MultiDiscrete
+ space = spec_to_gym_space([{1}, {2}, {3}])
+ self.assertIsInstance(space, MultiDiscrete)
+ self.assertEqual(space.nvec.shape, (3,))
+ space = spec_to_gym_space(MultiDiscrete(np.array([1, 2, 3])))
+ self.assertIsInstance(space, MultiDiscrete)
+ # composite spaces
+ # Tuple
+ space = spec_to_gym_space(([1, 2, 3, 4, 5], {2}, [{1}, {2}, {3}]))
+ self.assertIsInstance(space, Tuple)
+ self.assertEqual(len(space), 3)
+ self.assertIsInstance(space[0], Box)
+ self.assertIsInstance(space[1], Discrete)
+ self.assertIsInstance(space[2], MultiDiscrete)
+ space = spec_to_gym_space(Tuple((Box(-1, 1, shape=(1,)), Discrete(2))))
+ self.assertIsInstance(space, Tuple)
+ # Dict
+ space = spec_to_gym_space({"box": [1, 2, 3, 4, 5], "discrete": {2}, "multi_discrete": [{1}, {2}, {3}]})
+ self.assertIsInstance(space, Dict)
+ self.assertEqual(len(space), 3)
+ self.assertIsInstance(space["box"], Box)
+ self.assertIsInstance(space["discrete"], Discrete)
+ self.assertIsInstance(space["multi_discrete"], MultiDiscrete)
+ space = spec_to_gym_space(Dict({"box": Box(-1, 1, shape=(1,)), "discrete": Discrete(2)}))
+ self.assertIsInstance(space, Dict)
+
+ def test_sample_space(self):
+ device = "cpu"
+ # fundamental spaces
+ # Box
+ sample = sample_space(Box(low=-1.0, high=1.0, shape=(1, 2)), device, batch_size=1)
+ self.assertIsInstance(sample, torch.Tensor)
+ self._check_tensorized(sample, batch_size=1)
+ # Discrete
+ sample = sample_space(Discrete(2), device, batch_size=2)
+ self.assertIsInstance(sample, torch.Tensor)
+ self._check_tensorized(sample, batch_size=2)
+ # MultiDiscrete
+ sample = sample_space(MultiDiscrete(np.array([1, 2, 3])), device, batch_size=3)
+ self.assertIsInstance(sample, torch.Tensor)
+ self._check_tensorized(sample, batch_size=3)
+ # composite spaces
+ # Tuple
+ sample = sample_space(Tuple((Box(-1, 1, shape=(1,)), Discrete(2))), device, batch_size=4)
+ self.assertIsInstance(sample, (tuple, list))
+ self._check_tensorized(sample, batch_size=4)
+ # Dict
+ sample = sample_space(Dict({"box": Box(-1, 1, shape=(1,)), "discrete": Discrete(2)}), device, batch_size=5)
+ self.assertIsInstance(sample, dict)
+ self._check_tensorized(sample, batch_size=5)
+
+ """
+ Helper functions.
+ """
+
+ def _check_tensorized(self, sample, batch_size):
+ if isinstance(sample, (tuple, list)):
+ list(map(self._check_tensorized, sample, [batch_size] * len(sample)))
+ elif isinstance(sample, dict):
+ list(map(self._check_tensorized, sample.values(), [batch_size] * len(sample)))
+ else:
+ self.assertIsInstance(sample, torch.Tensor)
+ self.assertEqual(sample.shape[0], batch_size)
+
+
+if __name__ == "__main__":
+ run_tests()
diff --git a/source/extensions/omni.isaac.lab_tasks/config/extension.toml b/source/extensions/omni.isaac.lab_tasks/config/extension.toml
index 89ca646936..a6ecb7a56c 100644
--- a/source/extensions/omni.isaac.lab_tasks/config/extension.toml
+++ b/source/extensions/omni.isaac.lab_tasks/config/extension.toml
@@ -1,7 +1,7 @@
[package]
# Note: Semantic Versioning is used: https://semver.org/
-version = "0.10.5"
+version = "0.10.7"
# Description
title = "Isaac Lab Environments"
diff --git a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
index 2614630bd7..b3ba0a77fd 100644
--- a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
@@ -1,11 +1,24 @@
Changelog
---------
+0.10.7 (2024-10-02)
+~~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Replace deprecated :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in single-agent direct tasks
+ by :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively.
+* Replace deprecated :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in multi-agent direct tasks
+ by :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively.
+
+
0.10.6 (2024-09-25)
~~~~~~~~~~~~~~~~~~~
Added
^^^^^
+
* Added ``Isaac-Cartpole-RGB-Camera-v0`` and ``Isaac-Cartpole-Depth-Camera-v0``
manager based camera cartpole environments.
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
index b83b6782a6..b5c53a91d3 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
@@ -22,9 +22,9 @@ class AllegroHandEnvCfg(DirectRLEnvCfg):
# env
decimation = 4
episode_length_s = 10.0
- num_actions = 16
- num_observations = 124 # (full)
- num_states = 0
+ action_space = 16
+ observation_space = 124 # (full)
+ state_space = 0
asymmetric_obs = False
obs_type = "full"
# simulation
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
index 8bf6d6bcc9..42f57127ee 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
@@ -24,9 +24,9 @@ class AntEnvCfg(DirectRLEnvCfg):
episode_length_s = 15.0
decimation = 2
action_scale = 0.5
- num_actions = 8
- num_observations = 36
- num_states = 0
+ action_space = 8
+ observation_space = 36
+ state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
index 5490bb0dd3..ca1f61c54a 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
@@ -5,6 +5,7 @@
from __future__ import annotations
+import gymnasium as gym
import torch
import omni.isaac.lab.envs.mdp as mdp
@@ -59,9 +60,9 @@ class AnymalCFlatEnvCfg(DirectRLEnvCfg):
episode_length_s = 20.0
decimation = 4
action_scale = 0.5
- num_actions = 12
- num_observations = 48
- num_states = 0
+ action_space = 12
+ observation_space = 48
+ state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(
@@ -118,7 +119,7 @@ class AnymalCFlatEnvCfg(DirectRLEnvCfg):
@configclass
class AnymalCRoughEnvCfg(AnymalCFlatEnvCfg):
# env
- num_observations = 235
+ observation_space = 235
terrain = TerrainImporterCfg(
prim_path="/World/ground",
@@ -160,8 +161,10 @@ def __init__(self, cfg: AnymalCFlatEnvCfg | AnymalCRoughEnvCfg, render_mode: str
super().__init__(cfg, render_mode, **kwargs)
# Joint position command (deviation from default joint positions)
- self._actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
- self._previous_actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
+ self._actions = torch.zeros(self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device)
+ self._previous_actions = torch.zeros(
+ self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device
+ )
# X/Y linear velocity and yaw angular velocity commands
self._commands = torch.zeros(self.num_envs, 3, device=self.device)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
index 0b606fe899..ad8c616940 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
@@ -27,9 +27,9 @@ class CartDoublePendulumEnvCfg(DirectMARLEnvCfg):
decimation = 2
episode_length_s = 5.0
possible_agents = ["cart", "pendulum"]
- num_actions = {"cart": 1, "pendulum": 1}
- num_observations = {"cart": 4, "pendulum": 3}
- num_states = -1
+ action_spaces = {"cart": 1, "pendulum": 1}
+ observation_spaces = {"cart": 4, "pendulum": 3}
+ state_space = -1
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
index b2a1b1e303..dc7db07030 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
@@ -5,9 +5,7 @@
from __future__ import annotations
-import gymnasium as gym
import math
-import numpy as np
import torch
from collections.abc import Sequence
@@ -29,9 +27,6 @@ class CartpoleRGBCameraEnvCfg(DirectRLEnvCfg):
decimation = 2
episode_length_s = 5.0
action_scale = 100.0 # [N]
- num_actions = 1
- num_channels = 3
- num_states = 0
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
@@ -52,9 +47,13 @@ class CartpoleRGBCameraEnvCfg(DirectRLEnvCfg):
width=80,
height=80,
)
- num_observations = num_channels * tiled_camera.height * tiled_camera.width
write_image_to_file = False
+ # spaces
+ action_space = 1
+ state_space = 0
+ observation_space = [tiled_camera.height, tiled_camera.width, 3]
+
# change viewer settings
viewer = ViewerCfg(eye=(20.0, 20.0, 20.0))
@@ -87,9 +86,8 @@ class CartpoleDepthCameraEnvCfg(CartpoleRGBCameraEnvCfg):
height=80,
)
- # env
- num_channels = 1
- num_observations = num_channels * tiled_camera.height * tiled_camera.width
+ # spaces
+ observation_space = [tiled_camera.height, tiled_camera.width, 1]
class CartpoleCameraEnv(DirectRLEnv):
@@ -118,35 +116,6 @@ def close(self):
"""Cleanup for the environment."""
super().close()
- def _configure_gym_env_spaces(self):
- """Configure the action and observation spaces for the Gym environment."""
- # observation space (unbounded since we don't impose any limits)
- self.num_actions = self.cfg.num_actions
- self.num_observations = self.cfg.num_observations
- self.num_states = self.cfg.num_states
-
- # set up spaces
- self.single_observation_space = gym.spaces.Dict()
- self.single_observation_space["policy"] = gym.spaces.Box(
- low=-np.inf,
- high=np.inf,
- shape=(self.cfg.tiled_camera.height, self.cfg.tiled_camera.width, self.cfg.num_channels),
- )
- if self.num_states > 0:
- self.single_observation_space["critic"] = gym.spaces.Box(
- low=-np.inf,
- high=np.inf,
- shape=(self.cfg.tiled_camera.height, self.cfg.tiled_camera.width, self.cfg.num_channels),
- )
- self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
-
- # batch the spaces for vectorized environments
- self.observation_space = gym.vector.utils.batch_space(self.single_observation_space, self.num_envs)
- self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs)
-
- # RL specifics
- self.actions = torch.zeros(self.num_envs, self.num_actions, device=self.sim.device)
-
def _setup_scene(self):
"""Setup the scene with the cartpole and camera."""
self._cartpole = Articulation(self.cfg.robot_cfg)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
index 44926e95f9..534fb26443 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
@@ -27,9 +27,9 @@ class CartpoleEnvCfg(DirectRLEnvCfg):
decimation = 2
episode_length_s = 5.0
action_scale = 100.0 # [N]
- num_actions = 1
- num_observations = 4
- num_states = 0
+ action_space = 1
+ observation_space = 4
+ state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
index 4eb01953fe..3a6a480ed0 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
@@ -28,9 +28,9 @@ class FrankaCabinetEnvCfg(DirectRLEnvCfg):
# env
episode_length_s = 8.3333 # 500 timesteps
decimation = 2
- num_actions = 9
- num_observations = 23
- num_states = 0
+ action_space = 9
+ observation_space = 23
+ state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
index bfaf8f8190..2a4d330e6a 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
@@ -24,9 +24,9 @@ class HumanoidEnvCfg(DirectRLEnvCfg):
episode_length_s = 15.0
decimation = 2
action_scale = 1.0
- num_actions = 21
- num_observations = 75
- num_states = 0
+ action_space = 21
+ observation_space = 75
+ state_space = 0
# simulation
sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
index c6df659ec6..97156618f1 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
@@ -5,6 +5,7 @@
from __future__ import annotations
+import gymnasium as gym
import torch
import omni.isaac.lab.sim as sim_utils
@@ -50,9 +51,9 @@ class QuadcopterEnvCfg(DirectRLEnvCfg):
# env
episode_length_s = 10.0
decimation = 2
- num_actions = 4
- num_observations = 12
- num_states = 0
+ action_space = 4
+ observation_space = 12
+ state_space = 0
debug_vis = True
ui_window_class_type = QuadcopterEnvWindow
@@ -105,7 +106,7 @@ def __init__(self, cfg: QuadcopterEnvCfg, render_mode: str | None = None, **kwar
super().__init__(cfg, render_mode, **kwargs)
# Total thrust and moment applied to the base of the quadcopter
- self._actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
+ self._actions = torch.zeros(self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device)
self._thrust = torch.zeros(self.num_envs, 1, 3, device=self.device)
self._moment = torch.zeros(self.num_envs, 1, 3, device=self.device)
# Goal position
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
index f4b8407296..af88124792 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
@@ -119,9 +119,9 @@ class ShadowHandEnvCfg(DirectRLEnvCfg):
# env
decimation = 2
episode_length_s = 10.0
- num_actions = 20
- num_observations = 157 # (full)
- num_states = 0
+ action_space = 20
+ observation_space = 157 # (full)
+ state_space = 0
asymmetric_obs = False
obs_type = "full"
@@ -232,9 +232,9 @@ class ShadowHandOpenAIEnvCfg(ShadowHandEnvCfg):
# env
decimation = 3
episode_length_s = 8.0
- num_actions = 20
- num_observations = 42
- num_states = 187
+ action_space = 20
+ observation_space = 42
+ state_space = 187
asymmetric_obs = True
obs_type = "openai"
# simulation
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
index b025bfb052..492074d8a9 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
@@ -48,8 +48,8 @@ class ShadowHandVisionEnvCfg(ShadowHandEnvCfg):
feature_extractor = FeatureExtractorCfg()
# env
- num_observations = 164 + 27 # state observation + vision CNN embedding
- num_states = 187 + 27 # asymettric states + vision CNN embedding
+ observation_space = 164 + 27 # state observation + vision CNN embedding
+ state_space = 187 + 27 # asymettric states + vision CNN embedding
@configclass
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
index d6dbb3d6a2..d3a7c33b3f 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
@@ -118,9 +118,9 @@ class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
decimation = 2
episode_length_s = 7.5
possible_agents = ["right_hand", "left_hand"]
- num_actions = {"right_hand": 20, "left_hand": 20}
- num_observations = {"right_hand": 157, "left_hand": 157}
- num_states = 290
+ action_spaces = {"right_hand": 20, "left_hand": 20}
+ observation_spaces = {"right_hand": 157, "left_hand": 157}
+ state_space = 290
# simulation
sim: SimulationCfg = SimulationCfg(
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
index 0badd08c31..0dedef9ef0 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
@@ -70,19 +70,19 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
if hasattr(self.unwrapped, "action_manager"):
self.num_actions = self.unwrapped.action_manager.total_action_dim
else:
- self.num_actions = self.unwrapped.num_actions
+ self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space)
if hasattr(self.unwrapped, "observation_manager"):
self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0]
else:
- self.num_obs = self.unwrapped.num_observations
+ self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"])
# -- privileged observations
if (
hasattr(self.unwrapped, "observation_manager")
and "critic" in self.unwrapped.observation_manager.group_obs_dim
):
self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0]
- elif hasattr(self.unwrapped, "num_states"):
- self.num_privileged_obs = self.unwrapped.num_states
+ elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space:
+ self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"])
else:
self.num_privileged_obs = 0
# reset at the start since the RSL-RL runner does not call reset
diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
index 9e92e26156..993b776a81 100644
--- a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
+++ b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
@@ -22,6 +22,7 @@
import omni.usd
from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
+from omni.isaac.lab.envs.utils import sample_space
import omni.isaac.lab_tasks # noqa: F401
from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
@@ -108,12 +109,12 @@ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_
# simulate environment for num_steps steps
with torch.inference_mode():
for _ in range(num_steps):
- # sample actions from -1 to 1
- actions = 2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1
+ # sample actions according to the defined space
+ actions = sample_space(env.single_action_space, device=env.unwrapped.device, batch_size=num_envs)
# apply actions
transition = env.step(actions)
# check signals
- for data in transition:
+ for data in transition[:-1]: # exclude info
self.assertTrue(self._check_valid_tensor(data), msg=f"Invalid data: {data}")
# close the environment
@@ -131,14 +132,10 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool:
"""
if isinstance(data, torch.Tensor):
return not torch.any(torch.isnan(data))
+ elif isinstance(data, (tuple, list)):
+ return all(TestEnvironments._check_valid_tensor(value) for value in data)
elif isinstance(data, dict):
- valid_tensor = True
- for value in data.values():
- if isinstance(value, dict):
- valid_tensor &= TestEnvironments._check_valid_tensor(value)
- elif isinstance(value, torch.Tensor):
- valid_tensor &= not torch.any(torch.isnan(value))
- return valid_tensor
+ return all(TestEnvironments._check_valid_tensor(value) for value in data.values())
else:
raise ValueError(f"Input data of invalid type: {type(data)}.")
diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
index 19fcd88936..2f543a84e3 100644
--- a/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
+++ b/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
@@ -21,6 +21,7 @@
import omni.usd
from omni.isaac.lab.envs import DirectMARLEnv, DirectMARLEnvCfg
+from omni.isaac.lab.envs.utils import sample_space
import omni.isaac.lab_tasks # noqa: F401
from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
@@ -104,9 +105,9 @@ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_
# simulate environment for num_steps steps
with torch.inference_mode():
for _ in range(num_steps):
- # sample actions from -1 to 1
+ # sample actions according to the defined space
actions = {
- agent: 2 * torch.rand(env.action_space(agent).shape, device=env.unwrapped.device) - 1
+ agent: sample_space(env.action_spaces[agent], device=env.unwrapped.device)
for agent in env.unwrapped.possible_agents
}
# apply actions
@@ -131,14 +132,10 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool:
"""
if isinstance(data, torch.Tensor):
return not torch.any(torch.isnan(data))
+ elif isinstance(data, (tuple, list)):
+ return all(TestEnvironments._check_valid_tensor(value) for value in data)
elif isinstance(data, dict):
- valid_tensor = True
- for value in data.values():
- if isinstance(value, dict):
- valid_tensor &= TestEnvironments._check_valid_tensor(value)
- elif isinstance(value, torch.Tensor):
- valid_tensor &= not torch.any(torch.isnan(value))
- return valid_tensor
+ return all(TestEnvironments._check_valid_tensor(value) for value in data.values())
else:
raise ValueError(f"Input data of invalid type: {type(data)}.")