diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 98720f8c521..ca34b816ba1 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -87,6 +87,7 @@ Guidelines for modifications: * Jinghuan Shang * Jingzhou Liu * Jinqi Wei +* Jinyeob Kim * Johnson Sun * Kaixi Bao * Kris Wilson diff --git a/docs/source/_static/tasks/manipulation/openarm_bi_reach.jpg b/docs/source/_static/tasks/manipulation/openarm_bi_reach.jpg new file mode 100644 index 00000000000..12e6620027d Binary files /dev/null and b/docs/source/_static/tasks/manipulation/openarm_bi_reach.jpg differ diff --git a/docs/source/_static/tasks/manipulation/openarm_uni_lift.jpg b/docs/source/_static/tasks/manipulation/openarm_uni_lift.jpg new file mode 100644 index 00000000000..b7bb7c3cecf Binary files /dev/null and b/docs/source/_static/tasks/manipulation/openarm_uni_lift.jpg differ diff --git a/docs/source/_static/tasks/manipulation/openarm_uni_open_drawer.jpg b/docs/source/_static/tasks/manipulation/openarm_uni_open_drawer.jpg new file mode 100644 index 00000000000..0bec48a4019 Binary files /dev/null and b/docs/source/_static/tasks/manipulation/openarm_uni_open_drawer.jpg differ diff --git a/docs/source/_static/tasks/manipulation/openarm_uni_reach.jpg b/docs/source/_static/tasks/manipulation/openarm_uni_reach.jpg new file mode 100644 index 00000000000..46128f7607b Binary files /dev/null and b/docs/source/_static/tasks/manipulation/openarm_uni_reach.jpg differ diff --git a/docs/source/overview/environments.rst b/docs/source/overview/environments.rst index 050dcd22353..9f2d2ff28b1 100644 --- a/docs/source/overview/environments.rst +++ b/docs/source/overview/environments.rst @@ -163,6 +163,14 @@ for the lift-cube environment: +-------------------------+------------------------------+-----------------------------------------------------------------------------+ | |agibot_place_toy| | |agibot_place_toy-link| | Pick up and place an object in a box with a Agibot A2D humanoid robot | +-------------------------+------------------------------+-----------------------------------------------------------------------------+ + | |reach_openarm_bi| | |reach_openarm_bi-link| | Move the end-effector to sampled target poses with the OpenArm robot | + +-------------------------+------------------------------+-----------------------------------------------------------------------------+ + | |reach_openarm_uni| | |reach_openarm_uni-link| | Move the end-effector to a sampled target pose with the OpenArm robot | + +-------------------------+------------------------------+-----------------------------------------------------------------------------+ + | |lift_openarm_uni| | |lift_openarm_uni-link| | Pick a cube and bring it to a sampled target position with the OpenArm robot| + +-------------------------+------------------------------+-----------------------------------------------------------------------------+ + | |cabi_openarm_uni| | |cabi_openarm_uni-link| | Grasp the handle of a cabinet's drawer and open it with the OpenArm robot | + +-------------------------+------------------------------+-----------------------------------------------------------------------------+ .. |reach-franka| image:: ../_static/tasks/manipulation/franka_reach.jpg .. |reach-ur10| image:: ../_static/tasks/manipulation/ur10_reach.jpg @@ -183,6 +191,10 @@ for the lift-cube environment: .. |agibot_place_toy| image:: ../_static/tasks/manipulation/agibot_place_toy.jpg .. |kuka-allegro-lift| image:: ../_static/tasks/manipulation/kuka_allegro_lift.jpg .. |kuka-allegro-reorient| image:: ../_static/tasks/manipulation/kuka_allegro_reorient.jpg +.. |reach_openarm_bi| image:: ../_static/tasks/manipulation/openarm_bi_reach.jpg +.. |reach_openarm_uni| image:: ../_static/tasks/manipulation/openarm_uni_reach.jpg +.. |lift_openarm_uni| image:: ../_static/tasks/manipulation/openarm_uni_lift.jpg +.. |cabi_openarm_uni| image:: ../_static/tasks/manipulation/openarm_uni_open_drawer.jpg .. |reach-franka-link| replace:: `Isaac-Reach-Franka-v0 `__ .. |reach-ur10-link| replace:: `Isaac-Reach-UR10-v0 `__ @@ -212,6 +224,10 @@ for the lift-cube environment: .. |cube-shadow-vis-link| replace:: `Isaac-Repose-Cube-Shadow-Vision-Direct-v0 `__ .. |agibot_place_mug-link| replace:: `Isaac-Place-Mug-Agibot-Left-Arm-RmpFlow-v0 `__ .. |agibot_place_toy-link| replace:: `Isaac-Place-Toy2Box-Agibot-Right-Arm-RmpFlow-v0 `__ +.. |reach_openarm_bi-link| replace:: `Isaac-Reach-OpenArm-Bi-v0 `__ +.. |reach_openarm_uni-link| replace:: `Isaac-Reach-OpenArm-v0 `__ +.. |lift_openarm_uni-link| replace:: `Isaac-Lift-Cube-OpenArm-v0 `__ +.. |cabi_openarm_uni-link| replace:: `Isaac-Open-Drawer-OpenArm-v0 `__ Contact-rich Manipulation @@ -1135,3 +1151,19 @@ inferencing, including reading from an already trained checkpoint and disabling - Isaac-Velocity-Rough-Unitree-Go2-Play-v0 - Manager Based - **rsl_rl** (PPO), **skrl** (PPO) + * - Isaac-Reach-OpenArm-Bi-v0 + - Isaac-Reach-OpenArm-Bi-Play-v0 + - Manager Based + - **rsl_rl** (PPO), **rl_games** (PPO) + * - Isaac-Reach-OpenArm-v0 + - Isaac-Reach-OpenArm-Play-v0 + - Manager Based + - **rsl_rl** (PPO), **skrl** (PPO), **rl_games** (PPO) + * - Isaac-Lift-Cube-OpenArm-v0 + - Isaac-Lift-Cube-OpenArm-Play-v0 + - Manager Based + - **rsl_rl** (PPO), **rl_games** (PPO) + * - Isaac-Open-Drawer-OpenArm-v0 + - Isaac-Open-Drawer-OpenArm-Play-v0 + - Manager Based + - **rsl_rl** (PPO), **rl_games** (PPO) diff --git a/source/isaaclab_assets/config/extension.toml b/source/isaaclab_assets/config/extension.toml index dac5494087e..3f682d93335 100644 --- a/source/isaaclab_assets/config/extension.toml +++ b/source/isaaclab_assets/config/extension.toml @@ -1,6 +1,6 @@ [package] # Semantic Versioning is used: https://semver.org/ -version = "0.2.3" +version = "0.2.4" # Description title = "Isaac Lab Assets" diff --git a/source/isaaclab_assets/docs/CHANGELOG.rst b/source/isaaclab_assets/docs/CHANGELOG.rst index b6582e77e8a..3456213b3e8 100644 --- a/source/isaaclab_assets/docs/CHANGELOG.rst +++ b/source/isaaclab_assets/docs/CHANGELOG.rst @@ -1,6 +1,14 @@ Changelog --------- +0.2.4 (2025-11-26) +~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Configuration for OpenArm robots used for manipulation tasks. + 0.2.3 (2025-08-11) ~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_assets/isaaclab_assets/robots/openarm.py b/source/isaaclab_assets/isaaclab_assets/robots/openarm.py new file mode 100644 index 00000000000..e57658b32ca --- /dev/null +++ b/source/isaaclab_assets/isaaclab_assets/robots/openarm.py @@ -0,0 +1,176 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configuration of OpenArm robots. + +The following configurations are available: + +* :obj:`OPENARM_BI_CFG`: OpenArm robot with two arms. +* :obj:`OPENARM_BI_HIGH_PD_CFG`: OpenArm robot with two arms and stiffer PD control. +* :obj:`OPENARM_UNI_CFG`: OpenArm robot with one arm. +* :obj:`OPENARM_UNI_HIGH_PD_CFG`: OpenArm robot with one arm and stiffer PD control. + +References: +OpenArm repositories: +* https://github.com/enactic/openarm +* https://github.com/enactic/openarm_isaac_lab + +Motor spec sheets: +* Joint 1–2 (DM-J8009P-2EC): + https://cdn.shopify.com/s/files/1/0673/6848/5000/files/DM-J8009P-2EC_User_Manual.pdf?v=1755481750 +* Joint 3–4 (DM-J4340P-2EC / DM-J4340-2EC): + https://cdn.shopify.com/s/files/1/0673/6848/5000/files/DM-J4340-2EC_User_Manual.pdf?v=1756883905 +* Joint 5–8 (DM-J4310-2EC V1.1): + https://files.seeedstudio.com/products/Damiao/DM-J4310-en.pdf + +Please `git clone https://github.com/enactic/openarm_isaac_lab` and `export PYTHONPATH=$PYTHONPATH:~/openarm_isaac_lab` to use these configurations. +""" + +from source.openarm.openarm.tasks.manager_based.openarm_manipulation import OPENARM_ROOT_DIR + +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets.articulation import ArticulationCfg + +OPENARM_BI_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"{OPENARM_ROOT_DIR}/usds/openarm_bimanual/openarm_bimanual.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=8, + solver_velocity_iteration_count=0, + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "openarm_left_joint.*": 0.0, + "openarm_right_joint.*": 0.0, + "openarm_left_finger_joint.*": 0.0, + "openarm_right_finger_joint.*": 0.0, + }, + ), + # spec sheet for reference + # DM-J8009P-2EC (Joint 1, 2): + # https://cdn.shopify.com/s/files/1/0673/6848/5000/files/DM-J8009P-2EC_User_Manual.pdf?v=1755481750 + # DM-J4340P-2EC, DM-J4340-2EC (Joint 3, 4): + # https://cdn.shopify.com/s/files/1/0673/6848/5000/files/DM-J4340-2EC_User_Manual.pdf?v=1756883905 + # DM-J4310-2EC V1.1 (Joint 5, 6, 7, 8): + # https://files.seeedstudio.com/products/Damiao/DM-J4310-en.pdf + actuators={ + "openarm_arm": ImplicitActuatorCfg( + joint_names_expr=[ + "openarm_left_joint[1-7]", + "openarm_right_joint[1-7]", + ], + velocity_limit_sim={ + "openarm_left_joint[1-2]": 2.175, + "openarm_right_joint[1-2]": 2.175, + "openarm_left_joint[3-4]": 2.175, + "openarm_right_joint[3-4]": 2.175, + "openarm_left_joint[5-7]": 2.61, + "openarm_right_joint[5-7]": 2.61, + }, + effort_limit_sim={ + "openarm_left_joint[1-2]": 40.0, + "openarm_right_joint[1-2]": 40.0, + "openarm_left_joint[3-4]": 27.0, + "openarm_right_joint[3-4]": 27.0, + "openarm_left_joint[5-7]": 7.0, + "openarm_right_joint[5-7]": 7.0, + }, + stiffness=80.0, + damping=4.0, + ), + "openarm_gripper": ImplicitActuatorCfg( + joint_names_expr=[ + "openarm_left_finger_joint.*", + "openarm_right_finger_joint.*", + ], + velocity_limit_sim=0.2, + effort_limit_sim=333.33, + stiffness=2e3, + damping=1e2, + ), + }, + soft_joint_pos_limit_factor=1.0, +) +"""Configuration of OpenArm Bimanual robot.""" + +OPENARM_UNI_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"{OPENARM_ROOT_DIR}/usds/openarm_unimanual/openarm_unimanual.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + max_depenetration_velocity=5.0, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=8, + solver_velocity_iteration_count=0, + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "openarm_joint1": 1.57, + "openarm_joint2": 0.0, + "openarm_joint3": -1.57, + "openarm_joint4": 1.57, + "openarm_joint5": 0.0, + "openarm_joint6": 0.0, + "openarm_joint7": 0.0, + "openarm_finger_joint.*": 0.044, + }, + ), + actuators={ + "openarm_arm": ImplicitActuatorCfg( + joint_names_expr=["openarm_joint[1-7]"], + velocity_limit_sim={ + "openarm_joint[1-2]": 2.175, + "openarm_joint[3-4]": 2.175, + "openarm_joint[5-7]": 2.61, + }, + effort_limit_sim={ + "openarm_joint[1-2]": 40.0, + "openarm_joint[3-4]": 27.0, + "openarm_joint[5-7]": 7.0, + }, + stiffness=80.0, + damping=4.0, + ), + "openarm_gripper": ImplicitActuatorCfg( + joint_names_expr=["openarm_finger_joint.*"], + velocity_limit_sim=0.2, + effort_limit_sim=333.33, + stiffness=2e3, + damping=1e2, + ), + }, + soft_joint_pos_limit_factor=1.0, +) +"""Configuration of OpenArm Unimanual robot.""" + +OPENARM_BI_HIGH_PD_CFG = OPENARM_BI_CFG.copy() +OPENARM_BI_HIGH_PD_CFG.spawn.rigid_props.disable_gravity = True +OPENARM_BI_HIGH_PD_CFG.actuators["openarm_arm"].stiffness = 400.0 +OPENARM_BI_HIGH_PD_CFG.actuators["openarm_arm"].damping = 80.0 +OPENARM_BI_HIGH_PD_CFG.actuators["openarm_gripper"].stiffness = 2e3 +OPENARM_BI_HIGH_PD_CFG.actuators["openarm_gripper"].damping = 1e2 +"""Configuration of OpenArm Bimanual robot with stiffer PD control. + +This configuration is useful for task-space control using differential IK. +""" + +OPENARM_UNI_HIGH_PD_CFG = OPENARM_UNI_CFG.copy() +OPENARM_UNI_HIGH_PD_CFG.spawn.rigid_props.disable_gravity = True +OPENARM_UNI_HIGH_PD_CFG.actuators["openarm_arm"].stiffness = 400.0 +OPENARM_UNI_HIGH_PD_CFG.actuators["openarm_arm"].damping = 80.0 +"""Configuration of OpenArm Unimanual robot with stiffer PD control. + +This configuration is useful for task-space control using differential IK. +""" diff --git a/source/isaaclab_tasks/config/extension.toml b/source/isaaclab_tasks/config/extension.toml index ef079ce9112..5d1e4c5ef27 100644 --- a/source/isaaclab_tasks/config/extension.toml +++ b/source/isaaclab_tasks/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.11.9" +version = "0.11.10" # Description title = "Isaac Lab Environments" diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst index 518c3a02541..56d8e13ff89 100644 --- a/source/isaaclab_tasks/docs/CHANGELOG.rst +++ b/source/isaaclab_tasks/docs/CHANGELOG.rst @@ -1,6 +1,25 @@ Changelog --------- +0.11.10 (2025-11-26) +~~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added reaching task environments for OpenArm unimanual robot: + * :class:`OpenArmReachEnvCfg`; Gym ID ``Isaac-Reach-OpenArm-v0``. + * :class:`OpenArmReachEnvCfg_PLAY`; Gym ID ``Isaac-Reach-OpenArm-Play-v0``. +* Added lifting a cube task environments for OpenArm unimanual robot: + * :class:`OpenArmCubeLiftEnvCfg`; Gym ID ``Isaac-Lift-Cube-OpenArm-v0``. + * :class:`OpenArmCubeLiftEnvCfg_PLAY`; Gym ID ``Isaac-Lift-Cube-OpenArm-Play-v0``. +* Added opening a drawer task environments for OpenArm unimanual robot: + * :class:`OpenArmCabinetEnvCfg`; Gym ID ``Isaac-Open-Drawer-OpenArm-v0``. + * :class:`OpenArmCabinetEnvCfg_PLAY`; Gym ID ``Isaac-Open-Drawer-OpenArm-Play-v0``. +* Added reaching task environments for OpenArm bimanual robot: + * :class:`OpenArmReachEnvCfg`; Gym ID ``Isaac-Reach-OpenArm-Bi-v0``. + * :class:`OpenArmReachEnvCfg_PLAY`; Gym ID ``Isaac-Reach-OpenArm-Bi-Play-v0``. + 0.11.9 (2025-11-10) ~~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/__init__.py new file mode 100644 index 00000000000..be1eae32f25 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +## +# Joint Position Control +## + +gym.register( + id="Isaac-Open-Drawer-OpenArm-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:OpenArmCabinetEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:OpenArmCabinetPPORunnerCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + }, + disable_env_checker=True, +) + +gym.register( + id="Isaac-Open-Drawer-OpenArm-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:OpenArmCabinetEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:OpenArmCabinetPPORunnerCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + }, + disable_env_checker=True, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/__init__.py new file mode 100644 index 00000000000..2e924fbf1b1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 00000000000..85b8a40d5be --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,81 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +params: + seed: 42 + + # environment wrapper clipping + env: + clip_observations: 5.0 + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [256, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False + load_path: '' + + config: + name: openarm_open_drawer + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: False + normalize_value: False + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 1 + normalize_advantage: False + gamma: 0.99 + tau: 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 200 + max_epochs: 400 + save_best_after: 50 + save_frequency: 50 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.001 + truncate_grads: True + e_clip: 0.2 + horizon_length: 96 + minibatch_size: 4096 + mini_epochs: 5 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..b6d7a5ce6d5 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class OpenArmCabinetPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 96 + max_iterations = 600 + save_interval = 50 + experiment_name = "openarm_open_drawer" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=1e-3, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.02, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/cabinet_openarm_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/cabinet_openarm_env_cfg.py new file mode 100644 index 00000000000..d93459fabab --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/cabinet_openarm_env_cfg.py @@ -0,0 +1,281 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +We modified parts of the environment—such as the target’s position and orientation, as well as certain object properties—to better suit the smaller robot. +""" + +from dataclasses import MISSING + +import isaaclab.sim as sim_utils +from isaaclab.actuators.actuator_cfg import ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors import FrameTransformerCfg +from isaaclab.sensors.frame_transformer import OffsetCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +## +# Pre-defined configs +## +from isaaclab.markers.config import FRAME_MARKER_CFG # isort: skip + +FRAME_MARKER_SMALL_CFG = FRAME_MARKER_CFG.copy() +FRAME_MARKER_SMALL_CFG.markers["frame"].scale = (0.10, 0.10, 0.10) + +from ... import mdp + +## +# Scene definition +## + + +@configclass +class CabinetSceneCfg(InteractiveSceneCfg): + """Configuration for the cabinet scene with a robot and a cabinet. + + This is the abstract base implementation, the exact scene is defined in the derived classes + which need to set the robot and end-effector frames + """ + + # robots, Will be populated by agent env cfg + robot: ArticulationCfg = MISSING + # End-effector, Will be populated by agent env cfg + ee_frame: FrameTransformerCfg = MISSING + + cabinet = ArticulationCfg( + prim_path="{ENV_REGEX_NS}/Cabinet", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Sektion_Cabinet/sektion_cabinet_instanceable.usd", + activate_contact_sensors=False, + scale=(0.75, 0.75, 0.75), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.7, 0, 0.3), + rot=(0.0, 0.0, 0.0, 1.0), + joint_pos={ + "door_left_joint": 0.0, + "door_right_joint": 0.0, + "drawer_bottom_joint": 0.0, + "drawer_top_joint": 0.0, + }, + ), + actuators={ + "drawers": ImplicitActuatorCfg( + joint_names_expr=["drawer_top_joint", "drawer_bottom_joint"], + effort_limit=87.0, + velocity_limit=100.0, + stiffness=10.0, + damping=1.0, + ), + "doors": ImplicitActuatorCfg( + joint_names_expr=["door_left_joint", "door_right_joint"], + effort_limit=87.0, + velocity_limit=100.0, + stiffness=10.0, + damping=2.5, + ), + }, + ) + + # Frame definitions for the cabinet. + cabinet_frame = FrameTransformerCfg( + prim_path="{ENV_REGEX_NS}/Cabinet/sektion", + debug_vis=True, + visualizer_cfg=FRAME_MARKER_SMALL_CFG.replace(prim_path="/Visuals/CabinetFrameTransformer"), + target_frames=[ + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Cabinet/drawer_handle_bottom", + name="drawer_handle_bottom", + offset=OffsetCfg( + pos=(0.222, 0.0, 0.005), + rot=(0.5, 0.5, -0.5, -0.5), # align with end-effector frame + ), + ), + ], + ) + + # plane + plane = AssetBaseCfg( + prim_path="/World/GroundPlane", + init_state=AssetBaseCfg.InitialStateCfg(), + spawn=sim_utils.GroundPlaneCfg(), + collision_group=-1, + ) + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0), + ) + + +## +# MDP settings +## + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + arm_action: mdp.JointPositionActionCfg = MISSING + gripper_action: mdp.BinaryJointPositionActionCfg = MISSING + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + joint_pos = ObsTerm(func=mdp.joint_pos_rel) + joint_vel = ObsTerm(func=mdp.joint_vel_rel) + cabinet_joint_pos = ObsTerm( + func=mdp.joint_pos_rel, + params={"asset_cfg": SceneEntityCfg("cabinet", joint_names=["drawer_bottom_joint"])}, + ) + cabinet_joint_vel = ObsTerm( + func=mdp.joint_vel_rel, + params={"asset_cfg": SceneEntityCfg("cabinet", joint_names=["drawer_bottom_joint"])}, + ) + rel_ee_drawer_distance = ObsTerm(func=mdp.rel_ee_drawer_distance) + + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + robot_physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=".*"), + "static_friction_range": (0.8, 1.25), + "dynamic_friction_range": (0.8, 1.25), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + + cabinet_physics_material = EventTerm( + func=mdp.randomize_rigid_body_material, + mode="startup", + params={ + "asset_cfg": SceneEntityCfg("cabinet", body_names="drawer_handle_bottom"), + "static_friction_range": (2.25, 2.5), + "dynamic_friction_range": (2.0, 2.25), + "restitution_range": (0.0, 0.0), + "num_buckets": 16, + }, + ) + + reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "position_range": (-0.1, 0.1), + "velocity_range": (0.0, 0.0), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # 1. Approach the handle + approach_ee_handle = RewTerm(func=mdp.approach_ee_handle, weight=2.0, params={"threshold": 0.2}) + align_ee_handle = RewTerm(func=mdp.align_ee_handle, weight=0.5) + + # 2. Grasp the handle + approach_gripper_handle = RewTerm(func=mdp.approach_gripper_handle, weight=5.0, params={"offset": MISSING}) + align_grasp_around_handle = RewTerm(func=mdp.align_grasp_around_handle, weight=0.125) + grasp_handle = RewTerm( + func=mdp.grasp_handle, + weight=0.5, + params={ + "threshold": 0.03, + "open_joint_pos": MISSING, + "asset_cfg": SceneEntityCfg("robot", joint_names=MISSING), + }, + ) + + # 3. Open the drawer + open_drawer_bonus = RewTerm( + func=mdp.open_drawer_bonus, + weight=7.5, + params={"asset_cfg": SceneEntityCfg("cabinet", joint_names=["drawer_bottom_joint"])}, + ) + multi_stage_open_drawer = RewTerm( + func=mdp.multi_stage_open_drawer, + weight=1.0, + params={"asset_cfg": SceneEntityCfg("cabinet", joint_names=["drawer_bottom_joint"])}, + ) + + # 4. Penalize actions for cosmetic reasons + action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-1e-2) + joint_vel = RewTerm(func=mdp.joint_vel_l2, weight=-0.0001) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + +## +# Environment configuration +## + + +@configclass +class CabinetEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the cabinet environment.""" + + # Scene settings + scene: CabinetSceneCfg = CabinetSceneCfg(num_envs=4096, env_spacing=2.0) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 1 + self.episode_length_s = 8.0 + self.viewer.eye = (-2.0, 2.0, 2.0) + self.viewer.lookat = (0.8, 0.0, 0.5) + # simulation settings + self.sim.dt = 1 / 60 # 60Hz + self.sim.render_interval = self.decimation + self.sim.physx.bounce_threshold_velocity = 0.01 + self.sim.physx.friction_correlation_distance = 0.00625 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/joint_pos_env_cfg.py new file mode 100644 index 00000000000..123ea047e63 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/openarm/joint_pos_env_cfg.py @@ -0,0 +1,93 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +## +# Pre-defined configs +## +from isaaclab_assets.robots.openarm import OPENARM_UNI_CFG + +from isaaclab.sensors import FrameTransformerCfg +from isaaclab.sensors.frame_transformer.frame_transformer_cfg import OffsetCfg +from isaaclab.utils import configclass + +from isaaclab_tasks.manager_based.manipulation.cabinet import mdp + +from isaaclab_tasks.manager_based.manipulation.cabinet.config.openarm.cabinet_openarm_env_cfg import ( # isort: skip + FRAME_MARKER_SMALL_CFG, + CabinetEnvCfg, +) + + +@configclass +class OpenArmCabinetEnvCfg(CabinetEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # Set OpenArm as robot + self.scene.robot = OPENARM_UNI_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # Set Actions for the specific robot type (OpenArm) + self.actions.arm_action = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=["openarm_joint.*"], + scale=1.0, + use_default_offset=True, + ) + self.actions.gripper_action = mdp.BinaryJointPositionActionCfg( + asset_name="robot", + joint_names=["openarm_finger_joint.*"], + open_command_expr={"openarm_finger_joint.*": 0.044}, + close_command_expr={"openarm_finger_joint.*": 0.0}, + ) + + # Listens to the required transforms + # IMPORTANT: The order of the frames in the list is important. The first frame is the tool center point (TCP) + # the other frames are the fingers + self.scene.ee_frame = FrameTransformerCfg( + prim_path="{ENV_REGEX_NS}/Robot/openarm_link0", + visualizer_cfg=FRAME_MARKER_SMALL_CFG.replace(prim_path="/Visuals/EndEffectorFrameTransformer"), + debug_vis=False, + target_frames=[ + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/openarm_ee_tcp", + name="ee_tcp", + offset=OffsetCfg( + pos=(0.0, 0.0, -0.003), + ), + ), + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/openarm_left_finger", + name="tool_leftfinger", + offset=OffsetCfg( + pos=(0.0, -0.005, 0.075), + ), + ), + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/openarm_right_finger", + name="tool_rightfinger", + offset=OffsetCfg( + pos=(0.0, 0.005, 0.075), + ), + ), + ], + ) + + # override rewards + self.rewards.approach_gripper_handle.params["offset"] = 0.04 + self.rewards.grasp_handle.params["open_joint_pos"] = 0.044 + self.rewards.grasp_handle.params["asset_cfg"].joint_names = ["openarm_finger_joint.*"] + + +@configclass +class OpenArmCabinetEnvCfg_PLAY(OpenArmCabinetEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/__init__.py new file mode 100644 index 00000000000..ffb058ad1a8 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +## +# Joint Position Control +## + +gym.register( + id="Isaac-Lift-Cube-OpenArm-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:OpenArmCubeLiftEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:OpenArmLiftCubePPORunnerCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + }, + disable_env_checker=True, +) + +gym.register( + id="Isaac-Lift-Cube-OpenArm-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:OpenArmCubeLiftEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:OpenArmLiftCubePPORunnerCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + }, + disable_env_checker=True, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/__init__.py new file mode 100644 index 00000000000..2e924fbf1b1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 00000000000..70548b9d299 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,85 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +params: + seed: 42 + + # environment wrapper clipping + env: + clip_observations: 100.0 + clip_actions: 100.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [256, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: openarm_lift + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + value_bootstrap: False + num_actors: -1 + reward_shaper: + scale_value: 0.01 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-4 + lr_schedule: adaptive + schedule_type: legacy + kl_threshold: 0.01 + score_to_win: 100000000 + max_epochs: 1500 + save_best_after: 100 + save_frequency: 50 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.001 + truncate_grads: True + e_clip: 0.2 + horizon_length: 24 + minibatch_size: 24576 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + clip_actions: False + seq_len: 4 + bounds_loss_coef: 0.0001 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..23a16dc3df8 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class OpenArmLiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 2000 + save_interval = 50 + experiment_name = "openarm_lift" + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[256, 128, 64], + critic_hidden_dims=[256, 128, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.006, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-4, + schedule="adaptive", + gamma=0.98, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/joint_pos_env_cfg.py new file mode 100644 index 00000000000..16e54b396ef --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/joint_pos_env_cfg.py @@ -0,0 +1,101 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +import math + +from isaaclab_assets.robots.openarm import OPENARM_UNI_CFG + +from isaaclab.assets import RigidObjectCfg +from isaaclab.sensors import FrameTransformerCfg +from isaaclab.sim.schemas.schemas_cfg import RigidBodyPropertiesCfg +from isaaclab.sim.spawners.from_files.from_files_cfg import UsdFileCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +from isaaclab_tasks.manager_based.manipulation.lift import mdp +from isaaclab_tasks.manager_based.manipulation.lift.config.openarm.lift_openarm_env_cfg import LiftEnvCfg + +## +# Pre-defined configs +## +from isaaclab.markers.config import FRAME_MARKER_CFG # isort: skip + + +@configclass +class OpenArmCubeLiftEnvCfg(LiftEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # Set OpenArm as robot + self.scene.robot = OPENARM_UNI_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # Set actions for the specific robot type (OpenArm) + self.actions.arm_action = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=[ + "openarm_joint.*", + ], + scale=0.5, + use_default_offset=True, + ) + + self.actions.gripper_action = mdp.BinaryJointPositionActionCfg( + asset_name="robot", + joint_names=["openarm_finger_joint.*"], + open_command_expr={"openarm_finger_joint.*": 0.044}, + close_command_expr={"openarm_finger_joint.*": 0.0}, + ) + + # Set the body name for the end effector + self.commands.object_pose.body_name = "openarm_hand" + self.commands.object_pose.ranges.pitch = (math.pi / 2, math.pi / 2) + + # Set Cube as object + self.scene.object = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/Object", + init_state=RigidObjectCfg.InitialStateCfg(pos=[0.4, 0, 0.055], rot=[1, 0, 0, 0]), + spawn=UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd", + scale=(0.8, 0.8, 0.8), + rigid_props=RigidBodyPropertiesCfg( + solver_position_iteration_count=16, + solver_velocity_iteration_count=1, + max_angular_velocity=1000.0, + max_linear_velocity=1000.0, + max_depenetration_velocity=5.0, + disable_gravity=False, + ), + ), + ) + + # Listens to the required transforms + marker_cfg = FRAME_MARKER_CFG.copy() + marker_cfg.markers["frame"].scale = (0.1, 0.1, 0.1) + marker_cfg.prim_path = "/Visuals/FrameTransformer" + self.scene.ee_frame = FrameTransformerCfg( + prim_path="{ENV_REGEX_NS}/Robot/openarm_link0", + debug_vis=False, + visualizer_cfg=marker_cfg, + target_frames=[ + FrameTransformerCfg.FrameCfg( + prim_path="{ENV_REGEX_NS}/Robot/openarm_ee_tcp", + name="end_effector", + ), + ], + ) + + +@configclass +class OpenArmCubeLiftEnvCfg_PLAY(OpenArmCubeLiftEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/lift_openarm_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/lift_openarm_env_cfg.py new file mode 100644 index 00000000000..1a0943db851 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/openarm/lift_openarm_env_cfg.py @@ -0,0 +1,239 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +We modified parts of the environment—such as the target’s position and orientation, as well as certain object properties—to better suit the smaller robot. +""" + +from dataclasses import MISSING + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg, DeformableObjectCfg, RigidObjectCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg +from isaaclab.sim.spawners.from_files.from_files_cfg import GroundPlaneCfg, UsdFileCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +from ... import mdp + +## +# Scene definition +## + + +@configclass +class ObjectTableSceneCfg(InteractiveSceneCfg): + """Configuration for the lift scene with a robot and a object. + This is the abstract base implementation, the exact scene is defined in the derived classes + which need to set the target object, robot and end-effector frames + """ + + # robots: will be populated by agent env cfg + robot: ArticulationCfg = MISSING + # end-effector sensor: will be populated by agent env cfg + ee_frame: FrameTransformerCfg = MISSING + # target object: will be populated by agent env cfg + object: RigidObjectCfg | DeformableObjectCfg = MISSING + + # Table + table = AssetBaseCfg( + prim_path="{ENV_REGEX_NS}/Table", + init_state=AssetBaseCfg.InitialStateCfg(pos=[0.5, 0, 0], rot=[0.707, 0, 0, 0.707]), + spawn=UsdFileCfg(usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"), + ) + + # plane + plane = AssetBaseCfg( + prim_path="/World/GroundPlane", + init_state=AssetBaseCfg.InitialStateCfg(pos=[0, 0, -1.05]), + spawn=GroundPlaneCfg(), + ) + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0), + ) + + +## +# MDP settings +## + + +@configclass +class CommandsCfg: + """Command terms for the MDP.""" + + object_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name=MISSING, # will be set by agent env cfg + resampling_time_range=(5.0, 5.0), + debug_vis=True, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.2, 0.4), + pos_y=(-0.2, 0.2), + pos_z=(0.15, 0.4), + roll=(0.0, 0.0), + pitch=(0.0, 0.0), + yaw=(0.0, 0.0), + ), + ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + # will be set by agent env cfg + arm_action: mdp.JointPositionActionCfg | mdp.DifferentialInverseKinematicsActionCfg = MISSING + gripper_action: mdp.BinaryJointPositionActionCfg = MISSING + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["openarm_joint.*", "openarm_finger_joint.*"])}, + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["openarm_joint.*", "openarm_finger_joint.*"])}, + ) + object_position = ObsTerm(func=mdp.object_position_in_robot_root_frame) + target_object_position = ObsTerm(func=mdp.generated_commands, params={"command_name": "object_pose"}) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset") + + reset_object_position = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "pose_range": {"x": (-0.1, 0.1), "y": (-0.25, 0.25), "z": (0.0, 0.0)}, + "velocity_range": {}, + "asset_cfg": SceneEntityCfg("object", body_names="Object"), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + reaching_object = RewTerm(func=mdp.object_ee_distance, params={"std": 0.1}, weight=1.1) + + lifting_object = RewTerm(func=mdp.object_is_lifted, params={"minimal_height": 0.04}, weight=15.0) + + object_goal_tracking = RewTerm( + func=mdp.object_goal_distance, + params={"std": 0.3, "minimal_height": 0.04, "command_name": "object_pose"}, + weight=16.0, + ) + + object_goal_tracking_fine_grained = RewTerm( + func=mdp.object_goal_distance, + params={"std": 0.05, "minimal_height": 0.04, "command_name": "object_pose"}, + weight=5.0, + ) + + # action penalty + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-1e-4) + + joint_vel = RewTerm( + func=mdp.joint_vel_l2, + weight=-1e-4, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["openarm_joint.*", "openarm_finger_joint.*"])}, + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + object_dropping = DoneTerm( + func=mdp.root_height_below_minimum, + params={"minimum_height": -0.05, "asset_cfg": SceneEntityCfg("object")}, + ) + + +@configclass +class CurriculumCfg: + """Curriculum terms for the MDP.""" + + action_rate = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000}, + ) + + joint_vel = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000}, + ) + + +## +# Environment configuration +## + + +@configclass +class LiftEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the lifting environment.""" + + # Scene settings + scene: ObjectTableSceneCfg = ObjectTableSceneCfg(num_envs=4096, env_spacing=2.5) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + commands: CommandsCfg = CommandsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + curriculum: CurriculumCfg = CurriculumCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 2 + self.episode_length_s = 5.0 + # simulation settings + self.sim.dt = 0.01 # 100Hz + self.sim.render_interval = self.decimation + + self.sim.physx.bounce_threshold_velocity = 0.01 + self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 1024 * 1024 * 4 + self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024 + self.sim.physx.friction_correlation_distance = 0.00625 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/__init__.py new file mode 100644 index 00000000000..2e924fbf1b1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/__init__.py new file mode 100644 index 00000000000..829aa7fee6a --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +## +# Joint Position Control +## + +gym.register( + id="Isaac-Reach-OpenArm-Bi-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:OpenArmReachEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:OpenArmReachPPORunnerCfg", + }, +) + +gym.register( + id="Isaac-Reach-OpenArm-Bi-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:OpenArmReachEnvCfg_PLAY", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:OpenArmReachPPORunnerCfg", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/__init__.py new file mode 100644 index 00000000000..2e924fbf1b1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 00000000000..01a594e9687 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,83 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +params: + seed: 42 + + # environment wrapper clipping + env: + clip_observations: 100.0 + clip_actions: 100.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [64, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: openarm_bi_reach + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + value_bootstrap: True + num_actors: -1 + reward_shaper: + scale_value: 1.0 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-3 + lr_schedule: adaptive + schedule_type: legacy + kl_threshold: 0.01 + score_to_win: 10000 + max_epochs: 1000 + save_best_after: 200 + save_frequency: 100 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.01 + truncate_grads: True + e_clip: 0.2 + horizon_length: 24 + minibatch_size: 24576 + mini_epochs: 5 + critic_coef: 2 + clip_value: True + clip_actions: False + bounds_loss_coef: 0.0001 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..f5dbea6ff22 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,39 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class OpenArmReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 550 + save_interval = 50 + experiment_name = "openarm_bi_reach" + run_name = "" + resume = False + empirical_normalization = False + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[64, 64], + critic_hidden_dims=[64, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.001, + num_learning_epochs=8, + num_mini_batches=4, + learning_rate=1.0e-2, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/joint_pos_env_cfg.py new file mode 100644 index 00000000000..e36e3ae7fbb --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/joint_pos_env_cfg.py @@ -0,0 +1,78 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +## +# Pre-defined configs +## +from isaaclab_assets.robots.openarm import OPENARM_BI_HIGH_PD_CFG + +from isaaclab.utils import configclass + +import isaaclab_tasks.manager_based.manipulation.reach.mdp as mdp +from isaaclab_tasks.manager_based.manipulation.reach.config.openarm.bimanual.reach_openarm_bi_env_cfg import ReachEnvCfg + +## +# Environment configuration +## + + +@configclass +class OpenArmReachEnvCfg(ReachEnvCfg): + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # switch robot to OpenArm + self.scene.robot = OPENARM_BI_HIGH_PD_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # override rewards + self.rewards.left_end_effector_position_tracking.params["asset_cfg"].body_names = ["openarm_left_hand"] + self.rewards.left_end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = [ + "openarm_left_hand" + ] + self.rewards.left_end_effector_orientation_tracking.params["asset_cfg"].body_names = ["openarm_left_hand"] + + self.rewards.right_end_effector_position_tracking.params["asset_cfg"].body_names = ["openarm_right_hand"] + self.rewards.right_end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = [ + "openarm_right_hand" + ] + self.rewards.right_end_effector_orientation_tracking.params["asset_cfg"].body_names = ["openarm_right_hand"] + + # override actions + self.actions.left_arm_action = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=[ + "openarm_left_joint.*", + ], + scale=0.5, + use_default_offset=True, + ) + + self.actions.right_arm_action = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=[ + "openarm_right_joint.*", + ], + scale=0.5, + use_default_offset=True, + ) + + # override command generator body + # end-effector is along z-direction + self.commands.left_ee_pose.body_name = "openarm_left_hand" + self.commands.right_ee_pose.body_name = "openarm_right_hand" + + +@configclass +class OpenArmReachEnvCfg_PLAY(OpenArmReachEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/reach_openarm_bi_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/reach_openarm_bi_env_cfg.py new file mode 100644 index 00000000000..2375ad02b17 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/bimanual/reach_openarm_bi_env_cfg.py @@ -0,0 +1,335 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +We modified parts of the environment—such as the target’s position and orientation—to better suit the smaller robot. +""" + +import math +from dataclasses import MISSING + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import ActionTermCfg as ActionTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise + +import isaaclab_tasks.manager_based.manipulation.reach.mdp as mdp + +## +# Scene definition +## + + +@configclass +class ReachSceneCfg(InteractiveSceneCfg): + """Configuration for the scene with a robotic arm.""" + + # world + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(), + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, 0)), + ) + + # robots + robot: ArticulationCfg = MISSING + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0), + ) + + +## +# MDP settings +## + + +@configclass +class CommandsCfg: + """Command terms for the MDP.""" + + left_ee_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name=MISSING, + resampling_time_range=(4.0, 4.0), + debug_vis=True, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.15, 0.3), + pos_y=(0.15, 0.25), + pos_z=(0.3, 0.5), + roll=(-math.pi / 6, math.pi / 6), + pitch=(3 * math.pi / 2, 3 * math.pi / 2), + yaw=(8 * math.pi / 9, 10 * math.pi / 9), + ), + ) + + right_ee_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name=MISSING, + resampling_time_range=(4.0, 4.0), + debug_vis=True, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.15, 0.3), + pos_y=(-0.25, -0.15), + pos_z=(0.3, 0.5), + roll=(-math.pi / 6, math.pi / 6), + pitch=(3 * math.pi / 2, 3 * math.pi / 2), + yaw=(8 * math.pi / 9, 10 * math.pi / 9), + ), + ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + left_arm_action: ActionTerm = MISSING + right_arm_action: ActionTerm = MISSING + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + left_joint_pos = ObsTerm( + func=mdp.joint_pos_rel, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_left_joint.*", + ], + ) + }, + noise=Unoise(n_min=-0.01, n_max=0.01), + ) + + right_joint_pos = ObsTerm( + func=mdp.joint_pos_rel, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_right_joint.*", + ], + ) + }, + noise=Unoise(n_min=-0.01, n_max=0.01), + ) + + left_joint_vel = ObsTerm( + func=mdp.joint_vel_rel, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_left_joint.*", + ], + ) + }, + noise=Unoise(n_min=-0.01, n_max=0.01), + ) + right_joint_vel = ObsTerm( + func=mdp.joint_vel_rel, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_right_joint.*", + ], + ) + }, + noise=Unoise(n_min=-0.01, n_max=0.01), + ) + left_pose_command = ObsTerm(func=mdp.generated_commands, params={"command_name": "left_ee_pose"}) + right_pose_command = ObsTerm(func=mdp.generated_commands, params={"command_name": "right_ee_pose"}) + left_actions = ObsTerm(func=mdp.last_action, params={"action_name": "left_arm_action"}) + right_actions = ObsTerm(func=mdp.last_action, params={"action_name": "right_arm_action"}) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_scale, + mode="reset", + params={ + "position_range": (0.5, 1.5), + "velocity_range": (0.0, 0.0), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # task terms + left_end_effector_position_tracking = RewTerm( + func=mdp.position_command_error, + weight=-0.2, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "command_name": "left_ee_pose", + }, + ) + + right_end_effector_position_tracking = RewTerm( + func=mdp.position_command_error, + weight=-0.25, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "command_name": "right_ee_pose", + }, + ) + + left_end_effector_position_tracking_fine_grained = RewTerm( + func=mdp.position_command_error_tanh, + weight=0.1, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "std": 0.1, + "command_name": "left_ee_pose", + }, + ) + + right_end_effector_position_tracking_fine_grained = RewTerm( + func=mdp.position_command_error_tanh, + weight=0.2, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "std": 0.1, + "command_name": "right_ee_pose", + }, + ) + + left_end_effector_orientation_tracking = RewTerm( + func=mdp.orientation_command_error, + weight=-0.1, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "command_name": "left_ee_pose", + }, + ) + + right_end_effector_orientation_tracking = RewTerm( + func=mdp.orientation_command_error, + weight=-0.1, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "command_name": "right_ee_pose", + }, + ) + + # action penalty + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.0001) + left_joint_vel = RewTerm( + func=mdp.joint_vel_l2, + weight=-0.0001, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_left_joint.*", + ], + ) + }, + ) + right_joint_vel = RewTerm( + func=mdp.joint_vel_l2, + weight=-0.0001, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_right_joint.*", + ], + ) + }, + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + +@configclass +class CurriculumCfg: + """Curriculum terms for the MDP.""" + + action_rate = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "action_rate", "weight": -0.005, "num_steps": 4500}, + ) + + left_joint_vel = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "left_joint_vel", "weight": -0.001, "num_steps": 4500}, + ) + + right_joint_vel = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "right_joint_vel", "weight": -0.001, "num_steps": 4500}, + ) + + +## +# Environment configuration +## + + +@configclass +class ReachEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the reach end-effector pose tracking environment.""" + + # Scene settings + scene: ReachSceneCfg = ReachSceneCfg(num_envs=4096, env_spacing=2.5) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + commands: CommandsCfg = CommandsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + curriculum: CurriculumCfg = CurriculumCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 2 + self.sim.render_interval = self.decimation + self.episode_length_s = 24.0 + self.viewer.eye = (3.5, 3.5, 3.5) + # simulation settings + self.sim.dt = 1.0 / 60.0 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/__init__.py new file mode 100644 index 00000000000..149847c9828 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/__init__.py @@ -0,0 +1,40 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +## +# Joint Position Control +## + +gym.register( + id="Isaac-Reach-OpenArm-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:OpenArmReachEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:OpenArmReachPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Reach-OpenArm-Play-v0", + entry_point="isaaclab.envs:ManagerBasedRLEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:OpenArmReachEnvCfg_PLAY", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:OpenArmReachPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/__init__.py new file mode 100644 index 00000000000..2e924fbf1b1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 00000000000..749310c3e02 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,84 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +params: + seed: 42 + + # environment wrapper clipping + env: + clip_observations: 100.0 + clip_actions: 100.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [64, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: openarm_reach + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + value_bootstrap: True + num_actors: -1 + reward_shaper: + scale_value: 1.0 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 1e-3 + lr_schedule: adaptive + schedule_type: legacy + kl_threshold: 0.01 + score_to_win: 10000 + max_epochs: 1000 + save_best_after: 200 + save_frequency: 100 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.01 + truncate_grads: True + e_clip: 0.2 + horizon_length: 24 + minibatch_size: 24576 + mini_epochs: 5 + critic_coef: 2 + clip_value: True + clip_actions: False + bounds_loss_coef: 0.0001 diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 00000000000..356642892a1 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,40 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class OpenArmReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 24 + max_iterations = 1000 + save_interval = 50 + experiment_name = "openarm_reach" + run_name = "" + resume = False + empirical_normalization = True + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_hidden_dims=[64, 64], + critic_hidden_dims=[64, 64], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.001, + num_learning_epochs=8, + num_mini_batches=4, + learning_rate=1.0e-2, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/skrl_ppo_cfg.yaml new file mode 100644 index 00000000000..5cebf2eba2d --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,85 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: STATES + layers: [64, 64] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: STATES + layers: [64, 64] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 24 + learning_epochs: 5 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 1.0e-03 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.01 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.01 + value_loss_scale: 1.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "openarm_reach" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 24000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/joint_pos_env_cfg.py new file mode 100644 index 00000000000..8e4564077f9 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/joint_pos_env_cfg.py @@ -0,0 +1,75 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +## +# Pre-defined configs +## +from isaaclab_assets.robots.openarm import OPENARM_UNI_CFG + +from isaaclab.assets.articulation import ArticulationCfg +from isaaclab.utils import configclass + +import isaaclab_tasks.manager_based.manipulation.reach.mdp as mdp +from isaaclab_tasks.manager_based.manipulation.reach.config.openarm.unimanual.reach_openarm_uni_env_cfg import ReachEnvCfg + +## +# Environment configuration +## + + +@configclass +class OpenArmReachEnvCfg(ReachEnvCfg): + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # switch robot to OpenArm + self.scene.robot = OPENARM_UNI_CFG.replace( + prim_path="{ENV_REGEX_NS}/Robot", + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + "openarm_joint1": 1.57, + "openarm_joint2": 0.0, + "openarm_joint3": -1.57, + "openarm_joint4": 1.57, + "openarm_joint5": 0.0, + "openarm_joint6": 0.0, + "openarm_joint7": 0.0, + "openarm_finger_joint.*": 0.0, + }, # Close the gripper + ), + ) + + # override rewards + self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = ["openarm_hand"] + self.rewards.end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = ["openarm_hand"] + self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = ["openarm_hand"] + + # override actions + self.actions.arm_action = mdp.JointPositionActionCfg( + asset_name="robot", + joint_names=[ + "openarm_joint.*", + ], + scale=0.5, + use_default_offset=True, + ) + + # override command generator body + # end-effector is along z-direction + self.commands.ee_pose.body_name = "openarm_hand" + + +@configclass +class OpenArmReachEnvCfg_PLAY(OpenArmReachEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/reach_openarm_uni_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/reach_openarm_uni_env_cfg.py new file mode 100644 index 00000000000..732fcbe6e7b --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/openarm/unimanual/reach_openarm_uni_env_cfg.py @@ -0,0 +1,254 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +We modified parts of the environment—such as the target’s position and orientation—to better suit the smaller robot. +""" + +import math +from dataclasses import MISSING + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import ActionTermCfg as ActionTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.managers import ObservationTermCfg as ObsTerm +from isaaclab.managers import RewardTermCfg as RewTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.managers import TerminationTermCfg as DoneTerm +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR +from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise + +import isaaclab_tasks.manager_based.manipulation.reach.mdp as mdp + +## +# Scene definition +## + + +@configclass +class ReachSceneCfg(InteractiveSceneCfg): + """Configuration for the scene with a robotic arm.""" + + # world + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(), + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)), + ) + + table = AssetBaseCfg( + prim_path="{ENV_REGEX_NS}/Table", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd", + ), + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.55, 0.0, 0.0), rot=(0.70711, 0.0, 0.0, 0.70711)), + ) + + # robots + robot: ArticulationCfg = MISSING + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0), + ) + + +## +# MDP settings +## + + +@configclass +class CommandsCfg: + """Command terms for the MDP.""" + + ee_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name=MISSING, + resampling_time_range=(4.0, 4.0), + debug_vis=True, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.25, 0.35), + pos_y=(-0.2, 0.2), + pos_z=(0.3, 0.4), + roll=(-math.pi / 6, math.pi / 6), + pitch=(math.pi / 2, math.pi / 2), + yaw=(-math.pi / 9, math.pi / 9), + ), + ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + arm_action: ActionTerm = MISSING + gripper_action: ActionTerm | None = None + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + joint_pos = ObsTerm( + func=mdp.joint_pos_rel, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_joint.*" + ], + ) + }, + noise=Unoise(n_min=-0.01, n_max=0.01), + ) + joint_vel = ObsTerm( + func=mdp.joint_vel_rel, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_joint.*" + ], + ) + }, + noise=Unoise(n_min=-0.01, n_max=0.01), + ) + pose_command = ObsTerm(func=mdp.generated_commands, params={"command_name": "ee_pose"}) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_scale, + mode="reset", + params={ + "position_range": (0.5, 1.5), + "velocity_range": (0.0, 0.0), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # task terms + end_effector_position_tracking = RewTerm( + func=mdp.position_command_error, + weight=-0.2, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "command_name": "ee_pose", + }, + ) + end_effector_position_tracking_fine_grained = RewTerm( + func=mdp.position_command_error_tanh, + weight=0.1, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "std": 0.1, + "command_name": "ee_pose", + }, + ) + end_effector_orientation_tracking = RewTerm( + func=mdp.orientation_command_error, + weight=-0.1, + params={ + "asset_cfg": SceneEntityCfg("robot", body_names=MISSING), + "command_name": "ee_pose", + }, + ) + + # action penalty + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.0001) + joint_vel = RewTerm( + func=mdp.joint_vel_l2, + weight=-0.0001, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + "openarm_joint.*" + ], + ) + }, + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + +@configclass +class CurriculumCfg: + """Curriculum terms for the MDP.""" + + action_rate = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "action_rate", "weight": -0.005, "num_steps": 4500}, + ) + + joint_vel = CurrTerm( + func=mdp.modify_reward_weight, + params={"term_name": "joint_vel", "weight": -0.001, "num_steps": 4500}, + ) + + +## +# Environment configuration +## + + +@configclass +class ReachEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the reach end-effector pose tracking environment.""" + + # Scene settings + scene: ReachSceneCfg = ReachSceneCfg(num_envs=4096, env_spacing=2.5) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + commands: CommandsCfg = CommandsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + curriculum: CurriculumCfg = CurriculumCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 2 + self.sim.render_interval = self.decimation + self.episode_length_s = 12.0 + self.viewer.eye = (3.5, 3.5, 3.5) + # simulation settings + self.sim.dt = 1.0 / 60.0