haosulab · StoneT2000 · Apr 3, 2024 · Apr 3, 2024 · Apr 3, 2024 · Apr 3, 2024
diff --git a/docs/source/user_guide/tutorials/custom_tasks.md b/docs/source/user_guide/tutorials/custom_tasks.md
@@ -38,7 +38,7 @@ All tasks are defined by their own class and must inherit `BaseEnv`, similar to
 
 ```python
 import sapien
-from mani_skill.utils import sapien_utils
+from mani_skill.utils import sapien_utils, common
 from mani_skill.envs.sapien_env import BaseEnv
 from mani_skill.utils.registration import register_env
 

diff --git a/examples/baselines/ppo/README.md b/examples/baselines/ppo/README.md
@@ -31,6 +31,9 @@ python ppo.py --env_id="StackCube-v1" \
 python ppo.py --env_id="PickSingleYCB-v1" \
   --num_envs=1024 --update_epochs=8 --num_minibatches=32 \
   --total_timesteps=25_000_000
+python ppo.py --env_id="PegInsertionSide-v1" \
+  --num_envs=1024 --update_epochs=8 --num_minibatches=32 \
+  --total_timesteps=150_000_000 --num-steps=100 --num-eval-steps=100
 python ppo.py --env_id="TwoRobotStackCube-v1" \
    --num_envs=1024 --update_epochs=8 --num_minibatches=32 \
    --total_timesteps=40_000_000 --num-steps=100 --num-eval-steps=100

diff --git a/mani_skill/agents/controllers/base_controller.py b/mani_skill/agents/controllers/base_controller.py
@@ -16,8 +16,7 @@
     get_active_joint_indices,
     get_joints_by_names,
 )
-from mani_skill.utils import sapien_utils
-from mani_skill.utils.common import clip_and_scale_action, normalize_action_space
+from mani_skill.utils import common, gym_utils
 from mani_skill.utils.structs import Articulation, ArticulationJoint
 from mani_skill.utils.structs.types import Array
 
@@ -158,18 +157,18 @@ def set_state(self, state: dict):
     # -------------------------------------------------------------------------- #
     def _clip_and_scale_action_space(self):
         self._original_single_action_space = self.single_action_space
-        self.single_action_space = normalize_action_space(
+        self.single_action_space = gym_utils.normalize_action_space(
             self._original_single_action_space
         )
         low, high = (
             self._original_single_action_space.low,
             self._original_single_action_space.high,
         )
-        self.action_space_low = sapien_utils.to_tensor(low)
-        self.action_space_high = sapien_utils.to_tensor(high)
+        self.action_space_low = common.to_tensor(low)
+        self.action_space_high = common.to_tensor(high)
 
     def _clip_and_scale_action(self, action):
-        return clip_and_scale_action(
+        return gym_utils.clip_and_scale_action(
             action, self.action_space_low, self.action_space_high
         )
 
@@ -241,7 +240,7 @@ def reset(self):
 
     def set_action(self, action: Dict[str, np.ndarray]):
         for uid, controller in self.controllers.items():
-            controller.set_action(sapien_utils.to_tensor(action[uid]))
+            controller.set_action(common.to_tensor(action[uid]))
 
     def before_simulation_step(self):
         if physx.is_gpu_enabled():

diff --git a/mani_skill/agents/controllers/pd_ee_pose.py b/mani_skill/agents/controllers/pd_ee_pose.py
@@ -8,8 +8,7 @@
 from gymnasium import spaces
 
 from mani_skill import logger
-from mani_skill.utils import sapien_utils
-from mani_skill.utils.common import clip_and_scale_action
+from mani_skill.utils import common, gym_utils, sapien_utils
 from mani_skill.utils.geometry.rotation_conversions import (
     euler_angles_to_matrix,
     matrix_to_quaternion,
@@ -128,14 +127,12 @@ def compute_ik(
             result, success, error = self.pmodel.compute_inverse_kinematics(
                 self.ee_link_idx,
                 target_pose.sp,
-                initial_qpos=sapien_utils.to_numpy(
-                    self.articulation.get_qpos()
-                ).squeeze(0),
+                initial_qpos=common.to_numpy(self.articulation.get_qpos()).squeeze(0),
                 active_qmask=self.qmask,
                 max_iterations=max_iterations,
             )
         if success:
-            return sapien_utils.to_tensor([result[self.active_joint_indices]])
+            return common.to_tensor([result[self.active_joint_indices]])
         else:
             return None
 
@@ -230,7 +227,7 @@ def _initialize_action_space(self):
 
     def _clip_and_scale_action(self, action):
         # NOTE(xiqiang): rotation should be clipped by norm.
-        pos_action = clip_and_scale_action(
+        pos_action = gym_utils.clip_and_scale_action(
             action[:, :3], self.action_space_low[:3], self.action_space_high[:3]
         )
         rot_action = action[:, 3:]

diff --git a/mani_skill/agents/controllers/pd_joint_pos.py b/mani_skill/agents/controllers/pd_joint_pos.py
@@ -5,7 +5,7 @@
 import torch
 from gymnasium import spaces
 
-from mani_skill.utils import sapien_utils
+from mani_skill.utils import common
 from mani_skill.utils.structs.types import Array, DriveMode
 
 from .base_controller import BaseController, ControllerConfig
@@ -72,7 +72,7 @@ def set_drive_targets(self, targets):
 
     def set_action(self, action: Array):
         action = self._preprocess_action(action)
-        action = sapien_utils.to_tensor(action)
+        action = common.to_tensor(action)
         self._step = 0
         self._start_qpos = self.qpos
         if self.config.use_delta:

diff --git a/mani_skill/agents/robots/fetch/fetch.py b/mani_skill/agents/robots/fetch/fetch.py
@@ -11,8 +11,7 @@
 from mani_skill.agents.controllers import *
 from mani_skill.agents.registration import register_agent
 from mani_skill.sensors.camera import CameraConfig
-from mani_skill.utils import sapien_utils
-from mani_skill.utils.common import compute_angle_between, np_compute_angle_between
+from mani_skill.utils import common, sapien_utils
 from mani_skill.utils.structs import Pose
 from mani_skill.utils.structs.actor import Actor
 from mani_skill.utils.structs.link import Link
@@ -388,8 +387,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
             # direction to open the gripper
             ldirection = -self.finger1_link.pose.to_transformation_matrix()[..., :3, 1]
             rdirection = self.finger2_link.pose.to_transformation_matrix()[..., :3, 1]
-            langle = compute_angle_between(ldirection, contacts[0])
-            rangle = compute_angle_between(rdirection, contacts[1])
+            langle = common.compute_angle_between(ldirection, contacts[0])
+            rangle = common.compute_angle_between(rdirection, contacts[1])
             lflag = torch.logical_and(
                 lforce >= min_force, torch.rad2deg(langle) <= max_angle
             )
@@ -438,8 +437,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
 
                 # TODO Convert this to batched code
                 # angle between impulse and open direction
-                langle = np_compute_angle_between(ldirection[0], limpulse)
-                rangle = np_compute_angle_between(rdirection[0], rimpulse)
+                langle = common.np_compute_angle_between(ldirection[0], limpulse)
+                rangle = common.np_compute_angle_between(rdirection[0], rimpulse)
 
                 lflag = (
                     np.linalg.norm(limpulse) >= min_impulse

diff --git a/mani_skill/agents/robots/panda/panda.py b/mani_skill/agents/robots/panda/panda.py
@@ -10,8 +10,7 @@
 from mani_skill.agents.base_agent import BaseAgent
 from mani_skill.agents.controllers import *
 from mani_skill.agents.registration import register_agent
-from mani_skill.utils import sapien_utils
-from mani_skill.utils.common import compute_angle_between, np_compute_angle_between
+from mani_skill.utils import common, sapien_utils
 from mani_skill.utils.structs.actor import Actor
 
 
@@ -240,8 +239,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
             # direction to open the gripper
             ldirection = self.finger1_link.pose.to_transformation_matrix()[..., :3, 1]
             rdirection = -self.finger2_link.pose.to_transformation_matrix()[..., :3, 1]
-            langle = compute_angle_between(ldirection, contacts[0])
-            rangle = compute_angle_between(rdirection, contacts[1])
+            langle = common.compute_angle_between(ldirection, contacts[0])
+            rangle = common.compute_angle_between(rdirection, contacts[1])
             lflag = torch.logical_and(
                 lforce >= min_force, torch.rad2deg(langle) <= max_angle
             )
@@ -290,8 +289,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
 
                 # TODO Convert this to batched code
                 # angle between impulse and open direction
-                langle = np_compute_angle_between(ldirection[0], limpulse)
-                rangle = np_compute_angle_between(rdirection[0], rimpulse)
+                langle = common.np_compute_angle_between(ldirection[0], limpulse)
+                rangle = common.np_compute_angle_between(rdirection[0], rimpulse)
 
                 lflag = (
                     np.linalg.norm(limpulse) >= min_impulse

diff --git a/mani_skill/agents/robots/xmate3/xmate3.py b/mani_skill/agents/robots/xmate3/xmate3.py
@@ -10,8 +10,7 @@
 from mani_skill.agents.controllers import *
 from mani_skill.agents.registration import register_agent
 from mani_skill.sensors.camera import CameraConfig
-from mani_skill.utils import sapien_utils
-from mani_skill.utils.common import compute_angle_between, np_compute_angle_between
+from mani_skill.utils import common, sapien_utils
 from mani_skill.utils.structs import Pose
 from mani_skill.utils.structs.actor import Actor
 
@@ -204,8 +203,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
             # direction to open the gripper
             ldirection = self.finger1_link.pose.to_transformation_matrix()[..., :3, 1]
             rdirection = -self.finger2_link.pose.to_transformation_matrix()[..., :3, 1]
-            langle = compute_angle_between(ldirection, contacts[0])
-            rangle = compute_angle_between(rdirection, contacts[1])
+            langle = common.compute_angle_between(ldirection, contacts[0])
+            rangle = common.compute_angle_between(rdirection, contacts[1])
             lflag = torch.logical_and(
                 lforce >= min_force, torch.rad2deg(langle) <= max_angle
             )
@@ -254,8 +253,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
 
                 # TODO Convert this to batched code
                 # angle between impulse and open direction
-                langle = np_compute_angle_between(ldirection[0], limpulse)
-                rangle = np_compute_angle_between(rdirection[0], rimpulse)
+                langle = common.np_compute_angle_between(ldirection[0], limpulse)
+                rangle = common.np_compute_angle_between(rdirection[0], rimpulse)
 
                 lflag = (
                     np.linalg.norm(limpulse) >= min_impulse

diff --git a/mani_skill/agents/utils.py b/mani_skill/agents/utils.py
@@ -7,15 +7,15 @@
 import sapien.physx as physx
 from gymnasium import spaces
 
-from mani_skill.utils import sapien_utils
+from mani_skill.utils import common
 from mani_skill.utils.structs.articulation import Articulation
 
 
 def get_active_joint_indices(articulation: Articulation, joint_names: Sequence[str]):
     """get the indices of the provided joint names from the Articulation's list of active joints"""
     all_joint_names = [x.name for x in articulation.get_active_joints()]
     joint_indices = [all_joint_names.index(x) for x in joint_names]
-    return sapien_utils.to_tensor(joint_indices).int()
+    return common.to_tensor(joint_indices).int()
 
 
 def get_joints_by_names(articulation: Articulation, joint_names: Sequence[str]):

diff --git a/mani_skill/envs/minimal_template.py b/mani_skill/envs/minimal_template.py
@@ -7,7 +7,7 @@
 from mani_skill.agents.robots import Fetch, Panda
 from mani_skill.envs.sapien_env import BaseEnv
 from mani_skill.sensors.camera import CameraConfig
-from mani_skill.utils import sapien_utils
+from mani_skill.utils import common, sapien_utils
 from mani_skill.utils.registration import register_env
 from mani_skill.utils.structs.types import SimConfig
 

diff --git a/mani_skill/envs/sapien_env.py b/mani_skill/envs/sapien_env.py
@@ -33,7 +33,7 @@
     update_camera_cfgs_from_dict,
 )
 from mani_skill.sensors.depth_camera import StereoDepthCamera, StereoDepthCameraConfig
-from mani_skill.utils import common, sapien_utils
+from mani_skill.utils import common, gym_utils, sapien_utils
 from mani_skill.utils.structs import Actor, Articulation
 from mani_skill.utils.structs.types import Array, SimConfig
 from mani_skill.utils.visualization.misc import observations_to_images, tile_images
@@ -97,7 +97,7 @@ class BaseEnv(gym.Env):
     """Override this to enforce which robots or tuples of robots together are supported in the task. During env creation,
     setting robot_uids auto loads all desired robots into the scene, but not all tasks are designed to support some robot setups"""
     SUPPORTED_OBS_MODES = ("state", "state_dict", "none", "sensor_data", "rgb", "rgbd", "pointcloud")
-    SUPPORTED_REWARD_MODES = ("normalized_dense", "dense", "sparse")
+    SUPPORTED_REWARD_MODES = ("normalized_dense", "dense", "sparse", "none")
     SUPPORTED_RENDER_MODES = ("human", "rgb_array", "sensors")
     """The supported render modes. Human opens up a GUI viewer. rgb_array returns an rgb array showing the current environment state.
     sensors returns an rgb array but only showing all data collected by sensors as images put together"""
@@ -254,10 +254,10 @@ def __init__(
         )
         obs, _ = self.reset(seed=2022, options=dict(reconfigure=True))
         if physx.is_gpu_enabled():
-            obs = sapien_utils.to_numpy(obs)
+            obs = common.to_numpy(obs)
         self._init_raw_obs = obs.copy()
         """the raw observation returned by the env.reset. Useful for future observation wrappers to use to auto generate observation spaces"""
-        self._init_raw_state = sapien_utils.to_numpy(self.get_state_dict())
+        self._init_raw_state = common.to_numpy(self.get_state_dict())
         """the initial raw state returned by env.get_state. Useful for reconstructing state dictionaries from flattened state vectors"""
 
         self.action_space = self.agent.action_space
@@ -278,9 +278,9 @@ def _update_obs_space(self, obs: Any):
     @cached_property
     def single_observation_space(self):
         if self.num_envs > 1:
-            return common.convert_observation_to_space(self._init_raw_obs, unbatched=True)
+            return gym_utils.convert_observation_to_space(self._init_raw_obs, unbatched=True)
         else:
-            return common.convert_observation_to_space(self._init_raw_obs)
+            return gym_utils.convert_observation_to_space(self._init_raw_obs)
 
     @cached_property
     def observation_space(self):
@@ -704,7 +704,7 @@ def reset(self, seed=None, options=None):
             self._scene._gpu_fetch_all()
         obs = self.get_obs()
         if not physx.is_gpu_enabled():
-            obs = sapien_utils.to_numpy(sapien_utils.unbatch(obs))
+            obs = common.to_numpy(common.unbatch(obs))
             self._elapsed_steps = 0
         return obs, dict(reconfigure=reconfigure)
 
@@ -783,12 +783,12 @@ def step(self, action: Union[None, np.ndarray, torch.Tensor, Dict]):
             )
         else:
             # On CPU sim mode, we always return numpy / python primitives without any batching.
-            return sapien_utils.unbatch(
-                sapien_utils.to_numpy(obs),
-                sapien_utils.to_numpy(reward),
-                sapien_utils.to_numpy(terminated),
+            return common.unbatch(
+                common.to_numpy(obs),
+                common.to_numpy(reward),
+                common.to_numpy(terminated),
                 False,
-                sapien_utils.to_numpy(info),
+                common.to_numpy(info),
             )
 
     def step_action(
@@ -799,7 +799,7 @@ def step_action(
         if action is None:  # simulation without action
             pass
         elif isinstance(action, np.ndarray) or isinstance(action, torch.Tensor):
-            action = sapien_utils.to_tensor(action)
+            action = common.to_tensor(action)
             if action.shape == self._orig_single_action_space.shape:
                 action_is_unbatched = True
             set_action = True
@@ -808,13 +808,13 @@ def step_action(
                 if action["control_mode"] != self.agent.control_mode:
                     self.agent.set_control_mode(action["control_mode"])
                     self.agent.controller.reset()
-                action = sapien_utils.to_tensor(action["action"])
+                action = common.to_tensor(action["action"])
             else:
                 assert isinstance(
                     self.agent, MultiAgent
                 ), "Received a dictionary for an action but there are not multiple robots in the environment"
                 # assume this is a multi-agent action
-                action = sapien_utils.to_tensor(action)
+                action = common.to_tensor(action)
                 for k, a in action.items():
                     if a.shape == self._orig_single_action_space[k].shape:
                         action_is_unbatched = True
@@ -825,7 +825,7 @@ def step_action(
 
         if set_action:
             if self.num_envs == 1 and action_is_unbatched:
-                action = sapien_utils.batch(action)
+                action = common.batch(action)
             self.agent.set_action(action)
             if physx.is_gpu_enabled():
                 self._scene.px.gpu_apply_articulation_target_position()
@@ -1109,12 +1109,12 @@ def render(self):
         elif self.render_mode == "rgb_array":
             res = self.render_rgb_array()
             if self.num_envs == 1:
-                res = sapien_utils.to_numpy(sapien_utils.unbatch(res))
+                res = common.to_numpy(common.unbatch(res))
             return res
         elif self.render_mode == "sensors":
             res = self.render_sensors()
             if self.num_envs == 1:
-                res = sapien_utils.to_numpy(sapien_utils.unbatch(res))
+                res = common.to_numpy(common.unbatch(res))
             return res
         else:
             raise NotImplementedError(f"Unsupported render mode {self.render_mode}.")