Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Peg insertion side batched dense reward, simplify utilities code and refactoring #259

Merged
merged 9 commits into from
Apr 3, 2024
2 changes: 1 addition & 1 deletion docs/source/user_guide/tutorials/custom_tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ All tasks are defined by their own class and must inherit `BaseEnv`, similar to

```python
import sapien
from mani_skill.utils import sapien_utils
from mani_skill.utils import sapien_utils, common
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.utils.registration import register_env

Expand Down
3 changes: 3 additions & 0 deletions examples/baselines/ppo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ python ppo.py --env_id="StackCube-v1" \
python ppo.py --env_id="PickSingleYCB-v1" \
--num_envs=1024 --update_epochs=8 --num_minibatches=32 \
--total_timesteps=25_000_000
python ppo.py --env_id="PegInsertionSide-v1" \
--num_envs=1024 --update_epochs=8 --num_minibatches=32 \
--total_timesteps=150_000_000 --num-steps=100 --num-eval-steps=100
python ppo.py --env_id="TwoRobotStackCube-v1" \
--num_envs=1024 --update_epochs=8 --num_minibatches=32 \
--total_timesteps=40_000_000 --num-steps=100 --num-eval-steps=100
Expand Down
13 changes: 6 additions & 7 deletions mani_skill/agents/controllers/base_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
get_active_joint_indices,
get_joints_by_names,
)
from mani_skill.utils import sapien_utils
from mani_skill.utils.common import clip_and_scale_action, normalize_action_space
from mani_skill.utils import common, gym_utils
from mani_skill.utils.structs import Articulation, ArticulationJoint
from mani_skill.utils.structs.types import Array

Expand Down Expand Up @@ -158,18 +157,18 @@ def set_state(self, state: dict):
# -------------------------------------------------------------------------- #
def _clip_and_scale_action_space(self):
self._original_single_action_space = self.single_action_space
self.single_action_space = normalize_action_space(
self.single_action_space = gym_utils.normalize_action_space(
self._original_single_action_space
)
low, high = (
self._original_single_action_space.low,
self._original_single_action_space.high,
)
self.action_space_low = sapien_utils.to_tensor(low)
self.action_space_high = sapien_utils.to_tensor(high)
self.action_space_low = common.to_tensor(low)
self.action_space_high = common.to_tensor(high)

def _clip_and_scale_action(self, action):
return clip_and_scale_action(
return gym_utils.clip_and_scale_action(
action, self.action_space_low, self.action_space_high
)

Expand Down Expand Up @@ -241,7 +240,7 @@ def reset(self):

def set_action(self, action: Dict[str, np.ndarray]):
for uid, controller in self.controllers.items():
controller.set_action(sapien_utils.to_tensor(action[uid]))
controller.set_action(common.to_tensor(action[uid]))

def before_simulation_step(self):
if physx.is_gpu_enabled():
Expand Down
11 changes: 4 additions & 7 deletions mani_skill/agents/controllers/pd_ee_pose.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
from gymnasium import spaces

from mani_skill import logger
from mani_skill.utils import sapien_utils
from mani_skill.utils.common import clip_and_scale_action
from mani_skill.utils import common, gym_utils, sapien_utils
from mani_skill.utils.geometry.rotation_conversions import (
euler_angles_to_matrix,
matrix_to_quaternion,
Expand Down Expand Up @@ -128,14 +127,12 @@ def compute_ik(
result, success, error = self.pmodel.compute_inverse_kinematics(
self.ee_link_idx,
target_pose.sp,
initial_qpos=sapien_utils.to_numpy(
self.articulation.get_qpos()
).squeeze(0),
initial_qpos=common.to_numpy(self.articulation.get_qpos()).squeeze(0),
active_qmask=self.qmask,
max_iterations=max_iterations,
)
if success:
return sapien_utils.to_tensor([result[self.active_joint_indices]])
return common.to_tensor([result[self.active_joint_indices]])
else:
return None

Expand Down Expand Up @@ -230,7 +227,7 @@ def _initialize_action_space(self):

def _clip_and_scale_action(self, action):
# NOTE(xiqiang): rotation should be clipped by norm.
pos_action = clip_and_scale_action(
pos_action = gym_utils.clip_and_scale_action(
action[:, :3], self.action_space_low[:3], self.action_space_high[:3]
)
rot_action = action[:, 3:]
Expand Down
4 changes: 2 additions & 2 deletions mani_skill/agents/controllers/pd_joint_pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import torch
from gymnasium import spaces

from mani_skill.utils import sapien_utils
from mani_skill.utils import common
from mani_skill.utils.structs.types import Array, DriveMode

from .base_controller import BaseController, ControllerConfig
Expand Down Expand Up @@ -72,7 +72,7 @@ def set_drive_targets(self, targets):

def set_action(self, action: Array):
action = self._preprocess_action(action)
action = sapien_utils.to_tensor(action)
action = common.to_tensor(action)
self._step = 0
self._start_qpos = self.qpos
if self.config.use_delta:
Expand Down
11 changes: 5 additions & 6 deletions mani_skill/agents/robots/fetch/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
from mani_skill.agents.controllers import *
from mani_skill.agents.registration import register_agent
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import sapien_utils
from mani_skill.utils.common import compute_angle_between, np_compute_angle_between
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.structs import Pose
from mani_skill.utils.structs.actor import Actor
from mani_skill.utils.structs.link import Link
Expand Down Expand Up @@ -388,8 +387,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
# direction to open the gripper
ldirection = -self.finger1_link.pose.to_transformation_matrix()[..., :3, 1]
rdirection = self.finger2_link.pose.to_transformation_matrix()[..., :3, 1]
langle = compute_angle_between(ldirection, contacts[0])
rangle = compute_angle_between(rdirection, contacts[1])
langle = common.compute_angle_between(ldirection, contacts[0])
rangle = common.compute_angle_between(rdirection, contacts[1])
lflag = torch.logical_and(
lforce >= min_force, torch.rad2deg(langle) <= max_angle
)
Expand Down Expand Up @@ -438,8 +437,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):

# TODO Convert this to batched code
# angle between impulse and open direction
langle = np_compute_angle_between(ldirection[0], limpulse)
rangle = np_compute_angle_between(rdirection[0], rimpulse)
langle = common.np_compute_angle_between(ldirection[0], limpulse)
rangle = common.np_compute_angle_between(rdirection[0], rimpulse)

lflag = (
np.linalg.norm(limpulse) >= min_impulse
Expand Down
11 changes: 5 additions & 6 deletions mani_skill/agents/robots/panda/panda.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from mani_skill.agents.base_agent import BaseAgent
from mani_skill.agents.controllers import *
from mani_skill.agents.registration import register_agent
from mani_skill.utils import sapien_utils
from mani_skill.utils.common import compute_angle_between, np_compute_angle_between
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.structs.actor import Actor


Expand Down Expand Up @@ -240,8 +239,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
# direction to open the gripper
ldirection = self.finger1_link.pose.to_transformation_matrix()[..., :3, 1]
rdirection = -self.finger2_link.pose.to_transformation_matrix()[..., :3, 1]
langle = compute_angle_between(ldirection, contacts[0])
rangle = compute_angle_between(rdirection, contacts[1])
langle = common.compute_angle_between(ldirection, contacts[0])
rangle = common.compute_angle_between(rdirection, contacts[1])
lflag = torch.logical_and(
lforce >= min_force, torch.rad2deg(langle) <= max_angle
)
Expand Down Expand Up @@ -290,8 +289,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):

# TODO Convert this to batched code
# angle between impulse and open direction
langle = np_compute_angle_between(ldirection[0], limpulse)
rangle = np_compute_angle_between(rdirection[0], rimpulse)
langle = common.np_compute_angle_between(ldirection[0], limpulse)
rangle = common.np_compute_angle_between(rdirection[0], rimpulse)

lflag = (
np.linalg.norm(limpulse) >= min_impulse
Expand Down
11 changes: 5 additions & 6 deletions mani_skill/agents/robots/xmate3/xmate3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from mani_skill.agents.controllers import *
from mani_skill.agents.registration import register_agent
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import sapien_utils
from mani_skill.utils.common import compute_angle_between, np_compute_angle_between
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.structs import Pose
from mani_skill.utils.structs.actor import Actor

Expand Down Expand Up @@ -204,8 +203,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):
# direction to open the gripper
ldirection = self.finger1_link.pose.to_transformation_matrix()[..., :3, 1]
rdirection = -self.finger2_link.pose.to_transformation_matrix()[..., :3, 1]
langle = compute_angle_between(ldirection, contacts[0])
rangle = compute_angle_between(rdirection, contacts[1])
langle = common.compute_angle_between(ldirection, contacts[0])
rangle = common.compute_angle_between(rdirection, contacts[1])
lflag = torch.logical_and(
lforce >= min_force, torch.rad2deg(langle) <= max_angle
)
Expand Down Expand Up @@ -254,8 +253,8 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85):

# TODO Convert this to batched code
# angle between impulse and open direction
langle = np_compute_angle_between(ldirection[0], limpulse)
rangle = np_compute_angle_between(rdirection[0], rimpulse)
langle = common.np_compute_angle_between(ldirection[0], limpulse)
rangle = common.np_compute_angle_between(rdirection[0], rimpulse)

lflag = (
np.linalg.norm(limpulse) >= min_impulse
Expand Down
4 changes: 2 additions & 2 deletions mani_skill/agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
import sapien.physx as physx
from gymnasium import spaces

from mani_skill.utils import sapien_utils
from mani_skill.utils import common
from mani_skill.utils.structs.articulation import Articulation


def get_active_joint_indices(articulation: Articulation, joint_names: Sequence[str]):
"""get the indices of the provided joint names from the Articulation's list of active joints"""
all_joint_names = [x.name for x in articulation.get_active_joints()]
joint_indices = [all_joint_names.index(x) for x in joint_names]
return sapien_utils.to_tensor(joint_indices).int()
return common.to_tensor(joint_indices).int()


def get_joints_by_names(articulation: Articulation, joint_names: Sequence[str]):
Expand Down
2 changes: 1 addition & 1 deletion mani_skill/envs/minimal_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from mani_skill.agents.robots import Fetch, Panda
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import sapien_utils
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.registration import register_env
from mani_skill.utils.structs.types import SimConfig

Expand Down
36 changes: 18 additions & 18 deletions mani_skill/envs/sapien_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
update_camera_cfgs_from_dict,
)
from mani_skill.sensors.depth_camera import StereoDepthCamera, StereoDepthCameraConfig
from mani_skill.utils import common, sapien_utils
from mani_skill.utils import common, gym_utils, sapien_utils
from mani_skill.utils.structs import Actor, Articulation
from mani_skill.utils.structs.types import Array, SimConfig
from mani_skill.utils.visualization.misc import observations_to_images, tile_images
Expand Down Expand Up @@ -97,7 +97,7 @@ class BaseEnv(gym.Env):
"""Override this to enforce which robots or tuples of robots together are supported in the task. During env creation,
setting robot_uids auto loads all desired robots into the scene, but not all tasks are designed to support some robot setups"""
SUPPORTED_OBS_MODES = ("state", "state_dict", "none", "sensor_data", "rgb", "rgbd", "pointcloud")
SUPPORTED_REWARD_MODES = ("normalized_dense", "dense", "sparse")
SUPPORTED_REWARD_MODES = ("normalized_dense", "dense", "sparse", "none")
SUPPORTED_RENDER_MODES = ("human", "rgb_array", "sensors")
"""The supported render modes. Human opens up a GUI viewer. rgb_array returns an rgb array showing the current environment state.
sensors returns an rgb array but only showing all data collected by sensors as images put together"""
Expand Down Expand Up @@ -254,10 +254,10 @@ def __init__(
)
obs, _ = self.reset(seed=2022, options=dict(reconfigure=True))
if physx.is_gpu_enabled():
obs = sapien_utils.to_numpy(obs)
obs = common.to_numpy(obs)
self._init_raw_obs = obs.copy()
"""the raw observation returned by the env.reset. Useful for future observation wrappers to use to auto generate observation spaces"""
self._init_raw_state = sapien_utils.to_numpy(self.get_state_dict())
self._init_raw_state = common.to_numpy(self.get_state_dict())
"""the initial raw state returned by env.get_state. Useful for reconstructing state dictionaries from flattened state vectors"""

self.action_space = self.agent.action_space
Expand All @@ -278,9 +278,9 @@ def _update_obs_space(self, obs: Any):
@cached_property
def single_observation_space(self):
if self.num_envs > 1:
return common.convert_observation_to_space(self._init_raw_obs, unbatched=True)
return gym_utils.convert_observation_to_space(self._init_raw_obs, unbatched=True)
else:
return common.convert_observation_to_space(self._init_raw_obs)
return gym_utils.convert_observation_to_space(self._init_raw_obs)

@cached_property
def observation_space(self):
Expand Down Expand Up @@ -704,7 +704,7 @@ def reset(self, seed=None, options=None):
self._scene._gpu_fetch_all()
obs = self.get_obs()
if not physx.is_gpu_enabled():
obs = sapien_utils.to_numpy(sapien_utils.unbatch(obs))
obs = common.to_numpy(common.unbatch(obs))
self._elapsed_steps = 0
return obs, dict(reconfigure=reconfigure)

Expand Down Expand Up @@ -783,12 +783,12 @@ def step(self, action: Union[None, np.ndarray, torch.Tensor, Dict]):
)
else:
# On CPU sim mode, we always return numpy / python primitives without any batching.
return sapien_utils.unbatch(
sapien_utils.to_numpy(obs),
sapien_utils.to_numpy(reward),
sapien_utils.to_numpy(terminated),
return common.unbatch(
common.to_numpy(obs),
common.to_numpy(reward),
common.to_numpy(terminated),
False,
sapien_utils.to_numpy(info),
common.to_numpy(info),
)

def step_action(
Expand All @@ -799,7 +799,7 @@ def step_action(
if action is None: # simulation without action
pass
elif isinstance(action, np.ndarray) or isinstance(action, torch.Tensor):
action = sapien_utils.to_tensor(action)
action = common.to_tensor(action)
if action.shape == self._orig_single_action_space.shape:
action_is_unbatched = True
set_action = True
Expand All @@ -808,13 +808,13 @@ def step_action(
if action["control_mode"] != self.agent.control_mode:
self.agent.set_control_mode(action["control_mode"])
self.agent.controller.reset()
action = sapien_utils.to_tensor(action["action"])
action = common.to_tensor(action["action"])
else:
assert isinstance(
self.agent, MultiAgent
), "Received a dictionary for an action but there are not multiple robots in the environment"
# assume this is a multi-agent action
action = sapien_utils.to_tensor(action)
action = common.to_tensor(action)
for k, a in action.items():
if a.shape == self._orig_single_action_space[k].shape:
action_is_unbatched = True
Expand All @@ -825,7 +825,7 @@ def step_action(

if set_action:
if self.num_envs == 1 and action_is_unbatched:
action = sapien_utils.batch(action)
action = common.batch(action)
self.agent.set_action(action)
if physx.is_gpu_enabled():
self._scene.px.gpu_apply_articulation_target_position()
Expand Down Expand Up @@ -1109,12 +1109,12 @@ def render(self):
elif self.render_mode == "rgb_array":
res = self.render_rgb_array()
if self.num_envs == 1:
res = sapien_utils.to_numpy(sapien_utils.unbatch(res))
res = common.to_numpy(common.unbatch(res))
return res
elif self.render_mode == "sensors":
res = self.render_sensors()
if self.num_envs == 1:
res = sapien_utils.to_numpy(sapien_utils.unbatch(res))
res = common.to_numpy(common.unbatch(res))
return res
else:
raise NotImplementedError(f"Unsupported render mode {self.render_mode}.")
Expand Down
Loading