From 7cf8c3c7faf500bd9a9d1022e632697876bdee96 Mon Sep 17 00:00:00 2001 From: Xander Date: Fri, 12 Jul 2024 21:19:22 -0700 Subject: [PATCH 01/11] hopper added --- .../envs/tasks/control/assets/hopper.xml | 70 ++++++ mani_skill/envs/tasks/control/hopper.py | 212 ++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 mani_skill/envs/tasks/control/assets/hopper.xml create mode 100644 mani_skill/envs/tasks/control/hopper.py diff --git a/mani_skill/envs/tasks/control/assets/hopper.xml b/mani_skill/envs/tasks/control/assets/hopper.xml new file mode 100644 index 000000000..c4e860beb --- /dev/null +++ b/mani_skill/envs/tasks/control/assets/hopper.xml @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/mani_skill/envs/tasks/control/hopper.py b/mani_skill/envs/tasks/control/hopper.py new file mode 100644 index 000000000..ad69c0abf --- /dev/null +++ b/mani_skill/envs/tasks/control/hopper.py @@ -0,0 +1,212 @@ +"""Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/hopper.py""" +import os +from typing import Any, Dict, Union + +import numpy as np +import torch + +from mani_skill.agents.base_agent import BaseAgent +from mani_skill.agents.controllers import * +from mani_skill.envs.sapien_env import BaseEnv +from mani_skill.envs.utils import randomization, rewards +from mani_skill.sensors.camera import CameraConfig +from mani_skill.utils import common, sapien_utils +from mani_skill.utils.registration import register_env +from mani_skill.utils.structs.pose import Pose +from mani_skill.utils.structs.types import ( + Array, + GPUMemoryConfig, + SceneConfig, + SimConfig, +) + +# fix new imports later +from mani_skill.utils.building.ground import build_ground +from transforms3d.euler import euler2quat + +MJCF_FILE = f"{os.path.join(os.path.dirname(__file__), 'assets/hopper.xml')}" + +# params from dm_control +# Minimal height of torso over foot above which stand reward is 1. +_STAND_HEIGHT = 0.6 + +# Hopping speed above which hop reward is 1. +_HOP_SPEED = 2 + +class HopperRobot(BaseAgent): + uid = "hopper" + mjcf_path = MJCF_FILE + + @property + def _sensor_configs(self): + return [ + # replicated from xml file + CameraConfig( + uid="cam0", + pose=Pose.create_from_pq([0, -2.8, 0], euler2quat(0,0,np.pi/2)), + width=512, + height=512, + fov=70*(np.pi/180), + near=0.01, + far=100, + entity_uid="torso_dummy_1", + ), + # CameraConfig( + # uid="back", + # pose=Pose.create_from_pq([-2, -0.2, -1.2], [1, 0, 0, 0]), + # width=512, + # height=512, + # fov=np.pi/2, + # near=0.01, + # far=100, + # entity_uid="torso_dummy_0", + # ), + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @property + def _controller_configs(self): + # NOTE joints in [rootx,rooty,rooz] are for planar tracking, not control joints + pd_joint_delta_pos = PDJointPosControllerConfig( + [j.name for j in self.robot.active_joints if 'root' not in j.name], + -1, + 1, + damping=5, + stiffness=20, + force_limit=100, + use_delta=True, + ) + rest = PassiveControllerConfig([j.name for j in self.robot.active_joints if 'root' in j.name], damping=0, friction=0) + return deepcopy_dict( + dict( + pd_joint_delta_pos=dict( + body=pd_joint_delta_pos, rest=rest, balance_passive_force=False + ), + ) + ) + + def _load_articulation(self): + """ + Load the robot articulation + """ + loader = self.scene.create_mjcf_loader() + asset_path = str(self.mjcf_path) + + loader.name = self.uid + + # only need the robot + self.robot = loader.parse(asset_path)[0][0].build() + assert self.robot is not None, f"Fail to load URDF/MJCF from {asset_path}" + + # Cache robot link ids + self.robot_link_ids = [link.name for link in self.robot.get_links()] + +class HopperEnv(BaseEnv): + agent: Union[HopperRobot] + + def __init__(self, *args, robot_uids=HopperRobot, **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + + @property + def _default_sim_config(self): + return SimConfig( + gpu_memory_cfg=GPUMemoryConfig( + found_lost_pairs_capacity=2**25, max_rigid_patch_count=2**18 + ) + ) + + @property + def _default_sensor_configs(self): + return self.agent._sensor_configs + + @property + def _default_human_render_camera_configs(self): + pose = sapien_utils.look_at(eye=[0, -2, 0.5], target=[0, 0, 0.5]) + return [CameraConfig("render_camera", pose, 512, 512, (70/180)*np.pi, 0.01, 100)] + # return self.agent._sensor_configs + + def _load_scene(self, options: dict): + loader = self.scene.create_mjcf_loader() + articulation_builders, actor_builders, sensor_configs = loader.parse(MJCF_FILE) + for a in actor_builders: + a.build(a.name) + + # load in the ground + self.ground = build_ground( + self.scene, floor_width=2, floor_length=50, altitude=-0.075, floor_origin=(25-2,0) + ) + + def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): + with torch.device(self.device): + b = len(env_idx) + # qpos sampled same as dm_control, but ensure no self intersection explicitly here + random_qpos = torch.rand(b, self.agent.robot.dof[0]) + q_lims = self.agent.robot.get_qlimits() + q_ranges = q_lims[..., 1] - q_lims[..., 0] + random_qpos *= q_ranges + random_qpos += q_lims[..., 0] + + # overwrite planar joint qpos - these are special for planar robots + # first two joints are dummy rootx and rootz + random_qpos[:,:2] = 0 + # y is axis of rotation of our planar robot (xz plane), so we randomize around it + random_qpos[:,2] = torch.pi*(2*torch.rand(b) - 1) # (-pi,pi) + self.agent.robot.set_qpos(random_qpos) + + def evaluate(self): + return dict() + + #dm_control function + def touch(self, link_name): + """Returns function of sensor force values""" + force_vec = self.agent.robot.get_net_contact_forces([link_name]) + force_mag = torch.linalg.norm(force_vec, dim=-1) + return torch.log1p(force_mag) + + #dm_control function + @property + def height(self): + """Returns relative height of the robot""" + return (self.agent.robot.links_map['torso'].pose.p[:,-1] - + self.agent.robot.links_map['foot_heel'].pose.p[:,-1]).view(-1,1) + + # overwrite basic obs state dict + # same as dm_control does, need to remove inclusion of rootx joint qpos + # obs is a 15 dim vec of qpos, qvel, and touch info + def _get_obs_state_dict(self, info: Dict): + return dict( + qpos=self.agent.robot.get_qpos()[:,1:], + qvel=self.agent.robot.get_qvel(), + toe_touch=self.touch("foot_toe"), + heel_touch=self.touch("foot_heel"), + ) + +@register_env("MS-HopperStand-v1", max_episode_steps=100) +class HopperStandEnv(HopperEnv): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def compute_dense_reward(self, obs: Any, action: Array, info: Dict): + rew = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) + #print("rewards", rew.shape) + return rew.view(-1) + + def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): + # this should be equal to compute_dense_reward / max possible reward + max_reward = 1.0 + return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward + +# @register_env("MS-HopperHop-v1", max_episode_steps=1000) +# class HopperStandEnv(HopperEnv): +# def __init__(self, *args, **kwargs): +# super().__init__(*args, **kwargs) + +# def compute_dense_reward(self, obs: Any, action: Array, info: Dict): +# return 0 + +# def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): +# # this should be equal to compute_dense_reward / max possible reward +# max_reward = 1.0 +# return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward \ No newline at end of file From b582deba7a3d819c3337fe5a45cd118bed2a89cd Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 18 Jul 2024 03:43:05 -0700 Subject: [PATCH 02/11] hoppper-v1 --- docs/source/tasks/control/index.md | 42 +++++++ mani_skill/envs/tasks/control/__init__.py | 1 + mani_skill/envs/tasks/control/hopper.py | 139 +++++++++++++--------- mani_skill/envs/utils/rewards/common.py | 31 ++++- mani_skill/utils/building/ground.py | 19 +-- 5 files changed, 165 insertions(+), 67 deletions(-) diff --git a/docs/source/tasks/control/index.md b/docs/source/tasks/control/index.md index c2f15e8f5..8bf67b5ef 100644 --- a/docs/source/tasks/control/index.md +++ b/docs/source/tasks/control/index.md @@ -45,4 +45,46 @@ Use the Cartpole robot to swing up a pole on a cart. **Success Conditions:** - No specific success conditions. The task is considered successful if the pole is upright for the whole episode. We can threshold the episode accumulated reward to determine success. +::: + +## MS-HopperHop-v1 +![dense-reward][reward-badge] + +:::{dropdown} Task Card +:icon: note +:color: primary + +**Task Description:** +Hopper robot stays upright and moves in positive x direction with hopping motion + + +**Supported Robots: Hopper** + +**Randomizations:** +- Hopper robot is randomly rotated [-pi, pi] radians about y axis. +- Hopper qpos are uniformly sampled within their allowed ranges + +**Success Conditions:** +- No specific success conditions. The task is considered successful if the pole is upright for the whole episode. We can threshold the episode accumulated reward to determine success. +::: + +## MS-HopperStand-v1 +![dense-reward][reward-badge] + +:::{dropdown} Task Card +:icon: note +:color: primary + +**Task Description:** +Hopper robot stands upright + + +**Supported Robots: Hopper** + +**Randomizations:** +- Hopper robot is randomly rotated [-pi, pi] radians about y axis. +- Hopper qpos are uniformly sampled within their allowed ranges + +**Success Conditions:** +- No specific success conditions. We can threshold the episode accumulated reward to determine success. ::: \ No newline at end of file diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py index 920f2d3f6..d6912de32 100644 --- a/mani_skill/envs/tasks/control/__init__.py +++ b/mani_skill/envs/tasks/control/__init__.py @@ -1 +1,2 @@ from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv +from .hopper import HopperStandEnv, HopperHopEnv \ No newline at end of file diff --git a/mani_skill/envs/tasks/control/hopper.py b/mani_skill/envs/tasks/control/hopper.py index ad69c0abf..47671f831 100644 --- a/mani_skill/envs/tasks/control/hopper.py +++ b/mani_skill/envs/tasks/control/hopper.py @@ -44,45 +44,57 @@ def _sensor_configs(self): CameraConfig( uid="cam0", pose=Pose.create_from_pq([0, -2.8, 0], euler2quat(0,0,np.pi/2)), - width=512, - height=512, + width=128, + height=128, fov=70*(np.pi/180), near=0.01, far=100, entity_uid="torso_dummy_1", ), - # CameraConfig( - # uid="back", - # pose=Pose.create_from_pq([-2, -0.2, -1.2], [1, 0, 0, 0]), - # width=512, - # height=512, - # fov=np.pi/2, - # near=0.01, - # far=100, - # entity_uid="torso_dummy_0", - # ), ] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.real_links = [link for link in self.robot.links_map.keys() if 'dummy' not in link] + self.real_links_mass = torch.tensor([link.mass[0].item() for link in self.robot.links if 'dummy' not in link.name], device=self.device) + self.real_mass = self.real_links_mass.sum() @property def _controller_configs(self): # NOTE joints in [rootx,rooty,rooz] are for planar tracking, not control joints - pd_joint_delta_pos = PDJointPosControllerConfig( - [j.name for j in self.robot.active_joints if 'root' not in j.name], - -1, - 1, - damping=5, - stiffness=20, - force_limit=100, + # TODO (xhin): further tune controller params + max_delta = 2.25 #best + stiffness= 100 + friction = 0.1 + force_limit = 200 + damping = 7.5 #end best + pd_joint_delta_pos_body = PDJointPosControllerConfig( + ["hip", "knee", "waist"], + lower=-max_delta, + upper=max_delta, + damping=damping, + stiffness=stiffness, + force_limit=force_limit, + friction=friction, + use_delta=True, + ) + pd_joint_delta_pos_ankle = PDJointPosControllerConfig( + ["ankle"], + lower=-max_delta/3, + upper=max_delta/3, + damping=damping, + stiffness=stiffness, + force_limit=force_limit, + friction=friction, use_delta=True, ) rest = PassiveControllerConfig([j.name for j in self.robot.active_joints if 'root' in j.name], damping=0, friction=0) return deepcopy_dict( dict( pd_joint_delta_pos=dict( - body=pd_joint_delta_pos, rest=rest, balance_passive_force=False + body=pd_joint_delta_pos_body, + ankle=pd_joint_delta_pos_ankle, + rest=rest, balance_passive_force=False ), ) ) @@ -99,10 +111,16 @@ def _load_articulation(self): # only need the robot self.robot = loader.parse(asset_path)[0][0].build() assert self.robot is not None, f"Fail to load URDF/MJCF from {asset_path}" - - # Cache robot link ids self.robot_link_ids = [link.name for link in self.robot.get_links()] + # planar agent has root joints in range [-inf, inf], not ideal in obs space + def get_proprioception(self): + return dict( + #don't include xslider qpos, for x trans invariance + qpos=self.robot.get_qpos()[:,1:], + qvel=self.robot.get_qvel(), + ) + class HopperEnv(BaseEnv): agent: Union[HopperRobot] @@ -114,7 +132,12 @@ def _default_sim_config(self): return SimConfig( gpu_memory_cfg=GPUMemoryConfig( found_lost_pairs_capacity=2**25, max_rigid_patch_count=2**18 - ) + ), + scene_cfg=SceneConfig( + solver_position_iterations=15, solver_velocity_iterations=15 + ), + # sim_freq=50, + control_freq=25 ) @property @@ -123,9 +146,7 @@ def _default_sensor_configs(self): @property def _default_human_render_camera_configs(self): - pose = sapien_utils.look_at(eye=[0, -2, 0.5], target=[0, 0, 0.5]) - return [CameraConfig("render_camera", pose, 512, 512, (70/180)*np.pi, 0.01, 100)] - # return self.agent._sensor_configs + return self.agent._sensor_configs def _load_scene(self, options: dict): loader = self.scene.create_mjcf_loader() @@ -135,7 +156,7 @@ def _load_scene(self, options: dict): # load in the ground self.ground = build_ground( - self.scene, floor_width=2, floor_length=50, altitude=-0.075, floor_origin=(25-2,0) + self.scene, floor_width=2, floor_length=100, altitude=-0.075, xy_origin=(50-2,0) ) def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): @@ -158,55 +179,61 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): def evaluate(self): return dict() - #dm_control function + @property # dm_control mjc function adapted for maniskill + def height(self): + """Returns relative height of the robot""" + return (self.agent.robot.links_map['torso'].pose.p[:,-1] - + self.agent.robot.links_map['foot_heel'].pose.p[:,-1]).view(-1,1) + + @property # dm_control mjc function adapted for maniskill + def subtreelinvelx(self): + """Returns linvel x component of robot""" + return self.agent.robot.get_qvel()[:,0].view(-1,1) + + # dm_control mjc function adapted for maniskill def touch(self, link_name): """Returns function of sensor force values""" force_vec = self.agent.robot.get_net_contact_forces([link_name]) force_mag = torch.linalg.norm(force_vec, dim=-1) return torch.log1p(force_mag) - - #dm_control function - @property - def height(self): - """Returns relative height of the robot""" - return (self.agent.robot.links_map['torso'].pose.p[:,-1] - - self.agent.robot.links_map['foot_heel'].pose.p[:,-1]).view(-1,1) - # overwrite basic obs state dict - # same as dm_control does, need to remove inclusion of rootx joint qpos - # obs is a 15 dim vec of qpos, qvel, and touch info - def _get_obs_state_dict(self, info: Dict): - return dict( - qpos=self.agent.robot.get_qpos()[:,1:], - qvel=self.agent.robot.get_qvel(), + # dm_control also includes foot pressures as state obs space + def _get_obs_extra(self, info: Dict): + obs = dict( toe_touch=self.touch("foot_toe"), heel_touch=self.touch("foot_heel"), ) - + return obs + @register_env("MS-HopperStand-v1", max_episode_steps=100) class HopperStandEnv(HopperEnv): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def compute_dense_reward(self, obs: Any, action: Array, info: Dict): - rew = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) - #print("rewards", rew.shape) - return rew.view(-1) + return rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0).view(-1) def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): # this should be equal to compute_dense_reward / max possible reward max_reward = 1.0 return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward -# @register_env("MS-HopperHop-v1", max_episode_steps=1000) -# class HopperStandEnv(HopperEnv): -# def __init__(self, *args, **kwargs): -# super().__init__(*args, **kwargs) +@register_env("MS-HopperHop-v1", max_episode_steps=100) +class HopperHopEnv(HopperEnv): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) -# def compute_dense_reward(self, obs: Any, action: Array, info: Dict): -# return 0 + def compute_dense_reward(self, obs: Any, action: Array, info: Dict): + standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) + hopping = rewards.tolerance(self.subtreelinvelx, + lower=_HOP_SPEED, + upper=float('inf'), + margin=_HOP_SPEED/2, + value_at_margin=0.5, + sigmoid='linear') + return (standing.view(-1) * hopping.view(-1)) -# def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): -# # this should be equal to compute_dense_reward / max possible reward -# max_reward = 1.0 -# return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward \ No newline at end of file + def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): + # this should be equal to compute_dense_reward / max possible reward + max_reward = 1.0 + return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward \ No newline at end of file diff --git a/mani_skill/envs/utils/rewards/common.py b/mani_skill/envs/utils/rewards/common.py index 23f9ee127..2ba6ec228 100644 --- a/mani_skill/envs/utils/rewards/common.py +++ b/mani_skill/envs/utils/rewards/common.py @@ -1,5 +1,5 @@ import torch - +import numpy as np def tolerance( x, lower=0.0, upper=0.0, margin=0.0, sigmoid="gaussian", value_at_margin=0.1 @@ -31,7 +31,20 @@ def tolerance( Raises: ValueError: If `bounds[0] > bounds[1]`. ValueError: If `margin` is negative. + ValueError: If not 0 < `value_at_margin` < 1, + except for `linear`, `cosine` and `quadratic` sigmoids, which allow `value_at_margin` == 0. + ValueError: If `sigmoid` is of an unknown type. """ + + if sigmoid in ('cosine', 'linear', 'quadratic'): + if not 0 <= value_at_margin < 1: + raise ValueError('`value_at_margin` must be nonnegative and smaller than 1, ' + 'got {}.'.format(value_at_margin)) + else: + if not 0 < value_at_margin < 1: + raise ValueError('`value_at_margin` must be strictly between 0 and 1, ' + 'got {}.'.format(value_at_margin)) + if lower > upper: raise ValueError("Lower bound must be <= upper bound.") @@ -45,13 +58,23 @@ def tolerance( else: d = torch.where(x < lower, lower - x, x - upper) / margin if sigmoid == "gaussian": + scale = np.sqrt(-2 * np.log(value_at_margin)) value = torch.where( - in_bounds, torch.tensor(1.0), torch.exp(-0.5 * (d**2)) + in_bounds, torch.tensor(1.0), torch.exp(-0.5 * (d*scale)**2) ) elif sigmoid == "hyperbolic": - value = torch.where(in_bounds, torch.tensor(1.0), 1 / (1 + torch.exp(d))) + scale = np.arccosh(1/value_at_margin) + value = torch.where(in_bounds, torch.tensor(1.0), 1 / (1 + torch.exp(d*scale))) elif sigmoid == "quadratic": - value = torch.where(in_bounds, torch.tensor(1.0), 1 - d**2) + scale = np.sqrt(1-value_at_margin) + scaled_d = d*scale + x = torch.where(scaled_d.abs() < 1, 1 - scaled_d**2, torch.tensor(0.0)) + value = torch.where(in_bounds, torch.tensor(1.0), x) + elif sigmoid == "linear": + scale = 1-value_at_margin + scaled_d = d*scale + x = torch.where(scaled_d.abs() < 1, 1 - scaled_d, torch.tensor(0.0)) + value = torch.where(in_bounds, torch.tensor(1.0), x) else: raise ValueError(f"Unknown sigmoid type {sigmoid!r}.") diff --git a/mani_skill/utils/building/ground.py b/mani_skill/utils/building/ground.py index 550a6597d..b839a8433 100644 --- a/mani_skill/utils/building/ground.py +++ b/mani_skill/utils/building/ground.py @@ -17,6 +17,8 @@ def build_ground( scene: ManiSkillScene, floor_width: int = 100, + floor_length: int = None, + xy_origin: tuple = (0,0), altitude=0, name="ground", ): @@ -35,14 +37,17 @@ def build_ground( actor = ground.build_static(name=name) # generate a grid of right triangles that form 1x1 meter squares centered at (0, 0, 0) - num_verts = (floor_width + 1) ** 2 + floor_length = floor_width if floor_length is None else floor_length + num_verts = (floor_width + 1) * (floor_length + 1) vertices = np.zeros((num_verts, 3)) floor_half_width = floor_width / 2 - ranges = np.arange(start=-floor_half_width, stop=floor_half_width + 1) - xx, yy = np.meshgrid(ranges, ranges) + floor_half_length = floor_length / 2 + xrange = np.arange(start=-floor_half_width, stop=floor_half_width + 1) + yrange = np.arange(start=-floor_half_length, stop=floor_half_length + 1) + xx, yy = np.meshgrid(xrange, yrange) xys = np.stack((yy, xx), axis=2).reshape(-1, 2) - vertices[:, 0] = xys[:, 0] - vertices[:, 1] = xys[:, 1] + vertices[:, 0] = xys[:, 0] + xy_origin[0] + vertices[:, 1] = xys[:, 1] + xy_origin[1] vertices[:, 2] = altitude normals = np.zeros((len(vertices), 3)) normals[:, 2] = 1 @@ -61,7 +66,7 @@ def build_ground( # TODO: This is fast but still two for loops which is a little annoying triangles = [] - for i in range(floor_width): + for i in range(floor_length): triangles.append( np.stack( [ @@ -72,7 +77,7 @@ def build_ground( axis=1, ) ) - for i in range(floor_width): + for i in range(floor_length): triangles.append( np.stack( [ From 814c1b9a6b0cc4a5900ba46fb94f55c3aed00b32 Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 18 Jul 2024 03:44:45 -0700 Subject: [PATCH 03/11] camera mount --- mani_skill/envs/sapien_env.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mani_skill/envs/sapien_env.py b/mani_skill/envs/sapien_env.py index 813f429a1..d33082ccc 100644 --- a/mani_skill/envs/sapien_env.py +++ b/mani_skill/envs/sapien_env.py @@ -620,9 +620,14 @@ def _setup_sensors(self, options: dict): # Cameras for rendering only self._human_render_cameras = dict() for uid, camera_cfg in self._human_render_camera_configs.items(): + if uid in self._agent_sensor_configs: + articulation = self.agent.robot + else: + articulation = None self._human_render_cameras[uid] = Camera( camera_cfg, self.scene, + articulation=articulation, ) self.scene.sensors = self._sensors From d2a1f171213208f28bc2b2a711f8e30d0d29ad9d Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 18 Jul 2024 03:52:47 -0700 Subject: [PATCH 04/11] formatting --- mani_skill/envs/tasks/control/hopper.py | 119 ++++++++++++++---------- 1 file changed, 70 insertions(+), 49 deletions(-) diff --git a/mani_skill/envs/tasks/control/hopper.py b/mani_skill/envs/tasks/control/hopper.py index 47671f831..88c037d59 100644 --- a/mani_skill/envs/tasks/control/hopper.py +++ b/mani_skill/envs/tasks/control/hopper.py @@ -1,27 +1,22 @@ """Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/hopper.py""" + import os from typing import Any, Dict, Union import numpy as np import torch - from mani_skill.agents.base_agent import BaseAgent from mani_skill.agents.controllers import * from mani_skill.envs.sapien_env import BaseEnv from mani_skill.envs.utils import randomization, rewards from mani_skill.sensors.camera import CameraConfig from mani_skill.utils import common, sapien_utils -from mani_skill.utils.registration import register_env -from mani_skill.utils.structs.pose import Pose -from mani_skill.utils.structs.types import ( - Array, - GPUMemoryConfig, - SceneConfig, - SimConfig, -) - # fix new imports later from mani_skill.utils.building.ground import build_ground +from mani_skill.utils.registration import register_env +from mani_skill.utils.structs.pose import Pose +from mani_skill.utils.structs.types import (Array, GPUMemoryConfig, + SceneConfig, SimConfig) from transforms3d.euler import euler2quat MJCF_FILE = f"{os.path.join(os.path.dirname(__file__), 'assets/hopper.xml')}" @@ -33,6 +28,7 @@ # Hopping speed above which hop reward is 1. _HOP_SPEED = 2 + class HopperRobot(BaseAgent): uid = "hopper" mjcf_path = MJCF_FILE @@ -43,10 +39,10 @@ def _sensor_configs(self): # replicated from xml file CameraConfig( uid="cam0", - pose=Pose.create_from_pq([0, -2.8, 0], euler2quat(0,0,np.pi/2)), + pose=Pose.create_from_pq([0, -2.8, 0], euler2quat(0, 0, np.pi / 2)), width=128, height=128, - fov=70*(np.pi/180), + fov=70 * (np.pi / 180), near=0.01, far=100, entity_uid="torso_dummy_1", @@ -55,19 +51,28 @@ def _sensor_configs(self): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.real_links = [link for link in self.robot.links_map.keys() if 'dummy' not in link] - self.real_links_mass = torch.tensor([link.mass[0].item() for link in self.robot.links if 'dummy' not in link.name], device=self.device) + self.real_links = [ + link for link in self.robot.links_map.keys() if "dummy" not in link + ] + self.real_links_mass = torch.tensor( + [ + link.mass[0].item() + for link in self.robot.links + if "dummy" not in link.name + ], + device=self.device, + ) self.real_mass = self.real_links_mass.sum() @property def _controller_configs(self): # NOTE joints in [rootx,rooty,rooz] are for planar tracking, not control joints # TODO (xhin): further tune controller params - max_delta = 2.25 #best - stiffness= 100 + max_delta = 2.25 + stiffness = 100 friction = 0.1 force_limit = 200 - damping = 7.5 #end best + damping = 7.5 pd_joint_delta_pos_body = PDJointPosControllerConfig( ["hip", "knee", "waist"], lower=-max_delta, @@ -80,21 +85,26 @@ def _controller_configs(self): ) pd_joint_delta_pos_ankle = PDJointPosControllerConfig( ["ankle"], - lower=-max_delta/3, - upper=max_delta/3, + lower=-max_delta / 3, + upper=max_delta / 3, damping=damping, stiffness=stiffness, force_limit=force_limit, friction=friction, use_delta=True, ) - rest = PassiveControllerConfig([j.name for j in self.robot.active_joints if 'root' in j.name], damping=0, friction=0) + rest = PassiveControllerConfig( + [j.name for j in self.robot.active_joints if "root" in j.name], + damping=0, + friction=0, + ) return deepcopy_dict( dict( pd_joint_delta_pos=dict( - body=pd_joint_delta_pos_body, + body=pd_joint_delta_pos_body, ankle=pd_joint_delta_pos_ankle, - rest=rest, balance_passive_force=False + rest=rest, + balance_passive_force=False, ), ) ) @@ -116,11 +126,12 @@ def _load_articulation(self): # planar agent has root joints in range [-inf, inf], not ideal in obs space def get_proprioception(self): return dict( - #don't include xslider qpos, for x trans invariance - qpos=self.robot.get_qpos()[:,1:], + # don't include xslider qpos, for x trans invariance + qpos=self.robot.get_qpos()[:, 1:], qvel=self.robot.get_qvel(), ) + class HopperEnv(BaseEnv): agent: Union[HopperRobot] @@ -137,7 +148,7 @@ def _default_sim_config(self): solver_position_iterations=15, solver_velocity_iterations=15 ), # sim_freq=50, - control_freq=25 + control_freq=25, ) @property @@ -153,12 +164,16 @@ def _load_scene(self, options: dict): articulation_builders, actor_builders, sensor_configs = loader.parse(MJCF_FILE) for a in actor_builders: a.build(a.name) - + # load in the ground self.ground = build_ground( - self.scene, floor_width=2, floor_length=100, altitude=-0.075, xy_origin=(50-2,0) + self.scene, + floor_width=2, + floor_length=100, + altitude=-0.075, + xy_origin=(50 - 2, 0), ) - + def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): with torch.device(self.device): b = len(env_idx) @@ -171,24 +186,26 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): # overwrite planar joint qpos - these are special for planar robots # first two joints are dummy rootx and rootz - random_qpos[:,:2] = 0 + random_qpos[:, :2] = 0 # y is axis of rotation of our planar robot (xz plane), so we randomize around it - random_qpos[:,2] = torch.pi*(2*torch.rand(b) - 1) # (-pi,pi) + random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi) self.agent.robot.set_qpos(random_qpos) def evaluate(self): return dict() - - @property # dm_control mjc function adapted for maniskill + + @property # dm_control mjc function adapted for maniskill def height(self): """Returns relative height of the robot""" - return (self.agent.robot.links_map['torso'].pose.p[:,-1] - - self.agent.robot.links_map['foot_heel'].pose.p[:,-1]).view(-1,1) - - @property # dm_control mjc function adapted for maniskill + return ( + self.agent.robot.links_map["torso"].pose.p[:, -1] + - self.agent.robot.links_map["foot_heel"].pose.p[:, -1] + ).view(-1, 1) + + @property # dm_control mjc function adapted for maniskill def subtreelinvelx(self): """Returns linvel x component of robot""" - return self.agent.robot.get_qvel()[:,0].view(-1,1) + return self.agent.robot.get_qvel()[:, 0].view(-1, 1) # dm_control mjc function adapted for maniskill def touch(self, link_name): @@ -204,12 +221,13 @@ def _get_obs_extra(self, info: Dict): heel_touch=self.touch("foot_heel"), ) return obs - + + @register_env("MS-HopperStand-v1", max_episode_steps=100) class HopperStandEnv(HopperEnv): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - + def compute_dense_reward(self, obs: Any, action: Array, info: Dict): return rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0).view(-1) @@ -217,23 +235,26 @@ def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): # this should be equal to compute_dense_reward / max possible reward max_reward = 1.0 return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward - + + @register_env("MS-HopperHop-v1", max_episode_steps=100) class HopperHopEnv(HopperEnv): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - + def compute_dense_reward(self, obs: Any, action: Array, info: Dict): standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) - hopping = rewards.tolerance(self.subtreelinvelx, - lower=_HOP_SPEED, - upper=float('inf'), - margin=_HOP_SPEED/2, - value_at_margin=0.5, - sigmoid='linear') - return (standing.view(-1) * hopping.view(-1)) + hopping = rewards.tolerance( + self.subtreelinvelx, + lower=_HOP_SPEED, + upper=float("inf"), + margin=_HOP_SPEED / 2, + value_at_margin=0.5, + sigmoid="linear", + ) + return standing.view(-1) * hopping.view(-1) def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): # this should be equal to compute_dense_reward / max possible reward max_reward = 1.0 - return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward \ No newline at end of file + return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward From 4ce2293b39fe7c9194b32f13d2b0028f5789214c Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 18 Jul 2024 03:57:08 -0700 Subject: [PATCH 05/11] formatting --- docs/source/tasks/control/index.md | 2 +- mani_skill/envs/tasks/control/__init__.py | 2 +- mani_skill/envs/tasks/control/hopper.py | 12 +++++++++--- mani_skill/utils/building/ground.py | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/source/tasks/control/index.md b/docs/source/tasks/control/index.md index 8bf67b5ef..2a7136610 100644 --- a/docs/source/tasks/control/index.md +++ b/docs/source/tasks/control/index.md @@ -59,7 +59,7 @@ Hopper robot stays upright and moves in positive x direction with hopping motion **Supported Robots: Hopper** - + **Randomizations:** - Hopper robot is randomly rotated [-pi, pi] radians about y axis. - Hopper qpos are uniformly sampled within their allowed ranges diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py index d6912de32..59e766758 100644 --- a/mani_skill/envs/tasks/control/__init__.py +++ b/mani_skill/envs/tasks/control/__init__.py @@ -1,2 +1,2 @@ from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv -from .hopper import HopperStandEnv, HopperHopEnv \ No newline at end of file +from .hopper import HopperHopEnv, HopperStandEnv diff --git a/mani_skill/envs/tasks/control/hopper.py b/mani_skill/envs/tasks/control/hopper.py index 88c037d59..2b5fcc2e2 100644 --- a/mani_skill/envs/tasks/control/hopper.py +++ b/mani_skill/envs/tasks/control/hopper.py @@ -5,19 +5,25 @@ import numpy as np import torch +from transforms3d.euler import euler2quat + from mani_skill.agents.base_agent import BaseAgent from mani_skill.agents.controllers import * from mani_skill.envs.sapien_env import BaseEnv from mani_skill.envs.utils import randomization, rewards from mani_skill.sensors.camera import CameraConfig from mani_skill.utils import common, sapien_utils + # fix new imports later from mani_skill.utils.building.ground import build_ground from mani_skill.utils.registration import register_env from mani_skill.utils.structs.pose import Pose -from mani_skill.utils.structs.types import (Array, GPUMemoryConfig, - SceneConfig, SimConfig) -from transforms3d.euler import euler2quat +from mani_skill.utils.structs.types import ( + Array, + GPUMemoryConfig, + SceneConfig, + SimConfig, +) MJCF_FILE = f"{os.path.join(os.path.dirname(__file__), 'assets/hopper.xml')}" diff --git a/mani_skill/utils/building/ground.py b/mani_skill/utils/building/ground.py index b839a8433..f1543c841 100644 --- a/mani_skill/utils/building/ground.py +++ b/mani_skill/utils/building/ground.py @@ -18,7 +18,7 @@ def build_ground( scene: ManiSkillScene, floor_width: int = 100, floor_length: int = None, - xy_origin: tuple = (0,0), + xy_origin: tuple = (0, 0), altitude=0, name="ground", ): From 03060c7e10168a34b6e46fd5bbfbf70bf1d11d43 Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 18 Jul 2024 04:02:18 -0700 Subject: [PATCH 06/11] sigmoidal rewards comment removed --- mani_skill/envs/utils/rewards/common.py | 43 ++++++++++++++----------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/mani_skill/envs/utils/rewards/common.py b/mani_skill/envs/utils/rewards/common.py index 2ba6ec228..26cd78d04 100644 --- a/mani_skill/envs/utils/rewards/common.py +++ b/mani_skill/envs/utils/rewards/common.py @@ -1,5 +1,6 @@ -import torch import numpy as np +import torch + def tolerance( x, lower=0.0, upper=0.0, margin=0.0, sigmoid="gaussian", value_at_margin=0.1 @@ -23,7 +24,7 @@ def tolerance( 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'. value_at_margin: A float between 0 and 1 specifying the output value when the distance from `x` to the nearest bound is equal to `margin`. Ignored - if `margin == 0`. todo: not implemented yet + if `margin == 0`. Returns: A torch array with values between 0.0 and 1.0. @@ -31,20 +32,24 @@ def tolerance( Raises: ValueError: If `bounds[0] > bounds[1]`. ValueError: If `margin` is negative. - ValueError: If not 0 < `value_at_margin` < 1, + ValueError: If not 0 < `value_at_margin` < 1, except for `linear`, `cosine` and `quadratic` sigmoids, which allow `value_at_margin` == 0. ValueError: If `sigmoid` is of an unknown type. """ - if sigmoid in ('cosine', 'linear', 'quadratic'): - if not 0 <= value_at_margin < 1: - raise ValueError('`value_at_margin` must be nonnegative and smaller than 1, ' - 'got {}.'.format(value_at_margin)) + if sigmoid in ("cosine", "linear", "quadratic"): + if not 0 <= value_at_margin < 1: + raise ValueError( + "`value_at_margin` must be nonnegative and smaller than 1, " + "got {}.".format(value_at_margin) + ) else: - if not 0 < value_at_margin < 1: - raise ValueError('`value_at_margin` must be strictly between 0 and 1, ' - 'got {}.'.format(value_at_margin)) - + if not 0 < value_at_margin < 1: + raise ValueError( + "`value_at_margin` must be strictly between 0 and 1, " + "got {}.".format(value_at_margin) + ) + if lower > upper: raise ValueError("Lower bound must be <= upper bound.") @@ -60,19 +65,21 @@ def tolerance( if sigmoid == "gaussian": scale = np.sqrt(-2 * np.log(value_at_margin)) value = torch.where( - in_bounds, torch.tensor(1.0), torch.exp(-0.5 * (d*scale)**2) + in_bounds, torch.tensor(1.0), torch.exp(-0.5 * (d * scale) ** 2) ) elif sigmoid == "hyperbolic": - scale = np.arccosh(1/value_at_margin) - value = torch.where(in_bounds, torch.tensor(1.0), 1 / (1 + torch.exp(d*scale))) + scale = np.arccosh(1 / value_at_margin) + value = torch.where( + in_bounds, torch.tensor(1.0), 1 / (1 + torch.exp(d * scale)) + ) elif sigmoid == "quadratic": - scale = np.sqrt(1-value_at_margin) - scaled_d = d*scale + scale = np.sqrt(1 - value_at_margin) + scaled_d = d * scale x = torch.where(scaled_d.abs() < 1, 1 - scaled_d**2, torch.tensor(0.0)) value = torch.where(in_bounds, torch.tensor(1.0), x) elif sigmoid == "linear": - scale = 1-value_at_margin - scaled_d = d*scale + scale = 1 - value_at_margin + scaled_d = d * scale x = torch.where(scaled_d.abs() < 1, 1 - scaled_d, torch.tensor(0.0)) value = torch.where(in_bounds, torch.tensor(1.0), x) else: From 2da3604fe65283a0c7d2e3f0c32402aa443714d2 Mon Sep 17 00:00:00 2001 From: Xander Date: Tue, 23 Jul 2024 12:55:19 -0700 Subject: [PATCH 07/11] design fixes, visuals and convention --- mani_skill/envs/sapien_env.py | 5 - .../envs/tasks/control/assets/hopper.xml | 2 +- mani_skill/envs/tasks/control/hopper.py | 102 +++++++----------- .../control/planar/scene_builder.py | 56 ++++++++++ 4 files changed, 98 insertions(+), 67 deletions(-) create mode 100644 mani_skill/utils/scene_builder/control/planar/scene_builder.py diff --git a/mani_skill/envs/sapien_env.py b/mani_skill/envs/sapien_env.py index d33082ccc..813f429a1 100644 --- a/mani_skill/envs/sapien_env.py +++ b/mani_skill/envs/sapien_env.py @@ -620,14 +620,9 @@ def _setup_sensors(self, options: dict): # Cameras for rendering only self._human_render_cameras = dict() for uid, camera_cfg in self._human_render_camera_configs.items(): - if uid in self._agent_sensor_configs: - articulation = self.agent.robot - else: - articulation = None self._human_render_cameras[uid] = Camera( camera_cfg, self.scene, - articulation=articulation, ) self.scene.sensors = self._sensors diff --git a/mani_skill/envs/tasks/control/assets/hopper.xml b/mani_skill/envs/tasks/control/assets/hopper.xml index c4e860beb..0aa3be492 100644 --- a/mani_skill/envs/tasks/control/assets/hopper.xml +++ b/mani_skill/envs/tasks/control/assets/hopper.xml @@ -34,7 +34,7 @@ - + diff --git a/mani_skill/envs/tasks/control/hopper.py b/mani_skill/envs/tasks/control/hopper.py index 2b5fcc2e2..9ff8bf7ed 100644 --- a/mani_skill/envs/tasks/control/hopper.py +++ b/mani_skill/envs/tasks/control/hopper.py @@ -4,8 +4,8 @@ from typing import Any, Dict, Union import numpy as np +import sapien import torch -from transforms3d.euler import euler2quat from mani_skill.agents.base_agent import BaseAgent from mani_skill.agents.controllers import * @@ -13,21 +13,15 @@ from mani_skill.envs.utils import randomization, rewards from mani_skill.sensors.camera import CameraConfig from mani_skill.utils import common, sapien_utils - -# fix new imports later -from mani_skill.utils.building.ground import build_ground from mani_skill.utils.registration import register_env -from mani_skill.utils.structs.pose import Pose -from mani_skill.utils.structs.types import ( - Array, - GPUMemoryConfig, - SceneConfig, - SimConfig, +from mani_skill.utils.scene_builder.control.planar.scene_builder import ( + PlanarSceneBuilder, ) +from mani_skill.utils.structs.pose import Pose +from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig MJCF_FILE = f"{os.path.join(os.path.dirname(__file__), 'assets/hopper.xml')}" -# params from dm_control # Minimal height of torso over foot above which stand reward is 1. _STAND_HEIGHT = 0.6 @@ -39,22 +33,6 @@ class HopperRobot(BaseAgent): uid = "hopper" mjcf_path = MJCF_FILE - @property - def _sensor_configs(self): - return [ - # replicated from xml file - CameraConfig( - uid="cam0", - pose=Pose.create_from_pq([0, -2.8, 0], euler2quat(0, 0, np.pi / 2)), - width=128, - height=128, - fov=70 * (np.pi / 180), - near=0.01, - far=100, - entity_uid="torso_dummy_1", - ), - ] - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.real_links = [ @@ -147,23 +125,44 @@ def __init__(self, *args, robot_uids=HopperRobot, **kwargs): @property def _default_sim_config(self): return SimConfig( - gpu_memory_cfg=GPUMemoryConfig( - found_lost_pairs_capacity=2**25, max_rigid_patch_count=2**18 - ), scene_cfg=SceneConfig( - solver_position_iterations=15, solver_velocity_iterations=15 + solver_position_iterations=4, solver_velocity_iterations=1 ), - # sim_freq=50, + sim_freq=100, control_freq=25, ) @property def _default_sensor_configs(self): - return self.agent._sensor_configs + return [ + # replicated from xml file + CameraConfig( + uid="cam0", + pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), + width=128, + height=128, + fov=45 * (np.pi / 180), + near=0.01, + far=100, + mount=self.agent.robot.links_map["torso_dummy_1"], + ), + ] @property def _default_human_render_camera_configs(self): - return self.agent._sensor_configs + return [ + # replicated from xml file + CameraConfig( + uid="render_cam", + pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), + width=512, + height=512, + fov=45 * (np.pi / 180), + near=0.01, + far=100, + mount=self.agent.robot.links_map["torso_dummy_1"], + ), + ] def _load_scene(self, options: dict): loader = self.scene.create_mjcf_loader() @@ -171,31 +170,12 @@ def _load_scene(self, options: dict): for a in actor_builders: a.build(a.name) - # load in the ground - self.ground = build_ground( - self.scene, - floor_width=2, - floor_length=100, - altitude=-0.075, - xy_origin=(50 - 2, 0), - ) + self.planar_scene = PlanarSceneBuilder(env=self) + self.planar_scene.build() def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): with torch.device(self.device): - b = len(env_idx) - # qpos sampled same as dm_control, but ensure no self intersection explicitly here - random_qpos = torch.rand(b, self.agent.robot.dof[0]) - q_lims = self.agent.robot.get_qlimits() - q_ranges = q_lims[..., 1] - q_lims[..., 0] - random_qpos *= q_ranges - random_qpos += q_lims[..., 0] - - # overwrite planar joint qpos - these are special for planar robots - # first two joints are dummy rootx and rootz - random_qpos[:, :2] = 0 - # y is axis of rotation of our planar robot (xz plane), so we randomize around it - random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi) - self.agent.robot.set_qpos(random_qpos) + self.planar_scene.initialize(env_idx) def evaluate(self): return dict() @@ -221,15 +201,15 @@ def touch(self, link_name): return torch.log1p(force_mag) # dm_control also includes foot pressures as state obs space - def _get_obs_extra(self, info: Dict): - obs = dict( + def _get_obs_state_dict(self, info: Dict): + return dict( + agent=self._get_obs_agent(), toe_touch=self.touch("foot_toe"), heel_touch=self.touch("foot_heel"), ) - return obs -@register_env("MS-HopperStand-v1", max_episode_steps=100) +@register_env("MS-HopperStand-v1", max_episode_steps=600) class HopperStandEnv(HopperEnv): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -243,7 +223,7 @@ def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward -@register_env("MS-HopperHop-v1", max_episode_steps=100) +@register_env("MS-HopperHop-v1", max_episode_steps=600) class HopperHopEnv(HopperEnv): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/mani_skill/utils/scene_builder/control/planar/scene_builder.py b/mani_skill/utils/scene_builder/control/planar/scene_builder.py new file mode 100644 index 000000000..75bd7c5cf --- /dev/null +++ b/mani_skill/utils/scene_builder/control/planar/scene_builder.py @@ -0,0 +1,56 @@ +from typing import List + +import numpy as np +import sapien +import sapien.render +import torch +from transforms3d.euler import euler2quat + +from mani_skill.utils.building.ground import build_ground +from mani_skill.utils.scene_builder import SceneBuilder + + +class PlanarSceneBuilder(SceneBuilder): + def build(self, build_config_idxs: List[int] = None): + # ground - a strip with length along +x + self.ground = build_ground( + self.scene, + floor_width=2, + floor_length=100, + altitude=0, + xy_origin=(50 - 2, 0), + ) + + # background visual wall + self.wall = self.scene.create_actor_builder() + self.wall.add_box_visual( + half_size=(1e-3, 65, 10), + pose=sapien.Pose(p=[(50 - 2), 2, 0], q=euler2quat(0, 0, np.pi / 2)), + material=sapien.render.RenderMaterial( + base_color=np.array([0.3, 0.3, 0.3, 1]) + ), + ) + self.wall.build_static(name="wall") + self.scene_objects: List[sapien.Entity] = [self.ground, self.wall] + + def initialize(self, env_idx: torch.Tensor): + b = len(env_idx) + robot = self.env.agent.uid + if robot == "hopper" or robot == "walker": + # qpos sampled same as dm_control, but ensure no self intersection explicitly here + random_qpos = torch.rand(b, self.env.agent.robot.dof[0]) + q_lims = self.env.agent.robot.get_qlimits() + q_ranges = q_lims[..., 1] - q_lims[..., 0] + random_qpos *= q_ranges + random_qpos += q_lims[..., 0] + + # overwrite planar joint qpos - these are special for planar robots + # first two joints are dummy rootx and rootz + random_qpos[:, :2] = 0 + # y is axis of rotation of our planar robot (xz plane), so we randomize around it + random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi) + self.env.agent.reset(random_qpos) + elif robot == "cheetah": + raise NotImplementedError() + else: + raise NotImplementedError() From d4f06687db53eb82c426c0cfe16487787e1374ad Mon Sep 17 00:00:00 2001 From: Xander Date: Tue, 23 Jul 2024 13:35:04 -0700 Subject: [PATCH 08/11] added cartpole wall --- mani_skill/envs/tasks/control/cartpole.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mani_skill/envs/tasks/control/cartpole.py b/mani_skill/envs/tasks/control/cartpole.py index dd5f265bd..71b0d1e27 100644 --- a/mani_skill/envs/tasks/control/cartpole.py +++ b/mani_skill/envs/tasks/control/cartpole.py @@ -3,7 +3,9 @@ from typing import Any, Dict, Union import numpy as np +import sapien import torch +from transforms3d.euler import euler2quat from mani_skill.agents.base_agent import BaseAgent from mani_skill.agents.controllers import * @@ -110,6 +112,17 @@ def _load_scene(self, options: dict): for a in actor_builders: a.build(a.name) + # background visual wall + self.wall = self.scene.create_actor_builder() + self.wall.add_box_visual( + half_size=(1e-3, 20, 10), + pose=sapien.Pose(p=[0, 1, 1], q=euler2quat(0, 0, np.pi / 2)), + material=sapien.render.RenderMaterial( + base_color=np.array([0.3, 0.3, 0.3, 1]) + ), + ) + self.wall.build_static(name="wall") + def evaluate(self): return dict() From 763ce6b8d19aa1ccc44357d0b3edbef0d092b01e Mon Sep 17 00:00:00 2001 From: Xander Date: Tue, 23 Jul 2024 16:46:29 -0700 Subject: [PATCH 09/11] allow no info success ppo_rgb.py --- examples/baselines/ppo/ppo_rgb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/baselines/ppo/ppo_rgb.py b/examples/baselines/ppo/ppo_rgb.py index 5d20727db..16142c46e 100644 --- a/examples/baselines/ppo/ppo_rgb.py +++ b/examples/baselines/ppo/ppo_rgb.py @@ -416,7 +416,8 @@ def get_action_and_value(self, x, action=None): final_info = infos["final_info"] done_mask = infos["_final_info"] episodic_return = final_info['episode']['r'][done_mask].mean().cpu().numpy() - writer.add_scalar("charts/success_rate", final_info["success"][done_mask].float().mean().cpu().numpy(), global_step) + if "success" in final_info: + writer.add_scalar("charts/success_rate", final_info["success"][done_mask].float().mean().cpu().numpy(), global_step) writer.add_scalar("charts/episodic_return", episodic_return, global_step) writer.add_scalar("charts/episodic_length", final_info["elapsed_steps"][done_mask].float().mean().cpu().numpy(), global_step) for k in infos["final_observation"]: From 2ae6a474f32c70db1085cd8f951e3d8cc04a4101 Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 25 Jul 2024 16:59:46 -0700 Subject: [PATCH 10/11] hopping reward corrected --- mani_skill/envs/tasks/control/hopper.py | 85 +++++++++++-------- .../control/planar/scene_builder.py | 22 ----- 2 files changed, 51 insertions(+), 56 deletions(-) diff --git a/mani_skill/envs/tasks/control/hopper.py b/mani_skill/envs/tasks/control/hopper.py index 9ff8bf7ed..d301ae44c 100644 --- a/mani_skill/envs/tasks/control/hopper.py +++ b/mani_skill/envs/tasks/control/hopper.py @@ -13,6 +13,7 @@ from mani_skill.envs.utils import randomization, rewards from mani_skill.sensors.camera import CameraConfig from mani_skill.utils import common, sapien_utils +from mani_skill.utils.geometry import rotation_conversions from mani_skill.utils.registration import register_env from mani_skill.utils.scene_builder.control.planar.scene_builder import ( PlanarSceneBuilder, @@ -35,46 +36,27 @@ class HopperRobot(BaseAgent): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.real_links = [ - link for link in self.robot.links_map.keys() if "dummy" not in link - ] - self.real_links_mass = torch.tensor( - [ - link.mass[0].item() - for link in self.robot.links - if "dummy" not in link.name - ], - device=self.device, - ) - self.real_mass = self.real_links_mass.sum() @property def _controller_configs(self): # NOTE joints in [rootx,rooty,rooz] are for planar tracking, not control joints - # TODO (xhin): further tune controller params - max_delta = 2.25 + max_delta = 2 # best by far stiffness = 100 - friction = 0.1 - force_limit = 200 - damping = 7.5 + damping = 10 # end best pd_joint_delta_pos_body = PDJointPosControllerConfig( ["hip", "knee", "waist"], lower=-max_delta, upper=max_delta, damping=damping, stiffness=stiffness, - force_limit=force_limit, - friction=friction, use_delta=True, ) pd_joint_delta_pos_ankle = PDJointPosControllerConfig( ["ankle"], - lower=-max_delta / 3, - upper=max_delta / 3, + lower=-max_delta / 2.5, + upper=max_delta / 2.5, damping=damping, stiffness=stiffness, - force_limit=force_limit, - friction=friction, use_delta=True, ) rest = PassiveControllerConfig( @@ -102,15 +84,19 @@ def _load_articulation(self): loader.name = self.uid - # only need the robot self.robot = loader.parse(asset_path)[0][0].build() assert self.robot is not None, f"Fail to load URDF/MJCF from {asset_path}" self.robot_link_ids = [link.name for link in self.robot.get_links()] + # cache robot mass for com computation + self.robot_links_mass = [link.mass[0].item() for link in self.robot.get_links()] + self.robot_mass = np.sum(self.robot_links_mass[3:]) + # planar agent has root joints in range [-inf, inf], not ideal in obs space def get_proprioception(self): return dict( # don't include xslider qpos, for x trans invariance + # x increases throughout successful episode qpos=self.robot.get_qpos()[:, 1:], qvel=self.robot.get_qvel(), ) @@ -141,7 +127,7 @@ def _default_sensor_configs(self): pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), width=128, height=128, - fov=45 * (np.pi / 180), + fov=np.pi / 4, near=0.01, far=100, mount=self.agent.robot.links_map["torso_dummy_1"], @@ -157,7 +143,7 @@ def _default_human_render_camera_configs(self): pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), width=512, height=512, - fov=45 * (np.pi / 180), + fov=np.pi / 4, near=0.01, far=100, mount=self.agent.robot.links_map["torso_dummy_1"], @@ -175,10 +161,20 @@ def _load_scene(self, options: dict): def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): with torch.device(self.device): - self.planar_scene.initialize(env_idx) - - def evaluate(self): - return dict() + b = len(env_idx) + # qpos sampled same as dm_control, but ensure no self intersection explicitly here + random_qpos = torch.rand(b, self.agent.robot.dof[0]) + q_lims = self.agent.robot.get_qlimits() + q_ranges = q_lims[..., 1] - q_lims[..., 0] + random_qpos *= q_ranges + random_qpos += q_lims[..., 0] + + # overwrite planar joint qpos - these are special for planar robots + # first two joints are dummy rootx and rootz + random_qpos[:, :2] = 0 + # y is axis of rotation of our planar robot (xz plane), so we randomize around it + random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi) + self.agent.reset(random_qpos) @property # dm_control mjc function adapted for maniskill def height(self): @@ -190,8 +186,20 @@ def height(self): @property # dm_control mjc function adapted for maniskill def subtreelinvelx(self): - """Returns linvel x component of robot""" - return self.agent.robot.get_qvel()[:, 0].view(-1, 1) + # """Returns linvel x component of robot""" + links = self.agent.robot.get_links()[3:] # skip first three dummy links + vels = torch.stack( + [link.get_linear_velocity() * link.mass[0].item() for link in links], dim=0 + ) # (num_links, b, 3) + com_vel = vels.sum(dim=0) / self.agent.robot_mass # (b, 3) + return com_vel[:, 0] + + @property + def yrot(self): + """Returns rotvel y component of robot""" + # view rotation in range [-pi, pi] + rot = self.agent.robot.get_qpos()[:, 2].view(-1, 1) % (2 * np.pi) + return torch.where(rot < np.pi, rot, 2 * np.pi - rot) # dm_control mjc function adapted for maniskill def touch(self, link_name): @@ -215,7 +223,16 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def compute_dense_reward(self, obs: Any, action: Array, info: Dict): - return rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0).view(-1) + standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) + upright = rewards.tolerance( + self.yrot, # yrot in [-pi,pi] + lower=-np.pi / 6, + upper=np.pi / 6, + margin=1, + value_at_margin=0.5, + sigmoid="linear", + ) + return standing.view(-1) * upright.view(-1) def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): # this should be equal to compute_dense_reward / max possible reward @@ -238,9 +255,9 @@ def compute_dense_reward(self, obs: Any, action: Array, info: Dict): value_at_margin=0.5, sigmoid="linear", ) + return standing.view(-1) * hopping.view(-1) def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): - # this should be equal to compute_dense_reward / max possible reward max_reward = 1.0 return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward diff --git a/mani_skill/utils/scene_builder/control/planar/scene_builder.py b/mani_skill/utils/scene_builder/control/planar/scene_builder.py index 75bd7c5cf..9827cd498 100644 --- a/mani_skill/utils/scene_builder/control/planar/scene_builder.py +++ b/mani_skill/utils/scene_builder/control/planar/scene_builder.py @@ -32,25 +32,3 @@ def build(self, build_config_idxs: List[int] = None): ) self.wall.build_static(name="wall") self.scene_objects: List[sapien.Entity] = [self.ground, self.wall] - - def initialize(self, env_idx: torch.Tensor): - b = len(env_idx) - robot = self.env.agent.uid - if robot == "hopper" or robot == "walker": - # qpos sampled same as dm_control, but ensure no self intersection explicitly here - random_qpos = torch.rand(b, self.env.agent.robot.dof[0]) - q_lims = self.env.agent.robot.get_qlimits() - q_ranges = q_lims[..., 1] - q_lims[..., 0] - random_qpos *= q_ranges - random_qpos += q_lims[..., 0] - - # overwrite planar joint qpos - these are special for planar robots - # first two joints are dummy rootx and rootz - random_qpos[:, :2] = 0 - # y is axis of rotation of our planar robot (xz plane), so we randomize around it - random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi) - self.env.agent.reset(random_qpos) - elif robot == "cheetah": - raise NotImplementedError() - else: - raise NotImplementedError() From a75855d0a7e02d35b83ff848b0ca1aef25e5bf7f Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 25 Jul 2024 18:24:36 -0700 Subject: [PATCH 11/11] standing reward fixed --- mani_skill/envs/tasks/control/hopper.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/mani_skill/envs/tasks/control/hopper.py b/mani_skill/envs/tasks/control/hopper.py index d301ae44c..57cfcadd7 100644 --- a/mani_skill/envs/tasks/control/hopper.py +++ b/mani_skill/envs/tasks/control/hopper.py @@ -194,13 +194,6 @@ def subtreelinvelx(self): com_vel = vels.sum(dim=0) / self.agent.robot_mass # (b, 3) return com_vel[:, 0] - @property - def yrot(self): - """Returns rotvel y component of robot""" - # view rotation in range [-pi, pi] - rot = self.agent.robot.get_qpos()[:, 2].view(-1, 1) % (2 * np.pi) - return torch.where(rot < np.pi, rot, 2 * np.pi - rot) - # dm_control mjc function adapted for maniskill def touch(self, link_name): """Returns function of sensor force values""" @@ -224,15 +217,7 @@ def __init__(self, *args, **kwargs): def compute_dense_reward(self, obs: Any, action: Array, info: Dict): standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) - upright = rewards.tolerance( - self.yrot, # yrot in [-pi,pi] - lower=-np.pi / 6, - upper=np.pi / 6, - margin=1, - value_at_margin=0.5, - sigmoid="linear", - ) - return standing.view(-1) * upright.view(-1) + return standing.view(-1) def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): # this should be equal to compute_dense_reward / max possible reward