From 5f8eef182356096863274920ccbc850b57442777 Mon Sep 17 00:00:00 2001 From: Xander Date: Mon, 29 Jul 2024 20:00:50 -0700 Subject: [PATCH 01/13] simple stand, camera + qpos_rand todo --- mani_skill/envs/tasks/control/__init__.py | 1 + mani_skill/envs/tasks/control/humanoid.py | 274 ++++++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 mani_skill/envs/tasks/control/humanoid.py diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py index 59e766758..e74360bc3 100644 --- a/mani_skill/envs/tasks/control/__init__.py +++ b/mani_skill/envs/tasks/control/__init__.py @@ -1,2 +1,3 @@ from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv from .hopper import HopperHopEnv, HopperStandEnv +from .humanoid import HumanoidEnv diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py new file mode 100644 index 000000000..0072ddfa6 --- /dev/null +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -0,0 +1,274 @@ +"""Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/humanoid.py""" + +from typing import Any, Dict, Union + +import numpy as np +import sapien +import torch + +from mani_skill.agents.robots.humanoid import Humanoid +from mani_skill.envs.sapien_env import BaseEnv +from mani_skill.envs.utils import randomization, rewards +from mani_skill.sensors.camera import CameraConfig +from mani_skill.utils import common, sapien_utils +from mani_skill.utils.building.ground import build_ground +from mani_skill.utils.geometry import rotation_conversions +from mani_skill.utils.registration import register_env +from mani_skill.utils.structs.pose import Pose +from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig + +# Height of head above which stand reward is 1. +_STAND_HEIGHT = 1.4 + +# Horizontal speeds above which move reward is 1. +_WALK_SPEED = 1 +_RUN_SPEED = 10 + + +@register_env("MS-HumanoidFake-v1", max_episode_steps=100) +class HumanoidEnv(BaseEnv): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + # active_links_names = [link.name for link in self.active_links] + # print("links", self.agent.robot.links_map.keys()) + # print() + # print("active", active_links_names) + # print() + # print("joints", self.agent.robot.joints_map.keys()) + # print() + # print("active_joints", self.agent.robot.active_joints_map.keys()) + # print() + # print("num", len(self.agent.robot.active_joints)) + # print() + # print("all_joints", len(self.agent.robot.joints)) + + @property + def _default_sim_config(self): + return SimConfig( + scene_cfg=SceneConfig( + solver_position_iterations=4, solver_velocity_iterations=1 + ), + ) + + # @property + # def _default_sensor_configs(self): + # return [ + # # replicated from xml file + # CameraConfig( + # uid="cam0", + # pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), + # width=128, + # height=128, + # fov=np.pi / 4, + # near=0.01, + # far=100, + # mount = self.agent.robot.get_root() + # ), + # ] + + @property + def _default_human_render_camera_configs(self): + return [ + # replicated from xml file + CameraConfig( + uid="render_cam", + pose=sapien_utils.look_at(eye=[0, 2, 2], target=[0, 0, 0]), + width=512, + height=512, + fov=np.pi / 2.5, + near=0.01, + far=100, + ), + ] + + @property + def head_height(self): + """Returns the height of the head.""" + return self.agent.robot.links_map["head"].pose.p[:, -1] + + @property + def torso_upright(self): + # print("rot_mat") + # print(self.agent.robot.links_map["torso"].pose.to_transformation_matrix()) + # print("rot_mat_zz", self.agent.robot.links_map["torso"].pose.to_transformation_matrix()[:,2,2]) + # print("rot_mat.shape", self.agent.robot.links_map["torso"].pose.to_transformation_matrix().shape) + return self.agent.robot.links_map["torso"].pose.to_transformation_matrix()[ + :, 2, 2 + ] + + # not sure this is correct - are our pose rotations the same as mujocos? + # test out the transpose (taking column instead of row) + # right now, it should represnt the z axis of global in the local frame - hm, no, this should be correct + @property + def torso_vertical_orientation(self): + return ( + self.agent.robot.links_map["torso"] + .pose.to_transformation_matrix()[:, 2, :3] + .view(-1, 3) + ) + + @property + def extremities(self): + torso_frame = ( + self.agent.robot.links_map["torso"] + .pose.to_transformation_matrix()[:, :3, :3] + .view(-1, 3, 3) + ) + torso_pos = self.agent.robot.links_map["torso"].pose.p + positions = [] + for side in ("left_", "right_"): + for limb in ("hand", "foot"): + torso_to_limb = ( + self.agent.robot.links_map[side + limb].pose.p - torso_pos + ).view(-1, 1, 3) + positions.append( + (torso_to_limb @ torso_frame).view(-1, 3) + ) # reverse order mult == extrems in torso frame + return torch.stack(positions, dim=1).view(-1, 12) # (b, 4, 3) -> (b,12) + + @property + def center_of_mass_velocity(self): + # """Returns the center of mass velocity of robot""" + vels = torch.stack( + [ + link.get_linear_velocity() * link.mass[0].item() + for link in self.active_links + ], + dim=0, + ) # (num_links, b, 3) + com_vel = vels.sum(dim=0) / self.robot_mass # (b, 3) + return com_vel + + # # dm_control also includes foot pressures as state obs space + def _get_obs_state_dict(self, info: Dict): + return dict( + agent=self._get_obs_agent(), # dm control blocks out first 7 qpos, happens by default for us, includes qpos & qvel + head_height=self.head_height, # (b,1) + extremities=self.extremities, # (b, 4, 3) + torso_vertical=self.torso_vertical_orientation, # (b, 3) + com_velocity=self.center_of_mass_velocity, # (b, 3) + ) + + def compute_dense_reward(self, obs: Any, action: Array, info: Dict): + standing = rewards.tolerance( + self.head_height, + lower=_STAND_HEIGHT, + upper=float("inf"), + margin=_STAND_HEIGHT / 4, + ) + upright = rewards.tolerance( + self.torso_upright, + lower=0.9, + upper=float("inf"), + sigmoid="linear", + margin=1.9, + value_at_margin=0, + ) + stand_reward = standing.view(-1) * upright.view(-1) + small_control = rewards.tolerance( + action, margin=1, value_at_margin=0, sigmoid="quadratic" + ).mean( + dim=-1 + ) # (b, a) -> (b) + small_control = (4 + small_control) / 5 + horizontal_velocity = self.center_of_mass_velocity[:, :2] + dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean( + dim=-1 + ) # (b,3) -> (b) + + return small_control.view(-1) * stand_reward * dont_move.view(-1) + + def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): + max_reward = 1.0 + return self.compute_dense_reward(obs, action, info) / max_reward + + def _load_scene(self, options: dict): + loader = self.scene.create_mjcf_loader() + articulation_builders, actor_builders, sensor_configs = loader.parse( + self.agent.mjcf_path + ) + for a in actor_builders: + a.build(a.name) + + self.ground = build_ground(self.scene, floor_width=100, altitude=0) + + self.active_links = [ + link for link in self.agent.robot.get_links() if "dummy" not in link.name + ] + self.robot_mass = np.sum([link.mass[0].item() for link in self.active_links]) + + def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): + with torch.device(self.device): + b = len(env_idx) + # # qpos sampled same as dm_control, but ensure no self intersection explicitly here + # random_qpos = torch.rand(b, self.agent.robot.dof[0]) + # q_lims = self.agent.robot.get_qlimits() + # q_ranges = q_lims[..., 1] - q_lims[..., 0] + # random_qpos *= q_ranges + # random_qpos += q_lims[..., 0] + + # # overwrite planar joint qpos - these are special for planar robots + # # first two joints are dummy rootx and rootz + # random_qpos[:, :2] = 0 + # # y is axis of rotation of our planar robot (xz plane), so we randomize around it + # random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi) + # self.agent.reset(self.agent.keyframes["squat"].qpos) + pos = sapien.Pose(p=[0, 0, 0], q=[1, 0, 0, 0]) + self.agent.robot.set_root_pose(pos) + self.agent.reset(torch.zeros(b, self.agent.robot.dof[0])) + # @pass + + # @property # dm_control mjc function adapted for maniskill + # def height(self): + # """Returns relative height of the robot""" + # return ( + # self.agent.robot.links_map["torso"].pose.p[:, -1] + # - self.agent.robot.links_map["foot_heel"].pose.p[:, -1] + # ).view(-1, 1) + + # # dm_control mjc function adapted for maniskill + # def touch(self, link_name): + # """Returns function of sensor force values""" + # force_vec = self.agent.robot.get_net_contact_forces([link_name]) + # force_mag = torch.linalg.norm(force_vec, dim=-1) + # return torch.log1p(force_mag) + + +# @register_env("MS-humanoidStand-v1", max_episode_steps=600) +# class humanoidStandEnv(humanoidEnv): +# def __init__(self, *args, **kwargs): +# super().__init__(*args, **kwargs) + +# def compute_dense_reward(self, obs: Any, action: Array, info: Dict): +# standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) +# return standing.view(-1) + +# def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): +# # this should be equal to compute_dense_reward / max possible reward +# max_reward = 1.0 +# return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward + + +# @register_env("MS-humanoidHop-v1", max_episode_steps=600) +# class humanoidHopEnv(humanoidEnv): +# def __init__(self, *args, **kwargs): +# super().__init__(*args, **kwargs) + +# def compute_dense_reward(self, obs: Any, action: Array, info: Dict): +# standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) +# hopping = rewards.tolerance( +# self.subtreelinvelx, +# lower=_HOP_SPEED, +# upper=float("inf"), +# margin=_HOP_SPEED / 2, +# value_at_margin=0.5, +# sigmoid="linear", +# ) + +# return standing.view(-1) * hopping.view(-1) + +# def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): +# max_reward = 1.0 +# return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward From c28d26f43f6a4a5f172251a1fd115a93d5926ff4 Mon Sep 17 00:00:00 2001 From: Xander Date: Wed, 31 Jul 2024 12:56:10 -0700 Subject: [PATCH 02/13] sac works, hard humanoid stand --- mani_skill/agents/robots/humanoid/humanoid.py | 55 +++++++++++++++++-- .../assets/robots/humanoid/humanoid.xml | 3 +- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/mani_skill/agents/robots/humanoid/humanoid.py b/mani_skill/agents/robots/humanoid/humanoid.py index 3dded91f5..370db36f1 100644 --- a/mani_skill/agents/robots/humanoid/humanoid.py +++ b/mani_skill/agents/robots/humanoid/humanoid.py @@ -51,23 +51,68 @@ def __init__(self, *args, **kwargs): @property def _controller_configs(self): + # pd_joint_pos = PDJointPosControllerConfig( + # [x.name for x in self.robot.active_joints], + # lower=None, + # upper=None, + # stiffness=100, + # damping=10, + # normalize_action=False, + # ) pd_joint_pos = PDJointPosControllerConfig( [x.name for x in self.robot.active_joints], lower=None, upper=None, - stiffness=100, + stiffness=1000, damping=10, normalize_action=False, ) - pd_joint_delta_pos = PDJointPosControllerConfig( + # pd_joint_delta_pos = PDJointPosControllerConfig( + # [j.name for j in self.robot.active_joints], + # -1, + # 1, + # damping=5, + # stiffness=20, + # force_limit=100, + # use_delta=True, + # ) + + # pd_joint_delta_pos = PDJointPosControllerConfig( #instantly causes error + # [j.name for j in self.robot.active_joints], + # -2, + # 2, + # damping=50, + # stiffness=500, + # force_limit=20000000, + # use_delta=True, + # ) + # pd_joint_delta_pos = PDJointPosControllerConfig( #best yet, almost there + # [j.name for j in self.robot.active_joints], + # -3, + # 3, + # damping=5, + # stiffness=100, + # force_limit=20000000, + # use_delta=True, + # ) + pd_joint_delta_pos = PDJointPosControllerConfig( # best yet, almost there [j.name for j in self.robot.active_joints], -1, 1, - damping=5, - stiffness=20, - force_limit=100, + damping=10, + stiffness=200, + force_limit=200, use_delta=True, ) + # pd_joint_delta_pos = PDJointPosControllerConfig( #eventually causes error + # [j.name for j in self.robot.active_joints], + # -1, + # 1, + # damping=10, + # stiffness=1000, + # force_limit=200, + # use_delta=True, + # ) return deepcopy_dict( dict( pd_joint_pos=dict(body=pd_joint_pos, balance_passive_force=False), diff --git a/mani_skill/assets/robots/humanoid/humanoid.xml b/mani_skill/assets/robots/humanoid/humanoid.xml index e3c861478..0d4b38509 100644 --- a/mani_skill/assets/robots/humanoid/humanoid.xml +++ b/mani_skill/assets/robots/humanoid/humanoid.xml @@ -30,7 +30,8 @@ - + + From 434f82cf5453328bd265f3132d98fe3fa72029f1 Mon Sep 17 00:00:00 2001 From: Xander Date: Wed, 31 Jul 2024 13:01:24 -0700 Subject: [PATCH 03/13] sac workd, stand hard version --- mani_skill/envs/tasks/control/humanoid.py | 125 ++++++---------------- 1 file changed, 33 insertions(+), 92 deletions(-) diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py index 0072ddfa6..c3b7d20ad 100644 --- a/mani_skill/envs/tasks/control/humanoid.py +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -50,6 +50,8 @@ def _default_sim_config(self): scene_cfg=SceneConfig( solver_position_iterations=4, solver_velocity_iterations=1 ), + sim_freq=200, + control_freq=40, ) # @property @@ -88,34 +90,21 @@ def head_height(self): """Returns the height of the head.""" return self.agent.robot.links_map["head"].pose.p[:, -1] - @property - def torso_upright(self): + def torso_upright(self, info): # print("rot_mat") # print(self.agent.robot.links_map["torso"].pose.to_transformation_matrix()) # print("rot_mat_zz", self.agent.robot.links_map["torso"].pose.to_transformation_matrix()[:,2,2]) # print("rot_mat.shape", self.agent.robot.links_map["torso"].pose.to_transformation_matrix().shape) - return self.agent.robot.links_map["torso"].pose.to_transformation_matrix()[ - :, 2, 2 - ] + return info["torso_xmat"][:, 2, 2] # not sure this is correct - are our pose rotations the same as mujocos? # test out the transpose (taking column instead of row) # right now, it should represnt the z axis of global in the local frame - hm, no, this should be correct - @property - def torso_vertical_orientation(self): - return ( - self.agent.robot.links_map["torso"] - .pose.to_transformation_matrix()[:, 2, :3] - .view(-1, 3) - ) + def torso_vertical_orientation(self, info): + return info["torso_xmat"][:, 2, :3].view(-1, 3) - @property - def extremities(self): - torso_frame = ( - self.agent.robot.links_map["torso"] - .pose.to_transformation_matrix()[:, :3, :3] - .view(-1, 3, 3) - ) + def extremities(self, info): + torso_frame = info["torso_xmat"][:, :3, :3].view(-1, 3, 3) torso_pos = self.agent.robot.links_map["torso"].pose.p positions = [] for side in ("left_", "right_"): @@ -146,11 +135,18 @@ def _get_obs_state_dict(self, info: Dict): return dict( agent=self._get_obs_agent(), # dm control blocks out first 7 qpos, happens by default for us, includes qpos & qvel head_height=self.head_height, # (b,1) - extremities=self.extremities, # (b, 4, 3) - torso_vertical=self.torso_vertical_orientation, # (b, 3) + extremities=self.extremities(info), # (b, 4, 3) + torso_vertical=self.torso_vertical_orientation(info), # (b, 3) com_velocity=self.center_of_mass_velocity, # (b, 3) ) + def evaluate(self) -> Dict: + return dict( + torso_xmat=self.agent.robot.links_map[ + "torso" + ].pose.to_transformation_matrix() + ) + def compute_dense_reward(self, obs: Any, action: Array, info: Dict): standing = rewards.tolerance( self.head_height, @@ -159,7 +155,7 @@ def compute_dense_reward(self, obs: Any, action: Array, info: Dict): margin=_STAND_HEIGHT / 4, ) upright = rewards.tolerance( - self.torso_upright, + self.torso_upright(info), lower=0.9, upper=float("inf"), sigmoid="linear", @@ -202,73 +198,18 @@ def _load_scene(self, options: dict): def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): with torch.device(self.device): b = len(env_idx) - # # qpos sampled same as dm_control, but ensure no self intersection explicitly here - # random_qpos = torch.rand(b, self.agent.robot.dof[0]) - # q_lims = self.agent.robot.get_qlimits() - # q_ranges = q_lims[..., 1] - q_lims[..., 0] - # random_qpos *= q_ranges - # random_qpos += q_lims[..., 0] - - # # overwrite planar joint qpos - these are special for planar robots - # # first two joints are dummy rootx and rootz - # random_qpos[:, :2] = 0 - # # y is axis of rotation of our planar robot (xz plane), so we randomize around it - # random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi) - # self.agent.reset(self.agent.keyframes["squat"].qpos) - pos = sapien.Pose(p=[0, 0, 0], q=[1, 0, 0, 0]) - self.agent.robot.set_root_pose(pos) - self.agent.reset(torch.zeros(b, self.agent.robot.dof[0])) - # @pass - - # @property # dm_control mjc function adapted for maniskill - # def height(self): - # """Returns relative height of the robot""" - # return ( - # self.agent.robot.links_map["torso"].pose.p[:, -1] - # - self.agent.robot.links_map["foot_heel"].pose.p[:, -1] - # ).view(-1, 1) - - # # dm_control mjc function adapted for maniskill - # def touch(self, link_name): - # """Returns function of sensor force values""" - # force_vec = self.agent.robot.get_net_contact_forces([link_name]) - # force_mag = torch.linalg.norm(force_vec, dim=-1) - # return torch.log1p(force_mag) - - -# @register_env("MS-humanoidStand-v1", max_episode_steps=600) -# class humanoidStandEnv(humanoidEnv): -# def __init__(self, *args, **kwargs): -# super().__init__(*args, **kwargs) - -# def compute_dense_reward(self, obs: Any, action: Array, info: Dict): -# standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) -# return standing.view(-1) - -# def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): -# # this should be equal to compute_dense_reward / max possible reward -# max_reward = 1.0 -# return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward - - -# @register_env("MS-humanoidHop-v1", max_episode_steps=600) -# class humanoidHopEnv(humanoidEnv): -# def __init__(self, *args, **kwargs): -# super().__init__(*args, **kwargs) - -# def compute_dense_reward(self, obs: Any, action: Array, info: Dict): -# standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) -# hopping = rewards.tolerance( -# self.subtreelinvelx, -# lower=_HOP_SPEED, -# upper=float("inf"), -# margin=_HOP_SPEED / 2, -# value_at_margin=0.5, -# sigmoid="linear", -# ) - -# return standing.view(-1) * hopping.view(-1) - -# def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): -# max_reward = 1.0 -# return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward + # set agent root pose - torso now centered at dummy root at (0,0,0) + pose = Pose.create_from_pq( + p=[0, 0, 1.5], q=randomization.random_quaternions(b) + ) + self.agent.robot.set_root_pose(pose) + # set randomized qpos + random_qpos = torch.rand(b, self.agent.robot.dof[0]) + q_lims = self.agent.robot.get_qlimits() + q_ranges = q_lims[..., 1] - q_lims[..., 0] + random_qpos *= q_ranges + random_qpos += q_lims[..., 0] + self.agent.reset(random_qpos) + + # print("forces", self.agent.robot.get_net_contact_forces(self.agent.robot.links_map.keys()).sum()) + # self.agent.robot.collision From 81b476c50a502b8a72c4ebf77d87d6a575ecbebb Mon Sep 17 00:00:00 2001 From: Xander Date: Sat, 3 Aug 2024 16:46:48 -0700 Subject: [PATCH 04/13] refactored humanoid env, added correct foot friction (req. re-run of hard envs), base for easier versions called 'standard' --- mani_skill/envs/tasks/control/__init__.py | 2 +- mani_skill/envs/tasks/control/humanoid.py | 323 ++++++++++++++++------ mani_skill/utils/building/_mjcf_loader.py | 21 +- 3 files changed, 254 insertions(+), 92 deletions(-) diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py index e74360bc3..d8d84d9ae 100644 --- a/mani_skill/envs/tasks/control/__init__.py +++ b/mani_skill/envs/tasks/control/__init__.py @@ -1,3 +1,3 @@ from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv from .hopper import HopperHopEnv, HopperStandEnv -from .humanoid import HumanoidEnv +from .humanoid import HumanoidStandHard,HumanoidWalkHard,HumanoidRunHard,HumanoidStand diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py index c3b7d20ad..1c2eb9181 100644 --- a/mani_skill/envs/tasks/control/humanoid.py +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -9,13 +9,14 @@ from mani_skill.agents.robots.humanoid import Humanoid from mani_skill.envs.sapien_env import BaseEnv from mani_skill.envs.utils import randomization, rewards -from mani_skill.sensors.camera import CameraConfig +from mani_skill.sensors.camera import CameraConfig, update_camera_cfgs_from_dict from mani_skill.utils import common, sapien_utils from mani_skill.utils.building.ground import build_ground from mani_skill.utils.geometry import rotation_conversions from mani_skill.utils.registration import register_env from mani_skill.utils.structs.pose import Pose from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig +from transforms3d.euler import euler2quat # Height of head above which stand reward is 1. _STAND_HEIGHT = 1.4 @@ -24,25 +25,12 @@ _WALK_SPEED = 1 _RUN_SPEED = 10 - -@register_env("MS-HumanoidFake-v1", max_episode_steps=100) -class HumanoidEnv(BaseEnv): +################################################################### +class HumanoidEnvBase(BaseEnv): agent: Union[Humanoid] def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) - # active_links_names = [link.name for link in self.active_links] - # print("links", self.agent.robot.links_map.keys()) - # print() - # print("active", active_links_names) - # print() - # print("joints", self.agent.robot.joints_map.keys()) - # print() - # print("active_joints", self.agent.robot.active_joints_map.keys()) - # print() - # print("num", len(self.agent.robot.active_joints)) - # print() - # print("all_joints", len(self.agent.robot.joints)) @property def _default_sim_config(self): @@ -54,34 +42,42 @@ def _default_sim_config(self): control_freq=40, ) - # @property - # def _default_sensor_configs(self): - # return [ - # # replicated from xml file - # CameraConfig( - # uid="cam0", - # pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), - # width=128, - # height=128, - # fov=np.pi / 4, - # near=0.01, - # far=100, - # mount = self.agent.robot.get_root() - # ), - # ] + @property + def _default_sensor_configs(self): + return [ + # replicated from xml file + CameraConfig( + uid="cam0", + pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), + width=128, + height=128, + fov=np.pi / 4, + near=0.01, + far=100, + mount=self.camera_mount + ), + ] + + def _before_control_step(self): + self.camera_mount.set_pose(Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p)) + if sapien.physx.is_gpu_enabled(): + # we update just actor pose here, no need to call apply_all/fetch_all + self.scene.px.gpu_apply_rigid_dynamic_data() + self.scene.px.gpu_fetch_rigid_dynamic_data() @property def _default_human_render_camera_configs(self): return [ # replicated from xml file CameraConfig( - uid="render_cam", - pose=sapien_utils.look_at(eye=[0, 2, 2], target=[0, 0, 0]), + uid="not_render_cam", + pose=sapien_utils.look_at(eye=[0, -3, 1], target=[0, 0, 0]), width=512, height=512, - fov=np.pi / 2.5, + fov= 60 * np.pi/180, near=0.01, far=100, + mount=self.camera_mount ), ] @@ -120,82 +116,198 @@ def extremities(self, info): @property def center_of_mass_velocity(self): # """Returns the center of mass velocity of robot""" - vels = torch.stack( - [ - link.get_linear_velocity() * link.mass[0].item() - for link in self.active_links - ], - dim=0, - ) # (num_links, b, 3) + vels = torch.stack([link.get_linear_velocity() for link in self.active_links],dim=0) # (num_links, b, 3) + vels *= self.robot_links_mass.view(-1,1,1) com_vel = vels.sum(dim=0) / self.robot_mass # (b, 3) - return com_vel - # # dm_control also includes foot pressures as state obs space - def _get_obs_state_dict(self, info: Dict): - return dict( - agent=self._get_obs_agent(), # dm control blocks out first 7 qpos, happens by default for us, includes qpos & qvel - head_height=self.head_height, # (b,1) - extremities=self.extremities(info), # (b, 4, 3) - torso_vertical=self.torso_vertical_orientation(info), # (b, 3) - com_velocity=self.center_of_mass_velocity, # (b, 3) - ) + return com_vel def evaluate(self) -> Dict: return dict( torso_xmat=self.agent.robot.links_map[ "torso" - ].pose.to_transformation_matrix() + ].pose.to_transformation_matrix(), + cmass_linvel=self.center_of_mass_velocity + ) + + def _load_scene(self, options: dict): + loader = self.scene.create_mjcf_loader() + articulation_builders, actor_builders, sensor_configs = loader.parse( + self.agent.mjcf_path ) + for a in actor_builders: + a.build(a.name) - def compute_dense_reward(self, obs: Any, action: Array, info: Dict): - standing = rewards.tolerance( + self.ground = build_ground(self.scene) + + # allow tracking of humanoid + self.camera_mount = self.scene.create_actor_builder().build_kinematic("camera_mount") + + # cache for com velocity calc - doing so gives + 10 fps boost for 1024 envs + self.active_links = [ + link for link in self.agent.robot.get_links() if "dummy" not in link.name + ] + self.robot_links_mass = torch.stack([link.mass[0] for link in self.active_links]).to(self.device) + self.robot_mass = torch.sum(self.robot_links_mass).item() + + def standing_rew(self): + return rewards.tolerance( self.head_height, lower=_STAND_HEIGHT, - upper=float("inf"), + upper=float('inf'), margin=_STAND_HEIGHT / 4, - ) - upright = rewards.tolerance( + ).view(-1) + + def upright_rew(self, info: Dict): + return rewards.tolerance( self.torso_upright(info), lower=0.9, upper=float("inf"), sigmoid="linear", margin=1.9, value_at_margin=0, - ) - stand_reward = standing.view(-1) * upright.view(-1) - small_control = rewards.tolerance( + ).view(-1) + + def control_rew(self, action: Array): + return rewards.tolerance( action, margin=1, value_at_margin=0, sigmoid="quadratic" - ).mean( - dim=-1 - ) # (b, a) -> (b) - small_control = (4 + small_control) / 5 - horizontal_velocity = self.center_of_mass_velocity[:, :2] - dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean( - dim=-1 - ) # (b,3) -> (b) + ).mean(dim=-1).view(-1) # (b, a) -> (b) - return small_control.view(-1) * stand_reward * dont_move.view(-1) + def dont_move_rew(self, info): + return rewards.tolerance(info['cmass_linvel'][:, :2], margin=2).mean(dim=-1).view(-1) # (b,3) -> (b) + + def move_rew(self, info, move_speed=10): + com_vel = torch.linalg.norm(info['cmass_linvel'][:, :2], dim=-1) + return rewards.tolerance(com_vel, + lower=move_speed, + upper=np.inf, + margin=move_speed, + value_at_margin=0, + sigmoid="linear").mean(dim=-1).view(-1) # (b,3) -> (b) - def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): - max_reward = 1.0 - return self.compute_dense_reward(obs, action, info) / max_reward - def _load_scene(self, options: dict): - loader = self.scene.create_mjcf_loader() - articulation_builders, actor_builders, sensor_configs = loader.parse( - self.agent.mjcf_path + + # def compute_dense_reward(self, obs: Any, action: Array, info: Dict): + # standing = rewards.tolerance( + # self.head_height, + # lower=_STAND_HEIGHT, + # upper=float("inf"), + # margin=_STAND_HEIGHT / 4, + # ) + # upright = rewards.tolerance( + # self.torso_upright(info), + # lower=0.9, + # upper=float("inf"), + # sigmoid="linear", + # margin=1.9, + # value_at_margin=0, + # ) + # stand_reward = standing.view(-1) * upright.view(-1) + # small_control = rewards.tolerance( + # action, margin=1, value_at_margin=0, sigmoid="quadratic" + # ).mean( + # dim=-1 + # ) # (b, a) -> (b) + # small_control = (4 + small_control) / 5 + # horizontal_velocity = self.center_of_mass_velocity[:, :2] + # dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean( + # dim=-1 + # ) # (b,3) -> (b) + + # return small_control.view(-1) * stand_reward * dont_move.view(-1) + + # def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): + # max_reward = 1.0 + # return self.compute_dense_reward(obs, action, info) / max_reward +### +class HumanoidEnvStandard(HumanoidEnvBase): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + + def _get_obs_state_dict(self, info: Dict): + # our qpos model doesn't include the free joint, meaning qpos and qvel are 21 dims, not 27 + # global dpos/dt and root (torso) dquaterion/dt lost + # we replace with linear root linvel and root angularvel (equivalent info) + return dict( + agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos + root_vel=self.agent.robot.links_map["dummy_root_0"].get_linear_velocity(), # free joint info, (b, 3) + root_quat_vel = self.agent.robot.links_map["dummy_root_0"].get_angular_velocity(), # free joint info, (b, 3) + head_height=self.head_height, # (b,1) + com_velocity=info['cmass_linvel'], # (b, 3) + extremities=self.extremities(info), + link_linvels=torch.stack([link.get_linear_velocity() for link in self.active_links], dim=1).view(-1,16*3), + link_angvels=torch.stack([link.get_angular_velocity() for link in self.active_links], dim=1).view(-1,16*3), + qfrc=self.agent.robot.get_qf() ) - for a in actor_builders: - a.build(a.name) - self.ground = build_ground(self.scene, floor_width=100, altitude=0) + # our standard humanoid env resets if torso is below a certain point + def _load_scene(self, options: Dict): + super()._load_scene(options) + self.ground.set_collision_group_bit(group=2, bit_idx=30, bit=1) - self.active_links = [ - link for link in self.agent.robot.get_links() if "dummy" not in link.name - ] - self.robot_mass = np.sum([link.mass[0].item() for link in self.active_links]) + for link in self.active_links: + if not "foot" in link.name: + link.set_collision_group_bit(group=2, bit_idx=30, bit=1) + + def evaluate(self) -> Dict: + info = super().evaluate() + torso_z = self.agent.robot.links_map["torso"].pose.p[:,-1] + failure = torch.logical_or(torso_z < 1.0, torso_z > 2.0) + info.update(fail=failure) + return info + + def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): + #self.camera_mount.set_pose(Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p)) + with torch.device(self.device): + b = len(env_idx) + # set agent root pose - torso now centered at dummy root at (0,0,0) + pose = Pose.create_from_pq(p=torch.tensor([0, 0, 1.3]).unsqueeze(0).repeat(b,1)) + self.agent.robot.set_root_pose(pose) + # set randomized qpos + noise_scale = 1e-2 + qpos_noise = (torch.rand(b, self.agent.robot.dof[0]) * (2*noise_scale)) - noise_scale + qvel_noise = (torch.rand(b, self.agent.robot.dof[0]) * (2*noise_scale)) - noise_scale + self.agent.robot.set_qpos(qpos_noise) + self.agent.robot.set_qvel(qvel_noise) + +@register_env("MS-HumanoidStand-v1", max_episode_steps=100) +class HumanoidStand(HumanoidEnvStandard): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action))/5 + return small_control * self.standing_rew() * self.upright_rew(info) * self.dont_move_rew(info) + + def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + return self.compute_dense_reward(obs, action, info) + +class HumanoidEnvHard(HumanoidEnvBase): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + def _get_obs_state_dict(self, info: Dict): + # our qpos model doesn't include the free joint, meaning qpos and qvel are 21 dims, not 27 + # global dpos/dt and root (torso) dquaterion/dt lost + # we replace with linear root linvel and root angularvel (equivalent info) + return dict( + agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos + head_height=self.head_height, # (b,1) + extremities=self.extremities(info), # (b, 12) + torso_vertical=self.torso_vertical_orientation(info), # (b, 3) + com_velocity=info['cmass_linvel'], # (b, 3) + root_vel=self.agent.robot.links_map["dummy_root_0"].get_linear_velocity(), # free joint info, (b, 3) + root_quat_vel = self.agent.robot.links_map["dummy_root_0"].get_angular_velocity() # free joint info, (b, 3) + ) + def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): + self.camera_mount.set_pose(Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p)) with torch.device(self.device): b = len(env_idx) # set agent root pose - torso now centered at dummy root at (0,0,0) @@ -211,5 +323,46 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): random_qpos += q_lims[..., 0] self.agent.reset(random_qpos) - # print("forces", self.agent.robot.get_net_contact_forces(self.agent.robot.links_map.keys()).sum()) - # self.agent.robot.collision +### +@register_env("MS-HumanoidStandHard-v1", max_episode_steps=100) +class HumanoidStandHard(HumanoidEnvHard): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action))/5 + return small_control * self.standing_rew() * self.upright_rew(info) * self.dont_move_rew(info) + + def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + return self.compute_dense_reward(obs, action, info) + +@register_env("MS-HumanoidWalkHard-v1", max_episode_steps=100) +class HumanoidWalkHard(HumanoidEnvHard): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action))/5 + return small_control * self.standing_rew() * self.upright_rew(info) * self.move_rew(info, _WALK_SPEED) + + def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + return self.compute_dense_reward(obs, action, info) + +@register_env("MS-HumanoidRunHard-v1", max_episode_steps=100) +class HumanoidRunHard(HumanoidEnvHard): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action))/5 + return small_control * self.standing_rew() * self.upright_rew(info) * self.move_rew(info, _RUN_SPEED) + + def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + return self.compute_dense_reward(obs, action, info) + \ No newline at end of file diff --git a/mani_skill/utils/building/_mjcf_loader.py b/mani_skill/utils/building/_mjcf_loader.py index 774d3f273..74d7a6ecd 100644 --- a/mani_skill/utils/building/_mjcf_loader.py +++ b/mani_skill/utils/building/_mjcf_loader.py @@ -247,8 +247,17 @@ def _build_geom( ) geom_density = _parse_float(geom_attrib, "density", 1000.0) - physx_material = None - # TODO handle geometry material properties + + # if condim is 1, we can easily model the material's friction + if _parse_int(geom_attrib, "condim", 0) == 1: + friction = _parse_float(geom_attrib, "friction", 0.3) # maniskill default frection is 0.3 + physx_material = PhysxMaterial( + static_friction=friction, + dynamic_friction=friction, + restitution=0 + ) + else: + physx_material = None geom_group = _parse_int(geom_attrib, "group", 0) # See note at top of file for how we handle geom groups @@ -323,7 +332,7 @@ def _build_geom( t_visual2link, radius=geom_radius, half_length=geom_half_length, - # material=material, + material=physx_material, # name=geom_name, ) elif geom_type == "box": @@ -337,8 +346,8 @@ def _build_geom( if has_collisions: builder.add_box_collision( t_visual2link, - half_size=geom_size - # material=material, + half_size=geom_size, + material=physx_material, # name=geom_name, ) elif geom_type == "cylinder": @@ -355,7 +364,7 @@ def _build_geom( t_visual2link, radius=geom_radius, half_length=geom_half_length, - # material=material, + material=physx_material, # name=geom_name ) From c3221731ab489c3c2d9c083509b413776a8e18a2 Mon Sep 17 00:00:00 2001 From: Xander Date: Sat, 3 Aug 2024 16:54:03 -0700 Subject: [PATCH 05/13] refactored humanoid env, added correct foot friction (req. re-run of hard envs), base for easier versions called 'standard' --- mani_skill/envs/tasks/control/__init__.py | 7 +- mani_skill/envs/tasks/control/humanoid.py | 202 +++++++++++++++------- mani_skill/utils/building/_mjcf_loader.py | 10 +- 3 files changed, 148 insertions(+), 71 deletions(-) diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py index d8d84d9ae..763828af2 100644 --- a/mani_skill/envs/tasks/control/__init__.py +++ b/mani_skill/envs/tasks/control/__init__.py @@ -1,3 +1,8 @@ from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv from .hopper import HopperHopEnv, HopperStandEnv -from .humanoid import HumanoidStandHard,HumanoidWalkHard,HumanoidRunHard,HumanoidStand +from .humanoid import ( + HumanoidRunHard, + HumanoidStand, + HumanoidStandHard, + HumanoidWalkHard, +) diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py index 1c2eb9181..937749ea0 100644 --- a/mani_skill/envs/tasks/control/humanoid.py +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -5,6 +5,7 @@ import numpy as np import sapien import torch +from transforms3d.euler import euler2quat from mani_skill.agents.robots.humanoid import Humanoid from mani_skill.envs.sapien_env import BaseEnv @@ -16,7 +17,6 @@ from mani_skill.utils.registration import register_env from mani_skill.utils.structs.pose import Pose from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig -from transforms3d.euler import euler2quat # Height of head above which stand reward is 1. _STAND_HEIGHT = 1.4 @@ -54,12 +54,14 @@ def _default_sensor_configs(self): fov=np.pi / 4, near=0.01, far=100, - mount=self.camera_mount + mount=self.camera_mount, ), ] - + def _before_control_step(self): - self.camera_mount.set_pose(Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p)) + self.camera_mount.set_pose( + Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p) + ) if sapien.physx.is_gpu_enabled(): # we update just actor pose here, no need to call apply_all/fetch_all self.scene.px.gpu_apply_rigid_dynamic_data() @@ -74,10 +76,10 @@ def _default_human_render_camera_configs(self): pose=sapien_utils.look_at(eye=[0, -3, 1], target=[0, 0, 0]), width=512, height=512, - fov= 60 * np.pi/180, + fov=60 * np.pi / 180, near=0.01, far=100, - mount=self.camera_mount + mount=self.camera_mount, ), ] @@ -116,8 +118,10 @@ def extremities(self, info): @property def center_of_mass_velocity(self): # """Returns the center of mass velocity of robot""" - vels = torch.stack([link.get_linear_velocity() for link in self.active_links],dim=0) # (num_links, b, 3) - vels *= self.robot_links_mass.view(-1,1,1) + vels = torch.stack( + [link.get_linear_velocity() for link in self.active_links], dim=0 + ) # (num_links, b, 3) + vels *= self.robot_links_mass.view(-1, 1, 1) com_vel = vels.sum(dim=0) / self.robot_mass # (b, 3) return com_vel @@ -127,7 +131,7 @@ def evaluate(self) -> Dict: torso_xmat=self.agent.robot.links_map[ "torso" ].pose.to_transformation_matrix(), - cmass_linvel=self.center_of_mass_velocity + cmass_linvel=self.center_of_mass_velocity, ) def _load_scene(self, options: dict): @@ -141,23 +145,27 @@ def _load_scene(self, options: dict): self.ground = build_ground(self.scene) # allow tracking of humanoid - self.camera_mount = self.scene.create_actor_builder().build_kinematic("camera_mount") + self.camera_mount = self.scene.create_actor_builder().build_kinematic( + "camera_mount" + ) # cache for com velocity calc - doing so gives + 10 fps boost for 1024 envs self.active_links = [ link for link in self.agent.robot.get_links() if "dummy" not in link.name ] - self.robot_links_mass = torch.stack([link.mass[0] for link in self.active_links]).to(self.device) + self.robot_links_mass = torch.stack( + [link.mass[0] for link in self.active_links] + ).to(self.device) self.robot_mass = torch.sum(self.robot_links_mass).item() def standing_rew(self): return rewards.tolerance( self.head_height, lower=_STAND_HEIGHT, - upper=float('inf'), + upper=float("inf"), margin=_STAND_HEIGHT / 4, ).view(-1) - + def upright_rew(self, info: Dict): return rewards.tolerance( self.torso_upright(info), @@ -167,25 +175,35 @@ def upright_rew(self, info: Dict): margin=1.9, value_at_margin=0, ).view(-1) - + def control_rew(self, action: Array): - return rewards.tolerance( - action, margin=1, value_at_margin=0, sigmoid="quadratic" - ).mean(dim=-1).view(-1) # (b, a) -> (b) + return ( + rewards.tolerance(action, margin=1, value_at_margin=0, sigmoid="quadratic") + .mean(dim=-1) + .view(-1) + ) # (b, a) -> (b) def dont_move_rew(self, info): - return rewards.tolerance(info['cmass_linvel'][:, :2], margin=2).mean(dim=-1).view(-1) # (b,3) -> (b) - - def move_rew(self, info, move_speed=10): - com_vel = torch.linalg.norm(info['cmass_linvel'][:, :2], dim=-1) - return rewards.tolerance(com_vel, - lower=move_speed, - upper=np.inf, - margin=move_speed, - value_at_margin=0, - sigmoid="linear").mean(dim=-1).view(-1) # (b,3) -> (b) - + return ( + rewards.tolerance(info["cmass_linvel"][:, :2], margin=2) + .mean(dim=-1) + .view(-1) + ) # (b,3) -> (b) + def move_rew(self, info, move_speed=10): + com_vel = torch.linalg.norm(info["cmass_linvel"][:, :2], dim=-1) + return ( + rewards.tolerance( + com_vel, + lower=move_speed, + upper=np.inf, + margin=move_speed, + value_at_margin=0, + sigmoid="linear", + ) + .mean(dim=-1) + .view(-1) + ) # (b,3) -> (b) # def compute_dense_reward(self, obs: Any, action: Array, info: Dict): # standing = rewards.tolerance( @@ -219,6 +237,8 @@ def move_rew(self, info, move_speed=10): # def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): # max_reward = 1.0 # return self.compute_dense_reward(obs, action, info) / max_reward + + ### class HumanoidEnvStandard(HumanoidEnvBase): agent: Union[Humanoid] @@ -231,15 +251,23 @@ def _get_obs_state_dict(self, info: Dict): # global dpos/dt and root (torso) dquaterion/dt lost # we replace with linear root linvel and root angularvel (equivalent info) return dict( - agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos - root_vel=self.agent.robot.links_map["dummy_root_0"].get_linear_velocity(), # free joint info, (b, 3) - root_quat_vel = self.agent.robot.links_map["dummy_root_0"].get_angular_velocity(), # free joint info, (b, 3) + agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos + root_vel=self.agent.robot.links_map[ + "dummy_root_0" + ].get_linear_velocity(), # free joint info, (b, 3) + root_quat_vel=self.agent.robot.links_map[ + "dummy_root_0" + ].get_angular_velocity(), # free joint info, (b, 3) head_height=self.head_height, # (b,1) - com_velocity=info['cmass_linvel'], # (b, 3) + com_velocity=info["cmass_linvel"], # (b, 3) extremities=self.extremities(info), - link_linvels=torch.stack([link.get_linear_velocity() for link in self.active_links], dim=1).view(-1,16*3), - link_angvels=torch.stack([link.get_angular_velocity() for link in self.active_links], dim=1).view(-1,16*3), - qfrc=self.agent.robot.get_qf() + link_linvels=torch.stack( + [link.get_linear_velocity() for link in self.active_links], dim=1 + ).view(-1, 16 * 3), + link_angvels=torch.stack( + [link.get_angular_velocity() for link in self.active_links], dim=1 + ).view(-1, 16 * 3), + qfrc=self.agent.robot.get_qf(), ) # our standard humanoid env resets if torso is below a certain point @@ -253,25 +281,32 @@ def _load_scene(self, options: Dict): def evaluate(self) -> Dict: info = super().evaluate() - torso_z = self.agent.robot.links_map["torso"].pose.p[:,-1] + torso_z = self.agent.robot.links_map["torso"].pose.p[:, -1] failure = torch.logical_or(torso_z < 1.0, torso_z > 2.0) info.update(fail=failure) return info - + def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): - #self.camera_mount.set_pose(Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p)) + # self.camera_mount.set_pose(Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p)) with torch.device(self.device): b = len(env_idx) # set agent root pose - torso now centered at dummy root at (0,0,0) - pose = Pose.create_from_pq(p=torch.tensor([0, 0, 1.3]).unsqueeze(0).repeat(b,1)) + pose = Pose.create_from_pq( + p=torch.tensor([0, 0, 1.3]).unsqueeze(0).repeat(b, 1) + ) self.agent.robot.set_root_pose(pose) # set randomized qpos noise_scale = 1e-2 - qpos_noise = (torch.rand(b, self.agent.robot.dof[0]) * (2*noise_scale)) - noise_scale - qvel_noise = (torch.rand(b, self.agent.robot.dof[0]) * (2*noise_scale)) - noise_scale + qpos_noise = ( + torch.rand(b, self.agent.robot.dof[0]) * (2 * noise_scale) + ) - noise_scale + qvel_noise = ( + torch.rand(b, self.agent.robot.dof[0]) * (2 * noise_scale) + ) - noise_scale self.agent.robot.set_qpos(qpos_noise) self.agent.robot.set_qvel(qvel_noise) + @register_env("MS-HumanoidStand-v1", max_episode_steps=100) class HumanoidStand(HumanoidEnvStandard): agent: Union[Humanoid] @@ -280,12 +315,20 @@ def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - small_control = (4 + self.control_rew(action))/5 - return small_control * self.standing_rew() * self.upright_rew(info) * self.dont_move_rew(info) - - def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action)) / 5 + return ( + small_control + * self.standing_rew() + * self.upright_rew(info) + * self.dont_move_rew(info) + ) + + def compute_normalized_dense_reward( + self, obs: Any, action: torch.Tensor, info: Dict + ): return self.compute_dense_reward(obs, action, info) + class HumanoidEnvHard(HumanoidEnvBase): agent: Union[Humanoid] @@ -297,17 +340,23 @@ def _get_obs_state_dict(self, info: Dict): # global dpos/dt and root (torso) dquaterion/dt lost # we replace with linear root linvel and root angularvel (equivalent info) return dict( - agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos + agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos head_height=self.head_height, # (b,1) extremities=self.extremities(info), # (b, 12) torso_vertical=self.torso_vertical_orientation(info), # (b, 3) - com_velocity=info['cmass_linvel'], # (b, 3) - root_vel=self.agent.robot.links_map["dummy_root_0"].get_linear_velocity(), # free joint info, (b, 3) - root_quat_vel = self.agent.robot.links_map["dummy_root_0"].get_angular_velocity() # free joint info, (b, 3) + com_velocity=info["cmass_linvel"], # (b, 3) + root_vel=self.agent.robot.links_map[ + "dummy_root_0" + ].get_linear_velocity(), # free joint info, (b, 3) + root_quat_vel=self.agent.robot.links_map[ + "dummy_root_0" + ].get_angular_velocity(), # free joint info, (b, 3) ) - + def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): - self.camera_mount.set_pose(Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p)) + self.camera_mount.set_pose( + Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p) + ) with torch.device(self.device): b = len(env_idx) # set agent root pose - torso now centered at dummy root at (0,0,0) @@ -323,6 +372,7 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): random_qpos += q_lims[..., 0] self.agent.reset(random_qpos) + ### @register_env("MS-HumanoidStandHard-v1", max_episode_steps=100) class HumanoidStandHard(HumanoidEnvHard): @@ -332,12 +382,20 @@ def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - small_control = (4 + self.control_rew(action))/5 - return small_control * self.standing_rew() * self.upright_rew(info) * self.dont_move_rew(info) - - def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action)) / 5 + return ( + small_control + * self.standing_rew() + * self.upright_rew(info) + * self.dont_move_rew(info) + ) + + def compute_normalized_dense_reward( + self, obs: Any, action: torch.Tensor, info: Dict + ): return self.compute_dense_reward(obs, action, info) + @register_env("MS-HumanoidWalkHard-v1", max_episode_steps=100) class HumanoidWalkHard(HumanoidEnvHard): agent: Union[Humanoid] @@ -346,12 +404,20 @@ def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - small_control = (4 + self.control_rew(action))/5 - return small_control * self.standing_rew() * self.upright_rew(info) * self.move_rew(info, _WALK_SPEED) - - def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action)) / 5 + return ( + small_control + * self.standing_rew() + * self.upright_rew(info) + * self.move_rew(info, _WALK_SPEED) + ) + + def compute_normalized_dense_reward( + self, obs: Any, action: torch.Tensor, info: Dict + ): return self.compute_dense_reward(obs, action, info) - + + @register_env("MS-HumanoidRunHard-v1", max_episode_steps=100) class HumanoidRunHard(HumanoidEnvHard): agent: Union[Humanoid] @@ -360,9 +426,15 @@ def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - small_control = (4 + self.control_rew(action))/5 - return small_control * self.standing_rew() * self.upright_rew(info) * self.move_rew(info, _RUN_SPEED) - - def compute_normalized_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action)) / 5 + return ( + small_control + * self.standing_rew() + * self.upright_rew(info) + * self.move_rew(info, _RUN_SPEED) + ) + + def compute_normalized_dense_reward( + self, obs: Any, action: torch.Tensor, info: Dict + ): return self.compute_dense_reward(obs, action, info) - \ No newline at end of file diff --git a/mani_skill/utils/building/_mjcf_loader.py b/mani_skill/utils/building/_mjcf_loader.py index 74d7a6ecd..da94d9217 100644 --- a/mani_skill/utils/building/_mjcf_loader.py +++ b/mani_skill/utils/building/_mjcf_loader.py @@ -250,12 +250,12 @@ def _build_geom( # if condim is 1, we can easily model the material's friction if _parse_int(geom_attrib, "condim", 0) == 1: - friction = _parse_float(geom_attrib, "friction", 0.3) # maniskill default frection is 0.3 + friction = _parse_float( + geom_attrib, "friction", 0.3 + ) # maniskill default frection is 0.3 physx_material = PhysxMaterial( - static_friction=friction, - dynamic_friction=friction, - restitution=0 - ) + static_friction=friction, dynamic_friction=friction, restitution=0 + ) else: physx_material = None From 1579a324914dddc2240202f237e9931bd69673d4 Mon Sep 17 00:00:00 2001 From: Xander Date: Sun, 4 Aug 2024 21:52:06 -0700 Subject: [PATCH 06/13] it can run, controller tuned --- mani_skill/agents/robots/humanoid/humanoid.py | 491 ++++++++++++++++-- mani_skill/envs/tasks/control/__init__.py | 1 + mani_skill/envs/tasks/control/humanoid.py | 69 +++ 3 files changed, 527 insertions(+), 34 deletions(-) diff --git a/mani_skill/agents/robots/humanoid/humanoid.py b/mani_skill/agents/robots/humanoid/humanoid.py index 370db36f1..36b8906b1 100644 --- a/mani_skill/agents/robots/humanoid/humanoid.py +++ b/mani_skill/agents/robots/humanoid/humanoid.py @@ -67,57 +67,480 @@ def _controller_configs(self): damping=10, normalize_action=False, ) - # pd_joint_delta_pos = PDJointPosControllerConfig( + + # pd_joint_delta_pos = PDJointPosControllerConfig( # og sac good # [j.name for j in self.robot.active_joints], # -1, # 1, - # damping=5, - # stiffness=20, - # force_limit=100, + # damping=10, + # stiffness=200, + # force_limit=200, # use_delta=True, # ) - # pd_joint_delta_pos = PDJointPosControllerConfig( #instantly causes error - # [j.name for j in self.robot.active_joints], - # -2, - # 2, - # damping=50, - # stiffness=500, - # force_limit=20000000, - # use_delta=True, - # ) - # pd_joint_delta_pos = PDJointPosControllerConfig( #best yet, almost there + # pd_joint_delta_pos = PDJointPosControllerConfig( # good for walk # [j.name for j in self.robot.active_joints], - # -3, - # 3, - # damping=5, - # stiffness=100, - # force_limit=20000000, + # -0.5, + # 0.5, + # damping=10, + # stiffness=200, + # #force_limit=200, # use_delta=True, # ) - pd_joint_delta_pos = PDJointPosControllerConfig( # best yet, almost there - [j.name for j in self.robot.active_joints], - -1, - 1, - damping=10, - stiffness=200, - force_limit=200, - use_delta=True, - ) - # pd_joint_delta_pos = PDJointPosControllerConfig( #eventually causes error + + # pd_joint_delta_pos = PDJointPosControllerConfig( # good for walk # [j.name for j in self.robot.active_joints], # -1, # 1, # damping=10, - # stiffness=1000, - # force_limit=200, + # stiffness=200, + # #force_limit=200, # use_delta=True, # ) + + # return deepcopy_dict( + # dict( + # pd_joint_pos=dict(body=pd_joint_pos, balance_passive_force=False), + # pd_joint_delta_pos=dict( + # body=pd_joint_delta_pos, balance_passive_force=False + # ), + # ) + # ) + + pd_joint_delta_pos = dict() + + # pd_joint_delta_pos.update(abdomen_y = PDJointPosControllerConfig( # good for walk + # ["abdomen_y"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(abdomen_z = PDJointPosControllerConfig( # good for walk + # ["abdomen_z"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(abdomen_x = PDJointPosControllerConfig( # good for walk + # ["abdomen_x"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + + # pd_joint_delta_pos.update(right_hip_x = PDJointPosControllerConfig( # good for walk + # ["right_hip_x"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_hip_z = PDJointPosControllerConfig( # good for walk + # ["right_hip_z"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_hip_y = PDJointPosControllerConfig( # good for walk + # ["right_hip_y"],-max_delta,max_delta,damping=base_stiff*120/20,stiffness=base_stiff*120,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_knee = PDJointPosControllerConfig( # good for walk + # ["right_knee"],-max_delta,max_delta,damping=base_stiff*80/20,stiffness=base_stiff*80,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_ankle_x = PDJointPosControllerConfig( # good for walk + # ["right_ankle_x"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_ankle_y = PDJointPosControllerConfig( # good for walk + # ["right_ankle_y"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, + # )) + + # pd_joint_delta_pos.update(left_hip_x = PDJointPosControllerConfig( # good for walk + # ["left_hip_x"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_hip_z = PDJointPosControllerConfig( # good for walk + # ["left_hip_z"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_hip_y = PDJointPosControllerConfig( # good for walk + # ["left_hip_y"],-max_delta,max_delta,damping=base_stiff*120/20,stiffness=base_stiff*120,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_knee = PDJointPosControllerConfig( # good for walk + # ["left_knee"],-max_delta,max_delta,damping=base_stiff*80/20,stiffness=base_stiff*80,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_ankle_x = PDJointPosControllerConfig( # good for walk + # ["left_ankle_x"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_ankle_y = PDJointPosControllerConfig( # good for walk + # ["left_ankle_y"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, + # )) + + # pd_joint_delta_pos.update(right_shoulder1 = PDJointPosControllerConfig( # good for walk + # ["right_shoulder1"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_shoulder2 = PDJointPosControllerConfig( # good for walk + # ["right_shoulder2"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_elbow = PDJointPosControllerConfig( # good for walk + # ["right_elbow"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + + # pd_joint_delta_pos.update(left_shoulder1 = PDJointPosControllerConfig( # good for walk + # ["left_shoulder1"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_shoulder2 = PDJointPosControllerConfig( # good for walk + # ["left_shoulder2"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_elbow = PDJointPosControllerConfig( # good for walk + # ["left_elbow"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, + # )) + + ###################################################################################################################### good for walk + # pd_joint_delta_pos.update(abdomen_y = PDJointPosControllerConfig( # good for walk + # ["abdomen_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(abdomen_z = PDJointPosControllerConfig( # good for walk + # ["abdomen_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(abdomen_x = PDJointPosControllerConfig( # good for walk + # ["abdomen_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + + # pd_joint_delta_pos.update(right_hip_x = PDJointPosControllerConfig( # good for walk + # ["right_hip_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_hip_z = PDJointPosControllerConfig( # good for walk + # ["right_hip_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_hip_y = PDJointPosControllerConfig( # good for walk + # ["right_hip_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*120,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_knee = PDJointPosControllerConfig( # good for walk + # ["right_knee"],-max_delta,max_delta,damping=1,stiffness=base_stiff*80,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_ankle_x = PDJointPosControllerConfig( # good for walk + # ["right_ankle_x"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_ankle_y = PDJointPosControllerConfig( # good for walk + # ["right_ankle_y"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, + # )) + + # pd_joint_delta_pos.update(left_hip_x = PDJointPosControllerConfig( # good for walk + # ["left_hip_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_hip_z = PDJointPosControllerConfig( # good for walk + # ["left_hip_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_hip_y = PDJointPosControllerConfig( # good for walk + # ["left_hip_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*120,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_knee = PDJointPosControllerConfig( # good for walk + # ["left_knee"],-max_delta,max_delta,damping=1,stiffness=base_stiff*80,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_ankle_x = PDJointPosControllerConfig( # good for walk + # ["left_ankle_x"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_ankle_y = PDJointPosControllerConfig( # good for walk + # ["left_ankle_y"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, + # )) + + # pd_joint_delta_pos.update(right_shoulder1 = PDJointPosControllerConfig( # good for walk + # ["right_shoulder1"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_shoulder2 = PDJointPosControllerConfig( # good for walk + # ["right_shoulder2"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_elbow = PDJointPosControllerConfig( # good for walk + # ["right_elbow"],-max_delta,max_delta,damping=0,stiffness=base_stiff*40,use_delta=True, + # )) + + # pd_joint_delta_pos.update(left_shoulder1 = PDJointPosControllerConfig( # good for walk + # ["left_shoulder1"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_shoulder2 = PDJointPosControllerConfig( # good for walk + # ["left_shoulder2"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_elbow = PDJointPosControllerConfig( # good for walk + # ["left_elbow"],-max_delta,max_delta,damping=0,stiffness=base_stiff*40,use_delta=True, + # )) + ######################################################################################################################## dampisstiff + + ########################################################################################################################### otherrew_dampisstif_d2 + # base_stiff = 1 + # max_delta = 2 + # pd_joint_delta_pos.update(abdomen_y = PDJointPosControllerConfig( # good for walk + # ["abdomen_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(abdomen_z = PDJointPosControllerConfig( # good for walk + # ["abdomen_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(abdomen_x = PDJointPosControllerConfig( # good for walk + # ["abdomen_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + + # pd_joint_delta_pos.update(right_hip_x = PDJointPosControllerConfig( # good for walk + # ["right_hip_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_hip_z = PDJointPosControllerConfig( # good for walk + # ["right_hip_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_hip_y = PDJointPosControllerConfig( # good for walk + # ["right_hip_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*120,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_knee = PDJointPosControllerConfig( # good for walk + # ["right_knee"],-max_delta,max_delta,damping=1,stiffness=base_stiff*80,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_ankle_x = PDJointPosControllerConfig( # good for walk + # ["right_ankle_x"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_ankle_y = PDJointPosControllerConfig( # good for walk + # ["right_ankle_y"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, + # )) + + # pd_joint_delta_pos.update(left_hip_x = PDJointPosControllerConfig( # good for walk + # ["left_hip_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_hip_z = PDJointPosControllerConfig( # good for walk + # ["left_hip_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_hip_y = PDJointPosControllerConfig( # good for walk + # ["left_hip_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*120,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_knee = PDJointPosControllerConfig( # good for walk + # ["left_knee"],-max_delta,max_delta,damping=1,stiffness=base_stiff*80,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_ankle_x = PDJointPosControllerConfig( # good for walk + # ["left_ankle_x"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_ankle_y = PDJointPosControllerConfig( # good for walk + # ["left_ankle_y"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, + # )) + + # pd_joint_delta_pos.update(right_shoulder1 = PDJointPosControllerConfig( # good for walk + # ["right_shoulder1"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_shoulder2 = PDJointPosControllerConfig( # good for walk + # ["right_shoulder2"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(right_elbow = PDJointPosControllerConfig( # good for walk + # ["right_elbow"],-max_delta,max_delta,damping=0,stiffness=base_stiff*40,use_delta=True, + # )) + + # pd_joint_delta_pos.update(left_shoulder1 = PDJointPosControllerConfig( # good for walk + # ["left_shoulder1"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_shoulder2 = PDJointPosControllerConfig( # good for walk + # ["left_shoulder2"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, + # )) + # pd_joint_delta_pos.update(left_elbow = PDJointPosControllerConfig( # good for walk + # ["left_elbow"],-max_delta,max_delta,damping=0,stiffness=base_stiff*40,use_delta=True, + # )) + ########################################################################################################################### otherrew_dampisstif_d2 + + base_stiff = 1 + max_delta = 2 + pd_joint_delta_pos.update( + abdomen_y=PDJointPosControllerConfig( # good for walk + ["abdomen_y"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + abdomen_z=PDJointPosControllerConfig( # good for walk + ["abdomen_z"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + abdomen_x=PDJointPosControllerConfig( # good for walk + ["abdomen_x"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + + pd_joint_delta_pos.update( + right_hip_x=PDJointPosControllerConfig( # good for walk + ["right_hip_x"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + right_hip_z=PDJointPosControllerConfig( # good for walk + ["right_hip_z"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + right_hip_y=PDJointPosControllerConfig( # good for walk + ["right_hip_y"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 120, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + right_knee=PDJointPosControllerConfig( # good for walk + ["right_knee"], + -max_delta, + max_delta, + damping=1, + stiffness=base_stiff * 80, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + right_ankle_x=PDJointPosControllerConfig( # good for walk + ["right_ankle_x"], + -max_delta, + max_delta, + damping=3, + stiffness=base_stiff * 20, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + right_ankle_y=PDJointPosControllerConfig( # good for walk + ["right_ankle_y"], + -max_delta, + max_delta, + damping=3, + stiffness=base_stiff * 20, + use_delta=True, + ) + ) + + pd_joint_delta_pos.update( + left_hip_x=PDJointPosControllerConfig( # good for walk + ["left_hip_x"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + left_hip_z=PDJointPosControllerConfig( # good for walk + ["left_hip_z"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + left_hip_y=PDJointPosControllerConfig( # good for walk + ["left_hip_y"], + -max_delta, + max_delta, + damping=5, + stiffness=base_stiff * 120, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + left_knee=PDJointPosControllerConfig( # good for walk + ["left_knee"], + -max_delta, + max_delta, + damping=1, + stiffness=base_stiff * 80, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + left_ankle_x=PDJointPosControllerConfig( # good for walk + ["left_ankle_x"], + -max_delta, + max_delta, + damping=3, + stiffness=base_stiff * 20, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + left_ankle_y=PDJointPosControllerConfig( # good for walk + ["left_ankle_y"], + -max_delta, + max_delta, + damping=3, + stiffness=base_stiff * 20, + use_delta=True, + ) + ) + + pd_joint_delta_pos.update( + right_shoulder1=PDJointPosControllerConfig( # good for walk + ["right_shoulder1"], + -max_delta, + max_delta, + damping=1, + stiffness=base_stiff * 20, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + right_shoulder2=PDJointPosControllerConfig( # good for walk + ["right_shoulder2"], + -max_delta, + max_delta, + damping=1, + stiffness=base_stiff * 20, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + right_elbow=PDJointPosControllerConfig( # good for walk + ["right_elbow"], + -max_delta, + max_delta, + damping=0, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + + pd_joint_delta_pos.update( + left_shoulder1=PDJointPosControllerConfig( # good for walk + ["left_shoulder1"], + -max_delta, + max_delta, + damping=1, + stiffness=base_stiff * 20, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + left_shoulder2=PDJointPosControllerConfig( # good for walk + ["left_shoulder2"], + -max_delta, + max_delta, + damping=1, + stiffness=base_stiff * 20, + use_delta=True, + ) + ) + pd_joint_delta_pos.update( + left_elbow=PDJointPosControllerConfig( # good for walk + ["left_elbow"], + -max_delta, + max_delta, + damping=0, + stiffness=base_stiff * 40, + use_delta=True, + ) + ) + + pd_joint_delta_pos.update(balance_passive_force=False) + return deepcopy_dict( dict( pd_joint_pos=dict(body=pd_joint_pos, balance_passive_force=False), - pd_joint_delta_pos=dict( - body=pd_joint_delta_pos, balance_passive_force=False - ), + pd_joint_delta_pos=pd_joint_delta_pos, ) ) diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py index 763828af2..f6f96b666 100644 --- a/mani_skill/envs/tasks/control/__init__.py +++ b/mani_skill/envs/tasks/control/__init__.py @@ -4,5 +4,6 @@ HumanoidRunHard, HumanoidStand, HumanoidStandHard, + HumanoidWalk, HumanoidWalkHard, ) diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py index 937749ea0..a76edc5d4 100644 --- a/mani_skill/envs/tasks/control/humanoid.py +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -306,6 +306,19 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): self.agent.robot.set_qpos(qpos_noise) self.agent.robot.set_qvel(qvel_noise) + def move_x_rew(self, info, move_speed=10): + com_vel_x = info["cmass_linvel"][:, 0] + return rewards.tolerance( + com_vel_x, + lower=move_speed, + upper=np.inf, + margin=move_speed, + value_at_margin=0, + sigmoid="linear", + ).view( + -1 + ) # (b,3) -> (b) + @register_env("MS-HumanoidStand-v1", max_episode_steps=100) class HumanoidStand(HumanoidEnvStandard): @@ -329,6 +342,62 @@ def compute_normalized_dense_reward( return self.compute_dense_reward(obs, action, info) +@register_env("MS-HumanoidWalk-v1", max_episode_steps=200) +class HumanoidWalk(HumanoidEnvStandard): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + + # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + # small_control = (4 + self.control_rew(action)) / 5 + # return ( + # small_control + # * self.standing_rew() + # * self.upright_rew(info) + # * ((5*self.move_x_rew(info, _RUN_SPEED)+1)/6) + # ) + + # def compute_normalized_dense_reward( + # self, obs: Any, action: torch.Tensor, info: Dict + # ): + # return self.compute_dense_reward(obs, action, info) + + def compute_normalized_dense_reward( + self, obs: Any, action: torch.Tensor, info: Dict + ): + fail_rew = 5 # * ~info["fail"] + return 0.1 * ( + fail_rew + + ( + (1.25 * info["cmass_linvel"][:, 0]) + - (0.1 * (action.pow(2).sum(dim=-1))) + ) + ) + + +@register_env("MS-HumanoidRun-v1", max_episode_steps=100) +class HumanoidRun(HumanoidEnvStandard): + agent: Union[Humanoid] + + def __init__(self, *args, robot_uids="humanoid", **kwargs): + super().__init__(*args, robot_uids=robot_uids, **kwargs) + + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + small_control = (4 + self.control_rew(action)) / 5 + return ( + small_control + * self.standing_rew() + * self.upright_rew(info) + * self.move_x_rew(info, _RUN_SPEED) + ) + + def compute_normalized_dense_reward( + self, obs: Any, action: torch.Tensor, info: Dict + ): + return self.compute_dense_reward(obs, action, info) + + class HumanoidEnvHard(HumanoidEnvBase): agent: Union[Humanoid] From 48e5c617f08cc5ebb7b271a5f3a16263b8d4886a Mon Sep 17 00:00:00 2001 From: Xander Date: Tue, 6 Aug 2024 14:51:29 -0700 Subject: [PATCH 07/13] optimized humanoid controller config --- mani_skill/agents/robots/humanoid/humanoid.py | 543 +++--------------- mani_skill/envs/tasks/control/humanoid.py | 65 ++- 2 files changed, 109 insertions(+), 499 deletions(-) diff --git a/mani_skill/agents/robots/humanoid/humanoid.py b/mani_skill/agents/robots/humanoid/humanoid.py index 36b8906b1..1dfa53400 100644 --- a/mani_skill/agents/robots/humanoid/humanoid.py +++ b/mani_skill/agents/robots/humanoid/humanoid.py @@ -51,496 +51,83 @@ def __init__(self, *args, **kwargs): @property def _controller_configs(self): - # pd_joint_pos = PDJointPosControllerConfig( - # [x.name for x in self.robot.active_joints], - # lower=None, - # upper=None, - # stiffness=100, - # damping=10, - # normalize_action=False, - # ) pd_joint_pos = PDJointPosControllerConfig( [x.name for x in self.robot.active_joints], lower=None, upper=None, - stiffness=1000, + stiffness=100, damping=10, normalize_action=False, ) - # pd_joint_delta_pos = PDJointPosControllerConfig( # og sac good - # [j.name for j in self.robot.active_joints], - # -1, - # 1, - # damping=10, - # stiffness=200, - # force_limit=200, - # use_delta=True, - # ) - - # pd_joint_delta_pos = PDJointPosControllerConfig( # good for walk - # [j.name for j in self.robot.active_joints], - # -0.5, - # 0.5, - # damping=10, - # stiffness=200, - # #force_limit=200, - # use_delta=True, - # ) - - # pd_joint_delta_pos = PDJointPosControllerConfig( # good for walk - # [j.name for j in self.robot.active_joints], - # -1, - # 1, - # damping=10, - # stiffness=200, - # #force_limit=200, - # use_delta=True, - # ) - - # return deepcopy_dict( - # dict( - # pd_joint_pos=dict(body=pd_joint_pos, balance_passive_force=False), - # pd_joint_delta_pos=dict( - # body=pd_joint_delta_pos, balance_passive_force=False - # ), - # ) - # ) - - pd_joint_delta_pos = dict() - - # pd_joint_delta_pos.update(abdomen_y = PDJointPosControllerConfig( # good for walk - # ["abdomen_y"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(abdomen_z = PDJointPosControllerConfig( # good for walk - # ["abdomen_z"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(abdomen_x = PDJointPosControllerConfig( # good for walk - # ["abdomen_x"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - - # pd_joint_delta_pos.update(right_hip_x = PDJointPosControllerConfig( # good for walk - # ["right_hip_x"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_hip_z = PDJointPosControllerConfig( # good for walk - # ["right_hip_z"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_hip_y = PDJointPosControllerConfig( # good for walk - # ["right_hip_y"],-max_delta,max_delta,damping=base_stiff*120/20,stiffness=base_stiff*120,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_knee = PDJointPosControllerConfig( # good for walk - # ["right_knee"],-max_delta,max_delta,damping=base_stiff*80/20,stiffness=base_stiff*80,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_ankle_x = PDJointPosControllerConfig( # good for walk - # ["right_ankle_x"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_ankle_y = PDJointPosControllerConfig( # good for walk - # ["right_ankle_y"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, - # )) - - # pd_joint_delta_pos.update(left_hip_x = PDJointPosControllerConfig( # good for walk - # ["left_hip_x"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_hip_z = PDJointPosControllerConfig( # good for walk - # ["left_hip_z"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_hip_y = PDJointPosControllerConfig( # good for walk - # ["left_hip_y"],-max_delta,max_delta,damping=base_stiff*120/20,stiffness=base_stiff*120,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_knee = PDJointPosControllerConfig( # good for walk - # ["left_knee"],-max_delta,max_delta,damping=base_stiff*80/20,stiffness=base_stiff*80,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_ankle_x = PDJointPosControllerConfig( # good for walk - # ["left_ankle_x"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_ankle_y = PDJointPosControllerConfig( # good for walk - # ["left_ankle_y"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, - # )) - - # pd_joint_delta_pos.update(right_shoulder1 = PDJointPosControllerConfig( # good for walk - # ["right_shoulder1"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_shoulder2 = PDJointPosControllerConfig( # good for walk - # ["right_shoulder2"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_elbow = PDJointPosControllerConfig( # good for walk - # ["right_elbow"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - - # pd_joint_delta_pos.update(left_shoulder1 = PDJointPosControllerConfig( # good for walk - # ["left_shoulder1"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_shoulder2 = PDJointPosControllerConfig( # good for walk - # ["left_shoulder2"],-max_delta,max_delta,damping=base_stiff*20/20,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_elbow = PDJointPosControllerConfig( # good for walk - # ["left_elbow"],-max_delta,max_delta,damping=base_stiff*40/20,stiffness=base_stiff*40,use_delta=True, - # )) - - ###################################################################################################################### good for walk - # pd_joint_delta_pos.update(abdomen_y = PDJointPosControllerConfig( # good for walk - # ["abdomen_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(abdomen_z = PDJointPosControllerConfig( # good for walk - # ["abdomen_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(abdomen_x = PDJointPosControllerConfig( # good for walk - # ["abdomen_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - - # pd_joint_delta_pos.update(right_hip_x = PDJointPosControllerConfig( # good for walk - # ["right_hip_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_hip_z = PDJointPosControllerConfig( # good for walk - # ["right_hip_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_hip_y = PDJointPosControllerConfig( # good for walk - # ["right_hip_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*120,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_knee = PDJointPosControllerConfig( # good for walk - # ["right_knee"],-max_delta,max_delta,damping=1,stiffness=base_stiff*80,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_ankle_x = PDJointPosControllerConfig( # good for walk - # ["right_ankle_x"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_ankle_y = PDJointPosControllerConfig( # good for walk - # ["right_ankle_y"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, - # )) - - # pd_joint_delta_pos.update(left_hip_x = PDJointPosControllerConfig( # good for walk - # ["left_hip_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_hip_z = PDJointPosControllerConfig( # good for walk - # ["left_hip_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_hip_y = PDJointPosControllerConfig( # good for walk - # ["left_hip_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*120,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_knee = PDJointPosControllerConfig( # good for walk - # ["left_knee"],-max_delta,max_delta,damping=1,stiffness=base_stiff*80,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_ankle_x = PDJointPosControllerConfig( # good for walk - # ["left_ankle_x"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_ankle_y = PDJointPosControllerConfig( # good for walk - # ["left_ankle_y"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, - # )) - - # pd_joint_delta_pos.update(right_shoulder1 = PDJointPosControllerConfig( # good for walk - # ["right_shoulder1"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_shoulder2 = PDJointPosControllerConfig( # good for walk - # ["right_shoulder2"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_elbow = PDJointPosControllerConfig( # good for walk - # ["right_elbow"],-max_delta,max_delta,damping=0,stiffness=base_stiff*40,use_delta=True, - # )) - - # pd_joint_delta_pos.update(left_shoulder1 = PDJointPosControllerConfig( # good for walk - # ["left_shoulder1"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_shoulder2 = PDJointPosControllerConfig( # good for walk - # ["left_shoulder2"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_elbow = PDJointPosControllerConfig( # good for walk - # ["left_elbow"],-max_delta,max_delta,damping=0,stiffness=base_stiff*40,use_delta=True, - # )) - ######################################################################################################################## dampisstiff - - ########################################################################################################################### otherrew_dampisstif_d2 - # base_stiff = 1 - # max_delta = 2 - # pd_joint_delta_pos.update(abdomen_y = PDJointPosControllerConfig( # good for walk - # ["abdomen_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(abdomen_z = PDJointPosControllerConfig( # good for walk - # ["abdomen_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(abdomen_x = PDJointPosControllerConfig( # good for walk - # ["abdomen_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - - # pd_joint_delta_pos.update(right_hip_x = PDJointPosControllerConfig( # good for walk - # ["right_hip_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_hip_z = PDJointPosControllerConfig( # good for walk - # ["right_hip_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_hip_y = PDJointPosControllerConfig( # good for walk - # ["right_hip_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*120,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_knee = PDJointPosControllerConfig( # good for walk - # ["right_knee"],-max_delta,max_delta,damping=1,stiffness=base_stiff*80,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_ankle_x = PDJointPosControllerConfig( # good for walk - # ["right_ankle_x"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_ankle_y = PDJointPosControllerConfig( # good for walk - # ["right_ankle_y"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, - # )) - - # pd_joint_delta_pos.update(left_hip_x = PDJointPosControllerConfig( # good for walk - # ["left_hip_x"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_hip_z = PDJointPosControllerConfig( # good for walk - # ["left_hip_z"],-max_delta,max_delta,damping=5,stiffness=base_stiff*40,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_hip_y = PDJointPosControllerConfig( # good for walk - # ["left_hip_y"],-max_delta,max_delta,damping=5,stiffness=base_stiff*120,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_knee = PDJointPosControllerConfig( # good for walk - # ["left_knee"],-max_delta,max_delta,damping=1,stiffness=base_stiff*80,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_ankle_x = PDJointPosControllerConfig( # good for walk - # ["left_ankle_x"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_ankle_y = PDJointPosControllerConfig( # good for walk - # ["left_ankle_y"],-max_delta,max_delta,damping=3,stiffness=base_stiff*20,use_delta=True, - # )) - - # pd_joint_delta_pos.update(right_shoulder1 = PDJointPosControllerConfig( # good for walk - # ["right_shoulder1"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_shoulder2 = PDJointPosControllerConfig( # good for walk - # ["right_shoulder2"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(right_elbow = PDJointPosControllerConfig( # good for walk - # ["right_elbow"],-max_delta,max_delta,damping=0,stiffness=base_stiff*40,use_delta=True, - # )) - - # pd_joint_delta_pos.update(left_shoulder1 = PDJointPosControllerConfig( # good for walk - # ["left_shoulder1"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_shoulder2 = PDJointPosControllerConfig( # good for walk - # ["left_shoulder2"],-max_delta,max_delta,damping=1,stiffness=base_stiff*20,use_delta=True, - # )) - # pd_joint_delta_pos.update(left_elbow = PDJointPosControllerConfig( # good for walk - # ["left_elbow"],-max_delta,max_delta,damping=0,stiffness=base_stiff*40,use_delta=True, - # )) - ########################################################################################################################### otherrew_dampisstif_d2 - - base_stiff = 1 - max_delta = 2 - pd_joint_delta_pos.update( - abdomen_y=PDJointPosControllerConfig( # good for walk - ["abdomen_y"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - abdomen_z=PDJointPosControllerConfig( # good for walk - ["abdomen_z"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - abdomen_x=PDJointPosControllerConfig( # good for walk - ["abdomen_x"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - - pd_joint_delta_pos.update( - right_hip_x=PDJointPosControllerConfig( # good for walk - ["right_hip_x"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - right_hip_z=PDJointPosControllerConfig( # good for walk - ["right_hip_z"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - right_hip_y=PDJointPosControllerConfig( # good for walk - ["right_hip_y"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 120, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - right_knee=PDJointPosControllerConfig( # good for walk - ["right_knee"], - -max_delta, - max_delta, - damping=1, - stiffness=base_stiff * 80, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - right_ankle_x=PDJointPosControllerConfig( # good for walk - ["right_ankle_x"], - -max_delta, - max_delta, - damping=3, - stiffness=base_stiff * 20, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - right_ankle_y=PDJointPosControllerConfig( # good for walk - ["right_ankle_y"], - -max_delta, - max_delta, - damping=3, - stiffness=base_stiff * 20, - use_delta=True, - ) + # joints_dict = { + # "abdomen_y": {"damping":5, "stiffness":40}, + # "abdomen_z": {"damping":5, "stiffness":40}, + # "abdomen_x": {"damping":5, "stiffness":40}, + # "right_hip_x": {"damping":5, "stiffness":40}, + # "right_hip_z": {"damping":5, "stiffness":40}, + # "right_hip_y": {"damping":5, "stiffness":120}, + # "right_knee": {"damping":1, "stiffness":80}, + # "right_ankle_x": {"damping":3, "stiffness":20}, + # "right_ankle_y": {"damping":3, "stiffness":20}, + # "left_hip_x": {"damping":5, "stiffness":40}, + # "left_hip_z": {"damping":5, "stiffness":40}, + # "left_hip_y": {"damping":5, "stiffness":120}, + # "left_knee": {"damping":1, "stiffness":80}, + # "left_ankle_x": {"damping":3, "stiffness":20}, + # "left_ankle_y": {"damping":3, "stiffness":20}, + # "right_shoulder1":{"damping":1, "stiffness":20}, + # "right_shoulder2":{"damping":1, "stiffness":20}, + # "right_elbow": {"damping":0, "stiffness":40}, + # "left_shoulder1": {"damping":1, "stiffness":20}, + # "left_shoulder2": {"damping":1, "stiffness":20}, + # "left_elbow": {"damping":0, "stiffness":40}, + # } + + joints_dict = { + "abdomen_y": {"damping": 5, "stiffness": 40}, + "abdomen_z": {"damping": 5, "stiffness": 40}, + "abdomen_x": {"damping": 5, "stiffness": 40}, + "right_hip_x": {"damping": 5, "stiffness": 40}, + "right_hip_z": {"damping": 5, "stiffness": 40}, + "right_hip_y": {"damping": 5, "stiffness": 120}, + "right_knee": {"damping": 1, "stiffness": 80}, + "right_ankle_x": {"damping": 3, "stiffness": 20}, + "right_ankle_y": {"damping": 3, "stiffness": 40}, + "left_hip_x": {"damping": 5, "stiffness": 40}, + "left_hip_z": {"damping": 5, "stiffness": 40}, + "left_hip_y": {"damping": 5, "stiffness": 120}, + "left_knee": {"damping": 1, "stiffness": 80}, + "left_ankle_x": {"damping": 3, "stiffness": 20}, + "left_ankle_y": {"damping": 3, "stiffness": 40}, + "right_shoulder1": {"damping": 1, "stiffness": 20}, + "right_shoulder2": {"damping": 1, "stiffness": 20}, + "right_elbow": {"damping": 0, "stiffness": 40}, + "left_shoulder1": {"damping": 1, "stiffness": 20}, + "left_shoulder2": {"damping": 1, "stiffness": 20}, + "left_elbow": {"damping": 0, "stiffness": 40}, + } + + joint_names = list(joints_dict.keys()) + assert sorted(joint_names) == sorted([x.name for x in self.robot.active_joints]) + + damping = np.array([joint["damping"] for joint in joints_dict.values()]) + stiffness = np.array([joint["stiffness"] for joint in joints_dict.values()]) + + pd_joint_delta_pos = PDJointPosControllerConfig( + joint_names, + -2, + 2, + damping=damping, + stiffness=stiffness, + use_delta=True, ) - pd_joint_delta_pos.update( - left_hip_x=PDJointPosControllerConfig( # good for walk - ["left_hip_x"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - left_hip_z=PDJointPosControllerConfig( # good for walk - ["left_hip_z"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - left_hip_y=PDJointPosControllerConfig( # good for walk - ["left_hip_y"], - -max_delta, - max_delta, - damping=5, - stiffness=base_stiff * 120, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - left_knee=PDJointPosControllerConfig( # good for walk - ["left_knee"], - -max_delta, - max_delta, - damping=1, - stiffness=base_stiff * 80, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - left_ankle_x=PDJointPosControllerConfig( # good for walk - ["left_ankle_x"], - -max_delta, - max_delta, - damping=3, - stiffness=base_stiff * 20, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - left_ankle_y=PDJointPosControllerConfig( # good for walk - ["left_ankle_y"], - -max_delta, - max_delta, - damping=3, - stiffness=base_stiff * 20, - use_delta=True, - ) - ) - - pd_joint_delta_pos.update( - right_shoulder1=PDJointPosControllerConfig( # good for walk - ["right_shoulder1"], - -max_delta, - max_delta, - damping=1, - stiffness=base_stiff * 20, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - right_shoulder2=PDJointPosControllerConfig( # good for walk - ["right_shoulder2"], - -max_delta, - max_delta, - damping=1, - stiffness=base_stiff * 20, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - right_elbow=PDJointPosControllerConfig( # good for walk - ["right_elbow"], - -max_delta, - max_delta, - damping=0, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - - pd_joint_delta_pos.update( - left_shoulder1=PDJointPosControllerConfig( # good for walk - ["left_shoulder1"], - -max_delta, - max_delta, - damping=1, - stiffness=base_stiff * 20, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - left_shoulder2=PDJointPosControllerConfig( # good for walk - ["left_shoulder2"], - -max_delta, - max_delta, - damping=1, - stiffness=base_stiff * 20, - use_delta=True, - ) - ) - pd_joint_delta_pos.update( - left_elbow=PDJointPosControllerConfig( # good for walk - ["left_elbow"], - -max_delta, - max_delta, - damping=0, - stiffness=base_stiff * 40, - use_delta=True, - ) - ) - - pd_joint_delta_pos.update(balance_passive_force=False) - return deepcopy_dict( dict( pd_joint_pos=dict(body=pd_joint_pos, balance_passive_force=False), - pd_joint_delta_pos=pd_joint_delta_pos, + pd_joint_delta_pos=dict( + body=pd_joint_delta_pos, balance_passive_force=False + ), ) ) diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py index a76edc5d4..be365c226 100644 --- a/mani_skill/envs/tasks/control/humanoid.py +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -268,6 +268,7 @@ def _get_obs_state_dict(self, info: Dict): [link.get_angular_velocity() for link in self.active_links], dim=1 ).view(-1, 16 * 3), qfrc=self.agent.robot.get_qf(), + orient=self.agent.robot.links_map["dummy_root_0"].pose.q, ) # our standard humanoid env resets if torso is below a certain point @@ -320,7 +321,7 @@ def move_x_rew(self, info, move_speed=10): ) # (b,3) -> (b) -@register_env("MS-HumanoidStand-v1", max_episode_steps=100) +@register_env("MS-HumanoidStand-v1", max_episode_steps=200) class HumanoidStand(HumanoidEnvStandard): agent: Union[Humanoid] @@ -351,13 +352,15 @@ def __init__(self, *args, robot_uids="humanoid", **kwargs): # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): # small_control = (4 + self.control_rew(action)) / 5 - # return ( + # walk_rew = ( # small_control # * self.standing_rew() # * self.upright_rew(info) - # * ((5*self.move_x_rew(info, _RUN_SPEED)+1)/6) + # * self.move_x_rew(info, _WALK_SPEED) # ) + # return (1 + 2*walk_rew) / 3 + # def compute_normalized_dense_reward( # self, obs: Any, action: torch.Tensor, info: Dict # ): @@ -366,36 +369,56 @@ def __init__(self, *args, robot_uids="humanoid", **kwargs): def compute_normalized_dense_reward( self, obs: Any, action: torch.Tensor, info: Dict ): - fail_rew = 5 # * ~info["fail"] - return 0.1 * ( - fail_rew - + ( - (1.25 * info["cmass_linvel"][:, 0]) - - (0.1 * (action.pow(2).sum(dim=-1))) - ) - ) + small_control = (4 + self.control_rew(action)) / 5 + walk_rew = self.move_x_rew(info, _WALK_SPEED) + return ( + 1 + + (small_control * walk_rew * self.upright_rew(info) * self.standing_rew()) + ) / 2 -@register_env("MS-HumanoidRun-v1", max_episode_steps=100) + # def compute_normalized_dense_reward( + # self, obs: Any, action: torch.Tensor, info: Dict + # ): + # #small_control = (4 + self.control_rew(action).pow(2)) / 5 + # small_control = (3 - action.pow(2).mean(-1)) / 3 + # walk_rew = self.move_x_rew(info, _WALK_SPEED) + + # return (1+(2*small_control*walk_rew*self.standing_rew())) / 3 + + +@register_env("MS-HumanoidRun-v1", max_episode_steps=200) class HumanoidRun(HumanoidEnvStandard): agent: Union[Humanoid] def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) - def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - small_control = (4 + self.control_rew(action)) / 5 - return ( - small_control - * self.standing_rew() - * self.upright_rew(info) - * self.move_x_rew(info, _RUN_SPEED) - ) + # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + # small_control = (4 + self.control_rew(action)) / 5 + # return ( + # small_control + # * self.standing_rew() + # * self.upright_rew(info) + # * ((5*self.move_x_rew(info, _WALK_SPEED)+1)/6) + # ) + + # def compute_normalized_dense_reward( + # self, obs: Any, action: torch.Tensor, info: Dict + # ): + # return self.compute_dense_reward(obs, action, info) def compute_normalized_dense_reward( self, obs: Any, action: torch.Tensor, info: Dict ): - return self.compute_dense_reward(obs, action, info) + fail_rew = 5 # * ~info["fail"] + return 0.1 * ( + fail_rew + + ( + (1.25 * info["cmass_linvel"][:, 0]) + - (0.1 * (action.pow(2).sum(dim=-1))) + ) + ) class HumanoidEnvHard(HumanoidEnvBase): From bee876bb9e8e7171afb3a6536cbae34a236f6fb9 Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 8 Aug 2024 10:37:51 -0700 Subject: [PATCH 08/13] reformatting --- examples/baselines/ppo/ppo_rgb.py | 4 +- mani_skill/envs/tasks/control/__init__.py | 8 +- mani_skill/envs/tasks/control/humanoid.py | 424 +++++++++------------- 3 files changed, 178 insertions(+), 258 deletions(-) diff --git a/examples/baselines/ppo/ppo_rgb.py b/examples/baselines/ppo/ppo_rgb.py index f0ea4fe6e..a7b4a797d 100644 --- a/examples/baselines/ppo/ppo_rgb.py +++ b/examples/baselines/ppo/ppo_rgb.py @@ -48,6 +48,8 @@ class Args: """if toggled, only runs evaluation with the given model checkpoint and saves the evaluation trajectories""" checkpoint: str = None """path to a pretrained checkpoint file to start evaluation/training from""" + render_mode: str = "all" + """the environment rendering mode""" # Algorithm specific arguments env_id: str = "PickCube-v1" @@ -297,7 +299,7 @@ def get_action_and_value(self, x, action=None): device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") # env setup - env_kwargs = dict(obs_mode="rgbd", control_mode="pd_joint_delta_pos", render_mode="all", sim_backend="gpu") + env_kwargs = dict(obs_mode="rgbd", control_mode="pd_joint_delta_pos", render_mode=args.render_mode, sim_backend="gpu") eval_envs = gym.make(args.env_id, num_envs=args.num_eval_envs, **env_kwargs) envs = gym.make(args.env_id, num_envs=args.num_envs if not args.evaluate else 1, **env_kwargs) diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py index f6f96b666..e059742d7 100644 --- a/mani_skill/envs/tasks/control/__init__.py +++ b/mani_skill/envs/tasks/control/__init__.py @@ -1,9 +1,3 @@ from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv from .hopper import HopperHopEnv, HopperStandEnv -from .humanoid import ( - HumanoidRunHard, - HumanoidStand, - HumanoidStandHard, - HumanoidWalk, - HumanoidWalkHard, -) +from .humanoid import HumanoidRun, HumanoidStand, HumanoidWalk diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py index be365c226..570805773 100644 --- a/mani_skill/envs/tasks/control/humanoid.py +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -18,14 +18,13 @@ from mani_skill.utils.structs.pose import Pose from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig -# Height of head above which stand reward is 1. +# dm_control humanoid reward targets _STAND_HEIGHT = 1.4 -# Horizontal speeds above which move reward is 1. _WALK_SPEED = 1 _RUN_SPEED = 10 -################################################################### + class HumanoidEnvBase(BaseEnv): agent: Union[Humanoid] @@ -38,6 +37,7 @@ def _default_sim_config(self): scene_cfg=SceneConfig( solver_position_iterations=4, solver_velocity_iterations=1 ), + spacing=20, sim_freq=200, control_freq=40, ) @@ -45,34 +45,23 @@ def _default_sim_config(self): @property def _default_sensor_configs(self): return [ - # replicated from xml file CameraConfig( - uid="cam0", - pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), + uid="side_cam", + pose=sapien_utils.look_at(eye=[0, -3, 1], target=[0, 0, 0]), width=128, height=128, - fov=np.pi / 4, + fov=60 * np.pi / 180, near=0.01, far=100, mount=self.camera_mount, ), ] - def _before_control_step(self): - self.camera_mount.set_pose( - Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p) - ) - if sapien.physx.is_gpu_enabled(): - # we update just actor pose here, no need to call apply_all/fetch_all - self.scene.px.gpu_apply_rigid_dynamic_data() - self.scene.px.gpu_fetch_rigid_dynamic_data() - @property def _default_human_render_camera_configs(self): return [ - # replicated from xml file CameraConfig( - uid="not_render_cam", + uid="training_side_vis", pose=sapien_utils.look_at(eye=[0, -3, 1], target=[0, 0, 0]), width=512, height=512, @@ -83,21 +72,24 @@ def _default_human_render_camera_configs(self): ), ] + # reset the mounted camera - camera only follows torso pos, not orientation + def _before_control_step(self): + self.camera_mount.set_pose( + Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p) + ) + if sapien.physx.is_gpu_enabled(): + # we update just actor pose here, no need to call apply_all/fetch_all + self.scene.px.gpu_apply_rigid_dynamic_data() + self.scene.px.gpu_fetch_rigid_dynamic_data() + @property def head_height(self): """Returns the height of the head.""" return self.agent.robot.links_map["head"].pose.p[:, -1] def torso_upright(self, info): - # print("rot_mat") - # print(self.agent.robot.links_map["torso"].pose.to_transformation_matrix()) - # print("rot_mat_zz", self.agent.robot.links_map["torso"].pose.to_transformation_matrix()[:,2,2]) - # print("rot_mat.shape", self.agent.robot.links_map["torso"].pose.to_transformation_matrix().shape) return info["torso_xmat"][:, 2, 2] - # not sure this is correct - are our pose rotations the same as mujocos? - # test out the transpose (taking column instead of row) - # right now, it should represnt the z axis of global in the local frame - hm, no, this should be correct def torso_vertical_orientation(self, info): return info["torso_xmat"][:, 2, :3].view(-1, 3) @@ -126,6 +118,7 @@ def center_of_mass_velocity(self): return com_vel + # cache re-used computation def evaluate(self) -> Dict: return dict( torso_xmat=self.agent.robot.links_map[ @@ -142,7 +135,7 @@ def _load_scene(self, options: dict): for a in actor_builders: a.build(a.name) - self.ground = build_ground(self.scene) + self.ground = build_ground(self.scene, floor_width=500) # allow tracking of humanoid self.camera_mount = self.scene.create_actor_builder().build_kinematic( @@ -158,24 +151,7 @@ def _load_scene(self, options: dict): ).to(self.device) self.robot_mass = torch.sum(self.robot_links_mass).item() - def standing_rew(self): - return rewards.tolerance( - self.head_height, - lower=_STAND_HEIGHT, - upper=float("inf"), - margin=_STAND_HEIGHT / 4, - ).view(-1) - - def upright_rew(self, info: Dict): - return rewards.tolerance( - self.torso_upright(info), - lower=0.9, - upper=float("inf"), - sigmoid="linear", - margin=1.9, - value_at_margin=0, - ).view(-1) - + # humanoid reward components used across all tasks def control_rew(self, action: Array): return ( rewards.tolerance(action, margin=1, value_at_margin=0, sigmoid="quadratic") @@ -205,38 +181,23 @@ def move_rew(self, info, move_speed=10): .view(-1) ) # (b,3) -> (b) - # def compute_dense_reward(self, obs: Any, action: Array, info: Dict): - # standing = rewards.tolerance( - # self.head_height, - # lower=_STAND_HEIGHT, - # upper=float("inf"), - # margin=_STAND_HEIGHT / 4, - # ) - # upright = rewards.tolerance( - # self.torso_upright(info), - # lower=0.9, - # upper=float("inf"), - # sigmoid="linear", - # margin=1.9, - # value_at_margin=0, - # ) - # stand_reward = standing.view(-1) * upright.view(-1) - # small_control = rewards.tolerance( - # action, margin=1, value_at_margin=0, sigmoid="quadratic" - # ).mean( - # dim=-1 - # ) # (b, a) -> (b) - # small_control = (4 + small_control) / 5 - # horizontal_velocity = self.center_of_mass_velocity[:, :2] - # dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean( - # dim=-1 - # ) # (b,3) -> (b) - - # return small_control.view(-1) * stand_reward * dont_move.view(-1) - - # def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): - # max_reward = 1.0 - # return self.compute_dense_reward(obs, action, info) / max_reward + def standing_rew(self): + return rewards.tolerance( + self.head_height, + lower=_STAND_HEIGHT, + upper=float("inf"), + margin=_STAND_HEIGHT / 4, + ).view(-1) + + def upright_rew(self, info: Dict): + return rewards.tolerance( + self.torso_upright(info), + lower=0.9, + upper=float("inf"), + sigmoid="linear", + margin=1.9, + value_at_margin=0, + ).view(-1) ### @@ -248,8 +209,8 @@ def __init__(self, *args, robot_uids="humanoid", **kwargs): def _get_obs_state_dict(self, info: Dict): # our qpos model doesn't include the free joint, meaning qpos and qvel are 21 dims, not 27 - # global dpos/dt and root (torso) dquaterion/dt lost - # we replace with linear root linvel and root angularvel (equivalent info) + # global dpos/dt and root (torso) dquaterion/dt are lost as result + # we replace them with linear root linvel and root angularvel (equivalent info) return dict( agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos root_vel=self.agent.robot.links_map[ @@ -271,7 +232,8 @@ def _get_obs_state_dict(self, info: Dict): orient=self.agent.robot.links_map["dummy_root_0"].pose.q, ) - # our standard humanoid env resets if torso is below a certain point + # standard humanoid env resets if torso is below a certain point + # therefore, we disable all contacts except feet and floor def _load_scene(self, options: Dict): super()._load_scene(options) self.ground.set_collision_group_bit(group=2, bit_idx=30, bit=1) @@ -280,15 +242,7 @@ def _load_scene(self, options: Dict): if not "foot" in link.name: link.set_collision_group_bit(group=2, bit_idx=30, bit=1) - def evaluate(self) -> Dict: - info = super().evaluate() - torso_z = self.agent.robot.links_map["torso"].pose.p[:, -1] - failure = torch.logical_or(torso_z < 1.0, torso_z > 2.0) - info.update(fail=failure) - return info - def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): - # self.camera_mount.set_pose(Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p)) with torch.device(self.device): b = len(env_idx) # set agent root pose - torso now centered at dummy root at (0,0,0) @@ -307,6 +261,14 @@ def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): self.agent.robot.set_qpos(qpos_noise) self.agent.robot.set_qvel(qvel_noise) + # in standard (non-hard) version, we terminate early if the torso is in unacceptable range + def evaluate(self) -> Dict: + info = super().evaluate() + torso_z = self.agent.robot.links_map["torso"].pose.p[:, -1] + failure = torch.logical_or(torso_z < 0.7, torso_z > 2.0) + info.update(fail=failure) + return info + def move_x_rew(self, info, move_speed=10): com_vel_x = info["cmass_linvel"][:, 0] return rewards.tolerance( @@ -321,7 +283,7 @@ def move_x_rew(self, info, move_speed=10): ) # (b,3) -> (b) -@register_env("MS-HumanoidStand-v1", max_episode_steps=200) +@register_env("MS-HumanoidStand-v1", max_episode_steps=1000) class HumanoidStand(HumanoidEnvStandard): agent: Union[Humanoid] @@ -343,190 +305,152 @@ def compute_normalized_dense_reward( return self.compute_dense_reward(obs, action, info) -@register_env("MS-HumanoidWalk-v1", max_episode_steps=200) +@register_env("MS-HumanoidWalk-v1", max_episode_steps=1000) class HumanoidWalk(HumanoidEnvStandard): agent: Union[Humanoid] def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) - # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - # small_control = (4 + self.control_rew(action)) / 5 - # walk_rew = ( - # small_control - # * self.standing_rew() - # * self.upright_rew(info) - # * self.move_x_rew(info, _WALK_SPEED) - # ) - - # return (1 + 2*walk_rew) / 3 - - # def compute_normalized_dense_reward( - # self, obs: Any, action: torch.Tensor, info: Dict - # ): - # return self.compute_dense_reward(obs, action, info) - def compute_normalized_dense_reward( self, obs: Any, action: torch.Tensor, info: Dict ): small_control = (4 + self.control_rew(action)) / 5 - walk_rew = self.move_x_rew(info, _WALK_SPEED) - - return ( - 1 - + (small_control * walk_rew * self.upright_rew(info) * self.standing_rew()) - ) / 2 - - # def compute_normalized_dense_reward( - # self, obs: Any, action: torch.Tensor, info: Dict - # ): - # #small_control = (4 + self.control_rew(action).pow(2)) / 5 - # small_control = (3 - action.pow(2).mean(-1)) / 3 - # walk_rew = self.move_x_rew(info, _WALK_SPEED) - - # return (1+(2*small_control*walk_rew*self.standing_rew())) / 3 - - -@register_env("MS-HumanoidRun-v1", max_episode_steps=200) -class HumanoidRun(HumanoidEnvStandard): - agent: Union[Humanoid] - - def __init__(self, *args, robot_uids="humanoid", **kwargs): - super().__init__(*args, robot_uids=robot_uids, **kwargs) - - # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - # small_control = (4 + self.control_rew(action)) / 5 - # return ( - # small_control - # * self.standing_rew() - # * self.upright_rew(info) - # * ((5*self.move_x_rew(info, _WALK_SPEED)+1)/6) - # ) - - # def compute_normalized_dense_reward( - # self, obs: Any, action: torch.Tensor, info: Dict - # ): - # return self.compute_dense_reward(obs, action, info) - - def compute_normalized_dense_reward( - self, obs: Any, action: torch.Tensor, info: Dict - ): - fail_rew = 5 # * ~info["fail"] - return 0.1 * ( - fail_rew - + ( - (1.25 * info["cmass_linvel"][:, 0]) - - (0.1 * (action.pow(2).sum(dim=-1))) - ) - ) - - -class HumanoidEnvHard(HumanoidEnvBase): - agent: Union[Humanoid] - - def __init__(self, *args, robot_uids="humanoid", **kwargs): - super().__init__(*args, robot_uids=robot_uids, **kwargs) - - def _get_obs_state_dict(self, info: Dict): - # our qpos model doesn't include the free joint, meaning qpos and qvel are 21 dims, not 27 - # global dpos/dt and root (torso) dquaterion/dt lost - # we replace with linear root linvel and root angularvel (equivalent info) - return dict( - agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos - head_height=self.head_height, # (b,1) - extremities=self.extremities(info), # (b, 12) - torso_vertical=self.torso_vertical_orientation(info), # (b, 3) - com_velocity=info["cmass_linvel"], # (b, 3) - root_vel=self.agent.robot.links_map[ - "dummy_root_0" - ].get_linear_velocity(), # free joint info, (b, 3) - root_quat_vel=self.agent.robot.links_map[ - "dummy_root_0" - ].get_angular_velocity(), # free joint info, (b, 3) - ) - - def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): - self.camera_mount.set_pose( - Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p) - ) - with torch.device(self.device): - b = len(env_idx) - # set agent root pose - torso now centered at dummy root at (0,0,0) - pose = Pose.create_from_pq( - p=[0, 0, 1.5], q=randomization.random_quaternions(b) - ) - self.agent.robot.set_root_pose(pose) - # set randomized qpos - random_qpos = torch.rand(b, self.agent.robot.dof[0]) - q_lims = self.agent.robot.get_qlimits() - q_ranges = q_lims[..., 1] - q_lims[..., 0] - random_qpos *= q_ranges - random_qpos += q_lims[..., 0] - self.agent.reset(random_qpos) - - -### -@register_env("MS-HumanoidStandHard-v1", max_episode_steps=100) -class HumanoidStandHard(HumanoidEnvHard): - agent: Union[Humanoid] - - def __init__(self, *args, robot_uids="humanoid", **kwargs): - super().__init__(*args, robot_uids=robot_uids, **kwargs) - - def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - small_control = (4 + self.control_rew(action)) / 5 - return ( + walk_rew = ( small_control - * self.standing_rew() + * self.move_x_rew(info, _WALK_SPEED) * self.upright_rew(info) - * self.dont_move_rew(info) + * self.standing_rew() ) - - def compute_normalized_dense_reward( - self, obs: Any, action: torch.Tensor, info: Dict - ): - return self.compute_dense_reward(obs, action, info) + alive_rew = 1 + return (alive_rew + walk_rew) / 2 -@register_env("MS-HumanoidWalkHard-v1", max_episode_steps=100) -class HumanoidWalkHard(HumanoidEnvHard): +@register_env("MS-HumanoidRun-v1", max_episode_steps=1000) +class HumanoidRun(HumanoidEnvStandard): agent: Union[Humanoid] def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) - def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - small_control = (4 + self.control_rew(action)) / 5 - return ( - small_control - * self.standing_rew() - * self.upright_rew(info) - * self.move_rew(info, _WALK_SPEED) - ) - def compute_normalized_dense_reward( self, obs: Any, action: torch.Tensor, info: Dict ): - return self.compute_dense_reward(obs, action, info) - - -@register_env("MS-HumanoidRunHard-v1", max_episode_steps=100) -class HumanoidRunHard(HumanoidEnvHard): - agent: Union[Humanoid] - - def __init__(self, *args, robot_uids="humanoid", **kwargs): - super().__init__(*args, robot_uids=robot_uids, **kwargs) - - def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): - small_control = (4 + self.control_rew(action)) / 5 - return ( - small_control - * self.standing_rew() - * self.upright_rew(info) - * self.move_rew(info, _RUN_SPEED) + # reward function used by mjx for ppo humanoid run + rew_scale = 0.1 + run_x_rew = info["cmass_linvel"][:, 0] + alive_rew = 5 + return rew_scale * ( + alive_rew + 1.25 * run_x_rew - 0.1 * action.pow(2).sum(dim=-1) ) - def compute_normalized_dense_reward( - self, obs: Any, action: torch.Tensor, info: Dict - ): - return self.compute_dense_reward(obs, action, info) + +# TODO (xhin): more sac testing of hard version of environments + +# class HumanoidEnvHard(HumanoidEnvBase): +# agent: Union[Humanoid] + +# def __init__(self, *args, robot_uids="humanoid", **kwargs): +# super().__init__(*args, robot_uids=robot_uids, **kwargs) + +# def _get_obs_state_dict(self, info: Dict): +# # our qpos model doesn't include the free joint, meaning qpos and qvel are 21 dims, not 27 +# # global dpos/dt and root (torso) dquaterion/dt lost +# # we replace with linear root linvel and root angularvel (equivalent info) +# return dict( +# agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos +# head_height=self.head_height, # (b,1) +# extremities=self.extremities(info), # (b, 12) +# torso_vertical=self.torso_vertical_orientation(info), # (b, 3) +# com_velocity=info["cmass_linvel"], # (b, 3) +# root_vel=self.agent.robot.links_map[ +# "dummy_root_0" +# ].get_linear_velocity(), # free joint info, (b, 3) +# root_quat_vel=self.agent.robot.links_map[ +# "dummy_root_0" +# ].get_angular_velocity(), # free joint info, (b, 3) +# ) + +# def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): +# self.camera_mount.set_pose( +# Pose.create_from_pq(p=self.agent.robot.links_map["torso"].pose.p) +# ) +# with torch.device(self.device): +# b = len(env_idx) +# # set agent root pose - torso now centered at dummy root at (0,0,0) +# pose = Pose.create_from_pq( +# p=[0, 0, 1.5], q=randomization.random_quaternions(b) +# ) +# self.agent.robot.set_root_pose(pose) +# # set randomized qpos +# random_qpos = torch.rand(b, self.agent.robot.dof[0]) +# q_lims = self.agent.robot.get_qlimits() +# q_ranges = q_lims[..., 1] - q_lims[..., 0] +# random_qpos *= q_ranges +# random_qpos += q_lims[..., 0] +# self.agent.reset(random_qpos) + +# @register_env("MS-HumanoidStandHard-v1", max_episode_steps=1000) +# class HumanoidStandHard(HumanoidEnvHard): +# agent: Union[Humanoid] + +# def __init__(self, *args, robot_uids="humanoid", **kwargs): +# super().__init__(*args, robot_uids=robot_uids, **kwargs) + +# def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): +# small_control = (4 + self.control_rew(action)) / 5 +# return ( +# small_control +# * self.standing_rew() +# * self.upright_rew(info) +# * self.dont_move_rew(info) +# ) + +# def compute_normalized_dense_reward( +# self, obs: Any, action: torch.Tensor, info: Dict +# ): +# return self.compute_dense_reward(obs, action, info) + + +# @register_env("MS-HumanoidWalkHard-v1", max_episode_steps=1000) +# class HumanoidWalkHard(HumanoidEnvHard): +# agent: Union[Humanoid] + +# def __init__(self, *args, robot_uids="humanoid", **kwargs): +# super().__init__(*args, robot_uids=robot_uids, **kwargs) + +# def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): +# small_control = (4 + self.control_rew(action)) / 5 +# return ( +# small_control +# * self.standing_rew() +# * self.upright_rew(info) +# * self.move_rew(info, _WALK_SPEED) +# ) + +# def compute_normalized_dense_reward( +# self, obs: Any, action: torch.Tensor, info: Dict +# ): +# return self.compute_dense_reward(obs, action, info) + + +# @register_env("MS-HumanoidRunHard-v1", max_episode_steps=1000) +# class HumanoidRunHard(HumanoidEnvHard): +# agent: Union[Humanoid] + +# def __init__(self, *args, robot_uids="humanoid", **kwargs): +# super().__init__(*args, robot_uids=robot_uids, **kwargs) + +# def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): +# small_control = (4 + self.control_rew(action)) / 5 +# return ( +# small_control +# * self.standing_rew() +# * self.upright_rew(info) +# * self.move_rew(info, _RUN_SPEED) +# ) + +# def compute_normalized_dense_reward( +# self, obs: Any, action: torch.Tensor, info: Dict +# ): +# return self.compute_dense_reward(obs, action, info) From cb263e52422f0249ecf8ed48efb88ab278f57cd3 Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 8 Aug 2024 10:41:41 -0700 Subject: [PATCH 09/13] cleaned up comments --- mani_skill/agents/robots/humanoid/humanoid.py | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/mani_skill/agents/robots/humanoid/humanoid.py b/mani_skill/agents/robots/humanoid/humanoid.py index 1dfa53400..c8ec9abe9 100644 --- a/mani_skill/agents/robots/humanoid/humanoid.py +++ b/mani_skill/agents/robots/humanoid/humanoid.py @@ -60,30 +60,7 @@ def _controller_configs(self): normalize_action=False, ) - # joints_dict = { - # "abdomen_y": {"damping":5, "stiffness":40}, - # "abdomen_z": {"damping":5, "stiffness":40}, - # "abdomen_x": {"damping":5, "stiffness":40}, - # "right_hip_x": {"damping":5, "stiffness":40}, - # "right_hip_z": {"damping":5, "stiffness":40}, - # "right_hip_y": {"damping":5, "stiffness":120}, - # "right_knee": {"damping":1, "stiffness":80}, - # "right_ankle_x": {"damping":3, "stiffness":20}, - # "right_ankle_y": {"damping":3, "stiffness":20}, - # "left_hip_x": {"damping":5, "stiffness":40}, - # "left_hip_z": {"damping":5, "stiffness":40}, - # "left_hip_y": {"damping":5, "stiffness":120}, - # "left_knee": {"damping":1, "stiffness":80}, - # "left_ankle_x": {"damping":3, "stiffness":20}, - # "left_ankle_y": {"damping":3, "stiffness":20}, - # "right_shoulder1":{"damping":1, "stiffness":20}, - # "right_shoulder2":{"damping":1, "stiffness":20}, - # "right_elbow": {"damping":0, "stiffness":40}, - # "left_shoulder1": {"damping":1, "stiffness":20}, - # "left_shoulder2": {"damping":1, "stiffness":20}, - # "left_elbow": {"damping":0, "stiffness":40}, - # } - + # for pd_joint_delta_pos control joints_dict = { "abdomen_y": {"damping": 5, "stiffness": 40}, "abdomen_z": {"damping": 5, "stiffness": 40}, From 23188af039388ee47637bcfef04973c610af8ade Mon Sep 17 00:00:00 2001 From: Xander Date: Thu, 8 Aug 2024 20:20:10 -0700 Subject: [PATCH 10/13] standing now works --- docs/source/tasks/control/index.md | 63 ++++++++++++++++++++++- mani_skill/envs/tasks/control/humanoid.py | 48 ++++++++++++++--- 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/docs/source/tasks/control/index.md b/docs/source/tasks/control/index.md index 2a7136610..cff39033d 100644 --- a/docs/source/tasks/control/index.md +++ b/docs/source/tasks/control/index.md @@ -87,4 +87,65 @@ Hopper robot stands upright **Success Conditions:** - No specific success conditions. We can threshold the episode accumulated reward to determine success. -::: \ No newline at end of file +::: + +## MS-HumanoidStand-v1 +![dense-reward][reward-badge] + +:::{dropdown} Task Card +:icon: note +:color: primary + +**Task Description:** +Humanoid robot stands upright + + +**Supported Robots: humanoid** + +**Randomizations:** +- Humanoid robot is randomly rotated [-pi, pi] radians about z axis. +- Humanoid qpos and qvel have added noise from uniform distribution [-1e-2, 1e-2] + +**Fail Conditions:** +- Humanoid robot torso link leaves z range [0.7, 1.0] +::: + +## MS-HumanoidWalk-v1 +![dense-reward][reward-badge] + +:::{dropdown} Task Card +:icon: note +:color: primary + +**Task Description:** +Humanoid moves in x direction at walking pace + + +**Supported Robots: humanoid** + +**Randomizations:** +- Humanoid qpos and qvel have added noise from uniform distribution [-1e-2, 1e-2] + +**Fail Conditions:** +- Humanoid robot torso link leaves z range [0.7, 1.0] +::: + +## MS-HumanoidRun-v1 +![dense-reward][reward-badge] + +:::{dropdown} Task Card +:icon: note +:color: primary + +**Task Description:** +Humanoid moves in x direction at running pace + + +**Supported Robots: humanoid** + +**Randomizations:** +- Humanoid qpos and qvel have added noise from uniform distribution [-1e-2, 1e-2] + +**Fail Conditions:** +- Humanoid robot torso link leaves z range [0.7, 1.0] +::: diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py index 570805773..84332829c 100644 --- a/mani_skill/envs/tasks/control/humanoid.py +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -290,19 +290,55 @@ class HumanoidStand(HumanoidEnvStandard): def __init__(self, *args, robot_uids="humanoid", **kwargs): super().__init__(*args, robot_uids=robot_uids, **kwargs) - def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + def _get_obs_state_dict(self, info: Dict): + # make all obs completely egocentric, for z rotation invariance, since stand has z rot randomization + root_pose_mat = self.agent.robot.links_map[ + "dummy_root_0" + ].pose.to_transformation_matrix()[:, :3, :3] + lin_vels = [ + link.get_linear_velocity() for link in self.active_links + ] # (links, b, 3) + ang_vels = [ + link.get_angular_velocity() for link in self.active_links + ] # (links, b, 3) + non_ego_vels = torch.stack( + [*lin_vels, *ang_vels, info["cmass_linvel"]], dim=1 + ) # (b, len(lin_vels)+len(ang_vels)+1, 3) + ego_vels = (non_ego_vels @ root_pose_mat).view( + -1, (len(lin_vels) + len(ang_vels) + 1) * 3 + ) + return dict( + agent=self._get_obs_agent(), # (b, 21*2) root joint not included in our qpos + head_height=self.head_height, # (b,1) + egocentric_vels=ego_vels, + extremities=self.extremities(info), + ) + + # in stand, we also randomize the agent rotation around z axis + def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): + super()._initialize_episode(env_idx=env_idx, options=options) + with torch.device(self.device): + b = len(env_idx) + # randomize z rotation for humanoid pose + alphas = torch.rand(b) * 2 * torch.pi + quats = torch.zeros(b, 4) + quats[:, 0] = (alphas / 2).cos() + quats[:, -1] = (alphas / 2).sin() + pose = Pose.create_from_pq(p=[0, 0, 1.3], q=quats) + self.agent.robot.set_root_pose(pose) + + def compute_normalized_dense_reward( + self, obs: Any, action: torch.Tensor, info: Dict + ): small_control = (4 + self.control_rew(action)) / 5 - return ( + stand_rew = ( small_control * self.standing_rew() * self.upright_rew(info) * self.dont_move_rew(info) ) - def compute_normalized_dense_reward( - self, obs: Any, action: torch.Tensor, info: Dict - ): - return self.compute_dense_reward(obs, action, info) + return stand_rew @register_env("MS-HumanoidWalk-v1", max_episode_steps=1000) From 279b0f318a84563737f28a9ea5fc4388aeb776ff Mon Sep 17 00:00:00 2001 From: Xander Date: Mon, 19 Aug 2024 19:24:11 -0700 Subject: [PATCH 11/13] merge compatibility and typo fix --- mani_skill/envs/tasks/control/humanoid.py | 5 ++--- mani_skill/utils/building/_mjcf_loader.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mani_skill/envs/tasks/control/humanoid.py b/mani_skill/envs/tasks/control/humanoid.py index 84332829c..723901d2a 100644 --- a/mani_skill/envs/tasks/control/humanoid.py +++ b/mani_skill/envs/tasks/control/humanoid.py @@ -5,12 +5,11 @@ import numpy as np import sapien import torch -from transforms3d.euler import euler2quat from mani_skill.agents.robots.humanoid import Humanoid from mani_skill.envs.sapien_env import BaseEnv from mani_skill.envs.utils import randomization, rewards -from mani_skill.sensors.camera import CameraConfig, update_camera_cfgs_from_dict +from mani_skill.sensors.camera import CameraConfig from mani_skill.utils import common, sapien_utils from mani_skill.utils.building.ground import build_ground from mani_skill.utils.geometry import rotation_conversions @@ -34,7 +33,7 @@ def __init__(self, *args, robot_uids="humanoid", **kwargs): @property def _default_sim_config(self): return SimConfig( - scene_cfg=SceneConfig( + scene_config=SceneConfig( solver_position_iterations=4, solver_velocity_iterations=1 ), spacing=20, diff --git a/mani_skill/utils/building/_mjcf_loader.py b/mani_skill/utils/building/_mjcf_loader.py index da94d9217..df9257055 100644 --- a/mani_skill/utils/building/_mjcf_loader.py +++ b/mani_skill/utils/building/_mjcf_loader.py @@ -252,7 +252,7 @@ def _build_geom( if _parse_int(geom_attrib, "condim", 0) == 1: friction = _parse_float( geom_attrib, "friction", 0.3 - ) # maniskill default frection is 0.3 + ) # maniskill default friction is 0.3 physx_material = PhysxMaterial( static_friction=friction, dynamic_friction=friction, restitution=0 ) From 0556ee3bad0d6514e328594916a8e7090295d831 Mon Sep 17 00:00:00 2001 From: Xander Date: Mon, 19 Aug 2024 19:37:33 -0700 Subject: [PATCH 12/13] ppo args added to examples.sh --- examples/baselines/ppo/examples.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/examples/baselines/ppo/examples.sh b/examples/baselines/ppo/examples.sh index 2bf865e73..c0dace125 100644 --- a/examples/baselines/ppo/examples.sh +++ b/examples/baselines/ppo/examples.sh @@ -49,6 +49,17 @@ python ppo.py --env_id="MS-CartpoleSwingUp-v1" \ --total_timesteps=10_000_000 --num-steps=250 --num-eval-steps=1000 \ --gamma=0.99 --gae_lambda=0.95 \ --eval_freq=5 +python ppo.py --env_id="MS-HumanoidStand-v1" --num_envs=2048 \ + --update_epochs=8 --num_minibatches=32 --total_timesteps=40_000_000 \ + --eval_freq=10 --num_eval_steps=1000 --num_steps=200 --gamma=0.95 +python ppo.py --env_id="MS-HumanoidWalk-v1" --num_envs=2048 \ + --update_epochs=8 --num_minibatches=32 --total_timesteps=80_000_000 \ + --eval_freq=10 --num_eval_steps=1000 --num_steps=200 --gamma=0.97 \ + --ent_coef=1e-3 +python ppo.py --env_id="MS-HumanoidRun-v1" --num_envs=2048 \ + --update_epochs=8 --num_minibatches=32 --total_timesteps=60_000_000 \ + --eval_freq=10 --num_eval_steps=1000 --num_steps=200 --gamma=0.97 \ + --ent_coef=1e-3 python ppo.py --env_id="UnitreeG1PlaceAppleInBowl-v1" \ --num_envs=512 --update_epochs=8 --num_minibatches=32 \ --total_timesteps=50_000_000 --num-steps=100 --num-eval-steps=100 @@ -98,3 +109,8 @@ python ppo_rgb.py --env_id="PickSingleYCB-v1" \ python ppo_rgb.py --env_id="PushT-v1" \ --num_envs=256 --update_epochs=8 --num_minibatches=8 \ --total_timesteps=25_000_000 --num-steps=100 --num_eval_steps=100 --gamma=0.99 +python ppo_rgb.py --env_id="MS-HumanoidRun-v1" \ + --num_envs=256 --update_epochs=8 --num_minibatches=32 \ + --total_timesteps=80_000_000 --eval_freq=15 --num_eval_steps=1000 \ + --num_steps=200 --gamma=0.98 --no-include-state --render_mode="rgb_array" \ + --ent_coef=1e-3 From 0e6e5f6da12350a73082bdb9628820e24b745630 Mon Sep 17 00:00:00 2001 From: Xander Date: Tue, 20 Aug 2024 16:44:14 -0700 Subject: [PATCH 13/13] merge issue fix --- examples/baselines/ppo/ppo_rgb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/baselines/ppo/ppo_rgb.py b/examples/baselines/ppo/ppo_rgb.py index 0158d1387..79e3d9e8d 100644 --- a/examples/baselines/ppo/ppo_rgb.py +++ b/examples/baselines/ppo/ppo_rgb.py @@ -290,7 +290,7 @@ def close(self): device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") # env setup - env_kwargs = dict(obs_mode="rgbd", control_mode="pd_joint_delta_pos", render_mode=args.render_mode, sim_backend="gpu") + env_kwargs = dict(obs_mode="rgb", control_mode="pd_joint_delta_pos", render_mode=args.render_mode, sim_backend="gpu") eval_envs = gym.make(args.env_id, num_envs=args.num_eval_envs, **env_kwargs) envs = gym.make(args.env_id, num_envs=args.num_envs if not args.evaluate else 1, **env_kwargs)