-
Notifications
You must be signed in to change notification settings - Fork 167
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Control Hopper Env, Stand + Hop #440
Changes from all commits
7cf8c3c
53479d8
c73c1be
e5bb4c5
b582deb
814c1b9
d2a1f17
4ce2293
03060c7
e16295d
2da3604
d4f0668
763ce6b
2e04179
0ba8dab
cbad1d6
2ae6a47
a75855d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv | ||
from .hopper import HopperHopEnv, HopperStandEnv |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
<mujoco model="planar hopper"> | ||
<include file="./common/skybox.xml"/> | ||
<include file="./common/visual.xml"/> | ||
<include file="./common/materials.xml"/> | ||
|
||
<statistic extent="2" center="0 0 .5"/> | ||
|
||
<default> | ||
<default class="hopper"> | ||
<joint type="hinge" axis="0 1 0" limited="true" damping=".05" armature=".2"/> | ||
<geom type="capsule" material="self"/> | ||
<site type="sphere" size="0.05" group="3"/> | ||
</default> | ||
<default class="free"> | ||
<joint limited="false" damping="0" armature="0" stiffness="0"/> | ||
</default> | ||
<motor ctrlrange="-1 1" ctrllimited="true"/> | ||
</default> | ||
|
||
<option timestep="0.005"/> | ||
|
||
<worldbody> | ||
<camera name="cam0" pos="0 -2.8 0.8" euler="90 0 0" mode="trackcom"/> | ||
<camera name="back" pos="-2 -.2 1.2" xyaxes="0.2 -1 0 .5 0 2" mode="trackcom"/> | ||
<geom name="floor" type="plane" conaffinity="1" pos="48 0 0" size="50 1 .2" material="grid"/> | ||
<body name="torso" pos="0 0 1" childclass="hopper"> | ||
<light name="top" pos="0 0 2" mode="trackcom"/> | ||
<joint name="rootx" type="slide" axis="1 0 0" class="free"/> | ||
<joint name="rootz" type="slide" axis="0 0 1" class="free"/> | ||
<joint name="rooty" type="hinge" axis="0 1 0" class="free"/> | ||
<geom name="torso" fromto="0 0 -.05 0 0 .2" size="0.0653"/> | ||
<geom name="nose" fromto=".08 0 .13 .15 0 .14" size="0.03"/> | ||
<body name="pelvis" pos="0 0 -.05"> | ||
<joint name="waist" range="-30 30"/> | ||
<geom name="pelvis" fromto="0 0 0 0 0 -.15" size="0.065"/> | ||
<body name="thigh" pos="0 0 -.2"> | ||
<joint name="hip" range="-144 10"/> | ||
<geom name="thigh" fromto="0 0 0 0 0 -.33" size="0.04"/> | ||
<body name="calf" pos="0 0 -.33"> | ||
<joint name="knee" range="5 150"/> | ||
<geom name="calf" fromto="0 0 0 0 0 -.32" size="0.03"/> | ||
<!-- foot slightly modified from original mjcf | ||
split into heel and toe for force calculation convienence --> | ||
<body name="foot_heel" pos="0 0 -.32"> | ||
<joint name="ankle" range="-45 45"/> | ||
<geom name="foot_heel" fromto="-.08 0 0 .11 0 0" size="0.04"/> | ||
<body name="foot_toe"> | ||
<joint name="fix_toe2heel" type="fixed"/> | ||
<geom name="foot_toe" fromto=".11 0 0 .17 0 0" size="0.04"/> | ||
</body> | ||
</body> | ||
</body> | ||
</body> | ||
</body> | ||
</body> | ||
</worldbody> | ||
|
||
<sensor> | ||
<subtreelinvel name="torso_subtreelinvel" body="torso"/> | ||
<touch name="touch_toe" site="touch_toe"/> | ||
<touch name="touch_heel" site="touch_heel"/> | ||
</sensor> | ||
|
||
<actuator> | ||
<motor name="waist" joint="waist" gear="30"/> | ||
<motor name="hip" joint="hip" gear="40"/> | ||
<motor name="knee" joint="knee" gear="30"/> | ||
<motor name="ankle" joint="ankle" gear="10"/> | ||
</actuator> | ||
</mujoco> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,248 @@ | ||
"""Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/hopper.py""" | ||
|
||
import os | ||
from typing import Any, Dict, Union | ||
|
||
import numpy as np | ||
import sapien | ||
import torch | ||
|
||
from mani_skill.agents.base_agent import BaseAgent | ||
from mani_skill.agents.controllers import * | ||
from mani_skill.envs.sapien_env import BaseEnv | ||
from mani_skill.envs.utils import randomization, rewards | ||
from mani_skill.sensors.camera import CameraConfig | ||
from mani_skill.utils import common, sapien_utils | ||
from mani_skill.utils.geometry import rotation_conversions | ||
from mani_skill.utils.registration import register_env | ||
from mani_skill.utils.scene_builder.control.planar.scene_builder import ( | ||
PlanarSceneBuilder, | ||
) | ||
from mani_skill.utils.structs.pose import Pose | ||
from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig | ||
|
||
MJCF_FILE = f"{os.path.join(os.path.dirname(__file__), 'assets/hopper.xml')}" | ||
|
||
# Minimal height of torso over foot above which stand reward is 1. | ||
_STAND_HEIGHT = 0.6 | ||
|
||
# Hopping speed above which hop reward is 1. | ||
_HOP_SPEED = 2 | ||
|
||
|
||
class HopperRobot(BaseAgent): | ||
uid = "hopper" | ||
mjcf_path = MJCF_FILE | ||
|
||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
@property | ||
def _controller_configs(self): | ||
# NOTE joints in [rootx,rooty,rooz] are for planar tracking, not control joints | ||
max_delta = 2 # best by far | ||
stiffness = 100 | ||
damping = 10 # end best | ||
pd_joint_delta_pos_body = PDJointPosControllerConfig( | ||
["hip", "knee", "waist"], | ||
lower=-max_delta, | ||
upper=max_delta, | ||
damping=damping, | ||
stiffness=stiffness, | ||
use_delta=True, | ||
) | ||
pd_joint_delta_pos_ankle = PDJointPosControllerConfig( | ||
["ankle"], | ||
lower=-max_delta / 2.5, | ||
upper=max_delta / 2.5, | ||
damping=damping, | ||
stiffness=stiffness, | ||
use_delta=True, | ||
) | ||
rest = PassiveControllerConfig( | ||
[j.name for j in self.robot.active_joints if "root" in j.name], | ||
damping=0, | ||
friction=0, | ||
) | ||
return deepcopy_dict( | ||
dict( | ||
pd_joint_delta_pos=dict( | ||
body=pd_joint_delta_pos_body, | ||
ankle=pd_joint_delta_pos_ankle, | ||
rest=rest, | ||
balance_passive_force=False, | ||
), | ||
) | ||
) | ||
|
||
def _load_articulation(self): | ||
""" | ||
Load the robot articulation | ||
""" | ||
loader = self.scene.create_mjcf_loader() | ||
asset_path = str(self.mjcf_path) | ||
|
||
loader.name = self.uid | ||
|
||
self.robot = loader.parse(asset_path)[0][0].build() | ||
assert self.robot is not None, f"Fail to load URDF/MJCF from {asset_path}" | ||
self.robot_link_ids = [link.name for link in self.robot.get_links()] | ||
|
||
# cache robot mass for com computation | ||
self.robot_links_mass = [link.mass[0].item() for link in self.robot.get_links()] | ||
self.robot_mass = np.sum(self.robot_links_mass[3:]) | ||
|
||
# planar agent has root joints in range [-inf, inf], not ideal in obs space | ||
def get_proprioception(self): | ||
return dict( | ||
# don't include xslider qpos, for x trans invariance | ||
# x increases throughout successful episode | ||
qpos=self.robot.get_qpos()[:, 1:], | ||
qvel=self.robot.get_qvel(), | ||
) | ||
|
||
|
||
class HopperEnv(BaseEnv): | ||
agent: Union[HopperRobot] | ||
|
||
def __init__(self, *args, robot_uids=HopperRobot, **kwargs): | ||
super().__init__(*args, robot_uids=robot_uids, **kwargs) | ||
|
||
@property | ||
def _default_sim_config(self): | ||
return SimConfig( | ||
scene_cfg=SceneConfig( | ||
solver_position_iterations=4, solver_velocity_iterations=1 | ||
), | ||
sim_freq=100, | ||
control_freq=25, | ||
) | ||
|
||
@property | ||
def _default_sensor_configs(self): | ||
return [ | ||
# replicated from xml file | ||
CameraConfig( | ||
uid="cam0", | ||
pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), | ||
width=128, | ||
height=128, | ||
fov=np.pi / 4, | ||
near=0.01, | ||
far=100, | ||
mount=self.agent.robot.links_map["torso_dummy_1"], | ||
), | ||
] | ||
|
||
@property | ||
def _default_human_render_camera_configs(self): | ||
return [ | ||
# replicated from xml file | ||
CameraConfig( | ||
uid="render_cam", | ||
pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]), | ||
width=512, | ||
height=512, | ||
fov=np.pi / 4, | ||
near=0.01, | ||
far=100, | ||
mount=self.agent.robot.links_map["torso_dummy_1"], | ||
), | ||
] | ||
|
||
def _load_scene(self, options: dict): | ||
loader = self.scene.create_mjcf_loader() | ||
articulation_builders, actor_builders, sensor_configs = loader.parse(MJCF_FILE) | ||
for a in actor_builders: | ||
a.build(a.name) | ||
|
||
self.planar_scene = PlanarSceneBuilder(env=self) | ||
self.planar_scene.build() | ||
|
||
def _initialize_episode(self, env_idx: torch.Tensor, options: Dict): | ||
with torch.device(self.device): | ||
b = len(env_idx) | ||
# qpos sampled same as dm_control, but ensure no self intersection explicitly here | ||
random_qpos = torch.rand(b, self.agent.robot.dof[0]) | ||
q_lims = self.agent.robot.get_qlimits() | ||
q_ranges = q_lims[..., 1] - q_lims[..., 0] | ||
random_qpos *= q_ranges | ||
random_qpos += q_lims[..., 0] | ||
|
||
# overwrite planar joint qpos - these are special for planar robots | ||
# first two joints are dummy rootx and rootz | ||
random_qpos[:, :2] = 0 | ||
# y is axis of rotation of our planar robot (xz plane), so we randomize around it | ||
random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi) | ||
self.agent.reset(random_qpos) | ||
|
||
@property # dm_control mjc function adapted for maniskill | ||
def height(self): | ||
"""Returns relative height of the robot""" | ||
return ( | ||
self.agent.robot.links_map["torso"].pose.p[:, -1] | ||
- self.agent.robot.links_map["foot_heel"].pose.p[:, -1] | ||
).view(-1, 1) | ||
|
||
@property # dm_control mjc function adapted for maniskill | ||
def subtreelinvelx(self): | ||
# """Returns linvel x component of robot""" | ||
links = self.agent.robot.get_links()[3:] # skip first three dummy links | ||
vels = torch.stack( | ||
[link.get_linear_velocity() * link.mass[0].item() for link in links], dim=0 | ||
) # (num_links, b, 3) | ||
com_vel = vels.sum(dim=0) / self.agent.robot_mass # (b, 3) | ||
return com_vel[:, 0] | ||
|
||
# dm_control mjc function adapted for maniskill | ||
def touch(self, link_name): | ||
"""Returns function of sensor force values""" | ||
force_vec = self.agent.robot.get_net_contact_forces([link_name]) | ||
force_mag = torch.linalg.norm(force_vec, dim=-1) | ||
return torch.log1p(force_mag) | ||
|
||
# dm_control also includes foot pressures as state obs space | ||
def _get_obs_state_dict(self, info: Dict): | ||
return dict( | ||
agent=self._get_obs_agent(), | ||
toe_touch=self.touch("foot_toe"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does dm control with visual data input ever use state information? This data will be part of the observation (along with _get_obs_agent()) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am now explicitly overriding _get_obs_state_dict to include entire state (touch + proprioception), so the touch information isn't allowed for rgbd obs mode There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dm_control uses pixel wrapper, which allows for user to choose either entire state + pixels or only pixels There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok looking at what seems to be standard practice, DrQ and TDMPC both do not use state at all in training, e.g. https://github.com/nicklashansen/tdmpc2/blob/5f6fadec0fec78304b4b53e8171d348b58cac486/tdmpc2/envs/wrappers/pixels.py#L8 Given the tasks are not that hard, we can do image only ppo as well. The way I will do it is to fix the flatten rgbd obsevation wrapper to be like the pixel wrapper, and you just choose if you want rgb, depth, and / or state, and it will handle it accordingly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. re this, merge the main branch into your PR. I have updated code to do this exactly. The new ppo rgb script is this:
(the hyperparams might not be well tuned). |
||
heel_touch=self.touch("foot_heel"), | ||
) | ||
|
||
|
||
@register_env("MS-HopperStand-v1", max_episode_steps=600) | ||
class HopperStandEnv(HopperEnv): | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
def compute_dense_reward(self, obs: Any, action: Array, info: Dict): | ||
standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) | ||
return standing.view(-1) | ||
|
||
def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): | ||
# this should be equal to compute_dense_reward / max possible reward | ||
max_reward = 1.0 | ||
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward | ||
|
||
|
||
@register_env("MS-HopperHop-v1", max_episode_steps=600) | ||
class HopperHopEnv(HopperEnv): | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
def compute_dense_reward(self, obs: Any, action: Array, info: Dict): | ||
standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0) | ||
hopping = rewards.tolerance( | ||
self.subtreelinvelx, | ||
lower=_HOP_SPEED, | ||
upper=float("inf"), | ||
margin=_HOP_SPEED / 2, | ||
value_at_margin=0.5, | ||
sigmoid="linear", | ||
) | ||
|
||
return standing.view(-1) * hopping.view(-1) | ||
|
||
def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict): | ||
max_reward = 1.0 | ||
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how is control_freq here decided, was it to try and match dm-control?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
control_freq was set so the ratio of sim_freq to control is 4:1 (same as dm_control hopper), code now explicitly has sim_freq=100