Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Control Hopper Env, Stand + Hop #440

Merged
merged 18 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions docs/source/tasks/control/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,46 @@ Use the Cartpole robot to swing up a pole on a cart.

**Success Conditions:**
- No specific success conditions. The task is considered successful if the pole is upright for the whole episode. We can threshold the episode accumulated reward to determine success.
:::

## MS-HopperHop-v1
![dense-reward][reward-badge]

:::{dropdown} Task Card
:icon: note
:color: primary

**Task Description:**
Hopper robot stays upright and moves in positive x direction with hopping motion


**Supported Robots: Hopper**

**Randomizations:**
- Hopper robot is randomly rotated [-pi, pi] radians about y axis.
- Hopper qpos are uniformly sampled within their allowed ranges

**Success Conditions:**
- No specific success conditions. The task is considered successful if the pole is upright for the whole episode. We can threshold the episode accumulated reward to determine success.
:::

## MS-HopperStand-v1
![dense-reward][reward-badge]

:::{dropdown} Task Card
:icon: note
:color: primary

**Task Description:**
Hopper robot stands upright


**Supported Robots: Hopper**

**Randomizations:**
- Hopper robot is randomly rotated [-pi, pi] radians about y axis.
- Hopper qpos are uniformly sampled within their allowed ranges

**Success Conditions:**
- No specific success conditions. We can threshold the episode accumulated reward to determine success.
:::
1 change: 1 addition & 0 deletions mani_skill/envs/tasks/control/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv
from .hopper import HopperHopEnv, HopperStandEnv
70 changes: 70 additions & 0 deletions mani_skill/envs/tasks/control/assets/hopper.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<mujoco model="planar hopper">
<include file="./common/skybox.xml"/>
<include file="./common/visual.xml"/>
<include file="./common/materials.xml"/>

<statistic extent="2" center="0 0 .5"/>

<default>
<default class="hopper">
<joint type="hinge" axis="0 1 0" limited="true" damping=".05" armature=".2"/>
<geom type="capsule" material="self"/>
<site type="sphere" size="0.05" group="3"/>
</default>
<default class="free">
<joint limited="false" damping="0" armature="0" stiffness="0"/>
</default>
<motor ctrlrange="-1 1" ctrllimited="true"/>
</default>

<option timestep="0.005"/>

<worldbody>
<camera name="cam0" pos="0 -2.8 0.8" euler="90 0 0" mode="trackcom"/>
<camera name="back" pos="-2 -.2 1.2" xyaxes="0.2 -1 0 .5 0 2" mode="trackcom"/>
<geom name="floor" type="plane" conaffinity="1" pos="48 0 0" size="50 1 .2" material="grid"/>
<body name="torso" pos="0 0 1" childclass="hopper">
<light name="top" pos="0 0 2" mode="trackcom"/>
<joint name="rootx" type="slide" axis="1 0 0" class="free"/>
<joint name="rootz" type="slide" axis="0 0 1" class="free"/>
<joint name="rooty" type="hinge" axis="0 1 0" class="free"/>
<geom name="torso" fromto="0 0 -.05 0 0 .2" size="0.0653"/>
<geom name="nose" fromto=".08 0 .13 .15 0 .14" size="0.03"/>
<body name="pelvis" pos="0 0 -.05">
<joint name="waist" range="-30 30"/>
<geom name="pelvis" fromto="0 0 0 0 0 -.15" size="0.065"/>
<body name="thigh" pos="0 0 -.2">
<joint name="hip" range="-144 10"/>
<geom name="thigh" fromto="0 0 0 0 0 -.33" size="0.04"/>
<body name="calf" pos="0 0 -.33">
<joint name="knee" range="5 150"/>
<geom name="calf" fromto="0 0 0 0 0 -.32" size="0.03"/>
<!-- foot slightly modified from original mjcf
split into heel and toe for force calculation convienence -->
<body name="foot_heel" pos="0 0 -.32">
<joint name="ankle" range="-45 45"/>
<geom name="foot_heel" fromto="-.08 0 0 .11 0 0" size="0.04"/>
<body name="foot_toe">
<joint name="fix_toe2heel" type="fixed"/>
<geom name="foot_toe" fromto=".11 0 0 .17 0 0" size="0.04"/>
</body>
</body>
</body>
</body>
</body>
</body>
</worldbody>

<sensor>
<subtreelinvel name="torso_subtreelinvel" body="torso"/>
<touch name="touch_toe" site="touch_toe"/>
<touch name="touch_heel" site="touch_heel"/>
</sensor>

<actuator>
<motor name="waist" joint="waist" gear="30"/>
<motor name="hip" joint="hip" gear="40"/>
<motor name="knee" joint="knee" gear="30"/>
<motor name="ankle" joint="ankle" gear="10"/>
</actuator>
</mujoco>
13 changes: 13 additions & 0 deletions mani_skill/envs/tasks/control/cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from typing import Any, Dict, Union

import numpy as np
import sapien
import torch
from transforms3d.euler import euler2quat

from mani_skill.agents.base_agent import BaseAgent
from mani_skill.agents.controllers import *
Expand Down Expand Up @@ -110,6 +112,17 @@ def _load_scene(self, options: dict):
for a in actor_builders:
a.build(a.name)

# background visual wall
self.wall = self.scene.create_actor_builder()
self.wall.add_box_visual(
half_size=(1e-3, 20, 10),
pose=sapien.Pose(p=[0, 1, 1], q=euler2quat(0, 0, np.pi / 2)),
material=sapien.render.RenderMaterial(
base_color=np.array([0.3, 0.3, 0.3, 1])
),
)
self.wall.build_static(name="wall")

def evaluate(self):
return dict()

Expand Down
248 changes: 248 additions & 0 deletions mani_skill/envs/tasks/control/hopper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
"""Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/hopper.py"""

import os
from typing import Any, Dict, Union

import numpy as np
import sapien
import torch

from mani_skill.agents.base_agent import BaseAgent
from mani_skill.agents.controllers import *
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.envs.utils import randomization, rewards
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.geometry import rotation_conversions
from mani_skill.utils.registration import register_env
from mani_skill.utils.scene_builder.control.planar.scene_builder import (
PlanarSceneBuilder,
)
from mani_skill.utils.structs.pose import Pose
from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig

MJCF_FILE = f"{os.path.join(os.path.dirname(__file__), 'assets/hopper.xml')}"

# Minimal height of torso over foot above which stand reward is 1.
_STAND_HEIGHT = 0.6

# Hopping speed above which hop reward is 1.
_HOP_SPEED = 2


class HopperRobot(BaseAgent):
uid = "hopper"
mjcf_path = MJCF_FILE

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@property
def _controller_configs(self):
# NOTE joints in [rootx,rooty,rooz] are for planar tracking, not control joints
max_delta = 2 # best by far
stiffness = 100
damping = 10 # end best
pd_joint_delta_pos_body = PDJointPosControllerConfig(
["hip", "knee", "waist"],
lower=-max_delta,
upper=max_delta,
damping=damping,
stiffness=stiffness,
use_delta=True,
)
pd_joint_delta_pos_ankle = PDJointPosControllerConfig(
["ankle"],
lower=-max_delta / 2.5,
upper=max_delta / 2.5,
damping=damping,
stiffness=stiffness,
use_delta=True,
)
rest = PassiveControllerConfig(
[j.name for j in self.robot.active_joints if "root" in j.name],
damping=0,
friction=0,
)
return deepcopy_dict(
dict(
pd_joint_delta_pos=dict(
body=pd_joint_delta_pos_body,
ankle=pd_joint_delta_pos_ankle,
rest=rest,
balance_passive_force=False,
),
)
)

def _load_articulation(self):
"""
Load the robot articulation
"""
loader = self.scene.create_mjcf_loader()
asset_path = str(self.mjcf_path)

loader.name = self.uid

self.robot = loader.parse(asset_path)[0][0].build()
assert self.robot is not None, f"Fail to load URDF/MJCF from {asset_path}"
self.robot_link_ids = [link.name for link in self.robot.get_links()]

# cache robot mass for com computation
self.robot_links_mass = [link.mass[0].item() for link in self.robot.get_links()]
self.robot_mass = np.sum(self.robot_links_mass[3:])

# planar agent has root joints in range [-inf, inf], not ideal in obs space
def get_proprioception(self):
return dict(
# don't include xslider qpos, for x trans invariance
# x increases throughout successful episode
qpos=self.robot.get_qpos()[:, 1:],
qvel=self.robot.get_qvel(),
)


class HopperEnv(BaseEnv):
agent: Union[HopperRobot]

def __init__(self, *args, robot_uids=HopperRobot, **kwargs):
super().__init__(*args, robot_uids=robot_uids, **kwargs)

@property
def _default_sim_config(self):
return SimConfig(
scene_cfg=SceneConfig(
solver_position_iterations=4, solver_velocity_iterations=1
),
sim_freq=100,
control_freq=25,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is control_freq here decided, was it to try and match dm-control?

Copy link
Collaborator Author

@Xander-Hinrichsen Xander-Hinrichsen Jul 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

control_freq was set so the ratio of sim_freq to control is 4:1 (same as dm_control hopper), code now explicitly has sim_freq=100

)

@property
def _default_sensor_configs(self):
return [
# replicated from xml file
CameraConfig(
uid="cam0",
pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]),
width=128,
height=128,
fov=np.pi / 4,
near=0.01,
far=100,
mount=self.agent.robot.links_map["torso_dummy_1"],
),
]

@property
def _default_human_render_camera_configs(self):
return [
# replicated from xml file
CameraConfig(
uid="render_cam",
pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]),
width=512,
height=512,
fov=np.pi / 4,
near=0.01,
far=100,
mount=self.agent.robot.links_map["torso_dummy_1"],
),
]

def _load_scene(self, options: dict):
loader = self.scene.create_mjcf_loader()
articulation_builders, actor_builders, sensor_configs = loader.parse(MJCF_FILE)
for a in actor_builders:
a.build(a.name)

self.planar_scene = PlanarSceneBuilder(env=self)
self.planar_scene.build()

def _initialize_episode(self, env_idx: torch.Tensor, options: Dict):
with torch.device(self.device):
b = len(env_idx)
# qpos sampled same as dm_control, but ensure no self intersection explicitly here
random_qpos = torch.rand(b, self.agent.robot.dof[0])
q_lims = self.agent.robot.get_qlimits()
q_ranges = q_lims[..., 1] - q_lims[..., 0]
random_qpos *= q_ranges
random_qpos += q_lims[..., 0]

# overwrite planar joint qpos - these are special for planar robots
# first two joints are dummy rootx and rootz
random_qpos[:, :2] = 0
# y is axis of rotation of our planar robot (xz plane), so we randomize around it
random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1) # (-pi,pi)
self.agent.reset(random_qpos)

@property # dm_control mjc function adapted for maniskill
def height(self):
"""Returns relative height of the robot"""
return (
self.agent.robot.links_map["torso"].pose.p[:, -1]
- self.agent.robot.links_map["foot_heel"].pose.p[:, -1]
).view(-1, 1)

@property # dm_control mjc function adapted for maniskill
def subtreelinvelx(self):
# """Returns linvel x component of robot"""
links = self.agent.robot.get_links()[3:] # skip first three dummy links
vels = torch.stack(
[link.get_linear_velocity() * link.mass[0].item() for link in links], dim=0
) # (num_links, b, 3)
com_vel = vels.sum(dim=0) / self.agent.robot_mass # (b, 3)
return com_vel[:, 0]

# dm_control mjc function adapted for maniskill
def touch(self, link_name):
"""Returns function of sensor force values"""
force_vec = self.agent.robot.get_net_contact_forces([link_name])
force_mag = torch.linalg.norm(force_vec, dim=-1)
return torch.log1p(force_mag)

# dm_control also includes foot pressures as state obs space
def _get_obs_state_dict(self, info: Dict):
return dict(
agent=self._get_obs_agent(),
toe_touch=self.touch("foot_toe"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does dm control with visual data input ever use state information? This data will be part of the observation (along with _get_obs_agent())

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am now explicitly overriding _get_obs_state_dict to include entire state (touch + proprioception), so the touch information isn't allowed for rgbd obs mode

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dm_control uses pixel wrapper, which allows for user to choose either entire state + pixels or only pixels

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok looking at what seems to be standard practice, DrQ and TDMPC both do not use state at all in training, e.g. https://github.com/nicklashansen/tdmpc2/blob/5f6fadec0fec78304b4b53e8171d348b58cac486/tdmpc2/envs/wrappers/pixels.py#L8

Given the tasks are not that hard, we can do image only ppo as well. The way I will do it is to fix the flatten rgbd obsevation wrapper to be like the pixel wrapper, and you just choose if you want rgb, depth, and / or state, and it will handle it accordingly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re this, merge the main branch into your PR. I have updated code to do this exactly. The new ppo rgb script is this:

python ppo_rgb.py --exp_name="rgbhopseed1" --env_id="MS-HopperHop-v1" --num_envs=512 --update_epochs=8 --num_minibatches=32 --total_timesteps=20_000_000 --eval_freq=10 --num_eval_steps=600 --num_steps=50 --gamma=0.99 --seed=1 --no_include_state

(the hyperparams might not be well tuned).

heel_touch=self.touch("foot_heel"),
)


@register_env("MS-HopperStand-v1", max_episode_steps=600)
class HopperStandEnv(HopperEnv):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0)
return standing.view(-1)

def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
# this should be equal to compute_dense_reward / max possible reward
max_reward = 1.0
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward


@register_env("MS-HopperHop-v1", max_episode_steps=600)
class HopperHopEnv(HopperEnv):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0)
hopping = rewards.tolerance(
self.subtreelinvelx,
lower=_HOP_SPEED,
upper=float("inf"),
margin=_HOP_SPEED / 2,
value_at_margin=0.5,
sigmoid="linear",
)

return standing.view(-1) * hopping.view(-1)

def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
max_reward = 1.0
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
Loading