haosulab · StoneT2000 · Jul 26, 2024 · Jul 13, 2024 · Jul 13, 2024 · Jul 16, 2024
diff --git a/docs/source/tasks/control/index.md b/docs/source/tasks/control/index.md
@@ -45,4 +45,46 @@ Use the Cartpole robot to swing up a pole on a cart.
 
 **Success Conditions:**
 - No specific success conditions. The task is considered successful if the pole is upright for the whole episode. We can threshold the episode accumulated reward to determine success.
+:::
+
+## MS-HopperHop-v1
+![dense-reward][reward-badge]
+
+:::{dropdown} Task Card
+:icon: note
+:color: primary
+
+**Task Description:**
+Hopper robot stays upright and moves in positive x direction with hopping motion
+
+
+**Supported Robots: Hopper**
+
+**Randomizations:**
+- Hopper robot is randomly rotated [-pi, pi] radians about y axis.
+- Hopper qpos are uniformly sampled within their allowed ranges
+
+**Success Conditions:**
+- No specific success conditions. The task is considered successful if the pole is upright for the whole episode. We can threshold the episode accumulated reward to determine success.
+:::
+
+## MS-HopperStand-v1
+![dense-reward][reward-badge]
+
+:::{dropdown} Task Card
+:icon: note
+:color: primary
+
+**Task Description:**
+Hopper robot stands upright
+
+
+**Supported Robots: Hopper**
+
+**Randomizations:**
+- Hopper robot is randomly rotated [-pi, pi] radians about y axis.
+- Hopper qpos are uniformly sampled within their allowed ranges
+
+**Success Conditions:**
+- No specific success conditions. We can threshold the episode accumulated reward to determine success.
 :::
diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py
@@ -1 +1,2 @@
 from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv
+from .hopper import HopperHopEnv, HopperStandEnv
diff --git a/mani_skill/envs/tasks/control/assets/hopper.xml b/mani_skill/envs/tasks/control/assets/hopper.xml
@@ -0,0 +1,70 @@
+<mujoco model="planar hopper">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <statistic extent="2" center="0 0 .5"/>
+
+  <default>
+    <default class="hopper">
+      <joint type="hinge" axis="0 1 0" limited="true" damping=".05" armature=".2"/>
+      <geom type="capsule" material="self"/>
+      <site type="sphere" size="0.05" group="3"/>
+    </default>
+    <default class="free">
+      <joint limited="false" damping="0" armature="0" stiffness="0"/>
+    </default>
+    <motor ctrlrange="-1 1" ctrllimited="true"/>
+  </default>
+
+  <option timestep="0.005"/>
+
+  <worldbody>
+    <camera name="cam0" pos="0 -2.8 0.8" euler="90 0 0" mode="trackcom"/>
+    <camera name="back" pos="-2 -.2 1.2" xyaxes="0.2 -1 0 .5 0 2" mode="trackcom"/>
+    <geom name="floor" type="plane" conaffinity="1" pos="48 0 0" size="50 1 .2" material="grid"/>
+    <body name="torso" pos="0 0 1" childclass="hopper">
+      <light name="top" pos="0 0 2" mode="trackcom"/>
+      <joint name="rootx" type="slide" axis="1 0 0" class="free"/>
+      <joint name="rootz" type="slide" axis="0 0 1" class="free"/>
+      <joint name="rooty" type="hinge" axis="0 1 0" class="free"/>
+      <geom name="torso" fromto="0 0 -.05 0 0 .2" size="0.0653"/>
+      <geom name="nose" fromto=".08 0 .13 .15 0 .14" size="0.03"/>
+      <body name="pelvis" pos="0 0 -.05">
+        <joint name="waist" range="-30 30"/>
+        <geom name="pelvis" fromto="0 0 0 0 0 -.15" size="0.065"/>
+        <body name="thigh" pos="0 0 -.2">
+          <joint name="hip" range="-144 10"/>
+          <geom name="thigh" fromto="0 0 0 0 0 -.33" size="0.04"/>
+          <body name="calf" pos="0 0 -.33">
+            <joint name="knee" range="5 150"/>
+            <geom name="calf" fromto="0 0 0 0 0 -.32" size="0.03"/>
+            <!-- foot slightly modified from original mjcf
+            split into heel and toe for force calculation convienence -->
+            <body name="foot_heel" pos="0 0 -.32">
+              <joint name="ankle" range="-45 45"/>
+              <geom name="foot_heel" fromto="-.08 0 0 .11 0 0" size="0.04"/>
+                <body name="foot_toe">
+                  <joint name="fix_toe2heel" type="fixed"/>
+                  <geom name="foot_toe" fromto=".11 0 0 .17 0 0" size="0.04"/>
+                </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <sensor>
+    <subtreelinvel name="torso_subtreelinvel" body="torso"/>
+    <touch name="touch_toe" site="touch_toe"/>
+    <touch name="touch_heel" site="touch_heel"/>
+  </sensor>
+
+  <actuator>
+    <motor name="waist" joint="waist" gear="30"/>
+    <motor name="hip" joint="hip" gear="40"/>
+    <motor name="knee" joint="knee" gear="30"/>
+    <motor name="ankle" joint="ankle" gear="10"/>
+  </actuator>
+</mujoco>
diff --git a/mani_skill/envs/tasks/control/cartpole.py b/mani_skill/envs/tasks/control/cartpole.py
@@ -3,7 +3,9 @@
 from typing import Any, Dict, Union
 
 import numpy as np
+import sapien
 import torch
+from transforms3d.euler import euler2quat
 
 from mani_skill.agents.base_agent import BaseAgent
 from mani_skill.agents.controllers import *
@@ -110,6 +112,17 @@ def _load_scene(self, options: dict):
         for a in actor_builders:
             a.build(a.name)
 
+        # background visual wall
+        self.wall = self.scene.create_actor_builder()
+        self.wall.add_box_visual(
+            half_size=(1e-3, 20, 10),
+            pose=sapien.Pose(p=[0, 1, 1], q=euler2quat(0, 0, np.pi / 2)),
+            material=sapien.render.RenderMaterial(
+                base_color=np.array([0.3, 0.3, 0.3, 1])
+            ),
+        )
+        self.wall.build_static(name="wall")
+
     def evaluate(self):
         return dict()
 

diff --git a/mani_skill/envs/tasks/control/hopper.py b/mani_skill/envs/tasks/control/hopper.py
@@ -0,0 +1,248 @@
+"""Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/hopper.py"""
+
+import os
+from typing import Any, Dict, Union
+
+import numpy as np
+import sapien
+import torch
+
+from mani_skill.agents.base_agent import BaseAgent
+from mani_skill.agents.controllers import *
+from mani_skill.envs.sapien_env import BaseEnv
+from mani_skill.envs.utils import randomization, rewards
+from mani_skill.sensors.camera import CameraConfig
+from mani_skill.utils import common, sapien_utils
+from mani_skill.utils.geometry import rotation_conversions
+from mani_skill.utils.registration import register_env
+from mani_skill.utils.scene_builder.control.planar.scene_builder import (
+    PlanarSceneBuilder,
+)
+from mani_skill.utils.structs.pose import Pose
+from mani_skill.utils.structs.types import Array, SceneConfig, SimConfig
+
+MJCF_FILE = f"{os.path.join(os.path.dirname(__file__), 'assets/hopper.xml')}"
+
+# Minimal height of torso over foot above which stand reward is 1.
+_STAND_HEIGHT = 0.6
+
+# Hopping speed above which hop reward is 1.
+_HOP_SPEED = 2
+
+
+class HopperRobot(BaseAgent):
+    uid = "hopper"
+    mjcf_path = MJCF_FILE
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @property
+    def _controller_configs(self):
+        # NOTE joints in [rootx,rooty,rooz] are for planar tracking, not control joints
+        max_delta = 2  # best by far
+        stiffness = 100
+        damping = 10  # end best
+        pd_joint_delta_pos_body = PDJointPosControllerConfig(
+            ["hip", "knee", "waist"],
+            lower=-max_delta,
+            upper=max_delta,
+            damping=damping,
+            stiffness=stiffness,
+            use_delta=True,
+        )
+        pd_joint_delta_pos_ankle = PDJointPosControllerConfig(
+            ["ankle"],
+            lower=-max_delta / 2.5,
+            upper=max_delta / 2.5,
+            damping=damping,
+            stiffness=stiffness,
+            use_delta=True,
+        )
+        rest = PassiveControllerConfig(
+            [j.name for j in self.robot.active_joints if "root" in j.name],
+            damping=0,
+            friction=0,
+        )
+        return deepcopy_dict(
+            dict(
+                pd_joint_delta_pos=dict(
+                    body=pd_joint_delta_pos_body,
+                    ankle=pd_joint_delta_pos_ankle,
+                    rest=rest,
+                    balance_passive_force=False,
+                ),
+            )
+        )
+
+    def _load_articulation(self):
+        """
+        Load the robot articulation
+        """
+        loader = self.scene.create_mjcf_loader()
+        asset_path = str(self.mjcf_path)
+
+        loader.name = self.uid
+
+        self.robot = loader.parse(asset_path)[0][0].build()
+        assert self.robot is not None, f"Fail to load URDF/MJCF from {asset_path}"
+        self.robot_link_ids = [link.name for link in self.robot.get_links()]
+
+        # cache robot mass for com computation
+        self.robot_links_mass = [link.mass[0].item() for link in self.robot.get_links()]
+        self.robot_mass = np.sum(self.robot_links_mass[3:])
+
+    # planar agent has root joints in range [-inf, inf], not ideal in obs space
+    def get_proprioception(self):
+        return dict(
+            # don't include xslider qpos, for x trans invariance
+            # x increases throughout successful episode
+            qpos=self.robot.get_qpos()[:, 1:],
+            qvel=self.robot.get_qvel(),
+        )
+
+
+class HopperEnv(BaseEnv):
+    agent: Union[HopperRobot]
+
+    def __init__(self, *args, robot_uids=HopperRobot, **kwargs):
+        super().__init__(*args, robot_uids=robot_uids, **kwargs)
+
+    @property
+    def _default_sim_config(self):
+        return SimConfig(
+            scene_cfg=SceneConfig(
+                solver_position_iterations=4, solver_velocity_iterations=1
+            ),
+            sim_freq=100,
+            control_freq=25,
+        )
+
+    @property
+    def _default_sensor_configs(self):
+        return [
+            # replicated from xml file
+            CameraConfig(
+                uid="cam0",
+                pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]),
+                width=128,
+                height=128,
+                fov=np.pi / 4,
+                near=0.01,
+                far=100,
+                mount=self.agent.robot.links_map["torso_dummy_1"],
+            ),
+        ]
+
+    @property
+    def _default_human_render_camera_configs(self):
+        return [
+            # replicated from xml file
+            CameraConfig(
+                uid="render_cam",
+                pose=sapien_utils.look_at(eye=[0, -2.8, 0.8], target=[0, 0, 0]),
+                width=512,
+                height=512,
+                fov=np.pi / 4,
+                near=0.01,
+                far=100,
+                mount=self.agent.robot.links_map["torso_dummy_1"],
+            ),
+        ]
+
+    def _load_scene(self, options: dict):
+        loader = self.scene.create_mjcf_loader()
+        articulation_builders, actor_builders, sensor_configs = loader.parse(MJCF_FILE)
+        for a in actor_builders:
+            a.build(a.name)
+
+        self.planar_scene = PlanarSceneBuilder(env=self)
+        self.planar_scene.build()
+
+    def _initialize_episode(self, env_idx: torch.Tensor, options: Dict):
+        with torch.device(self.device):
+            b = len(env_idx)
+            # qpos sampled same as dm_control, but ensure no self intersection explicitly here
+            random_qpos = torch.rand(b, self.agent.robot.dof[0])
+            q_lims = self.agent.robot.get_qlimits()
+            q_ranges = q_lims[..., 1] - q_lims[..., 0]
+            random_qpos *= q_ranges
+            random_qpos += q_lims[..., 0]
+
+            # overwrite planar joint qpos - these are special for planar robots
+            # first two joints are dummy rootx and rootz
+            random_qpos[:, :2] = 0
+            # y is axis of rotation of our planar robot (xz plane), so we randomize around it
+            random_qpos[:, 2] = torch.pi * (2 * torch.rand(b) - 1)  # (-pi,pi)
+            self.agent.reset(random_qpos)
+
+    @property  # dm_control mjc function adapted for maniskill
+    def height(self):
+        """Returns relative height of the robot"""
+        return (
+            self.agent.robot.links_map["torso"].pose.p[:, -1]
+            - self.agent.robot.links_map["foot_heel"].pose.p[:, -1]
+        ).view(-1, 1)
+
+    @property  # dm_control mjc function adapted for maniskill
+    def subtreelinvelx(self):
+        # """Returns linvel x component of robot"""
+        links = self.agent.robot.get_links()[3:]  # skip first three dummy links
+        vels = torch.stack(
+            [link.get_linear_velocity() * link.mass[0].item() for link in links], dim=0
+        )  # (num_links, b, 3)
+        com_vel = vels.sum(dim=0) / self.agent.robot_mass  # (b, 3)
+        return com_vel[:, 0]
+
+    # dm_control mjc function adapted for maniskill
+    def touch(self, link_name):
+        """Returns function of sensor force values"""
+        force_vec = self.agent.robot.get_net_contact_forces([link_name])
+        force_mag = torch.linalg.norm(force_vec, dim=-1)
+        return torch.log1p(force_mag)
+
+    # dm_control also includes foot pressures as state obs space
+    def _get_obs_state_dict(self, info: Dict):
+        return dict(
+            agent=self._get_obs_agent(),
+            toe_touch=self.touch("foot_toe"),
+            heel_touch=self.touch("foot_heel"),
+        )
+
+
+@register_env("MS-HopperStand-v1", max_episode_steps=600)
+class HopperStandEnv(HopperEnv):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
+        standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0)
+        return standing.view(-1)
+
+    def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
+        # this should be equal to compute_dense_reward / max possible reward
+        max_reward = 1.0
+        return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
+
+
+@register_env("MS-HopperHop-v1", max_episode_steps=600)
+class HopperHopEnv(HopperEnv):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
+        standing = rewards.tolerance(self.height, lower=_STAND_HEIGHT, upper=2.0)
+        hopping = rewards.tolerance(
+            self.subtreelinvelx,
+            lower=_HOP_SPEED,
+            upper=float("inf"),
+            margin=_HOP_SPEED / 2,
+            value_at_margin=0.5,
+            sigmoid="linear",
+        )
+
+        return standing.view(-1) * hopping.view(-1)
+
+    def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
+        max_reward = 1.0
+        return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward