Skip to content

Commit

Permalink
New trajectory format and bug fixes (#222)
Browse files Browse the repository at this point in the history
* work

* work?

* bug fixes and support new state dict format, reduce robot name verbosity

* Update pick_cube.py

* Update test_gpu_envs.py

* Update test_gpu_envs.py

* bug fix with setting before hide poses

* bug fixes

* work

* work

* fix a bunch of bugs

* fix batching for scalar values

* Update benchmark_maniskill.py

* bug fix

* add more useful print for ppo

* fix bug where controller reset did nothandle partial reset correctly and new tests

* bug fixes

* bug fix with set state dict where merged actor didnt have the right new name and other bugs

* bug fix to handle both single and multiple envs

* bug fixes with formatting

* Update record.py

* sapien upgrade

* Update sapien_utils.py

* work
  • Loading branch information
StoneT2000 authored and arth-shukla committed Mar 8, 2024
1 parent 84f2177 commit 6f8c671
Show file tree
Hide file tree
Showing 40 changed files with 948 additions and 463 deletions.
2 changes: 1 addition & 1 deletion examples/baselines/ppo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Code adapted from [CleanRL](https://github.com/vwxyzjn/cleanrl/)
State based
```bash
python ppo.py --num_envs=1024 --update_epochs=8 --num_minibatches=32 --env_id="PickCube-v1" --total_timesteps=50000000
python ppo.py --num_envs=2048 --update_epochs=8 --num_minibatches=32 --env_id="PushCube-v1" --total_timesteps=100000000 --num-steps=12
python ppo.py --num_envs=2048 --update_epochs=8 --num_minibatches=32 --env_id="PushCube-v1" --total_timesteps=10000000 --eval_freq=10
python ppo.py --num_envs=1024 --update_epochs=8 --num_minibatches=32 --env_id="StackCube-v1" --total_timesteps=100000000
python ppo.py --num_envs=512 --update_epochs=8 --num_minibatches=32 --env_id="TwoRobotStackCube-v1" --total_timesteps=100000000 --num-steps=100
python ppo.py --num_envs=512 --update_epochs=8 --num_minibatches=32 --env_id="TwoRobotPickCube-v1" --total_timesteps=100000000 --num-steps=100
Expand Down
63 changes: 43 additions & 20 deletions examples/baselines/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class Args:
"""the number of parallel evaluation environments"""
num_steps: int = 50
"""the number of steps to run in each environment per policy rollout"""
num_eval_steps: int = 50
"""the number of steps to run in each evaluation environment during evaluation"""
anneal_lr: bool = False
"""Toggle learning rate annealing for policy and value networks"""
gamma: float = 0.8
Expand Down Expand Up @@ -183,9 +185,9 @@ def get_action_and_value(self, x, action=None):
envs = FlattenActionSpaceWrapper(envs)
eval_envs = FlattenActionSpaceWrapper(eval_envs)
if args.capture_video:
eval_envs = RecordEpisode(eval_envs, output_dir=f"runs/{run_name}/videos", save_trajectory=False, video_fps=30)
envs = ManiSkillVectorEnv(envs, args.num_envs, ignore_terminations=False, **env_kwargs)
eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=True, **env_kwargs)
eval_envs = RecordEpisode(eval_envs, output_dir=f"runs/{run_name}/videos", save_trajectory=False, max_steps_per_video=args.num_eval_steps, video_fps=30)
envs = ManiSkillVectorEnv(envs, args.num_envs, **env_kwargs)
eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=False, **env_kwargs)
assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"

agent = Agent(envs).to(device)
Expand Down Expand Up @@ -225,18 +227,37 @@ def clip_action(action: torch.Tensor):
if iteration % args.eval_freq == 1:
# evaluate
print("Evaluating")
eval_done = False
while not eval_done:
eval_envs.reset()
returns = []
eps_lens = []
successes = []
failures = []
for _ in range(args.num_eval_steps):
with torch.no_grad():
eval_obs, _, eval_terminations, eval_truncations, eval_infos = eval_envs.step(agent.get_action(eval_obs, deterministic=True))
if eval_truncations.any():
eval_done = True
info = eval_infos["final_info"]
episodic_return = info['episode']['r'].mean().cpu().numpy()
print(f"eval_episodic_return={episodic_return}")
writer.add_scalar("charts/eval_success_rate", info["success"].float().mean().cpu().numpy(), global_step)
writer.add_scalar("charts/eval_episodic_return", episodic_return, global_step)
writer.add_scalar("charts/eval_episodic_length", info["elapsed_steps"].float().mean().cpu().numpy(), global_step)
if "final_info" in eval_infos:
mask = eval_infos["_final_info"]
eps_lens.append(eval_infos["final_info"]["elapsed_steps"][mask].cpu().numpy())
returns.append(eval_infos["final_info"]["episode"]["r"][mask].cpu().numpy())
if "success" in eval_infos:
successes.append(eval_infos["final_info"]["success"][mask].cpu().numpy())
if "fail" in eval_infos:
failures.append(eval_infos["final_info"]["fail"][mask].cpu().numpy())
returns = np.concatenate(returns)
eps_lens = np.concatenate(eps_lens)
print(f"Evaluated {args.num_eval_steps * args.num_envs} steps resulting in {len(eps_lens)} episodes")
if len(successes) > 0:
successes = np.concatenate(successes)
writer.add_scalar("charts/eval_success_rate", successes.mean(), global_step)
print(f"eval_success_rate={successes.mean()}")
if len(failures) > 0:
failures = np.concatenate(failures)
writer.add_scalar("charts/eval_fail_rate", failures.mean(), global_step)
print(f"eval_fail_rate={failures.mean()}")

print(f"eval_episodic_return={returns.mean()}")
writer.add_scalar("charts/eval_episodic_return", returns.mean(), global_step)
writer.add_scalar("charts/eval_episodic_length", eps_lens.mean(), global_step)

if args.save_model and iteration % args.eval_freq == 1:
model_path = f"runs/{run_name}/{args.exp_name}_{iteration}.cleanrl_model"
Expand Down Expand Up @@ -266,15 +287,17 @@ def clip_action(action: torch.Tensor):
rewards[step] = reward.view(-1)

if "final_info" in infos:
info = infos["final_info"]
done_mask = info["_final_info"]
episodic_return = info['episode']['r'][done_mask].mean().cpu().numpy()
# print(f"global_step={global_step}, episodic_return={episodic_return}")
writer.add_scalar("charts/success_rate", info["success"][done_mask].float().mean().cpu().numpy(), global_step)
final_info = infos["final_info"]
done_mask = final_info["_final_info"]
episodic_return = final_info['episode']['r'][done_mask].cpu().numpy().mean()
if "success" in final_info:
writer.add_scalar("charts/success_rate", final_info["success"][done_mask].cpu().numpy().mean(), global_step)
if "fail" in final_info:
writer.add_scalar("charts/fail_rate", final_info["fail"][done_mask].cpu().numpy().mean(), global_step)
writer.add_scalar("charts/episodic_return", episodic_return, global_step)
writer.add_scalar("charts/episodic_length", info["elapsed_steps"][done_mask].float().mean().cpu().numpy(), global_step)
writer.add_scalar("charts/episodic_length", final_info["elapsed_steps"][done_mask].cpu().numpy().mean(), global_step)

final_values[step, torch.arange(args.num_envs, device=device)[done_mask]] = agent.get_value(info["final_observation"][done_mask]).view(-1)
final_values[step, torch.arange(args.num_envs, device=device)[done_mask]] = agent.get_value(final_info["final_observation"][done_mask]).view(-1)

# bootstrap value according to termination and truncation
with torch.no_grad():
Expand Down
6 changes: 0 additions & 6 deletions examples/benchmarking/benchmark_maniskill.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
# py-spy record -f speedscope -r 1000 -o profile -- python manualtest/benchmark_gpu_sim.py
# python manualtest/benchmark_orbit_sim.py --task "Isaac-Lift-Cube-Franka-v0" --num_envs 512 --headless
import argparse
import time

import gymnasium as gym
import numpy as np
import sapien
import sapien.physx
import sapien.render
import torch
import tqdm

import mani_skill2.envs
from mani_skill2.utils.scene_builder.ai2thor.variants import ArchitecTHORSceneBuilder
from mani_skill2.utils.scene_builder.replicacad.scene_builder import ReplicaCADSceneBuilder
from mani_skill2.vector.wrappers.gymnasium import ManiSkillVectorEnv
from profiling import Profiler
from mani_skill2.utils.visualization.misc import images_to_video, tile_images
Expand Down
18 changes: 13 additions & 5 deletions mani_skill2/agents/controllers/pd_ee_pose.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
# NOTE(jigu): not necessary to inherit, just for convenience
class PDEEPosController(PDJointPosController):
config: "PDEEPosControllerConfig"
_target_pose = None

def _initialize_joints(self):
self.initial_qpos = None
Expand Down Expand Up @@ -87,7 +88,13 @@ def ee_pose_at_base(self):

def reset(self):
super().reset()
self._target_pose = self.ee_pose_at_base
if self._target_pose is None:
self._target_pose = self.ee_pose_at_base
else:
# TODO (stao): this is a strange way to mask setting individual batched pose parts
self._target_pose.raw_pose[
self.scene._reset_mask
] = self.ee_pose_at_base.raw_pose[self.scene._reset_mask]

def compute_ik(self, target_pose: Pose, action: Array, max_iterations=100):
# Assume the target pose is defined in the base frame
Expand Down Expand Up @@ -152,13 +159,14 @@ def set_action(self, action: Array):

def get_state(self) -> dict:
if self.config.use_target:
return {"target_pose": vectorize_pose(self._target_pose)}
return {"target_pose": self._target_pose.raw_pose}
return {}

def set_state(self, state: dict):
if self.config.use_target:
target_pose = state["target_pose"]
self._target_pose = sapien.Pose(target_pose[:3], target_pose[3:])
# if self.config.use_target:
# target_pose = state["target_pose"]
# self._target_pose = sapien.Pose(target_pose[:3], target_pose[3:])
raise NotImplementedError()


@dataclass
Expand Down
23 changes: 19 additions & 4 deletions mani_skill2/agents/controllers/pd_joint_pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

class PDJointPosController(BaseController):
config: "PDJointPosControllerConfig"
_start_qpos = None
_target_qpos = None

def _get_joint_limits(self):
qlimits = self.articulation.get_qlimits()[0, self.joint_indices].cpu().numpy()
Expand Down Expand Up @@ -44,8 +46,19 @@ def set_drive_property(self):
def reset(self):
super().reset()
self._step = 0 # counter of simulation steps after action is set
self._start_qpos = self.qpos
self._target_qpos = self.qpos
if self._start_qpos is None:
self._start_qpos = self.qpos.clone()
else:

self._start_qpos[self.scene._reset_mask] = self.qpos[
self.scene._reset_mask
].clone()
if self._target_qpos is None:
self._target_qpos = self.qpos.clone()
else:
self._target_qpos[self.scene._reset_mask] = self.qpos[
self.scene._reset_mask
].clone()

def set_drive_targets(self, targets):
self.articulation.set_joint_drive_targets(
Expand All @@ -63,8 +76,10 @@ def set_action(self, action: Array):
else:
self._target_qpos = self._start_qpos + action
else:
# Compatible with mimic controllers
self._target_qpos = torch.broadcast_to(action, self._start_qpos.shape)
# Compatible with mimic controllers. Need to clone here otherwise cannot do in-place replacements in the reset function
self._target_qpos = torch.broadcast_to(
action, self._start_qpos.shape
).clone()
if self.config.interpolate:
self._step_size = (self._target_qpos - self._start_qpos) / self._sim_steps
else:
Expand Down
6 changes: 4 additions & 2 deletions mani_skill2/agents/controllers/pd_joint_pos_vel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import torch
from gymnasium import spaces

from .base_controller import BaseController, ControllerConfig
from .pd_joint_pos import PDJointPosController, PDJointPosControllerConfig


class PDJointPosVelController(PDJointPosController):
config: "PDJointPosVelControllerConfig"
_target_qvel = None

def _initialize_action_space(self):
joint_limits = self._get_joint_limits()
Expand All @@ -23,7 +23,9 @@ def _initialize_action_space(self):

def reset(self):
super().reset()
self._target_qvel = np.zeros_like(self._target_qpos)
self._target_qvel[self.scene._reset_mask] = torch.zeros_like(
self._target_qpos[self.scene._reset_mask], device=self.device
)

def set_drive_velocity_targets(self, targets):
self.articulation.set_joint_drive_velocity_targets(
Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms1/move_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ def _get_task_articulations(self):
# bucket max dof is 1 in our data
return [(self.bucket, 2)]

def set_state(self, state: np.ndarray):
super().set_state(state)
def set_state_dict(self, state: np.ndarray):
super().set_state_dict(state)
self._prev_actor_pose = self.bucket.pose

def _get_obs_extra(self):
Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms1/open_cabinet_door_drawer.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,8 +423,8 @@ def _get_task_articulations(self):
# The maximum DoF is 6 in our data.
return [(self.cabinet, 8)]

def set_state(self, state: np.ndarray):
super().set_state(state)
def set_state_dict(self, state: np.ndarray):
super().set_state_dict(state)
self._prev_actor_pose = self.target_link.pose


Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms1/push_chair.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,8 @@ def _get_task_articulations(self):
# The maximum DoF is 20 in our data.
return [(self.chair, 25)]

def set_state(self, state: np.ndarray):
super().set_state(state)
def set_state_dict(self, state: np.ndarray):
super().set_state_dict(state)
self._prev_actor_pose = self.root_link.pose

def _get_obs_extra(self):
Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms2/assembly/peg_insertion_side.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,8 @@ def _register_human_render_cameras(self):
cam_cfg.pose = look_at([0.5, -0.5, 0.8], [0.05, -0.1, 0.4])
return cam_cfg

def set_state(self, state):
super().set_state(state)
def set_state_dict(self, state):
super().set_state_dict(state)
# NOTE(xuanlin): This way is specific to how we compute goals.
# The general way is to handle variables explicitly
# TODO (stao): can we refactor this out
Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms2/assembly/plug_charger.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,6 @@ def _setup_lighting(self):
[-0.2, -0.5, 1], [2, 2, 2], shadow=self.enable_shadow
)

def set_state(self, state):
super().set_state(state)
def set_state_dict(self, state):
super().set_state_dict(state)
self.goal_pose = self.receptacle.pose * Pose(q=euler2quat(0, 0, np.pi))
8 changes: 4 additions & 4 deletions mani_skill2/envs/ms2/misc/turn_faucet.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,11 +418,11 @@ def compute_dense_reward(self, info, **kwargs):
def compute_normalized_dense_reward(self, **kwargs):
return self.compute_dense_reward(**kwargs) / 10.0

def get_state(self) -> np.ndarray:
state = super().get_state()
def get_state_dict(self) -> np.ndarray:
state = super().get_state_dict()
return np.hstack([state, self.target_angle])

def set_state(self, state):
def set_state_dict(self, state):
self.target_angle = state[-1]
super().set_state(state[:-1])
super().set_state_dict(state[:-1])
self.last_angle_diff = self.target_angle - self.current_angle
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms2/mpm/base_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ def set_sim_state(self, state):
super().set_sim_state(state["sapien"])
self.set_mpm_state(state["mpm"])

def get_state(self):
def get_state_dict(self):
"""Get environment state. Override to include task information (e.g., goal)"""
# with sapien.ProfilerBlock("get_state"):
ret_state = []
Expand All @@ -472,7 +472,7 @@ def get_state(self):
def n_particles(self):
return self.mpm_model.struct.n_particles

def set_state(self, state: np.ndarray):
def set_state_dict(self, state: np.ndarray):
"""Set environment state. Override to include task information (e.g., goal)"""
sim_state = OrderedDict()
mpm_state = OrderedDict()
Expand Down
12 changes: 6 additions & 6 deletions mani_skill2/envs/ms2/mpm/excavate_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,22 +427,22 @@ def render(self, draw_box=False, draw_target=False):
self._remove_draw_box(box)
return img

def get_state(self) -> np.ndarray:
state = super().get_state()
def get_state_dict(self) -> np.ndarray:
state = super().get_state_dict()
return np.hstack([state, self.target_num])

def set_state(self, state):
def set_state_dict(self, state):
self.target_num = state[-1]
super().set_state(state[:-1])
super().set_state_dict(state[:-1])


if __name__ == "__main__":
env = ExcavateEnv(reward_mode="dense")
env.reset()
env.agent.set_control_mode("pd_ee_delta_pose")

a = env.get_state()
env.set_state(a)
a = env.get_state_dict()
env.set_state_dict(a)

for i in range(100):
env.step(None)
Expand Down
12 changes: 6 additions & 6 deletions mani_skill2/envs/ms2/mpm/fill_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,22 +311,22 @@ def render(self, draw_box=False, draw_target=False):
self._remove_draw_box(box)
return img

def get_state(self) -> np.ndarray:
state = super().get_state()
def get_state_dict(self) -> np.ndarray:
state = super().get_state_dict()
return np.hstack([state, self.beaker_x, self.beaker_y])

def set_state(self, state):
def set_state_dict(self, state):
self.beaker_x = state[-2]
self.beaker_y = state[-1]
super().set_state(state[:-2])
super().set_state_dict(state[:-2])


if __name__ == "__main__":
env = FillEnv()
env.reset()

a = env.get_state()
env.set_state(a)
a = env.get_state_dict()
env.set_state_dict(a)

for _ in range(100):
env.step(None)
Expand Down
Loading

0 comments on commit 6f8c671

Please sign in to comment.