Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New trajectory format and bug fixes #222

Merged
merged 25 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/baselines/ppo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Code adapted from [CleanRL](https://github.com/vwxyzjn/cleanrl/)
State based
```bash
python ppo.py --num_envs=1024 --update_epochs=8 --num_minibatches=32 --env_id="PickCube-v1" --total_timesteps=50000000
python ppo.py --num_envs=2048 --update_epochs=8 --num_minibatches=32 --env_id="PushCube-v1" --total_timesteps=100000000 --num-steps=12
python ppo.py --num_envs=2048 --update_epochs=8 --num_minibatches=32 --env_id="PushCube-v1" --total_timesteps=10000000 --eval_freq=10
python ppo.py --num_envs=1024 --update_epochs=8 --num_minibatches=32 --env_id="StackCube-v1" --total_timesteps=100000000
python ppo.py --num_envs=512 --update_epochs=8 --num_minibatches=32 --env_id="TwoRobotStackCube-v1" --total_timesteps=100000000 --num-steps=100
python ppo.py --num_envs=512 --update_epochs=8 --num_minibatches=32 --env_id="TwoRobotPickCube-v1" --total_timesteps=100000000 --num-steps=100
Expand Down
63 changes: 43 additions & 20 deletions examples/baselines/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class Args:
"""the number of parallel evaluation environments"""
num_steps: int = 50
"""the number of steps to run in each environment per policy rollout"""
num_eval_steps: int = 50
"""the number of steps to run in each evaluation environment during evaluation"""
anneal_lr: bool = False
"""Toggle learning rate annealing for policy and value networks"""
gamma: float = 0.8
Expand Down Expand Up @@ -183,9 +185,9 @@ def get_action_and_value(self, x, action=None):
envs = FlattenActionSpaceWrapper(envs)
eval_envs = FlattenActionSpaceWrapper(eval_envs)
if args.capture_video:
eval_envs = RecordEpisode(eval_envs, output_dir=f"runs/{run_name}/videos", save_trajectory=False, video_fps=30)
envs = ManiSkillVectorEnv(envs, args.num_envs, ignore_terminations=False, **env_kwargs)
eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=True, **env_kwargs)
eval_envs = RecordEpisode(eval_envs, output_dir=f"runs/{run_name}/videos", save_trajectory=False, max_steps_per_video=args.num_eval_steps, video_fps=30)
envs = ManiSkillVectorEnv(envs, args.num_envs, **env_kwargs)
eval_envs = ManiSkillVectorEnv(eval_envs, args.num_eval_envs, ignore_terminations=False, **env_kwargs)
assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"

agent = Agent(envs).to(device)
Expand Down Expand Up @@ -225,18 +227,37 @@ def clip_action(action: torch.Tensor):
if iteration % args.eval_freq == 1:
# evaluate
print("Evaluating")
eval_done = False
while not eval_done:
eval_envs.reset()
returns = []
eps_lens = []
successes = []
failures = []
for _ in range(args.num_eval_steps):
with torch.no_grad():
eval_obs, _, eval_terminations, eval_truncations, eval_infos = eval_envs.step(agent.get_action(eval_obs, deterministic=True))
if eval_truncations.any():
eval_done = True
info = eval_infos["final_info"]
episodic_return = info['episode']['r'].mean().cpu().numpy()
print(f"eval_episodic_return={episodic_return}")
writer.add_scalar("charts/eval_success_rate", info["success"].float().mean().cpu().numpy(), global_step)
writer.add_scalar("charts/eval_episodic_return", episodic_return, global_step)
writer.add_scalar("charts/eval_episodic_length", info["elapsed_steps"].float().mean().cpu().numpy(), global_step)
if "final_info" in eval_infos:
mask = eval_infos["_final_info"]
eps_lens.append(eval_infos["final_info"]["elapsed_steps"][mask].cpu().numpy())
returns.append(eval_infos["final_info"]["episode"]["r"][mask].cpu().numpy())
if "success" in eval_infos:
successes.append(eval_infos["final_info"]["success"][mask].cpu().numpy())
if "fail" in eval_infos:
failures.append(eval_infos["final_info"]["fail"][mask].cpu().numpy())
returns = np.concatenate(returns)
eps_lens = np.concatenate(eps_lens)
print(f"Evaluated {args.num_eval_steps * args.num_envs} steps resulting in {len(eps_lens)} episodes")
if len(successes) > 0:
successes = np.concatenate(successes)
writer.add_scalar("charts/eval_success_rate", successes.mean(), global_step)
print(f"eval_success_rate={successes.mean()}")
if len(failures) > 0:
failures = np.concatenate(failures)
writer.add_scalar("charts/eval_fail_rate", failures.mean(), global_step)
print(f"eval_fail_rate={failures.mean()}")

print(f"eval_episodic_return={returns.mean()}")
writer.add_scalar("charts/eval_episodic_return", returns.mean(), global_step)
writer.add_scalar("charts/eval_episodic_length", eps_lens.mean(), global_step)

if args.save_model and iteration % args.eval_freq == 1:
model_path = f"runs/{run_name}/{args.exp_name}_{iteration}.cleanrl_model"
Expand Down Expand Up @@ -266,15 +287,17 @@ def clip_action(action: torch.Tensor):
rewards[step] = reward.view(-1)

if "final_info" in infos:
info = infos["final_info"]
done_mask = info["_final_info"]
episodic_return = info['episode']['r'][done_mask].mean().cpu().numpy()
# print(f"global_step={global_step}, episodic_return={episodic_return}")
writer.add_scalar("charts/success_rate", info["success"][done_mask].float().mean().cpu().numpy(), global_step)
final_info = infos["final_info"]
done_mask = final_info["_final_info"]
episodic_return = final_info['episode']['r'][done_mask].cpu().numpy().mean()
if "success" in final_info:
writer.add_scalar("charts/success_rate", final_info["success"][done_mask].cpu().numpy().mean(), global_step)
if "fail" in final_info:
writer.add_scalar("charts/fail_rate", final_info["fail"][done_mask].cpu().numpy().mean(), global_step)
writer.add_scalar("charts/episodic_return", episodic_return, global_step)
writer.add_scalar("charts/episodic_length", info["elapsed_steps"][done_mask].float().mean().cpu().numpy(), global_step)
writer.add_scalar("charts/episodic_length", final_info["elapsed_steps"][done_mask].cpu().numpy().mean(), global_step)

final_values[step, torch.arange(args.num_envs, device=device)[done_mask]] = agent.get_value(info["final_observation"][done_mask]).view(-1)
final_values[step, torch.arange(args.num_envs, device=device)[done_mask]] = agent.get_value(final_info["final_observation"][done_mask]).view(-1)

# bootstrap value according to termination and truncation
with torch.no_grad():
Expand Down
6 changes: 0 additions & 6 deletions examples/benchmarking/benchmark_maniskill.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
# py-spy record -f speedscope -r 1000 -o profile -- python manualtest/benchmark_gpu_sim.py
# python manualtest/benchmark_orbit_sim.py --task "Isaac-Lift-Cube-Franka-v0" --num_envs 512 --headless
import argparse
import time

import gymnasium as gym
import numpy as np
import sapien
import sapien.physx
import sapien.render
import torch
import tqdm

import mani_skill2.envs
from mani_skill2.utils.scene_builder.ai2thor.variants import ArchitecTHORSceneBuilder
from mani_skill2.utils.scene_builder.replicacad.scene_builder import ReplicaCADSceneBuilder
from mani_skill2.vector.wrappers.gymnasium import ManiSkillVectorEnv
from profiling import Profiler
from mani_skill2.utils.visualization.misc import images_to_video, tile_images
Expand Down
18 changes: 13 additions & 5 deletions mani_skill2/agents/controllers/pd_ee_pose.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
# NOTE(jigu): not necessary to inherit, just for convenience
class PDEEPosController(PDJointPosController):
config: "PDEEPosControllerConfig"
_target_pose = None

def _initialize_joints(self):
self.initial_qpos = None
Expand Down Expand Up @@ -87,7 +88,13 @@ def ee_pose_at_base(self):

def reset(self):
super().reset()
self._target_pose = self.ee_pose_at_base
if self._target_pose is None:
self._target_pose = self.ee_pose_at_base
else:
# TODO (stao): this is a strange way to mask setting individual batched pose parts
self._target_pose.raw_pose[
self.scene._reset_mask
] = self.ee_pose_at_base.raw_pose[self.scene._reset_mask]

def compute_ik(self, target_pose: Pose, action: Array, max_iterations=100):
# Assume the target pose is defined in the base frame
Expand Down Expand Up @@ -152,13 +159,14 @@ def set_action(self, action: Array):

def get_state(self) -> dict:
if self.config.use_target:
return {"target_pose": vectorize_pose(self._target_pose)}
return {"target_pose": self._target_pose.raw_pose}
return {}

def set_state(self, state: dict):
if self.config.use_target:
target_pose = state["target_pose"]
self._target_pose = sapien.Pose(target_pose[:3], target_pose[3:])
# if self.config.use_target:
# target_pose = state["target_pose"]
# self._target_pose = sapien.Pose(target_pose[:3], target_pose[3:])
raise NotImplementedError()


@dataclass
Expand Down
23 changes: 19 additions & 4 deletions mani_skill2/agents/controllers/pd_joint_pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

class PDJointPosController(BaseController):
config: "PDJointPosControllerConfig"
_start_qpos = None
_target_qpos = None

def _get_joint_limits(self):
qlimits = self.articulation.get_qlimits()[0, self.joint_indices].cpu().numpy()
Expand Down Expand Up @@ -44,8 +46,19 @@ def set_drive_property(self):
def reset(self):
super().reset()
self._step = 0 # counter of simulation steps after action is set
self._start_qpos = self.qpos
self._target_qpos = self.qpos
if self._start_qpos is None:
self._start_qpos = self.qpos.clone()
else:

self._start_qpos[self.scene._reset_mask] = self.qpos[
self.scene._reset_mask
].clone()
if self._target_qpos is None:
self._target_qpos = self.qpos.clone()
else:
self._target_qpos[self.scene._reset_mask] = self.qpos[
self.scene._reset_mask
].clone()

def set_drive_targets(self, targets):
self.articulation.set_joint_drive_targets(
Expand All @@ -63,8 +76,10 @@ def set_action(self, action: Array):
else:
self._target_qpos = self._start_qpos + action
else:
# Compatible with mimic controllers
self._target_qpos = torch.broadcast_to(action, self._start_qpos.shape)
# Compatible with mimic controllers. Need to clone here otherwise cannot do in-place replacements in the reset function
self._target_qpos = torch.broadcast_to(
action, self._start_qpos.shape
).clone()
if self.config.interpolate:
self._step_size = (self._target_qpos - self._start_qpos) / self._sim_steps
else:
Expand Down
6 changes: 4 additions & 2 deletions mani_skill2/agents/controllers/pd_joint_pos_vel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import torch
from gymnasium import spaces

from .base_controller import BaseController, ControllerConfig
from .pd_joint_pos import PDJointPosController, PDJointPosControllerConfig


class PDJointPosVelController(PDJointPosController):
config: "PDJointPosVelControllerConfig"
_target_qvel = None

def _initialize_action_space(self):
joint_limits = self._get_joint_limits()
Expand All @@ -23,7 +23,9 @@ def _initialize_action_space(self):

def reset(self):
super().reset()
self._target_qvel = np.zeros_like(self._target_qpos)
self._target_qvel[self.scene._reset_mask] = torch.zeros_like(
self._target_qpos[self.scene._reset_mask], device=self.device
)

def set_drive_velocity_targets(self, targets):
self.articulation.set_joint_drive_velocity_targets(
Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms1/move_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,8 @@ def _get_task_articulations(self):
# bucket max dof is 1 in our data
return [(self.bucket, 2)]

def set_state(self, state: np.ndarray):
super().set_state(state)
def set_state_dict(self, state: np.ndarray):
super().set_state_dict(state)
self._prev_actor_pose = self.bucket.pose

def _get_obs_extra(self):
Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms1/open_cabinet_door_drawer.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,8 +423,8 @@ def _get_task_articulations(self):
# The maximum DoF is 6 in our data.
return [(self.cabinet, 8)]

def set_state(self, state: np.ndarray):
super().set_state(state)
def set_state_dict(self, state: np.ndarray):
super().set_state_dict(state)
self._prev_actor_pose = self.target_link.pose


Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms1/push_chair.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,8 @@ def _get_task_articulations(self):
# The maximum DoF is 20 in our data.
return [(self.chair, 25)]

def set_state(self, state: np.ndarray):
super().set_state(state)
def set_state_dict(self, state: np.ndarray):
super().set_state_dict(state)
self._prev_actor_pose = self.root_link.pose

def _get_obs_extra(self):
Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms2/assembly/peg_insertion_side.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,8 @@ def _register_human_render_cameras(self):
cam_cfg.pose = look_at([0.5, -0.5, 0.8], [0.05, -0.1, 0.4])
return cam_cfg

def set_state(self, state):
super().set_state(state)
def set_state_dict(self, state):
super().set_state_dict(state)
# NOTE(xuanlin): This way is specific to how we compute goals.
# The general way is to handle variables explicitly
# TODO (stao): can we refactor this out
Expand Down
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms2/assembly/plug_charger.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,6 @@ def _setup_lighting(self):
[-0.2, -0.5, 1], [2, 2, 2], shadow=self.enable_shadow
)

def set_state(self, state):
super().set_state(state)
def set_state_dict(self, state):
super().set_state_dict(state)
self.goal_pose = self.receptacle.pose * Pose(q=euler2quat(0, 0, np.pi))
8 changes: 4 additions & 4 deletions mani_skill2/envs/ms2/misc/turn_faucet.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,11 +418,11 @@ def compute_dense_reward(self, info, **kwargs):
def compute_normalized_dense_reward(self, **kwargs):
return self.compute_dense_reward(**kwargs) / 10.0

def get_state(self) -> np.ndarray:
state = super().get_state()
def get_state_dict(self) -> np.ndarray:
state = super().get_state_dict()
return np.hstack([state, self.target_angle])

def set_state(self, state):
def set_state_dict(self, state):
self.target_angle = state[-1]
super().set_state(state[:-1])
super().set_state_dict(state[:-1])
self.last_angle_diff = self.target_angle - self.current_angle
4 changes: 2 additions & 2 deletions mani_skill2/envs/ms2/mpm/base_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ def set_sim_state(self, state):
super().set_sim_state(state["sapien"])
self.set_mpm_state(state["mpm"])

def get_state(self):
def get_state_dict(self):
"""Get environment state. Override to include task information (e.g., goal)"""
# with sapien.ProfilerBlock("get_state"):
ret_state = []
Expand All @@ -472,7 +472,7 @@ def get_state(self):
def n_particles(self):
return self.mpm_model.struct.n_particles

def set_state(self, state: np.ndarray):
def set_state_dict(self, state: np.ndarray):
"""Set environment state. Override to include task information (e.g., goal)"""
sim_state = OrderedDict()
mpm_state = OrderedDict()
Expand Down
12 changes: 6 additions & 6 deletions mani_skill2/envs/ms2/mpm/excavate_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,22 +427,22 @@ def render(self, draw_box=False, draw_target=False):
self._remove_draw_box(box)
return img

def get_state(self) -> np.ndarray:
state = super().get_state()
def get_state_dict(self) -> np.ndarray:
state = super().get_state_dict()
return np.hstack([state, self.target_num])

def set_state(self, state):
def set_state_dict(self, state):
self.target_num = state[-1]
super().set_state(state[:-1])
super().set_state_dict(state[:-1])


if __name__ == "__main__":
env = ExcavateEnv(reward_mode="dense")
env.reset()
env.agent.set_control_mode("pd_ee_delta_pose")

a = env.get_state()
env.set_state(a)
a = env.get_state_dict()
env.set_state_dict(a)

for i in range(100):
env.step(None)
Expand Down
12 changes: 6 additions & 6 deletions mani_skill2/envs/ms2/mpm/fill_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,22 +311,22 @@ def render(self, draw_box=False, draw_target=False):
self._remove_draw_box(box)
return img

def get_state(self) -> np.ndarray:
state = super().get_state()
def get_state_dict(self) -> np.ndarray:
state = super().get_state_dict()
return np.hstack([state, self.beaker_x, self.beaker_y])

def set_state(self, state):
def set_state_dict(self, state):
self.beaker_x = state[-2]
self.beaker_y = state[-1]
super().set_state(state[:-2])
super().set_state_dict(state[:-2])


if __name__ == "__main__":
env = FillEnv()
env.reset()

a = env.get_state()
env.set_state(a)
a = env.get_state_dict()
env.set_state_dict(a)

for _ in range(100):
env.step(None)
Expand Down
Loading
Loading