Skip to content

Commit

Permalink
speed task done
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhengyiLuo committed Mar 19, 2024
1 parent 4fd77a9 commit 2860a44
Show file tree
Hide file tree
Showing 14 changed files with 275 additions and 29 deletions.
2 changes: 1 addition & 1 deletion README.MD
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Press M (disable termination), and press I (start sampling), to see ramdomly sam

Speed:
```
python phc/run_hydra.py env.task=HumanoidSpeedZ env=env_im_vae exp_name=pulse_vae_iclr robot.real_weight_porpotion_boxes=False learning=im_z_fit env.models=['output/HumanoidIm/phc_3/Humanoid_00258000.pth','output/HumanoidIm/phc_comp_3/Humanoid_00023501.pth'] env.motion_file=sample_data//amass_isaac_standing_upright_slim.pkl test=True env.num_envs=1 headless=False epoch=-1
python phc/run_hydra.py env.task=HumanoidSpeedZ env=env_pulse_amp exp_name=pulse_speed robot.real_weight_porpotion_boxes=False learning=pulse_z_task env.models=['output/HumanoidIm/pulse_vae_iclr/Humanoid.pth'] env.motion_file=sample_data/amass_isaac_standing_upright_slim.pkl
```


Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
task: HumanoidSpeedZ
motion_file: ""
num_envs: 1536
env_spacing: 5
Expand All @@ -10,20 +11,13 @@ embedding_size: 32

z_readout: False
fitting: False
z_model: True # For motion symm loss
freeze_hand: False
distill: false
save_kin_info: False
distill_z_model: false
z_read: False

use_vae_prior: True
use_vae_sphere_posterior: False
use_vae_fixed_prior: False
use_vae_sphere_prior: False
use_vae_prior_loss: False


distill: false
save_kin_info: False
distill_z_model: false
Expand All @@ -37,16 +31,8 @@ distill_model_config:
numTrajSamples: 10
z_activation: "silu"
z_type: "vae"

models: ['output/HumanoidIm/pulse_vae_iclr/Humanoid.pth']

real_weight: True
box_body: True
kp_scale: 1
real_weight: True
freeze_hand: False
freeze_toe: False

power_reward: False
power_usage_reward: False
power_usage_coefficient: 0.01
Expand All @@ -63,13 +49,12 @@ speedChangeStepsMin: 100
speedChangeStepsMax: 200
enableTaskObs: True

pdControl: True
control_mode: "isaac_pd"
power_scale: 1.0
controlFrequencyInv: 2 # 30 Hz
stateInit: "Random"
hybridInitProb: 0.5
numAMPObsSteps: 10
enableTaskObs: True

local_root_obs: True
root_height_obs: True
Expand Down
94 changes: 94 additions & 0 deletions phc/data/cfg/learning/pulse_z_task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
params:
seed: 0

algo:
name: amp

model:
name: amp

network:
name: amp_z_reader
separate: True

space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: -1
fixed_sigma: True
learn_sigma: False

mlp:
units: [2048, 1024, 512]
activation: silu
d2rl: False

initializer:
name: default
regularizer:
name: None

disc:
units: [1024, 512]
activation: relu

initializer:
name: default

load_checkpoint: False

config:
name: Humanoid
env_name: rlgpu
multi_gpu: False
mixed_precision: False
normalize_input: True
normalize_value: True
reward_shaper:
scale_value: 1
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 2e-5
lr_schedule: constant
score_to_win: 20000
max_epochs: 10000000
save_best_after: 100
save_frequency: 500
print_stats: False
save_intermediate: True
entropy_coef: 0.0
truncate_grads: True
grad_norm: 50.0
ppo: True
e_clip: 0.2
horizon_length: 32
minibatch_size: 16384
mini_epochs: 6
critic_coef: 5
clip_value: False
clip_actions: False

bounds_loss_coef: 10
amp_obs_demo_buffer_size: 200000
amp_replay_buffer_size: 200000
amp_replay_keep_prob: 0.01
amp_batch_size: 512
amp_minibatch_size: 4096
disc_coef: 5
disc_logit_reg: 0.01
disc_grad_penalty: 5
disc_reward_scale: 2
disc_weight_decay: 0.0001
normalize_amp_input: True

task_reward_w: 1
disc_reward_w: 0

player:
games_num: 50000000
2 changes: 2 additions & 0 deletions phc/env/tasks/humanoid_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, head
self._reset_default_env_ids = []
self._reset_ref_env_ids = []
self._state_reset_happened = False

self._min_motion_len = cfg["env"].get("min_length", -1)

super().__init__(cfg=cfg, sim_params=sim_params, physics_engine=physics_engine, device_type=device_type, device_id=device_id, headless=headless)

Expand Down
86 changes: 86 additions & 0 deletions phc/env/tasks/humanoid_amp_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import torch

import phc.env.tasks.humanoid_amp as humanoid_amp
import phc.env.tasks.humanoid_amp_z as humanoid_amp_z
from phc.utils.flags import flags
class HumanoidAMPTask(humanoid_amp.HumanoidAMP):
def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
Expand Down Expand Up @@ -114,3 +115,88 @@ def _compute_reward(self, actions):

def _draw_task(self):
return

class HumanoidAMPZTask(humanoid_amp_z.HumanoidAMPZ):
def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
self._enable_task_obs = cfg["env"]["enableTaskObs"]

super().__init__(cfg=cfg,
sim_params=sim_params,
physics_engine=physics_engine,
device_type=device_type,
device_id=device_id,
headless=headless)
self.has_task = True
return


def get_obs_size(self):
obs_size = super().get_obs_size()
if (self._enable_task_obs):
task_obs_size = self.get_task_obs_size()
obs_size += task_obs_size
return obs_size

def get_task_obs_size(self):
return 0


def pre_physics_step(self, actions):
super().pre_physics_step(actions)
self._update_task()

return

def render(self, sync_frame_time=False):
super().render(sync_frame_time)

if self.viewer or flags.server_mode:
self._draw_task()
return

def _update_task(self):
return

def _reset_envs(self, env_ids):
super()._reset_envs(env_ids)
self._reset_task(env_ids)
return

def _reset_task(self, env_ids):
return

def _compute_observations(self, env_ids=None):
# env_ids is used for resetting
if env_ids is None:
env_ids = torch.arange(self.num_envs).to(self.device)
humanoid_obs = self._compute_humanoid_obs(env_ids)

if (self._enable_task_obs):
task_obs = self._compute_task_obs(env_ids)
obs = torch.cat([humanoid_obs, task_obs], dim=-1)
else:
obs = humanoid_obs

if self.obs_v == 2:
# Double sub will return a copy.
B, N = obs.shape
sums = self.obs_buf[env_ids, 0:self.past_track_steps].abs().sum(dim=1)
zeros = sums == 0
nonzero = ~zeros
obs_slice = self.obs_buf[env_ids]
obs_slice[zeros] = torch.tile(obs[zeros], (1, self.past_track_steps))
obs_slice[nonzero] = torch.cat([obs_slice[nonzero, N:], obs[nonzero]], dim=-1)
self.obs_buf[env_ids] = obs_slice
else:
self.obs_buf[env_ids] = obs

return

def _compute_task_obs(self, env_ids=None):
return NotImplemented

def _compute_reward(self, actions):
return NotImplemented

def _draw_task(self):
return
25 changes: 22 additions & 3 deletions phc/env/tasks/humanoid_amp_z.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
from collections import deque
from phc.utils.torch_utils import project_to_norm

from phc.utils.motion_lib import MotionLib
from phc.utils.motion_lib_smpl import MotionLibSMPL

from phc.learning.network_loader import load_z_encoder, load_z_decoder

from easydict import EasyDict
from phc.utils.motion_lib_base import FixHeightMode

HACK_MOTION_SYNC = False

class HumanoidAMPZ(humanoid_amp.HumanoidAMP):
Expand Down Expand Up @@ -66,10 +67,28 @@ def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, head

return

def _setup_character_props(self, key_bodies):
super()._setup_character_props(key_bodies)
self._num_actions = self.cfg['env'].get("embedding_size", 256)

return

def _load_motion(self, motion_file):
assert (self._dof_offsets[-1] == self.num_dof)
if self.humanoid_type in ["smpl", "smplh", "smplx"]:
self._motion_lib = MotionLibSMPL(motion_file=motion_file, device=self.device, masterfoot_conifg=self._masterfoot_config)
motion_lib_cfg = EasyDict({
"motion_file": motion_file,
"device": torch.device("cpu"),
"fix_height": FixHeightMode.full_fix,
"min_length": self._min_motion_len,
"max_length": -1,
"im_eval": flags.im_eval,
"multi_thread": True ,
"smpl_type": self.humanoid_type,
"randomrize_heading": True,
"device": self.device,
})
self._motion_lib = MotionLibSMPL(motion_lib_cfg)

self._motion_lib.load_motions(skeleton_trees=self.skeleton_trees, gender_betas=self.humanoid_shapes.cpu(), limb_weights=self.humanoid_limb_and_weights.cpu(), random_sample=not HACK_MOTION_SYNC)

Expand Down
1 change: 0 additions & 1 deletion phc/env/tasks/humanoid_im.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, head
self._num_traj_samples = cfg["env"]["numTrajSamples"]
else:
self._num_traj_samples = 1
self._min_motion_len = cfg["env"].get("min_length", -1)
self._traj_sample_timestep = 1 / cfg["env"].get("trajSampleTimestepInv", 30)

self.load_humanoid_configs(cfg)
Expand Down
1 change: 0 additions & 1 deletion phc/env/tasks/humanoid_im_distill.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ def step(self, actions):

################ GT-Action ################
# actions = gt_action; print("using gt action") # Debugging

# apply actions
self.pre_physics_step(actions)

Expand Down
4 changes: 2 additions & 2 deletions phc/env/tasks/humanoid_reach.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
return

def _load_marker_asset(self):
asset_root = "pulse/data/assets/mjcf/"
asset_root = "phc/data/assets/mjcf/"
asset_file = "location_marker.urdf"

asset_options = gymapi.AssetOptions()
Expand Down Expand Up @@ -261,7 +261,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
return

def _load_marker_asset(self):
asset_root = "pulse/data/assets/mjcf/"
asset_root = "pulse/data/assets/urdf/"
asset_file = "location_marker.urdf"

asset_options = gymapi.AssetOptions()
Expand Down
4 changes: 2 additions & 2 deletions phc/env/tasks/humanoid_speed.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
return

def _load_marker_asset(self):
asset_root = "pulse/data/assets/mjcf/"
asset_root = "phc/data/assets/urdf/"
asset_file = "heading_marker.urdf"

asset_options = gymapi.AssetOptions()
Expand Down Expand Up @@ -378,7 +378,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
return

def _load_marker_asset(self):
asset_root = "pulse/data/assets/mjcf/"
asset_root = "phc/data/assets/urdf/"
asset_file = "heading_marker.urdf"

asset_options = gymapi.AssetOptions()
Expand Down
4 changes: 2 additions & 2 deletions phc/env/tasks/humanoid_strike.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _build_env(self, env_id, env_ptr, humanoid_asset):
return

def _load_target_asset(self):
asset_root = "pulse/data/assets/mjcf/"
asset_root = "pulse/data/assets/urdf/"
asset_file = "strike_target.urdf"

asset_options = gymapi.AssetOptions()
Expand Down Expand Up @@ -305,7 +305,7 @@ def _build_env(self, env_id, env_ptr, humanoid_asset):
return

def _load_target_asset(self):
asset_root = "pulse/data/assets/mjcf/"
asset_root = "pulse/data/assets/urdf/"
asset_file = "strike_target.urdf"

asset_options = gymapi.AssetOptions()
Expand Down
Loading

0 comments on commit 2860a44

Please sign in to comment.