speed task done

ZhengyiLuo · Mar 19, 2024 · 2860a44 · 2860a44
1 parent 4fd77a9
commit 2860a44
Show file tree

Hide file tree

Showing 14 changed files with 275 additions and 29 deletions.
diff --git a/README.MD b/README.MD
@@ -94,7 +94,7 @@ Press M (disable termination), and press I (start sampling), to see ramdomly sam
 
 Speed:
 ```
-python phc/run_hydra.py env.task=HumanoidSpeedZ env=env_im_vae exp_name=pulse_vae_iclr robot.real_weight_porpotion_boxes=False learning=im_z_fit env.models=['output/HumanoidIm/phc_3/Humanoid_00258000.pth','output/HumanoidIm/phc_comp_3/Humanoid_00023501.pth'] env.motion_file=sample_data//amass_isaac_standing_upright_slim.pkl test=True env.num_envs=1  headless=False epoch=-1
+python phc/run_hydra.py env.task=HumanoidSpeedZ env=env_pulse_amp exp_name=pulse_speed robot.real_weight_porpotion_boxes=False learning=pulse_z_task env.models=['output/HumanoidIm/pulse_vae_iclr/Humanoid.pth'] env.motion_file=sample_data/amass_isaac_standing_upright_slim.pkl
 ```
 
 

diff --git a/phc/data/cfg/env/pulse_amp.yaml → phc/data/cfg/env/env_pulse_amp.yaml b/phc/data/cfg/env/pulse_amp.yaml → phc/data/cfg/env/env_pulse_amp.yaml
@@ -1,3 +1,4 @@
+task: HumanoidSpeedZ
 motion_file: ""
 num_envs: 1536
 env_spacing:  5
@@ -10,20 +11,13 @@ embedding_size: 32
 
 z_readout: False
 fitting: False
-z_model: True # For motion symm loss
-freeze_hand: False
-distill: false
-save_kin_info: False
-distill_z_model: false
-z_read: False
 
 use_vae_prior: True
 use_vae_sphere_posterior: False
 use_vae_fixed_prior: False
 use_vae_sphere_prior: False
 use_vae_prior_loss: False
 
-
 distill: false
 save_kin_info: False
 distill_z_model: false
@@ -37,16 +31,8 @@ distill_model_config:
   numTrajSamples: 10
   z_activation: "silu"
   z_type: "vae"
-
 models: ['output/HumanoidIm/pulse_vae_iclr/Humanoid.pth']
 
-real_weight: True
-box_body: True
-kp_scale: 1
-real_weight: True
-freeze_hand: False
-freeze_toe: False
-
 power_reward: False
 power_usage_reward: False
 power_usage_coefficient: 0.01
@@ -63,13 +49,12 @@ speedChangeStepsMin: 100
 speedChangeStepsMax: 200
 enableTaskObs: True
 
-pdControl: True
+control_mode: "isaac_pd"
 power_scale: 1.0
 controlFrequencyInv: 2 # 30 Hz
 stateInit: "Random"
 hybridInitProb: 0.5
 numAMPObsSteps: 10
-enableTaskObs: True
 
 local_root_obs: True
 root_height_obs: True

diff --git a/phc/data/cfg/learning/pulse_z_task.yaml b/phc/data/cfg/learning/pulse_z_task.yaml
@@ -0,0 +1,94 @@
+params:
+  seed: 0
+
+  algo:
+    name: amp
+
+  model:
+    name: amp
+
+  network:
+    name: amp_z_reader
+    separate: True
+
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: -1
+        fixed_sigma: True
+        learn_sigma: False
+
+    mlp:
+      units: [2048, 1024, 512]
+      activation: silu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+
+    disc:
+      units: [1024, 512]
+      activation: relu
+
+      initializer:
+        name: default
+
+  load_checkpoint: False
+
+  config:
+    name: Humanoid
+    env_name: rlgpu
+    multi_gpu: False
+    mixed_precision: False
+    normalize_input: True
+    normalize_value: True
+    reward_shaper:
+      scale_value: 1
+    normalize_advantage: True
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 2e-5
+    lr_schedule: constant
+    score_to_win: 20000
+    max_epochs: 10000000
+    save_best_after: 100
+    save_frequency: 500
+    print_stats: False
+    save_intermediate: True
+    entropy_coef: 0.0
+    truncate_grads: True
+    grad_norm: 50.0
+    ppo: True
+    e_clip: 0.2
+    horizon_length: 32
+    minibatch_size: 16384
+    mini_epochs: 6
+    critic_coef: 5
+    clip_value: False
+    clip_actions: False
+
+    bounds_loss_coef: 10
+    amp_obs_demo_buffer_size: 200000
+    amp_replay_buffer_size: 200000
+    amp_replay_keep_prob: 0.01
+    amp_batch_size: 512
+    amp_minibatch_size: 4096
+    disc_coef: 5
+    disc_logit_reg: 0.01
+    disc_grad_penalty: 5
+    disc_reward_scale: 2
+    disc_weight_decay: 0.0001
+    normalize_amp_input: True
+
+    task_reward_w: 1
+    disc_reward_w: 0
+
+    player: 
+      games_num: 50000000
diff --git a/phc/env/tasks/humanoid_amp.py b/phc/env/tasks/humanoid_amp.py
@@ -102,6 +102,8 @@ def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, head
         self._reset_default_env_ids = []
         self._reset_ref_env_ids = []
         self._state_reset_happened = False
+
+        self._min_motion_len = cfg["env"].get("min_length", -1)
 
         super().__init__(cfg=cfg, sim_params=sim_params, physics_engine=physics_engine, device_type=device_type, device_id=device_id, headless=headless)
 

diff --git a/phc/env/tasks/humanoid_amp_task.py b/phc/env/tasks/humanoid_amp_task.py
@@ -29,6 +29,7 @@
 import torch
 
 import phc.env.tasks.humanoid_amp as humanoid_amp
+import phc.env.tasks.humanoid_amp_z as humanoid_amp_z
 from phc.utils.flags import flags
 class HumanoidAMPTask(humanoid_amp.HumanoidAMP):
     def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
@@ -114,3 +115,88 @@ def _compute_reward(self, actions):
 
     def _draw_task(self):
         return
+
+class HumanoidAMPZTask(humanoid_amp_z.HumanoidAMPZ):
+    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
+        self._enable_task_obs = cfg["env"]["enableTaskObs"]
+
+        super().__init__(cfg=cfg,
+                         sim_params=sim_params,
+                         physics_engine=physics_engine,
+                         device_type=device_type,
+                         device_id=device_id,
+                         headless=headless)
+        self.has_task = True
+        return
+
+
+    def get_obs_size(self):
+        obs_size = super().get_obs_size()
+        if (self._enable_task_obs):
+            task_obs_size = self.get_task_obs_size()
+            obs_size += task_obs_size
+        return obs_size
+
+    def get_task_obs_size(self):
+        return 0
+
+
+    def pre_physics_step(self, actions):
+        super().pre_physics_step(actions)
+        self._update_task()
+
+        return
+
+    def render(self, sync_frame_time=False):
+        super().render(sync_frame_time)
+
+        if self.viewer or flags.server_mode:
+            self._draw_task()
+        return
+
+    def _update_task(self):
+        return
+
+    def _reset_envs(self, env_ids):
+        super()._reset_envs(env_ids)
+        self._reset_task(env_ids)
+        return
+
+    def _reset_task(self, env_ids):
+        return
+
+    def _compute_observations(self, env_ids=None):
+        # env_ids is used for resetting
+        if env_ids is None:
+            env_ids = torch.arange(self.num_envs).to(self.device)
+        humanoid_obs = self._compute_humanoid_obs(env_ids)
+
+        if (self._enable_task_obs):
+            task_obs = self._compute_task_obs(env_ids)
+            obs = torch.cat([humanoid_obs, task_obs], dim=-1)
+        else:
+            obs = humanoid_obs
+
+        if self.obs_v == 2:
+            # Double sub will return a copy.
+            B, N = obs.shape
+            sums = self.obs_buf[env_ids, 0:self.past_track_steps].abs().sum(dim=1)
+            zeros = sums == 0
+            nonzero = ~zeros
+            obs_slice = self.obs_buf[env_ids]
+            obs_slice[zeros] = torch.tile(obs[zeros], (1, self.past_track_steps))
+            obs_slice[nonzero] = torch.cat([obs_slice[nonzero, N:], obs[nonzero]], dim=-1)
+            self.obs_buf[env_ids] = obs_slice
+        else:
+            self.obs_buf[env_ids] = obs
+
+        return
+
+    def _compute_task_obs(self, env_ids=None):
+        return NotImplemented
+
+    def _compute_reward(self, actions):
+        return NotImplemented
+
+    def _draw_task(self):
+        return
diff --git a/phc/env/tasks/humanoid_amp_z.py b/phc/env/tasks/humanoid_amp_z.py
@@ -13,11 +13,12 @@
 from collections import deque
 from phc.utils.torch_utils import project_to_norm
 
-from phc.utils.motion_lib import MotionLib
 from phc.utils.motion_lib_smpl import MotionLibSMPL 
-
 from phc.learning.network_loader import load_z_encoder, load_z_decoder
 
+from easydict import EasyDict
+from phc.utils.motion_lib_base import FixHeightMode
+
 HACK_MOTION_SYNC = False
 
 class HumanoidAMPZ(humanoid_amp.HumanoidAMP):
@@ -66,10 +67,28 @@ def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, head
 
         return
 
+    def _setup_character_props(self, key_bodies):
+        super()._setup_character_props(key_bodies)
+        self._num_actions = self.cfg['env'].get("embedding_size", 256)
+
+        return
+
     def _load_motion(self, motion_file):
         assert (self._dof_offsets[-1] == self.num_dof)
         if self.humanoid_type in ["smpl", "smplh", "smplx"]:
-            self._motion_lib = MotionLibSMPL(motion_file=motion_file, device=self.device, masterfoot_conifg=self._masterfoot_config)
+            motion_lib_cfg = EasyDict({
+                "motion_file": motion_file,
+                "device": torch.device("cpu"),
+                "fix_height": FixHeightMode.full_fix,
+                "min_length": self._min_motion_len,
+                "max_length": -1,
+                "im_eval": flags.im_eval,
+                "multi_thread": True ,
+                "smpl_type": self.humanoid_type,
+                "randomrize_heading": True,
+                "device": self.device,
+            })
+            self._motion_lib = MotionLibSMPL(motion_lib_cfg)
 
             self._motion_lib.load_motions(skeleton_trees=self.skeleton_trees, gender_betas=self.humanoid_shapes.cpu(), limb_weights=self.humanoid_limb_and_weights.cpu(), random_sample=not HACK_MOTION_SYNC)
 

diff --git a/phc/env/tasks/humanoid_im.py b/phc/env/tasks/humanoid_im.py
@@ -42,7 +42,6 @@ def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, head
             self._num_traj_samples = cfg["env"]["numTrajSamples"]
         else:
             self._num_traj_samples = 1
-        self._min_motion_len = cfg["env"].get("min_length", -1)
         self._traj_sample_timestep = 1 / cfg["env"].get("trajSampleTimestepInv", 30)
 
         self.load_humanoid_configs(cfg)

diff --git a/phc/env/tasks/humanoid_im_distill.py b/phc/env/tasks/humanoid_im_distill.py
@@ -206,7 +206,6 @@ def step(self, actions):
 
             ################ GT-Action ################
             # actions = gt_action; print("using gt action") # Debugging 
-
         # apply actions
         self.pre_physics_step(actions)
 

diff --git a/phc/env/tasks/humanoid_reach.py b/phc/env/tasks/humanoid_reach.py
@@ -77,7 +77,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         return
 
     def _load_marker_asset(self):
-        asset_root = "pulse/data/assets/mjcf/"
+        asset_root = "phc/data/assets/mjcf/"
         asset_file = "location_marker.urdf"
 
         asset_options = gymapi.AssetOptions()
@@ -261,7 +261,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         return
 
     def _load_marker_asset(self):
-        asset_root = "pulse/data/assets/mjcf/"
+        asset_root = "pulse/data/assets/urdf/"
         asset_file = "location_marker.urdf"
 
         asset_options = gymapi.AssetOptions()

diff --git a/phc/env/tasks/humanoid_speed.py b/phc/env/tasks/humanoid_speed.py
@@ -108,7 +108,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         return
 
     def _load_marker_asset(self):
-        asset_root = "pulse/data/assets/mjcf/"
+        asset_root = "phc/data/assets/urdf/"
         asset_file = "heading_marker.urdf"
 
         asset_options = gymapi.AssetOptions()
@@ -378,7 +378,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         return
 
     def _load_marker_asset(self):
-        asset_root = "pulse/data/assets/mjcf/"
+        asset_root = "phc/data/assets/urdf/"
         asset_file = "heading_marker.urdf"
 
         asset_options = gymapi.AssetOptions()

diff --git a/phc/env/tasks/humanoid_strike.py b/phc/env/tasks/humanoid_strike.py
@@ -69,7 +69,7 @@ def _build_env(self, env_id, env_ptr, humanoid_asset):
         return
 
     def _load_target_asset(self):
-        asset_root = "pulse/data/assets/mjcf/"
+        asset_root = "pulse/data/assets/urdf/"
         asset_file = "strike_target.urdf"
 
         asset_options = gymapi.AssetOptions()
@@ -305,7 +305,7 @@ def _build_env(self, env_id, env_ptr, humanoid_asset):
         return
 
     def _load_target_asset(self):
-        asset_root = "pulse/data/assets/mjcf/"
+        asset_root = "pulse/data/assets/urdf/"
         asset_file = "strike_target.urdf"
 
         asset_options = gymapi.AssetOptions()
-Original file line number
+Diff line change
@@ Expand Up @@
     Speed:
     ```
-    python phc/run_hydra.py env.task=HumanoidSpeedZ env=env_im_vae exp_name=pulse_vae_iclr robot.real_weight_porpotion_boxes=False learning=im_z_fit env.models=['output/HumanoidIm/phc_3/Humanoid_00258000.pth','output/HumanoidIm/phc_comp_3/Humanoid_00023501.pth'] env.motion_file=sample_data//amass_isaac_standing_upright_slim.pkl test=True env.num_envs=1  headless=False epoch=-1
+    python phc/run_hydra.py env.task=HumanoidSpeedZ env=env_pulse_amp exp_name=pulse_speed robot.real_weight_porpotion_boxes=False learning=pulse_z_task env.models=['output/HumanoidIm/pulse_vae_iclr/Humanoid.pth'] env.motion_file=sample_data/amass_isaac_standing_upright_slim.pkl
     ```
@@ Expand Down @@