🚀 [RofuncRL] Update tasks

Skylark0924 · Aug 24, 2023 · e64f3f1 · e64f3f1
1 parent 70db278
commit e64f3f1
Show file tree

Hide file tree

Showing 30 changed files with 363 additions and 341 deletions.
diff --git a/doc/source/lfd/RofuncRL/ASE.md b/doc/source/lfd/RofuncRL/ASE.md
@@ -65,7 +65,7 @@ python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task HumanoidASESt
 
 ### Motion visualization
 
-Visualize the motion `reallusion_sword_shield/RL_Avatar_Atk_2xCombo01_Motion.npy` by using `HumanoidViewMotion`.
+If you want to visualize the motion, you can use `HumanoidViewMotion` task. For example, you can use the following command to visualize the motion `reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy` by using `HumanoidViewMotion`.
 
 ```shell
 python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task HumanoidViewMotion --motion_file reallusion_sword_shield/RL_Avatar_Atk_2xCombo01_Motion.npy --inference --headless=False

diff --git a/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py b/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py
@@ -33,8 +33,8 @@
 from isaacgym import gymtorch
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from ..base.vec_task import VecTask
-from ..utils.torch_jit_utils import *
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
 
 DOF_BODY_IDS = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14]
 DOF_OFFSETS = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28]

diff --git a/rofunc/learning/RofuncRL/tasks/ant.py b/rofunc/learning/RofuncRL/tasks/ant.py
@@ -28,10 +28,11 @@
 
 import os
 
-from isaacgym import gymtorch
 from isaacgym import gymapi
-from .base.vec_task import VecTask
-from .utils.torch_jit_utils import *
+from isaacgym import gymtorch
+
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
 from rofunc.utils.oslab.path import get_rofunc_path
 
 
@@ -64,7 +65,9 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.cfg["env"]["numObservations"] = 60
         self.cfg["env"]["numActions"] = 8
 
-        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render)
+        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device,
+                         graphics_device_id=graphics_device_id, headless=headless,
+                         virtual_screen_capture=virtual_screen_capture, force_render=force_render)
 
         if self.viewer != None:
             cam_pos = gymapi.Vec3(50.0, 25.0, 2.4)
@@ -93,7 +96,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.initial_dof_pos = torch.zeros_like(self.dof_pos, device=self.device, dtype=torch.float)
         zero_tensor = torch.tensor([0.0], device=self.device)
         self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower,
-                                           torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper, self.initial_dof_pos))
+                                           torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper,
+                                                       self.initial_dof_pos))
         self.initial_dof_vel = torch.zeros_like(self.dof_vel, device=self.device, dtype=torch.float)
 
         # initialize some data used later on
@@ -107,11 +111,11 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.targets = to_torch([1000, 0, 0], device=self.device).repeat((self.num_envs, 1))
         self.target_dirs = to_torch([1, 0, 0], device=self.device).repeat((self.num_envs, 1))
         self.dt = self.cfg["sim"]["dt"]
-        self.potentials = to_torch([-1000./self.dt], device=self.device).repeat(self.num_envs)
+        self.potentials = to_torch([-1000. / self.dt], device=self.device).repeat(self.num_envs)
         self.prev_potentials = self.potentials.clone()
 
     def create_sim(self):
-        self.up_axis_idx = 2 # index of up axis: Y=1, Z=2
+        self.up_axis_idx = 2  # index of up axis: Y=1, Z=2
         self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
 
         self._create_ground_plane()
@@ -162,7 +166,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         start_pose = gymapi.Transform()
         start_pose.p = gymapi.Vec3(*get_axis_params(0.44, self.up_axis_idx))
 
-        self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w], device=self.device)
+        self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w],
+                                           device=self.device)
 
         self.torso_index = 0
         self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset)
@@ -208,7 +213,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
 
         for i in range(len(extremity_names)):
-            self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0], extremity_names[i])
+            self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0],
+                                                                              extremity_names[i])
 
     def compute_reward(self, actions):
         self.rew_buf[:], self.reset_buf[:] = compute_ant_reward(
@@ -232,10 +238,11 @@ def compute_observations(self):
         self.gym.refresh_dof_state_tensor(self.sim)
         self.gym.refresh_actor_root_state_tensor(self.sim)
         self.gym.refresh_force_sensor_tensor(self.sim)
-        #print("Feet forces and torques: ", self.vec_sensor_tensor[0, :])
+        # print("Feet forces and torques: ", self.vec_sensor_tensor[0, :])
         # print(self.vec_sensor_tensor.shape)
 
-        self.obs_buf[:], self.potentials[:], self.prev_potentials[:], self.up_vec[:], self.heading_vec[:] = compute_ant_observations(
+        self.obs_buf[:], self.potentials[:], self.prev_potentials[:], self.up_vec[:], self.heading_vec[
+                                                                                      :] = compute_ant_observations(
             self.obs_buf, self.root_states, self.targets, self.potentials,
             self.inv_start_rot, self.dof_pos, self.dof_vel,
             self.dof_limits_lower, self.dof_limits_upper, self.dof_vel_scale,
@@ -250,7 +257,8 @@ def reset_idx(self, env_ids):
         positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=str(self.device))
         velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=str(self.device))
 
-        self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper)
+        self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
+                                             self.dof_limits_upper)
         self.dof_vel[env_ids] = velocities
 
         env_ids_int32 = env_ids.to(dtype=torch.int32)
@@ -299,37 +307,40 @@ def post_physics_step(self):
                 origin = self.gym.get_env_origin(self.envs[i])
                 pose = self.root_states[:, 0:3][i].cpu().numpy()
                 glob_pos = gymapi.Vec3(origin.x + pose[0], origin.y + pose[1], origin.z + pose[2])
-                points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.heading_vec[i, 0].cpu().numpy(),
-                               glob_pos.y + 4 * self.heading_vec[i, 1].cpu().numpy(),
-                               glob_pos.z + 4 * self.heading_vec[i, 2].cpu().numpy()])
+                points.append(
+                    [glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.heading_vec[i, 0].cpu().numpy(),
+                     glob_pos.y + 4 * self.heading_vec[i, 1].cpu().numpy(),
+                     glob_pos.z + 4 * self.heading_vec[i, 2].cpu().numpy()])
                 colors.append([0.97, 0.1, 0.06])
-                points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.up_vec[i, 0].cpu().numpy(), glob_pos.y + 4 * self.up_vec[i, 1].cpu().numpy(),
+                points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.up_vec[i, 0].cpu().numpy(),
+                               glob_pos.y + 4 * self.up_vec[i, 1].cpu().numpy(),
                                glob_pos.z + 4 * self.up_vec[i, 2].cpu().numpy()])
                 colors.append([0.05, 0.99, 0.04])
 
             self.gym.add_lines(self.viewer, None, self.num_envs * 2, points, colors)
 
+
 #####################################################################
 ###=========================jit functions=========================###
 #####################################################################
 
 
 @torch.jit.script
 def compute_ant_reward(
-    obs_buf,
-    reset_buf,
-    progress_buf,
-    actions,
-    up_weight,
-    heading_weight,
-    potentials,
-    prev_potentials,
-    actions_cost_scale,
-    energy_cost_scale,
-    joints_at_limit_cost_scale,
-    termination_height,
-    death_cost,
-    max_episode_length
+        obs_buf,
+        reset_buf,
+        progress_buf,
+        actions,
+        up_weight,
+        heading_weight,
+        potentials,
+        prev_potentials,
+        actions_cost_scale,
+        energy_cost_scale,
+        joints_at_limit_cost_scale,
+        termination_height,
+        death_cost,
+        max_episode_length
 ):
     # type: (Tensor, Tensor, Tensor, Tensor, float, float, Tensor, Tensor, float, float, float, float, float, float) -> Tuple[Tensor, Tensor]
 
@@ -351,10 +362,11 @@ def compute_ant_reward(
     progress_reward = potentials - prev_potentials
 
     total_reward = progress_reward + alive_reward + up_reward + heading_reward - \
-        actions_cost_scale * actions_cost - energy_cost_scale * electricity_cost - dof_at_limit_cost * joints_at_limit_cost_scale
+                   actions_cost_scale * actions_cost - energy_cost_scale * electricity_cost - dof_at_limit_cost * joints_at_limit_cost_scale
 
     # adjust reward for fallen agents
-    total_reward = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(total_reward) * death_cost, total_reward)
+    total_reward = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(total_reward) * death_cost,
+                               total_reward)
 
     # reset agents
     reset = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(reset_buf), reset_buf)
@@ -397,4 +409,4 @@ def compute_ant_observations(obs_buf, root_states, targets, potentials,
                      dof_vel * dof_vel_scale, sensor_force_torques.view(-1, 24) * contact_force_scale,
                      actions), dim=-1)
 
-    return obs, potentials, prev_potentials_new, up_vec, heading_vec
+    return obs, potentials, prev_potentials_new, up_vec, heading_vec
diff --git a/rofunc/learning/RofuncRL/tasks/anymal.py b/rofunc/learning/RofuncRL/tasks/anymal.py
@@ -33,7 +33,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from .base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
 
 
 class Anymal(VecTask):

diff --git a/rofunc/learning/RofuncRL/tasks/anymal_terrain.py b/rofunc/learning/RofuncRL/tasks/anymal_terrain.py
@@ -31,7 +31,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from .base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
 
 
 class AnymalTerrain(VecTask):
@@ -358,7 +358,7 @@ def compute_reward(self):
 
         # stumbling penalty
         stumble = (torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) > 5.) * (
-                    torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.)
+                torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.)
         rew_stumble = torch.sum(stumble, dim=1) * self.rew_scales["stumble"]
 
         # action rate penalty
@@ -456,7 +456,7 @@ def update_terrain_level(self, env_ids):
             return
         distance = torch.norm(self.root_states[env_ids, :2] - self.env_origins[env_ids, :2], dim=1)
         self.terrain_levels[env_ids] -= 1 * (
-                    distance < torch.norm(self.commands[env_ids, :2]) * self.max_episode_length_s * 0.25)
+                distance < torch.norm(self.commands[env_ids, :2]) * self.max_episode_length_s * 0.25)
         self.terrain_levels[env_ids] += 1 * (distance > self.terrain.env_length / 2)
         self.terrain_levels[env_ids] = torch.clip(self.terrain_levels[env_ids], 0) % self.terrain.env_rows
         self.env_origins[env_ids] = self.terrain_origins[self.terrain_levels[env_ids], self.terrain_types[env_ids]]
@@ -469,7 +469,7 @@ def pre_physics_step(self, actions):
         self.actions = actions.clone().to(self.device)
         for i in range(self.decimation):
             torques = torch.clip(self.Kp * (
-                        self.action_scale * self.actions + self.default_dof_pos - self.dof_pos) - self.Kd * self.dof_vel,
+                    self.action_scale * self.actions + self.default_dof_pos - self.dof_pos) - self.Kd * self.dof_vel,
                                  -80., 80.)
             self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(torques))
             self.torques = torques.view(self.torques.shape)
@@ -554,7 +554,7 @@ def get_heights(self, env_ids=None):
                                     self.height_points[env_ids]) + (self.root_states[env_ids, :3]).unsqueeze(1)
         else:
             points = quat_apply_yaw(self.base_quat.repeat(1, self.num_height_points), self.height_points) + (
-            self.root_states[:, :3]).unsqueeze(1)
+                self.root_states[:, :3]).unsqueeze(1)
 
         points += self.terrain.border_size
         points = (points / self.terrain.horizontal_scale).long()

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid.py
@@ -33,8 +33,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from ..base.vec_task import VecTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class Humanoid(VecTask):

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py
@@ -32,9 +32,10 @@
 from gym import spaces
 from isaacgym.torch_utils import *
 
-from .humanoid import Humanoid, dof_to_obs
+import rofunc as rf
 from rofunc.learning.RofuncRL.tasks.ase.motion_lib import MotionLib
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid import Humanoid, dof_to_obs
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class HumanoidAMP(Humanoid):
@@ -61,8 +62,12 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
                          virtual_screen_capture=virtual_screen_capture, force_render=force_render)
 
         motion_file = cfg['env'].get('motion_file', None)
-        motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                        "../../../../../examples/data/amp/" + motion_file)
+        if rf.oslab.is_absl_path(motion_file):
+            motion_file_path = motion_file
+        else:
+            motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                            "../../../../examples/data/amp/" + motion_file)
+        self._load_motion(motion_file_path)
 
         self._load_motion(motion_file_path)
 

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py
@@ -29,7 +29,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from .humanoid_amp import HumanoidAMP
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP
 
 
 class HumanoidAMPGetupTask(HumanoidAMP):

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py
@@ -28,7 +28,7 @@
 
 import torch
 
-from .humanoid_amp import HumanoidAMP
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP
 
 
 class HumanoidAMPTask(HumanoidAMP):

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py
@@ -32,8 +32,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp_task import HumanoidAMPTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 TAR_ACTOR_ID = 1
 TAR_FACING_ACTOR_ID = 2

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py
@@ -32,8 +32,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp_task import HumanoidAMPTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class HumanoidLocationTask(HumanoidAMPTask):

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py
@@ -32,7 +32,7 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp import HumanoidAMP
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP
 
 # import env.tasks.humanoid_amp_getup as humanoid_amp_getup
 # # import env.tasks.humanoid_strike as humanoid_strike

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py
@@ -32,8 +32,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp_task import HumanoidAMPTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class HumanoidReachTask(HumanoidAMPTask):

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py
@@ -32,8 +32,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp_task import HumanoidAMPTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class HumanoidStrikeTask(HumanoidAMPTask):

diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py
@@ -30,7 +30,7 @@
 
 from isaacgym import gymtorch
 
-from .humanoid_amp import HumanoidAMP
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP
 
 
 class HumanoidViewMotionTask(HumanoidAMP):