Skip to content

Commit

Permalink
🚀 [RofuncRL] Update tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
Skylark0924 committed Aug 24, 2023
1 parent 70db278 commit e64f3f1
Show file tree
Hide file tree
Showing 30 changed files with 363 additions and 341 deletions.
2 changes: 1 addition & 1 deletion doc/source/lfd/RofuncRL/ASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task HumanoidASESt

### Motion visualization

Visualize the motion `reallusion_sword_shield/RL_Avatar_Atk_2xCombo01_Motion.npy` by using `HumanoidViewMotion`.
If you want to visualize the motion, you can use `HumanoidViewMotion` task. For example, you can use the following command to visualize the motion `reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy` by using `HumanoidViewMotion`.

```shell
python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task HumanoidViewMotion --motion_file reallusion_sword_shield/RL_Avatar_Atk_2xCombo01_Motion.npy --inference --headless=False
Expand Down
4 changes: 2 additions & 2 deletions rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
from isaacgym import gymtorch

from rofunc.utils.oslab.path import get_rofunc_path
from ..base.vec_task import VecTask
from ..utils.torch_jit_utils import *
from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *

DOF_BODY_IDS = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14]
DOF_OFFSETS = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28]
Expand Down
78 changes: 45 additions & 33 deletions rofunc/learning/RofuncRL/tasks/ant.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@

import os

from isaacgym import gymtorch
from isaacgym import gymapi
from .base.vec_task import VecTask
from .utils.torch_jit_utils import *
from isaacgym import gymtorch

from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
from rofunc.utils.oslab.path import get_rofunc_path


Expand Down Expand Up @@ -64,7 +65,9 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
self.cfg["env"]["numObservations"] = 60
self.cfg["env"]["numActions"] = 8

super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render)
super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device,
graphics_device_id=graphics_device_id, headless=headless,
virtual_screen_capture=virtual_screen_capture, force_render=force_render)

if self.viewer != None:
cam_pos = gymapi.Vec3(50.0, 25.0, 2.4)
Expand Down Expand Up @@ -93,7 +96,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
self.initial_dof_pos = torch.zeros_like(self.dof_pos, device=self.device, dtype=torch.float)
zero_tensor = torch.tensor([0.0], device=self.device)
self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower,
torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper, self.initial_dof_pos))
torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper,
self.initial_dof_pos))
self.initial_dof_vel = torch.zeros_like(self.dof_vel, device=self.device, dtype=torch.float)

# initialize some data used later on
Expand All @@ -107,11 +111,11 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
self.targets = to_torch([1000, 0, 0], device=self.device).repeat((self.num_envs, 1))
self.target_dirs = to_torch([1, 0, 0], device=self.device).repeat((self.num_envs, 1))
self.dt = self.cfg["sim"]["dt"]
self.potentials = to_torch([-1000./self.dt], device=self.device).repeat(self.num_envs)
self.potentials = to_torch([-1000. / self.dt], device=self.device).repeat(self.num_envs)
self.prev_potentials = self.potentials.clone()

def create_sim(self):
self.up_axis_idx = 2 # index of up axis: Y=1, Z=2
self.up_axis_idx = 2 # index of up axis: Y=1, Z=2
self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)

self._create_ground_plane()
Expand Down Expand Up @@ -162,7 +166,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(*get_axis_params(0.44, self.up_axis_idx))

self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w], device=self.device)
self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w],
device=self.device)

self.torso_index = 0
self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset)
Expand Down Expand Up @@ -208,7 +213,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)

for i in range(len(extremity_names)):
self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0], extremity_names[i])
self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0],
extremity_names[i])

def compute_reward(self, actions):
self.rew_buf[:], self.reset_buf[:] = compute_ant_reward(
Expand All @@ -232,10 +238,11 @@ def compute_observations(self):
self.gym.refresh_dof_state_tensor(self.sim)
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_force_sensor_tensor(self.sim)
#print("Feet forces and torques: ", self.vec_sensor_tensor[0, :])
# print("Feet forces and torques: ", self.vec_sensor_tensor[0, :])
# print(self.vec_sensor_tensor.shape)

self.obs_buf[:], self.potentials[:], self.prev_potentials[:], self.up_vec[:], self.heading_vec[:] = compute_ant_observations(
self.obs_buf[:], self.potentials[:], self.prev_potentials[:], self.up_vec[:], self.heading_vec[
:] = compute_ant_observations(
self.obs_buf, self.root_states, self.targets, self.potentials,
self.inv_start_rot, self.dof_pos, self.dof_vel,
self.dof_limits_lower, self.dof_limits_upper, self.dof_vel_scale,
Expand All @@ -250,7 +257,8 @@ def reset_idx(self, env_ids):
positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=str(self.device))
velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=str(self.device))

self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper)
self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
self.dof_limits_upper)
self.dof_vel[env_ids] = velocities

env_ids_int32 = env_ids.to(dtype=torch.int32)
Expand Down Expand Up @@ -299,37 +307,40 @@ def post_physics_step(self):
origin = self.gym.get_env_origin(self.envs[i])
pose = self.root_states[:, 0:3][i].cpu().numpy()
glob_pos = gymapi.Vec3(origin.x + pose[0], origin.y + pose[1], origin.z + pose[2])
points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.heading_vec[i, 0].cpu().numpy(),
glob_pos.y + 4 * self.heading_vec[i, 1].cpu().numpy(),
glob_pos.z + 4 * self.heading_vec[i, 2].cpu().numpy()])
points.append(
[glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.heading_vec[i, 0].cpu().numpy(),
glob_pos.y + 4 * self.heading_vec[i, 1].cpu().numpy(),
glob_pos.z + 4 * self.heading_vec[i, 2].cpu().numpy()])
colors.append([0.97, 0.1, 0.06])
points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.up_vec[i, 0].cpu().numpy(), glob_pos.y + 4 * self.up_vec[i, 1].cpu().numpy(),
points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.up_vec[i, 0].cpu().numpy(),
glob_pos.y + 4 * self.up_vec[i, 1].cpu().numpy(),
glob_pos.z + 4 * self.up_vec[i, 2].cpu().numpy()])
colors.append([0.05, 0.99, 0.04])

self.gym.add_lines(self.viewer, None, self.num_envs * 2, points, colors)


#####################################################################
###=========================jit functions=========================###
#####################################################################


@torch.jit.script
def compute_ant_reward(
obs_buf,
reset_buf,
progress_buf,
actions,
up_weight,
heading_weight,
potentials,
prev_potentials,
actions_cost_scale,
energy_cost_scale,
joints_at_limit_cost_scale,
termination_height,
death_cost,
max_episode_length
obs_buf,
reset_buf,
progress_buf,
actions,
up_weight,
heading_weight,
potentials,
prev_potentials,
actions_cost_scale,
energy_cost_scale,
joints_at_limit_cost_scale,
termination_height,
death_cost,
max_episode_length
):
# type: (Tensor, Tensor, Tensor, Tensor, float, float, Tensor, Tensor, float, float, float, float, float, float) -> Tuple[Tensor, Tensor]

Expand All @@ -351,10 +362,11 @@ def compute_ant_reward(
progress_reward = potentials - prev_potentials

total_reward = progress_reward + alive_reward + up_reward + heading_reward - \
actions_cost_scale * actions_cost - energy_cost_scale * electricity_cost - dof_at_limit_cost * joints_at_limit_cost_scale
actions_cost_scale * actions_cost - energy_cost_scale * electricity_cost - dof_at_limit_cost * joints_at_limit_cost_scale

# adjust reward for fallen agents
total_reward = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(total_reward) * death_cost, total_reward)
total_reward = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(total_reward) * death_cost,
total_reward)

# reset agents
reset = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(reset_buf), reset_buf)
Expand Down Expand Up @@ -397,4 +409,4 @@ def compute_ant_observations(obs_buf, root_states, targets, potentials,
dof_vel * dof_vel_scale, sensor_force_torques.view(-1, 24) * contact_force_scale,
actions), dim=-1)

return obs, potentials, prev_potentials_new, up_vec, heading_vec
return obs, potentials, prev_potentials_new, up_vec, heading_vec
2 changes: 1 addition & 1 deletion rofunc/learning/RofuncRL/tasks/anymal.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from isaacgym import gymtorch
from isaacgym.torch_utils import *

from .base.vec_task import VecTask
from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask


class Anymal(VecTask):
Expand Down
10 changes: 5 additions & 5 deletions rofunc/learning/RofuncRL/tasks/anymal_terrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from isaacgym import gymtorch
from isaacgym.torch_utils import *

from .base.vec_task import VecTask
from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask


class AnymalTerrain(VecTask):
Expand Down Expand Up @@ -358,7 +358,7 @@ def compute_reward(self):

# stumbling penalty
stumble = (torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) > 5.) * (
torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.)
torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.)
rew_stumble = torch.sum(stumble, dim=1) * self.rew_scales["stumble"]

# action rate penalty
Expand Down Expand Up @@ -456,7 +456,7 @@ def update_terrain_level(self, env_ids):
return
distance = torch.norm(self.root_states[env_ids, :2] - self.env_origins[env_ids, :2], dim=1)
self.terrain_levels[env_ids] -= 1 * (
distance < torch.norm(self.commands[env_ids, :2]) * self.max_episode_length_s * 0.25)
distance < torch.norm(self.commands[env_ids, :2]) * self.max_episode_length_s * 0.25)
self.terrain_levels[env_ids] += 1 * (distance > self.terrain.env_length / 2)
self.terrain_levels[env_ids] = torch.clip(self.terrain_levels[env_ids], 0) % self.terrain.env_rows
self.env_origins[env_ids] = self.terrain_origins[self.terrain_levels[env_ids], self.terrain_types[env_ids]]
Expand All @@ -469,7 +469,7 @@ def pre_physics_step(self, actions):
self.actions = actions.clone().to(self.device)
for i in range(self.decimation):
torques = torch.clip(self.Kp * (
self.action_scale * self.actions + self.default_dof_pos - self.dof_pos) - self.Kd * self.dof_vel,
self.action_scale * self.actions + self.default_dof_pos - self.dof_pos) - self.Kd * self.dof_vel,
-80., 80.)
self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(torques))
self.torques = torques.view(self.torques.shape)
Expand Down Expand Up @@ -554,7 +554,7 @@ def get_heights(self, env_ids=None):
self.height_points[env_ids]) + (self.root_states[env_ids, :3]).unsqueeze(1)
else:
points = quat_apply_yaw(self.base_quat.repeat(1, self.num_height_points), self.height_points) + (
self.root_states[:, :3]).unsqueeze(1)
self.root_states[:, :3]).unsqueeze(1)

points += self.terrain.border_size
points = (points / self.terrain.horizontal_scale).long()
Expand Down
4 changes: 2 additions & 2 deletions rofunc/learning/RofuncRL/tasks/ase/humanoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
from isaacgym.torch_utils import *

from rofunc.utils.oslab.path import get_rofunc_path
from ..base.vec_task import VecTask
from ..utils import torch_jit_utils as torch_utils
from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils


class Humanoid(VecTask):
Expand Down
13 changes: 9 additions & 4 deletions rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@
from gym import spaces
from isaacgym.torch_utils import *

from .humanoid import Humanoid, dof_to_obs
import rofunc as rf
from rofunc.learning.RofuncRL.tasks.ase.motion_lib import MotionLib
from ..utils import torch_jit_utils as torch_utils
from rofunc.learning.RofuncRL.tasks.ase.humanoid import Humanoid, dof_to_obs
from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils


class HumanoidAMP(Humanoid):
Expand All @@ -61,8 +62,12 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
virtual_screen_capture=virtual_screen_capture, force_render=force_render)

motion_file = cfg['env'].get('motion_file', None)
motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"../../../../../examples/data/amp/" + motion_file)
if rf.oslab.is_absl_path(motion_file):
motion_file_path = motion_file
else:
motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"../../../../examples/data/amp/" + motion_file)
self._load_motion(motion_file_path)

self._load_motion(motion_file_path)

Expand Down
2 changes: 1 addition & 1 deletion rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from isaacgym import gymtorch
from isaacgym.torch_utils import *

from .humanoid_amp import HumanoidAMP
from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP


class HumanoidAMPGetupTask(HumanoidAMP):
Expand Down
2 changes: 1 addition & 1 deletion rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import torch

from .humanoid_amp import HumanoidAMP
from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP


class HumanoidAMPTask(HumanoidAMP):
Expand Down
4 changes: 2 additions & 2 deletions rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
from isaacgym.torch_utils import *

from rofunc.utils.oslab.path import get_rofunc_path
from .humanoid_amp_task import HumanoidAMPTask
from ..utils import torch_jit_utils as torch_utils
from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils

TAR_ACTOR_ID = 1
TAR_FACING_ACTOR_ID = 2
Expand Down
4 changes: 2 additions & 2 deletions rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
from isaacgym.torch_utils import *

from rofunc.utils.oslab.path import get_rofunc_path
from .humanoid_amp_task import HumanoidAMPTask
from ..utils import torch_jit_utils as torch_utils
from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils


class HumanoidLocationTask(HumanoidAMPTask):
Expand Down
2 changes: 1 addition & 1 deletion rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from isaacgym.torch_utils import *

from rofunc.utils.oslab.path import get_rofunc_path
from .humanoid_amp import HumanoidAMP
from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP

# import env.tasks.humanoid_amp_getup as humanoid_amp_getup
# # import env.tasks.humanoid_strike as humanoid_strike
Expand Down
4 changes: 2 additions & 2 deletions rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
from isaacgym.torch_utils import *

from rofunc.utils.oslab.path import get_rofunc_path
from .humanoid_amp_task import HumanoidAMPTask
from ..utils import torch_jit_utils as torch_utils
from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils


class HumanoidReachTask(HumanoidAMPTask):
Expand Down
4 changes: 2 additions & 2 deletions rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
from isaacgym.torch_utils import *

from rofunc.utils.oslab.path import get_rofunc_path
from .humanoid_amp_task import HumanoidAMPTask
from ..utils import torch_jit_utils as torch_utils
from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils


class HumanoidStrikeTask(HumanoidAMPTask):
Expand Down
2 changes: 1 addition & 1 deletion rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from isaacgym import gymtorch

from .humanoid_amp import HumanoidAMP
from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP


class HumanoidViewMotionTask(HumanoidAMP):
Expand Down
Loading

0 comments on commit e64f3f1

Please sign in to comment.