diff --git a/doc/source/lfd/RofuncRL/ASE.md b/doc/source/lfd/RofuncRL/ASE.md index 3940b0ee6..00b73bee2 100644 --- a/doc/source/lfd/RofuncRL/ASE.md +++ b/doc/source/lfd/RofuncRL/ASE.md @@ -65,7 +65,7 @@ python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task HumanoidASESt ### Motion visualization -Visualize the motion `reallusion_sword_shield/RL_Avatar_Atk_2xCombo01_Motion.npy` by using `HumanoidViewMotion`. +If you want to visualize the motion, you can use `HumanoidViewMotion` task. For example, you can use the following command to visualize the motion `reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy` by using `HumanoidViewMotion`. ```shell python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task HumanoidViewMotion --motion_file reallusion_sword_shield/RL_Avatar_Atk_2xCombo01_Motion.npy --inference --headless=False diff --git a/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py b/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py index 8ad4d9026..eef041751 100644 --- a/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py +++ b/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py @@ -33,8 +33,8 @@ from isaacgym import gymtorch from rofunc.utils.oslab.path import get_rofunc_path -from ..base.vec_task import VecTask -from ..utils.torch_jit_utils import * +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * DOF_BODY_IDS = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14] DOF_OFFSETS = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28] diff --git a/rofunc/learning/RofuncRL/tasks/ant.py b/rofunc/learning/RofuncRL/tasks/ant.py index df6c2e766..548bdaeda 100644 --- a/rofunc/learning/RofuncRL/tasks/ant.py +++ b/rofunc/learning/RofuncRL/tasks/ant.py @@ -28,10 +28,11 @@ import os -from isaacgym import gymtorch from isaacgym import gymapi -from .base.vec_task import VecTask -from .utils.torch_jit_utils import * +from isaacgym import gymtorch + +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * from rofunc.utils.oslab.path import get_rofunc_path @@ -64,7 +65,9 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.cfg["env"]["numObservations"] = 60 self.cfg["env"]["numActions"] = 8 - super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render) + super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, + graphics_device_id=graphics_device_id, headless=headless, + virtual_screen_capture=virtual_screen_capture, force_render=force_render) if self.viewer != None: cam_pos = gymapi.Vec3(50.0, 25.0, 2.4) @@ -93,7 +96,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.initial_dof_pos = torch.zeros_like(self.dof_pos, device=self.device, dtype=torch.float) zero_tensor = torch.tensor([0.0], device=self.device) self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower, - torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper, self.initial_dof_pos)) + torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper, + self.initial_dof_pos)) self.initial_dof_vel = torch.zeros_like(self.dof_vel, device=self.device, dtype=torch.float) # initialize some data used later on @@ -107,11 +111,11 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.targets = to_torch([1000, 0, 0], device=self.device).repeat((self.num_envs, 1)) self.target_dirs = to_torch([1, 0, 0], device=self.device).repeat((self.num_envs, 1)) self.dt = self.cfg["sim"]["dt"] - self.potentials = to_torch([-1000./self.dt], device=self.device).repeat(self.num_envs) + self.potentials = to_torch([-1000. / self.dt], device=self.device).repeat(self.num_envs) self.prev_potentials = self.potentials.clone() def create_sim(self): - self.up_axis_idx = 2 # index of up axis: Y=1, Z=2 + self.up_axis_idx = 2 # index of up axis: Y=1, Z=2 self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) self._create_ground_plane() @@ -162,7 +166,8 @@ def _create_envs(self, num_envs, spacing, num_per_row): start_pose = gymapi.Transform() start_pose.p = gymapi.Vec3(*get_axis_params(0.44, self.up_axis_idx)) - self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w], device=self.device) + self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w], + device=self.device) self.torso_index = 0 self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset) @@ -208,7 +213,8 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device) for i in range(len(extremity_names)): - self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0], extremity_names[i]) + self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0], + extremity_names[i]) def compute_reward(self, actions): self.rew_buf[:], self.reset_buf[:] = compute_ant_reward( @@ -232,10 +238,11 @@ def compute_observations(self): self.gym.refresh_dof_state_tensor(self.sim) self.gym.refresh_actor_root_state_tensor(self.sim) self.gym.refresh_force_sensor_tensor(self.sim) - #print("Feet forces and torques: ", self.vec_sensor_tensor[0, :]) + # print("Feet forces and torques: ", self.vec_sensor_tensor[0, :]) # print(self.vec_sensor_tensor.shape) - self.obs_buf[:], self.potentials[:], self.prev_potentials[:], self.up_vec[:], self.heading_vec[:] = compute_ant_observations( + self.obs_buf[:], self.potentials[:], self.prev_potentials[:], self.up_vec[:], self.heading_vec[ + :] = compute_ant_observations( self.obs_buf, self.root_states, self.targets, self.potentials, self.inv_start_rot, self.dof_pos, self.dof_vel, self.dof_limits_lower, self.dof_limits_upper, self.dof_vel_scale, @@ -250,7 +257,8 @@ def reset_idx(self, env_ids): positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=str(self.device)) velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=str(self.device)) - self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper) + self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower, + self.dof_limits_upper) self.dof_vel[env_ids] = velocities env_ids_int32 = env_ids.to(dtype=torch.int32) @@ -299,16 +307,19 @@ def post_physics_step(self): origin = self.gym.get_env_origin(self.envs[i]) pose = self.root_states[:, 0:3][i].cpu().numpy() glob_pos = gymapi.Vec3(origin.x + pose[0], origin.y + pose[1], origin.z + pose[2]) - points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.heading_vec[i, 0].cpu().numpy(), - glob_pos.y + 4 * self.heading_vec[i, 1].cpu().numpy(), - glob_pos.z + 4 * self.heading_vec[i, 2].cpu().numpy()]) + points.append( + [glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.heading_vec[i, 0].cpu().numpy(), + glob_pos.y + 4 * self.heading_vec[i, 1].cpu().numpy(), + glob_pos.z + 4 * self.heading_vec[i, 2].cpu().numpy()]) colors.append([0.97, 0.1, 0.06]) - points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.up_vec[i, 0].cpu().numpy(), glob_pos.y + 4 * self.up_vec[i, 1].cpu().numpy(), + points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.up_vec[i, 0].cpu().numpy(), + glob_pos.y + 4 * self.up_vec[i, 1].cpu().numpy(), glob_pos.z + 4 * self.up_vec[i, 2].cpu().numpy()]) colors.append([0.05, 0.99, 0.04]) self.gym.add_lines(self.viewer, None, self.num_envs * 2, points, colors) + ##################################################################### ###=========================jit functions=========================### ##################################################################### @@ -316,20 +327,20 @@ def post_physics_step(self): @torch.jit.script def compute_ant_reward( - obs_buf, - reset_buf, - progress_buf, - actions, - up_weight, - heading_weight, - potentials, - prev_potentials, - actions_cost_scale, - energy_cost_scale, - joints_at_limit_cost_scale, - termination_height, - death_cost, - max_episode_length + obs_buf, + reset_buf, + progress_buf, + actions, + up_weight, + heading_weight, + potentials, + prev_potentials, + actions_cost_scale, + energy_cost_scale, + joints_at_limit_cost_scale, + termination_height, + death_cost, + max_episode_length ): # type: (Tensor, Tensor, Tensor, Tensor, float, float, Tensor, Tensor, float, float, float, float, float, float) -> Tuple[Tensor, Tensor] @@ -351,10 +362,11 @@ def compute_ant_reward( progress_reward = potentials - prev_potentials total_reward = progress_reward + alive_reward + up_reward + heading_reward - \ - actions_cost_scale * actions_cost - energy_cost_scale * electricity_cost - dof_at_limit_cost * joints_at_limit_cost_scale + actions_cost_scale * actions_cost - energy_cost_scale * electricity_cost - dof_at_limit_cost * joints_at_limit_cost_scale # adjust reward for fallen agents - total_reward = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(total_reward) * death_cost, total_reward) + total_reward = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(total_reward) * death_cost, + total_reward) # reset agents reset = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(reset_buf), reset_buf) @@ -397,4 +409,4 @@ def compute_ant_observations(obs_buf, root_states, targets, potentials, dof_vel * dof_vel_scale, sensor_force_torques.view(-1, 24) * contact_force_scale, actions), dim=-1) - return obs, potentials, prev_potentials_new, up_vec, heading_vec \ No newline at end of file + return obs, potentials, prev_potentials_new, up_vec, heading_vec diff --git a/rofunc/learning/RofuncRL/tasks/anymal.py b/rofunc/learning/RofuncRL/tasks/anymal.py index adfd23539..91abec1db 100644 --- a/rofunc/learning/RofuncRL/tasks/anymal.py +++ b/rofunc/learning/RofuncRL/tasks/anymal.py @@ -33,7 +33,7 @@ from isaacgym import gymtorch from isaacgym.torch_utils import * -from .base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask class Anymal(VecTask): diff --git a/rofunc/learning/RofuncRL/tasks/anymal_terrain.py b/rofunc/learning/RofuncRL/tasks/anymal_terrain.py index 9f7a2d449..03844e01f 100644 --- a/rofunc/learning/RofuncRL/tasks/anymal_terrain.py +++ b/rofunc/learning/RofuncRL/tasks/anymal_terrain.py @@ -31,7 +31,7 @@ from isaacgym import gymtorch from isaacgym.torch_utils import * -from .base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask class AnymalTerrain(VecTask): @@ -358,7 +358,7 @@ def compute_reward(self): # stumbling penalty stumble = (torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) > 5.) * ( - torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.) + torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.) rew_stumble = torch.sum(stumble, dim=1) * self.rew_scales["stumble"] # action rate penalty @@ -456,7 +456,7 @@ def update_terrain_level(self, env_ids): return distance = torch.norm(self.root_states[env_ids, :2] - self.env_origins[env_ids, :2], dim=1) self.terrain_levels[env_ids] -= 1 * ( - distance < torch.norm(self.commands[env_ids, :2]) * self.max_episode_length_s * 0.25) + distance < torch.norm(self.commands[env_ids, :2]) * self.max_episode_length_s * 0.25) self.terrain_levels[env_ids] += 1 * (distance > self.terrain.env_length / 2) self.terrain_levels[env_ids] = torch.clip(self.terrain_levels[env_ids], 0) % self.terrain.env_rows self.env_origins[env_ids] = self.terrain_origins[self.terrain_levels[env_ids], self.terrain_types[env_ids]] @@ -469,7 +469,7 @@ def pre_physics_step(self, actions): self.actions = actions.clone().to(self.device) for i in range(self.decimation): torques = torch.clip(self.Kp * ( - self.action_scale * self.actions + self.default_dof_pos - self.dof_pos) - self.Kd * self.dof_vel, + self.action_scale * self.actions + self.default_dof_pos - self.dof_pos) - self.Kd * self.dof_vel, -80., 80.) self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(torques)) self.torques = torques.view(self.torques.shape) @@ -554,7 +554,7 @@ def get_heights(self, env_ids=None): self.height_points[env_ids]) + (self.root_states[env_ids, :3]).unsqueeze(1) else: points = quat_apply_yaw(self.base_quat.repeat(1, self.num_height_points), self.height_points) + ( - self.root_states[:, :3]).unsqueeze(1) + self.root_states[:, :3]).unsqueeze(1) points += self.terrain.border_size points = (points / self.terrain.horizontal_scale).long() diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid.py index b9ee9fae5..649ab846f 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid.py @@ -33,8 +33,8 @@ from isaacgym.torch_utils import * from rofunc.utils.oslab.path import get_rofunc_path -from ..base.vec_task import VecTask -from ..utils import torch_jit_utils as torch_utils +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils class Humanoid(VecTask): diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py index 4e4083382..abdcd3bc2 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py @@ -32,9 +32,10 @@ from gym import spaces from isaacgym.torch_utils import * -from .humanoid import Humanoid, dof_to_obs +import rofunc as rf from rofunc.learning.RofuncRL.tasks.ase.motion_lib import MotionLib -from ..utils import torch_jit_utils as torch_utils +from rofunc.learning.RofuncRL.tasks.ase.humanoid import Humanoid, dof_to_obs +from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils class HumanoidAMP(Humanoid): @@ -61,8 +62,12 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir virtual_screen_capture=virtual_screen_capture, force_render=force_render) motion_file = cfg['env'].get('motion_file', None) - motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - "../../../../../examples/data/amp/" + motion_file) + if rf.oslab.is_absl_path(motion_file): + motion_file_path = motion_file + else: + motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "../../../../examples/data/amp/" + motion_file) + self._load_motion(motion_file_path) self._load_motion(motion_file_path) diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py index ebb0453d1..4013cd57c 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py @@ -29,7 +29,7 @@ from isaacgym import gymtorch from isaacgym.torch_utils import * -from .humanoid_amp import HumanoidAMP +from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP class HumanoidAMPGetupTask(HumanoidAMP): diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py index cc76e6ef5..5f1564ed3 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py @@ -28,7 +28,7 @@ import torch -from .humanoid_amp import HumanoidAMP +from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP class HumanoidAMPTask(HumanoidAMP): diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py index e039ec2ec..95ad73b11 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py @@ -32,8 +32,8 @@ from isaacgym.torch_utils import * from rofunc.utils.oslab.path import get_rofunc_path -from .humanoid_amp_task import HumanoidAMPTask -from ..utils import torch_jit_utils as torch_utils +from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask +from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils TAR_ACTOR_ID = 1 TAR_FACING_ACTOR_ID = 2 diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py index 7be2c5c96..74902b75a 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py @@ -32,8 +32,8 @@ from isaacgym.torch_utils import * from rofunc.utils.oslab.path import get_rofunc_path -from .humanoid_amp_task import HumanoidAMPTask -from ..utils import torch_jit_utils as torch_utils +from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask +from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils class HumanoidLocationTask(HumanoidAMPTask): diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py index a30546a37..21c9d0a34 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py @@ -32,7 +32,7 @@ from isaacgym.torch_utils import * from rofunc.utils.oslab.path import get_rofunc_path -from .humanoid_amp import HumanoidAMP +from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP # import env.tasks.humanoid_amp_getup as humanoid_amp_getup # # import env.tasks.humanoid_strike as humanoid_strike diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py index b0b9a1d05..02fdf3f23 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py @@ -32,8 +32,8 @@ from isaacgym.torch_utils import * from rofunc.utils.oslab.path import get_rofunc_path -from .humanoid_amp_task import HumanoidAMPTask -from ..utils import torch_jit_utils as torch_utils +from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask +from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils class HumanoidReachTask(HumanoidAMPTask): diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py index 4b2315960..f05045264 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py @@ -32,8 +32,8 @@ from isaacgym.torch_utils import * from rofunc.utils.oslab.path import get_rofunc_path -from .humanoid_amp_task import HumanoidAMPTask -from ..utils import torch_jit_utils as torch_utils +from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask +from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils class HumanoidStrikeTask(HumanoidAMPTask): diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py index 870d2f7c3..37f7db4a2 100644 --- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py +++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py @@ -30,7 +30,7 @@ from isaacgym import gymtorch -from .humanoid_amp import HumanoidAMP +from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP class HumanoidViewMotionTask(HumanoidAMP): diff --git a/rofunc/learning/RofuncRL/tasks/base.py b/rofunc/learning/RofuncRL/tasks/base.py deleted file mode 100644 index 7c23cba0b..000000000 --- a/rofunc/learning/RofuncRL/tasks/base.py +++ /dev/null @@ -1,94 +0,0 @@ -""" Define all environments and provide helper functions to load environments. """ - -# OpenAI gym interface -import gym - -from ..util.subproc_vec_env import SubprocVecEnv - -REGISTERED_ENVS = {} - - -def register_env(target_class): - REGISTERED_ENVS[target_class.__name__] = target_class - - -def get_env(name): - """ - Gets the environment class given @name. - """ - if name not in REGISTERED_ENVS: - raise Exception( - "Unknown environment name: {}\nAvailable environments: {}".format( - name, ", ".join(REGISTERED_ENVS) - ) - ) - return REGISTERED_ENVS[name] - - -def make_env(name, config=None): - """ - Creates a new environment instance with @name and @config. - """ - env = get_env(name) - - # get default config if not provided - if config is None: - import argparse - import config.furniture as furniture_config - from util import str2bool - - parser = argparse.ArgumentParser() - furniture_config.add_argument(parser) - parser.add_argument("--seed", type=int, default=123) - parser.add_argument("--debug", type=str2bool, default=False) - - config, unparsed = parser.parse_known_args() - - return env(config) - - -def get_gym_env(env_id, env_kwargs): - env = gym.make(env_id, **env_kwargs) - return env - - -def make_vec_env(env_id, num_env, config=None, env_kwargs=None): - """ - Creates a wrapped SubprocVecEnv using OpenAI gym interface. - Unity app will use the port number from @config.port to (@config.port + @num_env - 1). - - Code modified based on - https://github.com/openai/baselines/blob/master/baselines/common/cmd_util.py - - Args: - env_id: environment id registered in in `env/__init__.py`. - num_env: number of environments to launch. - config: general configuration for the environment. - """ - env_kwargs = env_kwargs or {} - - if config is not None: - for key, value in config.__dict__.items(): - env_kwargs[key] = value - - def make_thunk(rank): - new_env_kwargs = env_kwargs.copy() - new_env_kwargs["port"] = env_kwargs["port"] + rank - new_env_kwargs["seed"] = env_kwargs["seed"] + rank - return lambda: get_gym_env(env_id, new_env_kwargs) - - return SubprocVecEnv([make_thunk(i) for i in range(num_env)]) - - -class EnvMeta(type): - """ Meta class for registering environments. """ - - def __new__(meta, name, bases, class_dict): - cls = super().__new__(meta, name, bases, class_dict) - - # List all environments that should not be registered here. - _unregistered_envs = ["FurnitureEnv"] - - if cls.__name__ not in _unregistered_envs: - register_env(cls) - return cls diff --git a/rofunc/learning/RofuncRL/tasks/base/curi_base_task.py b/rofunc/learning/RofuncRL/tasks/base/curi_base_task.py index 3aacdd503..09f121f68 100644 --- a/rofunc/learning/RofuncRL/tasks/base/curi_base_task.py +++ b/rofunc/learning/RofuncRL/tasks/base/curi_base_task.py @@ -15,7 +15,7 @@ from isaacgym import gymtorch, gymapi from isaacgym.torch_utils import * -from .vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask class CURIBaseTask(VecTask): diff --git a/rofunc/learning/RofuncRL/tasks/cartpole.py b/rofunc/learning/RofuncRL/tasks/cartpole.py index cd9b0847e..74ee95580 100644 --- a/rofunc/learning/RofuncRL/tasks/cartpole.py +++ b/rofunc/learning/RofuncRL/tasks/cartpole.py @@ -26,12 +26,13 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import numpy as np import os + +import numpy as np import torch +from isaacgym import gymtorch, gymapi -from isaacgym import gymutil, gymtorch, gymapi -from .base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask from rofunc.utils.oslab.path import get_rofunc_path @@ -48,7 +49,9 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.cfg["env"]["numObservations"] = 4 self.cfg["env"]["numActions"] = 1 - super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render) + super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, + graphics_device_id=graphics_device_id, headless=headless, + virtual_screen_capture=virtual_screen_capture, force_render=force_render) dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) @@ -71,7 +74,8 @@ def _create_ground_plane(self): def _create_envs(self, num_envs, spacing, num_per_row): # define plane on which environments are initialized - lower = gymapi.Vec3(0.5 * -spacing, -spacing, 0.0) if self.up_axis == 'z' else gymapi.Vec3(0.5 * -spacing, 0.0, -spacing) + lower = gymapi.Vec3(0.5 * -spacing, -spacing, 0.0) if self.up_axis == 'z' else gymapi.Vec3(0.5 * -spacing, 0.0, + -spacing) upper = gymapi.Vec3(0.5 * spacing, spacing, spacing) # get rofunc path from rofunc package metadata @@ -99,7 +103,7 @@ def _create_envs(self, num_envs, spacing, num_per_row): pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0) else: pose.p.y = 2.0 - pose.r = gymapi.Quat(-np.sqrt(2)/2, 0.0, 0.0, np.sqrt(2)/2) + pose.r = gymapi.Quat(-np.sqrt(2) / 2, 0.0, 0.0, np.sqrt(2) / 2) self.cartpole_handles = [] self.envs = [] @@ -176,6 +180,7 @@ def post_physics_step(self): self.compute_observations() self.compute_reward() + ##################################################################### ###=========================jit functions=========================### ##################################################################### diff --git a/rofunc/learning/RofuncRL/tasks/curi_cabinet.py b/rofunc/learning/RofuncRL/tasks/curi_cabinet.py index 7c9d82d0a..7edad6d1d 100644 --- a/rofunc/learning/RofuncRL/tasks/curi_cabinet.py +++ b/rofunc/learning/RofuncRL/tasks/curi_cabinet.py @@ -17,8 +17,8 @@ from isaacgym import gymtorch, gymapi from isaacgym.torch_utils import * -from rofunc.utils.oslab.path import get_rofunc_path from rofunc.learning.RofuncRL.tasks.base.curi_base_task import CURIBaseTask +from rofunc.utils.oslab.path import get_rofunc_path class CURICabinetTask(CURIBaseTask): diff --git a/rofunc/learning/RofuncRL/tasks/curi_cabinet_bimanual.py b/rofunc/learning/RofuncRL/tasks/curi_cabinet_bimanual.py index df6c596f0..0d1d3f2da 100644 --- a/rofunc/learning/RofuncRL/tasks/curi_cabinet_bimanual.py +++ b/rofunc/learning/RofuncRL/tasks/curi_cabinet_bimanual.py @@ -16,8 +16,9 @@ from isaacgym import gymtorch, gymapi from isaacgym.torch_utils import * -from rofunc.utils.oslab.path import get_rofunc_path + from rofunc.learning.RofuncRL.tasks.base.curi_base_task import CURIBaseTask +from rofunc.utils.oslab.path import get_rofunc_path class CURICabinetBimanualTask(CURIBaseTask): @@ -237,14 +238,14 @@ def _create_envs(self, num_envs, spacing, num_per_row): # Left arm self.hand_handle_l, self.lfinger_handle_l, self.rfinger_handle_l, self.curi_local_grasp_pos, \ - self.curi_local_grasp_rot, self.curi_grasp_pos_l, self.curi_grasp_rot_l, self.curi_lfinger_pos_l, \ - self.curi_rfinger_pos_l, self.curi_lfinger_rot_l, self.curi_rfinger_rot_l \ + self.curi_local_grasp_rot, self.curi_grasp_pos_l, self.curi_grasp_rot_l, self.curi_lfinger_pos_l, \ + self.curi_rfinger_pos_l, self.curi_lfinger_rot_l, self.curi_rfinger_rot_l \ = self.init_data_curi("panda_left_link7", "panda_left_leftfinger", "panda_left_rightfinger", env_ptr, curi_actor) # Right arm self.hand_handle_r, self.lfinger_handle_r, self.rfinger_handle_r, self.curi_local_grasp_pos, \ - self.curi_local_grasp_rot, self.curi_grasp_pos_r, self.curi_grasp_rot_r, self.curi_lfinger_pos_r, \ - self.curi_rfinger_pos_r, self.curi_lfinger_rot_r, self.curi_rfinger_rot_r \ + self.curi_local_grasp_rot, self.curi_grasp_pos_r, self.curi_grasp_rot_r, self.curi_lfinger_pos_r, \ + self.curi_rfinger_pos_r, self.curi_lfinger_rot_r, self.curi_rfinger_rot_r \ = self.init_data_curi("panda_right_link7", "panda_right_leftfinger", "panda_right_rightfinger", env_ptr, curi_actor) @@ -291,7 +292,7 @@ def init_data_curi(self, hand_dof_name, lfinger_dof_name, rfinger_dof_name, env_ curi_rfinger_rot = torch.zeros_like(curi_local_grasp_rot) return hand_handle, lfinger_handle, rfinger_handle, curi_local_grasp_pos, curi_local_grasp_rot, curi_grasp_pos, \ - curi_grasp_rot, curi_lfinger_pos, curi_rfinger_pos, curi_lfinger_rot, curi_rfinger_rot + curi_grasp_rot, curi_lfinger_pos, curi_rfinger_pos, curi_lfinger_rot, curi_rfinger_rot def init_data_cabinet(self, env_ptr, cabinet_actor): self.drawer_handle_top = self.gym.find_actor_rigid_body_handle(env_ptr, cabinet_actor, "drawer_top") diff --git a/rofunc/learning/RofuncRL/tasks/curi_cabinet_image.py b/rofunc/learning/RofuncRL/tasks/curi_cabinet_image.py index 633e8cdf3..1b13b39f4 100644 --- a/rofunc/learning/RofuncRL/tasks/curi_cabinet_image.py +++ b/rofunc/learning/RofuncRL/tasks/curi_cabinet_image.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -import torch + +import tqdm from isaacgym import gymapi, gymtorch from isaacgym.torch_utils import * -import tqdm import rofunc as rf from rofunc.learning.RofuncRL.tasks.base.curi_base_task import CURIBaseTask diff --git a/rofunc/learning/RofuncRL/tasks/curi_coffee_stirring.py b/rofunc/learning/RofuncRL/tasks/curi_coffee_stirring.py index f3d622670..dab3bb30e 100644 --- a/rofunc/learning/RofuncRL/tasks/curi_coffee_stirring.py +++ b/rofunc/learning/RofuncRL/tasks/curi_coffee_stirring.py @@ -14,10 +14,11 @@ import os -from rofunc.utils.oslab.path import get_rofunc_path from isaacgym import gymtorch, gymapi from isaacgym.torch_utils import * -from .base.vec_task import VecTask + +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.utils.oslab.path import get_rofunc_path class CURICoffeeStirringTask(VecTask): diff --git a/rofunc/learning/RofuncRL/tasks/franka_cabinet.py b/rofunc/learning/RofuncRL/tasks/franka_cabinet.py index a32ce67ca..eb73e8ce0 100644 --- a/rofunc/learning/RofuncRL/tasks/franka_cabinet.py +++ b/rofunc/learning/RofuncRL/tasks/franka_cabinet.py @@ -28,10 +28,11 @@ import os -from rofunc.utils.oslab.path import get_rofunc_path from isaacgym import gymtorch, gymapi from isaacgym.torch_utils import * -from .base.vec_task import VecTask + +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.utils.oslab.path import get_rofunc_path class FrankaCabinetTask(VecTask): @@ -406,7 +407,7 @@ def reset_idx(self, env_ids): # reset franka pos = tensor_clamp( self.franka_default_dof_pos.unsqueeze(0) + 0.25 * ( - torch.rand((len(env_ids), self.num_franka_dofs), device=self.device) - 0.5), + torch.rand((len(env_ids), self.num_franka_dofs), device=self.device) - 0.5), self.franka_dof_lower_limits, self.franka_dof_upper_limits) self.franka_dof_pos[env_ids, :] = pos self.franka_dof_vel[env_ids, :] = torch.zeros_like(self.franka_dof_vel[env_ids]) @@ -602,4 +603,4 @@ def compute_grasp_transforms(hand_rot, hand_pos, franka_local_grasp_rot, franka_ global_drawer_rot, global_drawer_pos = tf_combine( drawer_rot, drawer_pos, drawer_local_grasp_rot, drawer_local_grasp_pos) - return global_franka_rot, global_franka_pos, global_drawer_rot, global_drawer_pos \ No newline at end of file + return global_franka_rot, global_franka_pos, global_drawer_rot, global_drawer_pos diff --git a/rofunc/learning/RofuncRL/tasks/franka_cube_stack.py b/rofunc/learning/RofuncRL/tasks/franka_cube_stack.py index d98c8b35f..bec3f7855 100644 --- a/rofunc/learning/RofuncRL/tasks/franka_cube_stack.py +++ b/rofunc/learning/RofuncRL/tasks/franka_cube_stack.py @@ -31,8 +31,8 @@ from isaacgym import gymapi from isaacgym import gymtorch -from .base.vec_task import VecTask -from .utils.torch_jit_utils import * +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * @torch.jit.script diff --git a/rofunc/learning/RofuncRL/tasks/humanoid.py b/rofunc/learning/RofuncRL/tasks/humanoid.py index f57cd1e64..11ea78129 100644 --- a/rofunc/learning/RofuncRL/tasks/humanoid.py +++ b/rofunc/learning/RofuncRL/tasks/humanoid.py @@ -31,9 +31,9 @@ from isaacgym import gymapi from isaacgym import gymtorch +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * from rofunc.utils.oslab.path import get_rofunc_path -from .base.vec_task import VecTask -from .utils.torch_jit_utils import * class HumanoidTask(VecTask): diff --git a/rofunc/learning/RofuncRL/tasks/humanoid_amp.py b/rofunc/learning/RofuncRL/tasks/humanoid_amp.py index 155c8d863..9935f479f 100644 --- a/rofunc/learning/RofuncRL/tasks/humanoid_amp.py +++ b/rofunc/learning/RofuncRL/tasks/humanoid_amp.py @@ -32,9 +32,10 @@ from gym import spaces from isaacgym import gymtorch -from .amp.humanoid_amp_base import HumanoidAMPBase, dof_to_obs +import rofunc as rf +from rofunc.learning.RofuncRL.tasks.amp.humanoid_amp_base import HumanoidAMPBase, dof_to_obs from rofunc.learning.RofuncRL.tasks.amp.motion_lib import MotionLib -from .utils.torch_jit_utils import * +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * NUM_AMP_OBS_PER_STEP = 13 + 52 + 28 + 12 # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos] @@ -63,8 +64,11 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir virtual_screen_capture=virtual_screen_capture, force_render=force_render) motion_file = cfg['env'].get('motion_file', "amp_humanoid_backflip.npy") - motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - "../../../../examples/data/amp/" + motion_file) + if rf.oslab.is_absl_path(motion_file): + motion_file_path = motion_file + else: + motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "../../../../examples/data/amp/" + motion_file) self._load_motion(motion_file_path) self.num_amp_obs = self._num_amp_obs_steps * NUM_AMP_OBS_PER_STEP diff --git a/rofunc/learning/RofuncRL/tasks/ikea.py b/rofunc/learning/RofuncRL/tasks/ikea.py index 494b1672d..521f15650 100644 --- a/rofunc/learning/RofuncRL/tasks/ikea.py +++ b/rofunc/learning/RofuncRL/tasks/ikea.py @@ -14,10 +14,11 @@ import os -from rofunc.utils.oslab.path import get_rofunc_path from isaacgym import gymtorch, gymapi from isaacgym.torch_utils import * -from .base.vec_task import VecTask + +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.utils.oslab.path import get_rofunc_path class IKEABaseTask(VecTask): diff --git a/rofunc/learning/RofuncRL/tasks/shadow_hand.py b/rofunc/learning/RofuncRL/tasks/shadow_hand.py index 30fa3613b..8206a5aea 100644 --- a/rofunc/learning/RofuncRL/tasks/shadow_hand.py +++ b/rofunc/learning/RofuncRL/tasks/shadow_hand.py @@ -26,16 +26,13 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import numpy as np import os -import torch -from isaacgym import gymtorch from isaacgym import gymapi -from isaacgym.torch_utils import * +from isaacgym import gymtorch -from isaacgymenvs.utils.torch_jit_utils import * -from isaacgymenvs.tasks.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * class ShadowHand(VecTask): @@ -94,9 +91,12 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir } if "asset" in self.cfg["env"]: - self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock", self.asset_files_dict["block"]) - self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg", self.asset_files_dict["egg"]) - self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen", self.asset_files_dict["pen"]) + self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock", + self.asset_files_dict["block"]) + self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg", + self.asset_files_dict["egg"]) + self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen", + self.asset_files_dict["pen"]) # can be "openai", "full_no_vel", "full", "full_state" self.obs_type = self.cfg["env"]["observationType"] @@ -116,7 +116,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.up_axis = 'z' - self.fingertips = ["robot0:ffdistal", "robot0:mfdistal", "robot0:rfdistal", "robot0:lfdistal", "robot0:thdistal"] + self.fingertips = ["robot0:ffdistal", "robot0:mfdistal", "robot0:rfdistal", "robot0:lfdistal", + "robot0:thdistal"] self.num_fingertips = len(self.fingertips) self.use_vel_obs = False @@ -131,12 +132,14 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.cfg["env"]["numStates"] = num_states self.cfg["env"]["numActions"] = 20 - super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render) + super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, + graphics_device_id=graphics_device_id, headless=headless, + virtual_screen_capture=virtual_screen_capture, force_render=force_render) self.dt = self.sim_params.dt control_freq_inv = self.cfg["env"].get("controlFrequencyInv", 1) if self.reset_time > 0.0: - self.max_episode_length = int(round(self.reset_time/(control_freq_inv * self.dt))) + self.max_episode_length = int(round(self.reset_time / (control_freq_inv * self.dt))) print("Reset time: ", self.reset_time) print("New episode length: ", self.max_episode_length) @@ -155,7 +158,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, self.num_fingertips * 6) dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim) - self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, self.num_shadow_hand_dofs) + self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, + self.num_shadow_hand_dofs) self.gym.refresh_actor_root_state_tensor(self.sim) self.gym.refresh_dof_state_tensor(self.sim) @@ -177,7 +181,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.prev_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device) self.cur_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device) - self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs, -1) + self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs, + -1) self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((self.num_envs, 1)) @@ -195,13 +200,14 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.force_decay = to_torch(self.force_decay, dtype=torch.float, device=self.device) self.force_prob_range = to_torch(self.force_prob_range, dtype=torch.float, device=self.device) self.random_force_prob = torch.exp((torch.log(self.force_prob_range[0]) - torch.log(self.force_prob_range[1])) - * torch.rand(self.num_envs, device=self.device) + torch.log(self.force_prob_range[1])) + * torch.rand(self.num_envs, device=self.device) + torch.log( + self.force_prob_range[1])) self.rb_forces = torch.zeros((self.num_envs, self.num_bodies, 3), dtype=torch.float, device=self.device) def create_sim(self): self.dt = self.cfg["sim"]["dt"] - self.up_axis_idx = 2 if self.up_axis == 'z' else 1 # index of up axis: Y=1, Z=2 + self.up_axis_idx = 2 if self.up_axis == 'z' else 1 # index of up axis: Y=1, Z=2 self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) self._create_ground_plane() @@ -225,7 +231,8 @@ def _create_envs(self, num_envs, spacing, num_per_row): if "asset" in self.cfg["env"]: # asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root) - shadow_hand_asset_file = os.path.normpath(self.cfg["env"]["asset"].get("assetFileName", shadow_hand_asset_file)) + shadow_hand_asset_file = os.path.normpath( + self.cfg["env"]["asset"].get("assetFileName", shadow_hand_asset_file)) object_asset_file = self.asset_files_dict[self.object_type] @@ -264,8 +271,10 @@ def _create_envs(self, num_envs, spacing, num_per_row): tendon_props[i].damping = t_damping self.gym.set_asset_tendon_properties(shadow_hand_asset, tendon_props) - actuated_dof_names = [self.gym.get_asset_actuator_joint_name(shadow_hand_asset, i) for i in range(self.num_shadow_hand_actuators)] - self.actuated_dof_indices = [self.gym.find_asset_dof_index(shadow_hand_asset, name) for name in actuated_dof_names] + actuated_dof_names = [self.gym.get_asset_actuator_joint_name(shadow_hand_asset, i) for i in + range(self.num_shadow_hand_actuators)] + self.actuated_dof_indices = [self.gym.find_asset_dof_index(shadow_hand_asset, name) for name in + actuated_dof_names] # get shadow_hand dof properties, loaded by Isaac Gym from the MJCF file shadow_hand_dof_props = self.gym.get_asset_dof_properties(shadow_hand_asset) @@ -287,7 +296,8 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.shadow_hand_dof_default_pos = to_torch(self.shadow_hand_dof_default_pos, device=self.device) self.shadow_hand_dof_default_vel = to_torch(self.shadow_hand_dof_default_vel, device=self.device) - self.fingertip_handles = [self.gym.find_asset_rigid_body_index(shadow_hand_asset, name) for name in self.fingertips] + self.fingertip_handles = [self.gym.find_asset_rigid_body_index(shadow_hand_asset, name) for name in + self.fingertips] # create fingertip force sensors, if needed if self.obs_type == "full_state" or self.asymmetric_obs: @@ -308,7 +318,7 @@ def _create_envs(self, num_envs, spacing, num_per_row): object_start_pose = gymapi.Transform() object_start_pose.p = gymapi.Vec3() object_start_pose.p.x = shadow_hand_start_pose.p.x - pose_dy, pose_dz = -0.39, 0.10 + pose_dy, pose_dz = -0.39, 0.10 object_start_pose.p.y = shadow_hand_start_pose.p.y + pose_dy object_start_pose.p.z = shadow_hand_start_pose.p.z + pose_dz @@ -339,7 +349,8 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.object_indices = [] self.goal_object_indices = [] - self.fingertip_handles = [self.gym.find_asset_rigid_body_index(shadow_hand_asset, name) for name in self.fingertips] + self.fingertip_handles = [self.gym.find_asset_rigid_body_index(shadow_hand_asset, name) for name in + self.fingertips] shadow_hand_rb_count = self.gym.get_asset_rigid_body_count(shadow_hand_asset) object_rb_count = self.gym.get_asset_rigid_body_count(object_asset) @@ -355,10 +366,13 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True) # add hand - collision filter = -1 to use asset collision filters set in mjcf loader - shadow_hand_actor = self.gym.create_actor(env_ptr, shadow_hand_asset, shadow_hand_start_pose, "hand", i, -1, 0) - self.hand_start_states.append([shadow_hand_start_pose.p.x, shadow_hand_start_pose.p.y, shadow_hand_start_pose.p.z, - shadow_hand_start_pose.r.x, shadow_hand_start_pose.r.y, shadow_hand_start_pose.r.z, shadow_hand_start_pose.r.w, - 0, 0, 0, 0, 0, 0]) + shadow_hand_actor = self.gym.create_actor(env_ptr, shadow_hand_asset, shadow_hand_start_pose, "hand", i, -1, + 0) + self.hand_start_states.append( + [shadow_hand_start_pose.p.x, shadow_hand_start_pose.p.y, shadow_hand_start_pose.p.z, + shadow_hand_start_pose.r.x, shadow_hand_start_pose.r.y, shadow_hand_start_pose.r.z, + shadow_hand_start_pose.r.w, + 0, 0, 0, 0, 0, 0]) self.gym.set_actor_dof_properties(env_ptr, shadow_hand_actor, shadow_hand_dof_props) hand_idx = self.gym.get_actor_index(env_ptr, shadow_hand_actor, gymapi.DOMAIN_SIM) self.hand_indices.append(hand_idx) @@ -370,13 +384,15 @@ def _create_envs(self, num_envs, spacing, num_per_row): # add object object_handle = self.gym.create_actor(env_ptr, object_asset, object_start_pose, "object", i, 0, 0) self.object_init_state.append([object_start_pose.p.x, object_start_pose.p.y, object_start_pose.p.z, - object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z, object_start_pose.r.w, + object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z, + object_start_pose.r.w, 0, 0, 0, 0, 0, 0]) object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM) self.object_indices.append(object_idx) # add goal object - goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs, 0, 0) + goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs, + 0, 0) goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM) self.goal_object_indices.append(goal_object_idx) @@ -397,7 +413,8 @@ def _create_envs(self, num_envs, spacing, num_per_row): object_rb_props = self.gym.get_actor_rigid_body_properties(env_ptr, object_handle) self.object_rb_masses = [prop.mass for prop in object_rb_props] - self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view(self.num_envs, 13) + self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view( + self.num_envs, 13) self.goal_states = self.object_init_state.clone() self.goal_states[:, self.up_axis_idx] -= 0.04 self.goal_init_state = self.goal_states.clone() @@ -412,8 +429,11 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.goal_object_indices = to_torch(self.goal_object_indices, dtype=torch.long, device=self.device) def compute_reward(self, actions): - self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[:], self.consecutive_successes[:] = compute_hand_reward( - self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, self.consecutive_successes, + self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[ + :], self.consecutive_successes[ + :] = compute_hand_reward( + self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, + self.consecutive_successes, self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot, self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale, self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty, @@ -429,9 +449,11 @@ def compute_reward(self, actions): # The direct average shows the overall result more quickly, but slightly undershoots long term # policy performance. - print("Direct average consecutive successes = {:.1f}".format(direct_average_successes/(self.total_resets + self.num_envs))) + print("Direct average consecutive successes = {:.1f}".format( + direct_average_successes / (self.total_resets + self.num_envs))) if self.total_resets > 0: - print("Post-Reset average consecutive successes = {:.1f}".format(self.total_successes/self.total_resets)) + print("Post-Reset average consecutive successes = {:.1f}".format( + self.total_successes / self.total_resets)) def compute_observations(self): self.gym.refresh_dof_state_tensor(self.sim) @@ -497,7 +519,8 @@ def compute_fingertip_observations(self, no_vel=False): def compute_full_observations(self, no_vel=False): if no_vel: self.obs_buf[:, 0:self.num_shadow_hand_dofs] = unscale(self.shadow_hand_dof_pos, - self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits) + self.shadow_hand_dof_lower_limits, + self.shadow_hand_dof_upper_limits) self.obs_buf[:, 24:31] = self.object_pose self.obs_buf[:, 31:38] = self.goal_pose @@ -509,8 +532,10 @@ def compute_full_observations(self, no_vel=False): self.obs_buf[:, 57:77] = self.actions else: self.obs_buf[:, 0:self.num_shadow_hand_dofs] = unscale(self.shadow_hand_dof_pos, - self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits) - self.obs_buf[:, self.num_shadow_hand_dofs:2*self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel + self.shadow_hand_dof_lower_limits, + self.shadow_hand_dof_upper_limits) + self.obs_buf[:, + self.num_shadow_hand_dofs:2 * self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel self.obs_buf[:, 48:55] = self.object_pose self.obs_buf[:, 55:58] = self.object_linvel @@ -527,27 +552,32 @@ def compute_full_observations(self, no_vel=False): def compute_full_state(self, asymm_obs=False): if asymm_obs: self.states_buf[:, 0:self.num_shadow_hand_dofs] = unscale(self.shadow_hand_dof_pos, - self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits) - self.states_buf[:, self.num_shadow_hand_dofs:2*self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel - self.states_buf[:, 2*self.num_shadow_hand_dofs:3*self.num_shadow_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor - - obj_obs_start = 3*self.num_shadow_hand_dofs # 72 + self.shadow_hand_dof_lower_limits, + self.shadow_hand_dof_upper_limits) + self.states_buf[:, + self.num_shadow_hand_dofs:2 * self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel + self.states_buf[:, + 2 * self.num_shadow_hand_dofs:3 * self.num_shadow_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor + + obj_obs_start = 3 * self.num_shadow_hand_dofs # 72 self.states_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose self.states_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel self.states_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel goal_obs_start = obj_obs_start + 13 # 85 self.states_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose - self.states_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot)) + self.states_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, + quat_conjugate(self.goal_rot)) # fingertip observations, state(pose and vel) + force-torque sensors num_ft_states = 13 * self.num_fingertips # 65 num_ft_force_torques = 6 * self.num_fingertips # 30 fingertip_obs_start = goal_obs_start + 11 # 96 - self.states_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(self.num_envs, num_ft_states) + self.states_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape( + self.num_envs, num_ft_states) self.states_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states + - num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor # obs_end = 96 + 65 + 30 = 191 # obs_total = obs_end + num_actions = 211 @@ -555,27 +585,32 @@ def compute_full_state(self, asymm_obs=False): self.states_buf[:, obs_end:obs_end + self.num_actions] = self.actions else: self.obs_buf[:, 0:self.num_shadow_hand_dofs] = unscale(self.shadow_hand_dof_pos, - self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits) - self.obs_buf[:, self.num_shadow_hand_dofs:2*self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel - self.obs_buf[:, 2*self.num_shadow_hand_dofs:3*self.num_shadow_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor - - obj_obs_start = 3*self.num_shadow_hand_dofs # 72 + self.shadow_hand_dof_lower_limits, + self.shadow_hand_dof_upper_limits) + self.obs_buf[:, + self.num_shadow_hand_dofs:2 * self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel + self.obs_buf[:, + 2 * self.num_shadow_hand_dofs:3 * self.num_shadow_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor + + obj_obs_start = 3 * self.num_shadow_hand_dofs # 72 self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel goal_obs_start = obj_obs_start + 13 # 85 self.obs_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose - self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot)) + self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, + quat_conjugate(self.goal_rot)) # fingertip observations, state(pose and vel) + force-torque sensors num_ft_states = 13 * self.num_fingertips # 65 num_ft_force_torques = 6 * self.num_fingertips # 30 fingertip_obs_start = goal_obs_start + 11 # 96 - self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(self.num_envs, num_ft_states) + self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape( + self.num_envs, num_ft_states) self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states + - num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor + num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor # obs_end = 96 + 65 + 30 = 191 # obs_total = obs_end + num_actions = 211 @@ -585,13 +620,16 @@ def compute_full_state(self, asymm_obs=False): def reset_target_pose(self, env_ids, apply_reset=False): rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), 4), device=self.device) - new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids]) + new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids], + self.y_unit_tensor[env_ids]) self.goal_states[env_ids, 0:3] = self.goal_init_state[env_ids, 0:3] self.goal_states[env_ids, 3:7] = new_rot - self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids, 0:3] + self.goal_displacement_tensor + self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids, + 0:3] + self.goal_displacement_tensor self.root_state_tensor[self.goal_object_indices[env_ids], 3:7] = self.goal_states[env_ids, 3:7] - self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like(self.root_state_tensor[self.goal_object_indices[env_ids], 7:13]) + self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like( + self.root_state_tensor[self.goal_object_indices[env_ids], 7:13]) if apply_reset: goal_object_indices = self.goal_object_indices[env_ids].to(torch.int32) @@ -617,18 +655,24 @@ def reset_idx(self, env_ids, goal_env_ids): # reset object self.root_state_tensor[self.object_indices[env_ids]] = self.object_init_state[env_ids].clone() self.root_state_tensor[self.object_indices[env_ids], 0:2] = self.object_init_state[env_ids, 0:2] + \ - self.reset_position_noise * rand_floats[:, 0:2] - self.root_state_tensor[self.object_indices[env_ids], self.up_axis_idx] = self.object_init_state[env_ids, self.up_axis_idx] + \ - self.reset_position_noise * rand_floats[:, self.up_axis_idx] - - new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids]) + self.reset_position_noise * rand_floats[:, 0:2] + self.root_state_tensor[self.object_indices[env_ids], self.up_axis_idx] = self.object_init_state[ + env_ids, self.up_axis_idx] + \ + self.reset_position_noise * rand_floats[ + :, + self.up_axis_idx] + + new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids], + self.y_unit_tensor[env_ids]) if self.object_type == "pen": rand_angle_y = torch.tensor(0.3) new_object_rot = randomize_rotation_pen(rand_floats[:, 3], rand_floats[:, 4], rand_angle_y, - self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids], self.z_unit_tensor[env_ids]) + self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids], + self.z_unit_tensor[env_ids]) self.root_state_tensor[self.object_indices[env_ids], 3:7] = new_object_rot - self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like(self.root_state_tensor[self.object_indices[env_ids], 7:13]) + self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like( + self.root_state_tensor[self.object_indices[env_ids], 7:13]) object_indices = torch.unique(torch.cat([self.object_indices[env_ids], self.goal_object_indices[env_ids], @@ -638,18 +682,20 @@ def reset_idx(self, env_ids, goal_env_ids): gymtorch.unwrap_tensor(object_indices), len(object_indices)) # reset random force probabilities - self.random_force_prob[env_ids] = torch.exp((torch.log(self.force_prob_range[0]) - torch.log(self.force_prob_range[1])) - * torch.rand(len(env_ids), device=self.device) + torch.log(self.force_prob_range[1])) + self.random_force_prob[env_ids] = torch.exp( + (torch.log(self.force_prob_range[0]) - torch.log(self.force_prob_range[1])) + * torch.rand(len(env_ids), device=self.device) + torch.log(self.force_prob_range[1])) # reset shadow hand delta_max = self.shadow_hand_dof_upper_limits - self.shadow_hand_dof_default_pos delta_min = self.shadow_hand_dof_lower_limits - self.shadow_hand_dof_default_pos - rand_delta = delta_min + (delta_max - delta_min) * 0.5 * (rand_floats[:, 5:5+self.num_shadow_hand_dofs] + 1) + rand_delta = delta_min + (delta_max - delta_min) * 0.5 * (rand_floats[:, 5:5 + self.num_shadow_hand_dofs] + 1) pos = self.shadow_hand_default_dof_pos + self.reset_dof_pos_noise * rand_delta self.shadow_hand_dof_pos[env_ids, :] = pos self.shadow_hand_dof_vel[env_ids, :] = self.shadow_hand_dof_default_vel + \ - self.reset_dof_vel_noise * rand_floats[:, 5+self.num_shadow_hand_dofs:5+self.num_shadow_hand_dofs*2] + self.reset_dof_vel_noise * rand_floats[:, + 5 + self.num_shadow_hand_dofs:5 + self.num_shadow_hand_dofs * 2] self.prev_targets[env_ids, :self.num_shadow_hand_dofs] = pos self.cur_targets[env_ids, :self.num_shadow_hand_dofs] = pos @@ -682,16 +728,28 @@ def pre_physics_step(self, actions): self.actions = actions.clone().to(self.device) if self.use_relative_control: - targets = self.prev_targets[:, self.actuated_dof_indices] + self.shadow_hand_dof_speed_scale * self.dt * self.actions + targets = self.prev_targets[:, + self.actuated_dof_indices] + self.shadow_hand_dof_speed_scale * self.dt * self.actions self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(targets, - self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], self.shadow_hand_dof_upper_limits[self.actuated_dof_indices]) + self.shadow_hand_dof_lower_limits[ + self.actuated_dof_indices], + self.shadow_hand_dof_upper_limits[ + self.actuated_dof_indices]) else: self.cur_targets[:, self.actuated_dof_indices] = scale(self.actions, - self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], self.shadow_hand_dof_upper_limits[self.actuated_dof_indices]) + self.shadow_hand_dof_lower_limits[ + self.actuated_dof_indices], + self.shadow_hand_dof_upper_limits[ + self.actuated_dof_indices]) self.cur_targets[:, self.actuated_dof_indices] = self.act_moving_average * self.cur_targets[:, - self.actuated_dof_indices] + (1.0 - self.act_moving_average) * self.prev_targets[:, self.actuated_dof_indices] - self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(self.cur_targets[:, self.actuated_dof_indices], - self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], self.shadow_hand_dof_upper_limits[self.actuated_dof_indices]) + self.actuated_dof_indices] + ( + 1.0 - self.act_moving_average) * self.prev_targets[ + :, + self.actuated_dof_indices] + self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp( + self.cur_targets[:, self.actuated_dof_indices], + self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], + self.shadow_hand_dof_upper_limits[self.actuated_dof_indices]) self.prev_targets[:, self.actuated_dof_indices] = self.cur_targets[:, self.actuated_dof_indices] self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(self.cur_targets)) @@ -702,9 +760,11 @@ def pre_physics_step(self, actions): # apply new forces force_indices = (torch.rand(self.num_envs, device=self.device) < self.random_force_prob).nonzero() self.rb_forces[force_indices, self.object_rb_handles, :] = torch.randn( - self.rb_forces[force_indices, self.object_rb_handles, :].shape, device=self.device) * self.object_rb_masses * self.force_scale + self.rb_forces[force_indices, self.object_rb_handles, :].shape, + device=self.device) * self.object_rb_masses * self.force_scale - self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.rb_forces), None, gymapi.LOCAL_SPACE) + self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.rb_forces), None, + gymapi.LOCAL_SPACE) def post_physics_step(self): self.progress_buf += 1 @@ -719,23 +779,36 @@ def post_physics_step(self): self.gym.refresh_rigid_body_state_tensor(self.sim) for i in range(self.num_envs): - targetx = (self.goal_pos[i] + quat_apply(self.goal_rot[i], to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy() - targety = (self.goal_pos[i] + quat_apply(self.goal_rot[i], to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy() - targetz = (self.goal_pos[i] + quat_apply(self.goal_rot[i], to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy() + targetx = (self.goal_pos[i] + quat_apply(self.goal_rot[i], + to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy() + targety = (self.goal_pos[i] + quat_apply(self.goal_rot[i], + to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy() + targetz = (self.goal_pos[i] + quat_apply(self.goal_rot[i], + to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy() p0 = self.goal_pos[i].cpu().numpy() + self.goal_displacement_tensor.cpu().numpy() - self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], targetx[0], targetx[1], targetx[2]], [0.85, 0.1, 0.1]) - self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], targety[0], targety[1], targety[2]], [0.1, 0.85, 0.1]) - self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], targetz[0], targetz[1], targetz[2]], [0.1, 0.1, 0.85]) - - objectx = (self.object_pos[i] + quat_apply(self.object_rot[i], to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy() - objecty = (self.object_pos[i] + quat_apply(self.object_rot[i], to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy() - objectz = (self.object_pos[i] + quat_apply(self.object_rot[i], to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy() + self.gym.add_lines(self.viewer, self.envs[i], 1, + [p0[0], p0[1], p0[2], targetx[0], targetx[1], targetx[2]], [0.85, 0.1, 0.1]) + self.gym.add_lines(self.viewer, self.envs[i], 1, + [p0[0], p0[1], p0[2], targety[0], targety[1], targety[2]], [0.1, 0.85, 0.1]) + self.gym.add_lines(self.viewer, self.envs[i], 1, + [p0[0], p0[1], p0[2], targetz[0], targetz[1], targetz[2]], [0.1, 0.1, 0.85]) + + objectx = (self.object_pos[i] + quat_apply(self.object_rot[i], + to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy() + objecty = (self.object_pos[i] + quat_apply(self.object_rot[i], + to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy() + objectz = (self.object_pos[i] + quat_apply(self.object_rot[i], + to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy() p0 = self.object_pos[i].cpu().numpy() - self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], objectx[0], objectx[1], objectx[2]], [0.85, 0.1, 0.1]) - self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], objecty[0], objecty[1], objecty[2]], [0.1, 0.85, 0.1]) - self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], objectz[0], objectz[1], objectz[2]], [0.1, 0.1, 0.85]) + self.gym.add_lines(self.viewer, self.envs[i], 1, + [p0[0], p0[1], p0[2], objectx[0], objectx[1], objectx[2]], [0.85, 0.1, 0.1]) + self.gym.add_lines(self.viewer, self.envs[i], 1, + [p0[0], p0[1], p0[2], objecty[0], objecty[1], objecty[2]], [0.1, 0.85, 0.1]) + self.gym.add_lines(self.viewer, self.envs[i], 1, + [p0[0], p0[1], p0[2], objectz[0], objectz[1], objectz[2]], [0.1, 0.1, 0.85]) + ##################################################################### ###=========================jit functions=========================### @@ -744,12 +817,12 @@ def post_physics_step(self): @torch.jit.script def compute_hand_reward( - rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes, - max_episode_length: float, object_pos, object_rot, target_pos, target_rot, - dist_reward_scale: float, rot_reward_scale: float, rot_eps: float, - actions, action_penalty_scale: float, - success_tolerance: float, reach_goal_bonus: float, fall_dist: float, - fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool + rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes, + max_episode_length: float, object_pos, object_rot, target_pos, target_rot, + dist_reward_scale: float, rot_reward_scale: float, rot_eps: float, + actions, action_penalty_scale: float, + success_tolerance: float, reach_goal_bonus: float, fall_dist: float, + fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool ): # Distance from the hand to the object goal_dist = torch.norm(object_pos - target_pos, p=2, dim=-1) @@ -762,7 +835,7 @@ def compute_hand_reward( rot_dist = 2.0 * torch.asin(torch.clamp(torch.norm(quat_diff[:, 0:3], p=2, dim=-1), max=1.0)) dist_rew = goal_dist * dist_reward_scale - rot_rew = 1.0/(torch.abs(rot_dist) + rot_eps) * rot_reward_scale + rot_rew = 1.0 / (torch.abs(rot_dist) + rot_eps) * rot_reward_scale action_penalty = torch.sum(actions ** 2, dim=-1) @@ -783,7 +856,8 @@ def compute_hand_reward( resets = torch.where(goal_dist >= fall_dist, torch.ones_like(reset_buf), reset_buf) if max_consecutive_successes > 0: # Reset progress buffer on goal envs if max_consecutive_successes > 0 - progress_buf = torch.where(torch.abs(rot_dist) <= success_tolerance, torch.zeros_like(progress_buf), progress_buf) + progress_buf = torch.where(torch.abs(rot_dist) <= success_tolerance, torch.zeros_like(progress_buf), + progress_buf) resets = torch.where(successes >= max_consecutive_successes, torch.ones_like(resets), resets) resets = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(resets), resets) @@ -794,7 +868,8 @@ def compute_hand_reward( num_resets = torch.sum(resets) finished_cons_successes = torch.sum(successes * resets.float()) - cons_successes = torch.where(num_resets > 0, av_factor*finished_cons_successes/num_resets + (1.0 - av_factor)*consecutive_successes, consecutive_successes) + cons_successes = torch.where(num_resets > 0, av_factor * finished_cons_successes / num_resets + ( + 1.0 - av_factor) * consecutive_successes, consecutive_successes) return reward, resets, goal_resets, progress_buf, successes, cons_successes diff --git a/rofunc/learning/RofuncRL/tasks/trifinger.py b/rofunc/learning/RofuncRL/tasks/trifinger.py index f6f632acf..d5d4aa83c 100644 --- a/rofunc/learning/RofuncRL/tasks/trifinger.py +++ b/rofunc/learning/RofuncRL/tasks/trifinger.py @@ -26,30 +26,26 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import numpy as np import os -import torch +from collections import OrderedDict -from isaacgym import gymtorch from isaacgym import gymapi -from isaacgym.torch_utils import * - -from collections import OrderedDict +from isaacgym import gymtorch project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -from isaacgymenvs.utils.torch_jit_utils import * -from isaacgymenvs.tasks.base.vec_task import VecTask +from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import * +from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask from types import SimpleNamespace from collections import deque from typing import Deque, Dict, Tuple, Union - # python import enum import numpy as np + # ################### # # Dimensions of robot # # ################### # @@ -85,6 +81,7 @@ class TrifingerDimensions(enum.Enum): ObjectPoseDim = 7 ObjectVelocityDim = 6 + # ################# # # Different objects # # ################# # @@ -176,7 +173,6 @@ def __compute(self): class Trifinger(VecTask): - # constants # directory where assets for the simulator are present _trifinger_assets_dir = os.path.join(project_dir, "../", "assets", "trifinger") @@ -345,7 +341,7 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.state_spec = self.obs_spec self.action_spec = { - "command": self.action_dim + "command": self.action_dim } self.cfg["env"]["numObservations"] = sum(self.obs_spec.values()) @@ -355,7 +351,6 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self.randomize = self.cfg["task"]["randomize"] self.randomization_params = self.cfg["task"]["randomization_params"] - # define prims present in the scene prim_names = ["robot", "table", "boundary", "object", "goal_object"] # mapping from name to asset instance @@ -374,14 +369,15 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir f'finger_middle_to_lower_joint_{finger_pos}'] self._robot_dof_indices = OrderedDict.fromkeys(robot_dof_names, None) - super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render) + super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, + graphics_device_id=graphics_device_id, headless=headless, + virtual_screen_capture=virtual_screen_capture, force_render=force_render) if self.viewer != None: cam_pos = gymapi.Vec3(0.7, 0.0, 0.7) cam_target = gymapi.Vec3(0.0, 0.0, 0.0) self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) - # change constant buffers from numpy/lists into torch tensors # limits for robot for limit_name in self._robot_limits: @@ -418,7 +414,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self._ft_sensors_values = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, num_ft_dims) dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim) - self._dof_torque = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, self._dims.JointTorqueDim.value) + self._dof_torque = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, + self._dims.JointTorqueDim.value) # get gym GPU state tensors actor_root_state_tensor = self.gym.acquire_actor_root_state_tensor(self.sim) @@ -460,11 +457,10 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir self._successes_pos = torch.zeros(self.num_envs, device=self.device, dtype=torch.long) self._successes_quat = torch.zeros(self.num_envs, device=self.device, dtype=torch.long) - self.__configure_mdp_spaces() def create_sim(self): - self.up_axis_idx = 2 # index of up axis: Y=1, Z=2 + self.up_axis_idx = 2 # index of up axis: Y=1, Z=2 self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) self._create_ground_plane() @@ -528,7 +524,8 @@ def _create_envs(self, num_envs, spacing, num_per_row): # define lower and upper region bound for each environment env_lower_bound = gymapi.Vec3(-self.cfg["env"]["envSpacing"], -self.cfg["env"]["envSpacing"], 0.0) - env_upper_bound = gymapi.Vec3(self.cfg["env"]["envSpacing"], self.cfg["env"]["envSpacing"], self.cfg["env"]["envSpacing"]) + env_upper_bound = gymapi.Vec3(self.cfg["env"]["envSpacing"], self.cfg["env"]["envSpacing"], + self.cfg["env"]["envSpacing"]) num_envs_per_row = int(np.sqrt(self.num_envs)) # initialize gym indices buffer as a list # note: later the list is converted to torch tensor for ease in interfacing with IsaacGym. @@ -549,26 +546,26 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True) # add trifinger robot to environment trifinger_actor = self.gym.create_actor(env_ptr, self.gym_assets["robot"], gymapi.Transform(), - "robot", env_index, 0, 0) + "robot", env_index, 0, 0) trifinger_idx = self.gym.get_actor_index(env_ptr, trifinger_actor, gymapi.DOMAIN_SIM) # add table to environment table_handle = self.gym.create_actor(env_ptr, self.gym_assets["table"], gymapi.Transform(), - "table", env_index, 1, 0) + "table", env_index, 1, 0) table_idx = self.gym.get_actor_index(env_ptr, table_handle, gymapi.DOMAIN_SIM) # add stage to environment boundary_handle = self.gym.create_actor(env_ptr, self.gym_assets["boundary"], gymapi.Transform(), - "boundary", env_index, 1, 0) + "boundary", env_index, 1, 0) boundary_idx = self.gym.get_actor_index(env_ptr, boundary_handle, gymapi.DOMAIN_SIM) # add object to environment object_handle = self.gym.create_actor(env_ptr, self.gym_assets["object"], gymapi.Transform(), - "object", env_index, 0, 0) + "object", env_index, 0, 0) object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM) # add goal object to environment goal_handle = self.gym.create_actor(env_ptr, self.gym_assets["goal_object"], gymapi.Transform(), - "goal_object", env_index + self.num_envs, 0, 0) + "goal_object", env_index + self.num_envs, 0, 0) goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM) # change settings of DOF self.gym.set_actor_dof_properties(env_ptr, trifinger_actor, robot_dof_props) @@ -621,11 +618,11 @@ def __configure_mdp_spaces(self): object_obs_low = torch.cat([ self._object_limits["position"].low, self._object_limits["orientation"].low, - ]*2) + ] * 2) object_obs_high = torch.cat([ self._object_limits["position"].high, self._object_limits["orientation"].high, - ]*2) + ] * 2) # Note: This is order sensitive. self._observations_scale.low = torch.cat([ @@ -702,14 +699,14 @@ def __configure_mdp_spaces(self): raise AssertionError(msg) # print the scaling print(f'MDP Raw observation bounds\n' - f'\tLow: {self._observations_scale.low}\n' - f'\tHigh: {self._observations_scale.high}') + f'\tLow: {self._observations_scale.low}\n' + f'\tHigh: {self._observations_scale.high}') print(f'MDP Raw state bounds\n' - f'\tLow: {self._states_scale.low}\n' - f'\tHigh: {self._states_scale.high}') + f'\tLow: {self._states_scale.low}\n' + f'\tHigh: {self._states_scale.high}') print(f'MDP Raw action bounds\n' - f'\tLow: {self._action_scale.low}\n' - f'\tHigh: {self._action_scale.high}') + f'\tLow: {self._action_scale.low}\n' + f'\tHigh: {self._action_scale.high}') def compute_reward(self, actions): self.rew_buf[:] = 0. @@ -734,7 +731,7 @@ def compute_reward(self, actions): self.cfg["env"]["reward_terms"]["keypoints_dist"]["activate"] ) - self.extras.update({"env/rewards/"+k: v.mean() for k, v in log_dict.items()}) + self.extras.update({"env/rewards/" + k: v.mean() for k, v in log_dict.items()}) def compute_observations(self): # refresh memory buffers @@ -749,8 +746,10 @@ def compute_observations(self): tip_wrenches = self._ft_sensors_values else: - joint_torques = torch.zeros(self.num_envs, self._dims.JointTorqueDim.value, dtype=torch.float32, device=self.device) - tip_wrenches = torch.zeros(self.num_envs, self._dims.NumFingers.value * self._dims.WrenchDim.value, dtype=torch.float32, device=self.device) + joint_torques = torch.zeros(self.num_envs, self._dims.JointTorqueDim.value, dtype=torch.float32, + device=self.device) + tip_wrenches = torch.zeros(self.num_envs, self._dims.NumFingers.value * self._dims.WrenchDim.value, + dtype=torch.float32, device=self.device) # extract frame handles fingertip_handles_indices = list(self._fingertips_handles.values()) @@ -826,13 +825,13 @@ def reset_idx(self, env_ids): # D) Set values into simulator # -- DOF self.gym.set_dof_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._dof_state), - gymtorch.unwrap_tensor(robot_indices), len(robot_indices)) + gymtorch.unwrap_tensor(robot_indices), len(robot_indices)) # -- actor root states self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._actors_root_state), - gymtorch.unwrap_tensor(all_indices), len(all_indices)) + gymtorch.unwrap_tensor(all_indices), len(all_indices)) def _sample_robot_state(self, instances: torch.Tensor, distribution: str = 'default', - dof_pos_stddev: float = 0.0, dof_vel_stddev: float = 0.0): + dof_pos_stddev: float = 0.0, dof_vel_stddev: float = 0.0): """Samples the robot DOF state based on the settings. Type of robot initial state distribution: ["default", "random"] @@ -1083,7 +1082,7 @@ def _check_termination(self): # Check for distance within tolerance goal_orientation_reset = torch.le(object_goal_orientation_dist, termination_config["success"]["orientation_tolerance"]) - self._step_info['env/current_orientation_goal/per_env'] = np.mean(goal_orientation_reset.float().cpu().numpy()) + self._step_info['env/current_orientation_goal/per_env'] = np.mean(goal_orientation_reset.float().cpu().numpy()) if self.cfg["env"]['task_difficulty'] < 4: # Check for task completion if position goal is within a threshold @@ -1099,7 +1098,6 @@ def _check_termination(self): self._successes_pos = goal_position_reset self._successes_quat = goal_orientation_reset - """ Helper functions - define assets """ @@ -1130,7 +1128,7 @@ def __define_robot_asset(self): robot_asset_options.use_physx_armature = True # load tri-finger asset trifinger_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir, - self._robot_urdf_file, robot_asset_options) + self._robot_urdf_file, robot_asset_options) # set the link properties for the robot # Ref: https://github.com/rr-learning/rrc_simulation/blob/master/python/rrc_simulation/sim_finger.py#L563 trifinger_props = self.gym.get_asset_rigid_shape_properties(trifinger_asset) @@ -1142,7 +1140,7 @@ def __define_robot_asset(self): # extract the frame handles for frame_name in self._fingertips_handles.keys(): self._fingertips_handles[frame_name] = self.gym.find_asset_rigid_body_index(trifinger_asset, - frame_name) + frame_name) # check valid handle if self._fingertips_handles[frame_name] == gymapi.INVALID_HANDLE: msg = f"Invalid handle received for frame: `{frame_name}`." @@ -1174,7 +1172,7 @@ def __define_table_asset(self): # load stage asset table_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir, - self._table_urdf_file, table_asset_options) + self._table_urdf_file, table_asset_options) # set stage properties table_props = self.gym.get_asset_rigid_shape_properties(table_asset) # iterate over each mesh @@ -1204,7 +1202,7 @@ def __define_boundary_asset(self): # load stage asset boundary_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir, - self._boundary_urdf_file, boundary_asset_options) + self._boundary_urdf_file, boundary_asset_options) # set stage properties boundary_props = self.gym.get_asset_rigid_shape_properties(boundary_asset) @@ -1222,7 +1220,7 @@ def __define_object_asset(self): object_asset_options.flip_visual_attachments = True # load object asset object_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir, - self._object_urdf_file, object_asset_options) + self._object_urdf_file, object_asset_options) # set object properties # Ref: https://github.com/rr-learning/rrc_simulation/blob/master/python/rrc_simulation/collision_objects.py#L96 object_props = self.gym.get_asset_rigid_shape_properties(object_asset) @@ -1245,7 +1243,7 @@ def __define_goal_object_asset(self): object_asset_options.flip_visual_attachments = True # load object asset goal_object_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir, - self._object_urdf_file, object_asset_options) + self._object_urdf_file, object_asset_options) # return the asset return goal_object_asset @@ -1254,12 +1252,13 @@ def env_steps_count(self) -> int: """Returns the total number of environment steps aggregated across parallel environments.""" return self.gym.get_frame_count(self.sim) * self.num_envs + ##################################################################### ###=========================jit functions=========================### ##################################################################### @torch.jit.script -def lgsk_kernel(x: torch.Tensor, scale: float = 50.0, eps:float=2) -> torch.Tensor: +def lgsk_kernel(x: torch.Tensor, scale: float = 50.0, eps: float = 2) -> torch.Tensor: """Defines logistic kernel function to bound input to [-0.25, 0) Ref: https://arxiv.org/abs/1901.08652 (page 15) @@ -1275,9 +1274,9 @@ def lgsk_kernel(x: torch.Tensor, scale: float = 50.0, eps:float=2) -> torch.Tens scaled = x * scale return 1.0 / (scaled.exp() + eps + (-scaled).exp()) + @torch.jit.script def gen_keypoints(pose: torch.Tensor, num_keypoints: int = 8, size: Tuple[float, float, float] = (0.065, 0.065, 0.065)): - num_envs = pose.shape[0] keypoints_buf = torch.ones(num_envs, num_keypoints, 3, dtype=torch.float32, device=pose.device) @@ -1290,6 +1289,7 @@ def gen_keypoints(pose: torch.Tensor, num_keypoints: int = 8, size: Tuple[float, keypoints_buf[:, i, :] = local_to_world_space(corner, pose) return keypoints_buf + @torch.jit.script def compute_trifinger_reward( obs_buf: torch.Tensor, @@ -1309,7 +1309,6 @@ def compute_trifinger_reward( last_fingertip_state: torch.Tensor, use_keypoints: bool ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]: - ft_sched_start = 0 ft_sched_end = 5e7 @@ -1360,7 +1359,7 @@ def compute_trifinger_reward( quat_b = object_goal_poses_buf[:, 3:7] angles = quat_diff_rad(quat_a, quat_b) - object_rot_reward = object_rot_weight * dt / (3. * torch.abs(angles) + 0.01) + object_rot_reward = object_rot_weight * dt / (3. * torch.abs(angles) + 0.01) pose_reward = object_dist_reward + object_rot_reward @@ -1396,13 +1395,12 @@ def compute_trifinger_observations_states( joint_torques: torch.Tensor, tip_wrenches: torch.Tensor ): - num_envs = dof_position.shape[0] obs_buf = torch.cat([ dof_position, dof_velocity, - object_state[:, 0:7], # pose + object_state[:, 0:7], # pose object_goal_poses, actions ], dim=-1) @@ -1410,7 +1408,7 @@ def compute_trifinger_observations_states( if asymmetric_obs: states_buf = torch.cat([ obs_buf, - object_state[:, 7:13], # linear / angular velocity + object_state[:, 7:13], # linear / angular velocity fingertip_state.reshape(num_envs, -1), joint_torques, tip_wrenches @@ -1420,6 +1418,7 @@ def compute_trifinger_observations_states( return obs_buf, states_buf + """ Sampling of cuboidal object """ @@ -1470,8 +1469,9 @@ def random_orientation(num: int, device: str) -> torch.Tensor: return quat + @torch.jit.script -def random_orientation_within_angle(num: int, device:str, base: torch.Tensor, max_angle: float): +def random_orientation_within_angle(num: int, device: str, base: torch.Tensor, max_angle: float): """ Generates random quaternions within max_angle of base Ref: https://math.stackexchange.com/a/3448434 """ @@ -1479,13 +1479,13 @@ def random_orientation_within_angle(num: int, device:str, base: torch.Tensor, ma rand = torch.rand((num, 3), dtype=torch.float, device=device) - c = torch.cos(rand[:, 0]*max_angle) - n = torch.sqrt((1.-c)/2.) + c = torch.cos(rand[:, 0] * max_angle) + n = torch.sqrt((1. - c) / 2.) - quat[:, 3] = torch.sqrt((1+c)/2.) - quat[:, 2] = (rand[:, 1]*2.-1.) * n - quat[:, 0] = (torch.sqrt(1-quat[:, 2]**2.) * torch.cos(2*np.pi*rand[:, 2])) * n - quat[:, 1] = (torch.sqrt(1-quat[:, 2]**2.) * torch.sin(2*np.pi*rand[:, 2])) * n + quat[:, 3] = torch.sqrt((1 + c) / 2.) + quat[:, 2] = (rand[:, 1] * 2. - 1.) * n + quat[:, 0] = (torch.sqrt(1 - quat[:, 2] ** 2.) * torch.cos(2 * np.pi * rand[:, 2])) * n + quat[:, 1] = (torch.sqrt(1 - quat[:, 2] ** 2.) * torch.sin(2 * np.pi * rand[:, 2])) * n # floating point errors can cause it to be slightly off, re-normalise quat = torch.nn.functional.normalize(quat, p=2., dim=-1, eps=1e-12) @@ -1503,6 +1503,7 @@ def random_angular_vel(num: int, device: str, magnitude_stdev: float) -> torch.T magnitude *= magnitude_stdev return magnitude * axis + @torch.jit.script def random_yaw_orientation(num: int, device: str) -> torch.Tensor: """Returns sampled rotation around z-axis.""" diff --git a/rofunc/utils/oslab/path.py b/rofunc/utils/oslab/path.py index 06fc9fabb..25304eeb5 100644 --- a/rofunc/utils/oslab/path.py +++ b/rofunc/utils/oslab/path.py @@ -55,3 +55,13 @@ def check_ckpt_exist(ckpt_name): return True else: return False + + +def is_absl_path(path): + """ + Check if the path is an absolute path. + + :param path: the path to be checked + :return: True if the path is an absolute path, False otherwise + """ + return os.path.isabs(path)