diff --git a/doc/source/lfd/RofuncRL/ASE.md b/doc/source/lfd/RofuncRL/ASE.md
index 3940b0ee6..00b73bee2 100644
--- a/doc/source/lfd/RofuncRL/ASE.md
+++ b/doc/source/lfd/RofuncRL/ASE.md
@@ -65,7 +65,7 @@ python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task HumanoidASESt
 
 ### Motion visualization
 
-Visualize the motion `reallusion_sword_shield/RL_Avatar_Atk_2xCombo01_Motion.npy` by using `HumanoidViewMotion`.
+If you want to visualize the motion, you can use `HumanoidViewMotion` task. For example, you can use the following command to visualize the motion `reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy` by using `HumanoidViewMotion`.
 
 ```shell
 python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task HumanoidViewMotion --motion_file reallusion_sword_shield/RL_Avatar_Atk_2xCombo01_Motion.npy --inference --headless=False
diff --git a/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py b/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py
index 8ad4d9026..eef041751 100644
--- a/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py
+++ b/rofunc/learning/RofuncRL/tasks/amp/humanoid_amp_base.py
@@ -33,8 +33,8 @@
 from isaacgym import gymtorch
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from ..base.vec_task import VecTask
-from ..utils.torch_jit_utils import *
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
 
 DOF_BODY_IDS = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14]
 DOF_OFFSETS = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28]
diff --git a/rofunc/learning/RofuncRL/tasks/ant.py b/rofunc/learning/RofuncRL/tasks/ant.py
index df6c2e766..548bdaeda 100644
--- a/rofunc/learning/RofuncRL/tasks/ant.py
+++ b/rofunc/learning/RofuncRL/tasks/ant.py
@@ -28,10 +28,11 @@
 
 import os
 
-from isaacgym import gymtorch
 from isaacgym import gymapi
-from .base.vec_task import VecTask
-from .utils.torch_jit_utils import *
+from isaacgym import gymtorch
+
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
 from rofunc.utils.oslab.path import get_rofunc_path
 
 
@@ -64,7 +65,9 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.cfg["env"]["numObservations"] = 60
         self.cfg["env"]["numActions"] = 8
 
-        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render)
+        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device,
+                         graphics_device_id=graphics_device_id, headless=headless,
+                         virtual_screen_capture=virtual_screen_capture, force_render=force_render)
 
         if self.viewer != None:
             cam_pos = gymapi.Vec3(50.0, 25.0, 2.4)
@@ -93,7 +96,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.initial_dof_pos = torch.zeros_like(self.dof_pos, device=self.device, dtype=torch.float)
         zero_tensor = torch.tensor([0.0], device=self.device)
         self.initial_dof_pos = torch.where(self.dof_limits_lower > zero_tensor, self.dof_limits_lower,
-                                           torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper, self.initial_dof_pos))
+                                           torch.where(self.dof_limits_upper < zero_tensor, self.dof_limits_upper,
+                                                       self.initial_dof_pos))
         self.initial_dof_vel = torch.zeros_like(self.dof_vel, device=self.device, dtype=torch.float)
 
         # initialize some data used later on
@@ -107,11 +111,11 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.targets = to_torch([1000, 0, 0], device=self.device).repeat((self.num_envs, 1))
         self.target_dirs = to_torch([1, 0, 0], device=self.device).repeat((self.num_envs, 1))
         self.dt = self.cfg["sim"]["dt"]
-        self.potentials = to_torch([-1000./self.dt], device=self.device).repeat(self.num_envs)
+        self.potentials = to_torch([-1000. / self.dt], device=self.device).repeat(self.num_envs)
         self.prev_potentials = self.potentials.clone()
 
     def create_sim(self):
-        self.up_axis_idx = 2 # index of up axis: Y=1, Z=2
+        self.up_axis_idx = 2  # index of up axis: Y=1, Z=2
         self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
 
         self._create_ground_plane()
@@ -162,7 +166,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         start_pose = gymapi.Transform()
         start_pose.p = gymapi.Vec3(*get_axis_params(0.44, self.up_axis_idx))
 
-        self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w], device=self.device)
+        self.start_rotation = torch.tensor([start_pose.r.x, start_pose.r.y, start_pose.r.z, start_pose.r.w],
+                                           device=self.device)
 
         self.torso_index = 0
         self.num_bodies = self.gym.get_asset_rigid_body_count(ant_asset)
@@ -208,7 +213,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
 
         for i in range(len(extremity_names)):
-            self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0], extremity_names[i])
+            self.extremities_index[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.ant_handles[0],
+                                                                              extremity_names[i])
 
     def compute_reward(self, actions):
         self.rew_buf[:], self.reset_buf[:] = compute_ant_reward(
@@ -232,10 +238,11 @@ def compute_observations(self):
         self.gym.refresh_dof_state_tensor(self.sim)
         self.gym.refresh_actor_root_state_tensor(self.sim)
         self.gym.refresh_force_sensor_tensor(self.sim)
-        #print("Feet forces and torques: ", self.vec_sensor_tensor[0, :])
+        # print("Feet forces and torques: ", self.vec_sensor_tensor[0, :])
         # print(self.vec_sensor_tensor.shape)
 
-        self.obs_buf[:], self.potentials[:], self.prev_potentials[:], self.up_vec[:], self.heading_vec[:] = compute_ant_observations(
+        self.obs_buf[:], self.potentials[:], self.prev_potentials[:], self.up_vec[:], self.heading_vec[
+                                                                                      :] = compute_ant_observations(
             self.obs_buf, self.root_states, self.targets, self.potentials,
             self.inv_start_rot, self.dof_pos, self.dof_vel,
             self.dof_limits_lower, self.dof_limits_upper, self.dof_vel_scale,
@@ -250,7 +257,8 @@ def reset_idx(self, env_ids):
         positions = torch_rand_float(-0.2, 0.2, (len(env_ids), self.num_dof), device=str(self.device))
         velocities = torch_rand_float(-0.1, 0.1, (len(env_ids), self.num_dof), device=str(self.device))
 
-        self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower, self.dof_limits_upper)
+        self.dof_pos[env_ids] = tensor_clamp(self.initial_dof_pos[env_ids] + positions, self.dof_limits_lower,
+                                             self.dof_limits_upper)
         self.dof_vel[env_ids] = velocities
 
         env_ids_int32 = env_ids.to(dtype=torch.int32)
@@ -299,16 +307,19 @@ def post_physics_step(self):
                 origin = self.gym.get_env_origin(self.envs[i])
                 pose = self.root_states[:, 0:3][i].cpu().numpy()
                 glob_pos = gymapi.Vec3(origin.x + pose[0], origin.y + pose[1], origin.z + pose[2])
-                points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.heading_vec[i, 0].cpu().numpy(),
-                               glob_pos.y + 4 * self.heading_vec[i, 1].cpu().numpy(),
-                               glob_pos.z + 4 * self.heading_vec[i, 2].cpu().numpy()])
+                points.append(
+                    [glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.heading_vec[i, 0].cpu().numpy(),
+                     glob_pos.y + 4 * self.heading_vec[i, 1].cpu().numpy(),
+                     glob_pos.z + 4 * self.heading_vec[i, 2].cpu().numpy()])
                 colors.append([0.97, 0.1, 0.06])
-                points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.up_vec[i, 0].cpu().numpy(), glob_pos.y + 4 * self.up_vec[i, 1].cpu().numpy(),
+                points.append([glob_pos.x, glob_pos.y, glob_pos.z, glob_pos.x + 4 * self.up_vec[i, 0].cpu().numpy(),
+                               glob_pos.y + 4 * self.up_vec[i, 1].cpu().numpy(),
                                glob_pos.z + 4 * self.up_vec[i, 2].cpu().numpy()])
                 colors.append([0.05, 0.99, 0.04])
 
             self.gym.add_lines(self.viewer, None, self.num_envs * 2, points, colors)
 
+
 #####################################################################
 ###=========================jit functions=========================###
 #####################################################################
@@ -316,20 +327,20 @@ def post_physics_step(self):
 
 @torch.jit.script
 def compute_ant_reward(
-    obs_buf,
-    reset_buf,
-    progress_buf,
-    actions,
-    up_weight,
-    heading_weight,
-    potentials,
-    prev_potentials,
-    actions_cost_scale,
-    energy_cost_scale,
-    joints_at_limit_cost_scale,
-    termination_height,
-    death_cost,
-    max_episode_length
+        obs_buf,
+        reset_buf,
+        progress_buf,
+        actions,
+        up_weight,
+        heading_weight,
+        potentials,
+        prev_potentials,
+        actions_cost_scale,
+        energy_cost_scale,
+        joints_at_limit_cost_scale,
+        termination_height,
+        death_cost,
+        max_episode_length
 ):
     # type: (Tensor, Tensor, Tensor, Tensor, float, float, Tensor, Tensor, float, float, float, float, float, float) -> Tuple[Tensor, Tensor]
 
@@ -351,10 +362,11 @@ def compute_ant_reward(
     progress_reward = potentials - prev_potentials
 
     total_reward = progress_reward + alive_reward + up_reward + heading_reward - \
-        actions_cost_scale * actions_cost - energy_cost_scale * electricity_cost - dof_at_limit_cost * joints_at_limit_cost_scale
+                   actions_cost_scale * actions_cost - energy_cost_scale * electricity_cost - dof_at_limit_cost * joints_at_limit_cost_scale
 
     # adjust reward for fallen agents
-    total_reward = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(total_reward) * death_cost, total_reward)
+    total_reward = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(total_reward) * death_cost,
+                               total_reward)
 
     # reset agents
     reset = torch.where(obs_buf[:, 0] < termination_height, torch.ones_like(reset_buf), reset_buf)
@@ -397,4 +409,4 @@ def compute_ant_observations(obs_buf, root_states, targets, potentials,
                      dof_vel * dof_vel_scale, sensor_force_torques.view(-1, 24) * contact_force_scale,
                      actions), dim=-1)
 
-    return obs, potentials, prev_potentials_new, up_vec, heading_vec
\ No newline at end of file
+    return obs, potentials, prev_potentials_new, up_vec, heading_vec
diff --git a/rofunc/learning/RofuncRL/tasks/anymal.py b/rofunc/learning/RofuncRL/tasks/anymal.py
index adfd23539..91abec1db 100644
--- a/rofunc/learning/RofuncRL/tasks/anymal.py
+++ b/rofunc/learning/RofuncRL/tasks/anymal.py
@@ -33,7 +33,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from .base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
 
 
 class Anymal(VecTask):
diff --git a/rofunc/learning/RofuncRL/tasks/anymal_terrain.py b/rofunc/learning/RofuncRL/tasks/anymal_terrain.py
index 9f7a2d449..03844e01f 100644
--- a/rofunc/learning/RofuncRL/tasks/anymal_terrain.py
+++ b/rofunc/learning/RofuncRL/tasks/anymal_terrain.py
@@ -31,7 +31,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from .base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
 
 
 class AnymalTerrain(VecTask):
@@ -358,7 +358,7 @@ def compute_reward(self):
 
         # stumbling penalty
         stumble = (torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) > 5.) * (
-                    torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.)
+                torch.abs(self.contact_forces[:, self.feet_indices, 2]) < 1.)
         rew_stumble = torch.sum(stumble, dim=1) * self.rew_scales["stumble"]
 
         # action rate penalty
@@ -456,7 +456,7 @@ def update_terrain_level(self, env_ids):
             return
         distance = torch.norm(self.root_states[env_ids, :2] - self.env_origins[env_ids, :2], dim=1)
         self.terrain_levels[env_ids] -= 1 * (
-                    distance < torch.norm(self.commands[env_ids, :2]) * self.max_episode_length_s * 0.25)
+                distance < torch.norm(self.commands[env_ids, :2]) * self.max_episode_length_s * 0.25)
         self.terrain_levels[env_ids] += 1 * (distance > self.terrain.env_length / 2)
         self.terrain_levels[env_ids] = torch.clip(self.terrain_levels[env_ids], 0) % self.terrain.env_rows
         self.env_origins[env_ids] = self.terrain_origins[self.terrain_levels[env_ids], self.terrain_types[env_ids]]
@@ -469,7 +469,7 @@ def pre_physics_step(self, actions):
         self.actions = actions.clone().to(self.device)
         for i in range(self.decimation):
             torques = torch.clip(self.Kp * (
-                        self.action_scale * self.actions + self.default_dof_pos - self.dof_pos) - self.Kd * self.dof_vel,
+                    self.action_scale * self.actions + self.default_dof_pos - self.dof_pos) - self.Kd * self.dof_vel,
                                  -80., 80.)
             self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(torques))
             self.torques = torques.view(self.torques.shape)
@@ -554,7 +554,7 @@ def get_heights(self, env_ids=None):
                                     self.height_points[env_ids]) + (self.root_states[env_ids, :3]).unsqueeze(1)
         else:
             points = quat_apply_yaw(self.base_quat.repeat(1, self.num_height_points), self.height_points) + (
-            self.root_states[:, :3]).unsqueeze(1)
+                self.root_states[:, :3]).unsqueeze(1)
 
         points += self.terrain.border_size
         points = (points / self.terrain.horizontal_scale).long()
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid.py
index b9ee9fae5..649ab846f 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid.py
@@ -33,8 +33,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from ..base.vec_task import VecTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class Humanoid(VecTask):
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py
index 4e4083382..abdcd3bc2 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp.py
@@ -32,9 +32,10 @@
 from gym import spaces
 from isaacgym.torch_utils import *
 
-from .humanoid import Humanoid, dof_to_obs
+import rofunc as rf
 from rofunc.learning.RofuncRL.tasks.ase.motion_lib import MotionLib
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid import Humanoid, dof_to_obs
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class HumanoidAMP(Humanoid):
@@ -61,8 +62,12 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
                          virtual_screen_capture=virtual_screen_capture, force_render=force_render)
 
         motion_file = cfg['env'].get('motion_file', None)
-        motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                        "../../../../../examples/data/amp/" + motion_file)
+        if rf.oslab.is_absl_path(motion_file):
+            motion_file_path = motion_file
+        else:
+            motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                            "../../../../examples/data/amp/" + motion_file)
+        self._load_motion(motion_file_path)
 
         self._load_motion(motion_file_path)
 
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py
index ebb0453d1..4013cd57c 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_getup.py
@@ -29,7 +29,7 @@
 from isaacgym import gymtorch
 from isaacgym.torch_utils import *
 
-from .humanoid_amp import HumanoidAMP
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP
 
 
 class HumanoidAMPGetupTask(HumanoidAMP):
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py
index cc76e6ef5..5f1564ed3 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_amp_task.py
@@ -28,7 +28,7 @@
 
 import torch
 
-from .humanoid_amp import HumanoidAMP
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP
 
 
 class HumanoidAMPTask(HumanoidAMP):
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py
index e039ec2ec..95ad73b11 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_heading.py
@@ -32,8 +32,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp_task import HumanoidAMPTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 TAR_ACTOR_ID = 1
 TAR_FACING_ACTOR_ID = 2
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py
index 7be2c5c96..74902b75a 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_location.py
@@ -32,8 +32,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp_task import HumanoidAMPTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class HumanoidLocationTask(HumanoidAMPTask):
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py
index a30546a37..21c9d0a34 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_perturb.py
@@ -32,7 +32,7 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp import HumanoidAMP
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP
 
 # import env.tasks.humanoid_amp_getup as humanoid_amp_getup
 # # import env.tasks.humanoid_strike as humanoid_strike
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py
index b0b9a1d05..02fdf3f23 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_reach.py
@@ -32,8 +32,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp_task import HumanoidAMPTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class HumanoidReachTask(HumanoidAMPTask):
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py
index 4b2315960..f05045264 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_strike.py
@@ -32,8 +32,8 @@
 from isaacgym.torch_utils import *
 
 from rofunc.utils.oslab.path import get_rofunc_path
-from .humanoid_amp_task import HumanoidAMPTask
-from ..utils import torch_jit_utils as torch_utils
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp_task import HumanoidAMPTask
+from rofunc.learning.RofuncRL.tasks.utils import torch_jit_utils as torch_utils
 
 
 class HumanoidStrikeTask(HumanoidAMPTask):
diff --git a/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py b/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py
index 870d2f7c3..37f7db4a2 100644
--- a/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py
+++ b/rofunc/learning/RofuncRL/tasks/ase/humanoid_view_motion.py
@@ -30,7 +30,7 @@
 
 from isaacgym import gymtorch
 
-from .humanoid_amp import HumanoidAMP
+from rofunc.learning.RofuncRL.tasks.ase.humanoid_amp import HumanoidAMP
 
 
 class HumanoidViewMotionTask(HumanoidAMP):
diff --git a/rofunc/learning/RofuncRL/tasks/base.py b/rofunc/learning/RofuncRL/tasks/base.py
deleted file mode 100644
index 7c23cba0b..000000000
--- a/rofunc/learning/RofuncRL/tasks/base.py
+++ /dev/null
@@ -1,94 +0,0 @@
-""" Define all environments and provide helper functions to load environments. """
-
-# OpenAI gym interface
-import gym
-
-from ..util.subproc_vec_env import SubprocVecEnv
-
-REGISTERED_ENVS = {}
-
-
-def register_env(target_class):
-    REGISTERED_ENVS[target_class.__name__] = target_class
-
-
-def get_env(name):
-    """
-    Gets the environment class given @name.
-    """
-    if name not in REGISTERED_ENVS:
-        raise Exception(
-            "Unknown environment name: {}\nAvailable environments: {}".format(
-                name, ", ".join(REGISTERED_ENVS)
-            )
-        )
-    return REGISTERED_ENVS[name]
-
-
-def make_env(name, config=None):
-    """
-    Creates a new environment instance with @name and @config.
-    """
-    env = get_env(name)
-
-    # get default config if not provided
-    if config is None:
-        import argparse
-        import config.furniture as furniture_config
-        from util import str2bool
-
-        parser = argparse.ArgumentParser()
-        furniture_config.add_argument(parser)
-        parser.add_argument("--seed", type=int, default=123)
-        parser.add_argument("--debug", type=str2bool, default=False)
-
-        config, unparsed = parser.parse_known_args()
-
-    return env(config)
-
-
-def get_gym_env(env_id, env_kwargs):
-    env = gym.make(env_id, **env_kwargs)
-    return env
-
-
-def make_vec_env(env_id, num_env, config=None, env_kwargs=None):
-    """
-    Creates a wrapped SubprocVecEnv using OpenAI gym interface.
-    Unity app will use the port number from @config.port to (@config.port + @num_env - 1).
-
-    Code modified based on
-    https://github.com/openai/baselines/blob/master/baselines/common/cmd_util.py
-
-    Args:
-        env_id: environment id registered in in `env/__init__.py`.
-        num_env: number of environments to launch.
-        config: general configuration for the environment.
-    """
-    env_kwargs = env_kwargs or {}
-
-    if config is not None:
-        for key, value in config.__dict__.items():
-            env_kwargs[key] = value
-
-    def make_thunk(rank):
-        new_env_kwargs = env_kwargs.copy()
-        new_env_kwargs["port"] = env_kwargs["port"] + rank
-        new_env_kwargs["seed"] = env_kwargs["seed"] + rank
-        return lambda: get_gym_env(env_id, new_env_kwargs)
-
-    return SubprocVecEnv([make_thunk(i) for i in range(num_env)])
-
-
-class EnvMeta(type):
-    """ Meta class for registering environments. """
-
-    def __new__(meta, name, bases, class_dict):
-        cls = super().__new__(meta, name, bases, class_dict)
-
-        # List all environments that should not be registered here.
-        _unregistered_envs = ["FurnitureEnv"]
-
-        if cls.__name__ not in _unregistered_envs:
-            register_env(cls)
-        return cls
diff --git a/rofunc/learning/RofuncRL/tasks/base/curi_base_task.py b/rofunc/learning/RofuncRL/tasks/base/curi_base_task.py
index 3aacdd503..09f121f68 100644
--- a/rofunc/learning/RofuncRL/tasks/base/curi_base_task.py
+++ b/rofunc/learning/RofuncRL/tasks/base/curi_base_task.py
@@ -15,7 +15,7 @@
 from isaacgym import gymtorch, gymapi
 from isaacgym.torch_utils import *
 
-from .vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
 
 
 class CURIBaseTask(VecTask):
diff --git a/rofunc/learning/RofuncRL/tasks/cartpole.py b/rofunc/learning/RofuncRL/tasks/cartpole.py
index cd9b0847e..74ee95580 100644
--- a/rofunc/learning/RofuncRL/tasks/cartpole.py
+++ b/rofunc/learning/RofuncRL/tasks/cartpole.py
@@ -26,12 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
 import os
+
+import numpy as np
 import torch
+from isaacgym import gymtorch, gymapi
 
-from isaacgym import gymutil, gymtorch, gymapi
-from .base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
 from rofunc.utils.oslab.path import get_rofunc_path
 
 
@@ -48,7 +49,9 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.cfg["env"]["numObservations"] = 4
         self.cfg["env"]["numActions"] = 1
 
-        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render)
+        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device,
+                         graphics_device_id=graphics_device_id, headless=headless,
+                         virtual_screen_capture=virtual_screen_capture, force_render=force_render)
 
         dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
         self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
@@ -71,7 +74,8 @@ def _create_ground_plane(self):
 
     def _create_envs(self, num_envs, spacing, num_per_row):
         # define plane on which environments are initialized
-        lower = gymapi.Vec3(0.5 * -spacing, -spacing, 0.0) if self.up_axis == 'z' else gymapi.Vec3(0.5 * -spacing, 0.0, -spacing)
+        lower = gymapi.Vec3(0.5 * -spacing, -spacing, 0.0) if self.up_axis == 'z' else gymapi.Vec3(0.5 * -spacing, 0.0,
+                                                                                                   -spacing)
         upper = gymapi.Vec3(0.5 * spacing, spacing, spacing)
 
         # get rofunc path from rofunc package metadata
@@ -99,7 +103,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
             pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0)
         else:
             pose.p.y = 2.0
-            pose.r = gymapi.Quat(-np.sqrt(2)/2, 0.0, 0.0, np.sqrt(2)/2)
+            pose.r = gymapi.Quat(-np.sqrt(2) / 2, 0.0, 0.0, np.sqrt(2) / 2)
 
         self.cartpole_handles = []
         self.envs = []
@@ -176,6 +180,7 @@ def post_physics_step(self):
         self.compute_observations()
         self.compute_reward()
 
+
 #####################################################################
 ###=========================jit functions=========================###
 #####################################################################
diff --git a/rofunc/learning/RofuncRL/tasks/curi_cabinet.py b/rofunc/learning/RofuncRL/tasks/curi_cabinet.py
index 7c9d82d0a..7edad6d1d 100644
--- a/rofunc/learning/RofuncRL/tasks/curi_cabinet.py
+++ b/rofunc/learning/RofuncRL/tasks/curi_cabinet.py
@@ -17,8 +17,8 @@
 from isaacgym import gymtorch, gymapi
 from isaacgym.torch_utils import *
 
-from rofunc.utils.oslab.path import get_rofunc_path
 from rofunc.learning.RofuncRL.tasks.base.curi_base_task import CURIBaseTask
+from rofunc.utils.oslab.path import get_rofunc_path
 
 
 class CURICabinetTask(CURIBaseTask):
diff --git a/rofunc/learning/RofuncRL/tasks/curi_cabinet_bimanual.py b/rofunc/learning/RofuncRL/tasks/curi_cabinet_bimanual.py
index df6c596f0..0d1d3f2da 100644
--- a/rofunc/learning/RofuncRL/tasks/curi_cabinet_bimanual.py
+++ b/rofunc/learning/RofuncRL/tasks/curi_cabinet_bimanual.py
@@ -16,8 +16,9 @@
 
 from isaacgym import gymtorch, gymapi
 from isaacgym.torch_utils import *
-from rofunc.utils.oslab.path import get_rofunc_path
+
 from rofunc.learning.RofuncRL.tasks.base.curi_base_task import CURIBaseTask
+from rofunc.utils.oslab.path import get_rofunc_path
 
 
 class CURICabinetBimanualTask(CURIBaseTask):
@@ -237,14 +238,14 @@ def _create_envs(self, num_envs, spacing, num_per_row):
 
         # Left arm
         self.hand_handle_l, self.lfinger_handle_l, self.rfinger_handle_l, self.curi_local_grasp_pos, \
-        self.curi_local_grasp_rot, self.curi_grasp_pos_l, self.curi_grasp_rot_l, self.curi_lfinger_pos_l, \
-        self.curi_rfinger_pos_l, self.curi_lfinger_rot_l, self.curi_rfinger_rot_l \
+            self.curi_local_grasp_rot, self.curi_grasp_pos_l, self.curi_grasp_rot_l, self.curi_lfinger_pos_l, \
+            self.curi_rfinger_pos_l, self.curi_lfinger_rot_l, self.curi_rfinger_rot_l \
             = self.init_data_curi("panda_left_link7", "panda_left_leftfinger", "panda_left_rightfinger", env_ptr,
                                   curi_actor)
         # Right arm
         self.hand_handle_r, self.lfinger_handle_r, self.rfinger_handle_r, self.curi_local_grasp_pos, \
-        self.curi_local_grasp_rot, self.curi_grasp_pos_r, self.curi_grasp_rot_r, self.curi_lfinger_pos_r, \
-        self.curi_rfinger_pos_r, self.curi_lfinger_rot_r, self.curi_rfinger_rot_r \
+            self.curi_local_grasp_rot, self.curi_grasp_pos_r, self.curi_grasp_rot_r, self.curi_lfinger_pos_r, \
+            self.curi_rfinger_pos_r, self.curi_lfinger_rot_r, self.curi_rfinger_rot_r \
             = self.init_data_curi("panda_right_link7", "panda_right_leftfinger", "panda_right_rightfinger", env_ptr,
                                   curi_actor)
 
@@ -291,7 +292,7 @@ def init_data_curi(self, hand_dof_name, lfinger_dof_name, rfinger_dof_name, env_
         curi_rfinger_rot = torch.zeros_like(curi_local_grasp_rot)
 
         return hand_handle, lfinger_handle, rfinger_handle, curi_local_grasp_pos, curi_local_grasp_rot, curi_grasp_pos, \
-               curi_grasp_rot, curi_lfinger_pos, curi_rfinger_pos, curi_lfinger_rot, curi_rfinger_rot
+            curi_grasp_rot, curi_lfinger_pos, curi_rfinger_pos, curi_lfinger_rot, curi_rfinger_rot
 
     def init_data_cabinet(self, env_ptr, cabinet_actor):
         self.drawer_handle_top = self.gym.find_actor_rigid_body_handle(env_ptr, cabinet_actor, "drawer_top")
diff --git a/rofunc/learning/RofuncRL/tasks/curi_cabinet_image.py b/rofunc/learning/RofuncRL/tasks/curi_cabinet_image.py
index 633e8cdf3..1b13b39f4 100644
--- a/rofunc/learning/RofuncRL/tasks/curi_cabinet_image.py
+++ b/rofunc/learning/RofuncRL/tasks/curi_cabinet_image.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-import torch
+
+import tqdm
 from isaacgym import gymapi, gymtorch
 from isaacgym.torch_utils import *
-import tqdm
 
 import rofunc as rf
 from rofunc.learning.RofuncRL.tasks.base.curi_base_task import CURIBaseTask
diff --git a/rofunc/learning/RofuncRL/tasks/curi_coffee_stirring.py b/rofunc/learning/RofuncRL/tasks/curi_coffee_stirring.py
index f3d622670..dab3bb30e 100644
--- a/rofunc/learning/RofuncRL/tasks/curi_coffee_stirring.py
+++ b/rofunc/learning/RofuncRL/tasks/curi_coffee_stirring.py
@@ -14,10 +14,11 @@
 
 import os
 
-from rofunc.utils.oslab.path import get_rofunc_path
 from isaacgym import gymtorch, gymapi
 from isaacgym.torch_utils import *
-from .base.vec_task import VecTask
+
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.utils.oslab.path import get_rofunc_path
 
 
 class CURICoffeeStirringTask(VecTask):
diff --git a/rofunc/learning/RofuncRL/tasks/franka_cabinet.py b/rofunc/learning/RofuncRL/tasks/franka_cabinet.py
index a32ce67ca..eb73e8ce0 100644
--- a/rofunc/learning/RofuncRL/tasks/franka_cabinet.py
+++ b/rofunc/learning/RofuncRL/tasks/franka_cabinet.py
@@ -28,10 +28,11 @@
 
 import os
 
-from rofunc.utils.oslab.path import get_rofunc_path
 from isaacgym import gymtorch, gymapi
 from isaacgym.torch_utils import *
-from .base.vec_task import VecTask
+
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.utils.oslab.path import get_rofunc_path
 
 
 class FrankaCabinetTask(VecTask):
@@ -406,7 +407,7 @@ def reset_idx(self, env_ids):
         # reset franka
         pos = tensor_clamp(
             self.franka_default_dof_pos.unsqueeze(0) + 0.25 * (
-                        torch.rand((len(env_ids), self.num_franka_dofs), device=self.device) - 0.5),
+                    torch.rand((len(env_ids), self.num_franka_dofs), device=self.device) - 0.5),
             self.franka_dof_lower_limits, self.franka_dof_upper_limits)
         self.franka_dof_pos[env_ids, :] = pos
         self.franka_dof_vel[env_ids, :] = torch.zeros_like(self.franka_dof_vel[env_ids])
@@ -602,4 +603,4 @@ def compute_grasp_transforms(hand_rot, hand_pos, franka_local_grasp_rot, franka_
     global_drawer_rot, global_drawer_pos = tf_combine(
         drawer_rot, drawer_pos, drawer_local_grasp_rot, drawer_local_grasp_pos)
 
-    return global_franka_rot, global_franka_pos, global_drawer_rot, global_drawer_pos
\ No newline at end of file
+    return global_franka_rot, global_franka_pos, global_drawer_rot, global_drawer_pos
diff --git a/rofunc/learning/RofuncRL/tasks/franka_cube_stack.py b/rofunc/learning/RofuncRL/tasks/franka_cube_stack.py
index d98c8b35f..bec3f7855 100644
--- a/rofunc/learning/RofuncRL/tasks/franka_cube_stack.py
+++ b/rofunc/learning/RofuncRL/tasks/franka_cube_stack.py
@@ -31,8 +31,8 @@
 from isaacgym import gymapi
 from isaacgym import gymtorch
 
-from .base.vec_task import VecTask
-from .utils.torch_jit_utils import *
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
 
 
 @torch.jit.script
diff --git a/rofunc/learning/RofuncRL/tasks/humanoid.py b/rofunc/learning/RofuncRL/tasks/humanoid.py
index f57cd1e64..11ea78129 100644
--- a/rofunc/learning/RofuncRL/tasks/humanoid.py
+++ b/rofunc/learning/RofuncRL/tasks/humanoid.py
@@ -31,9 +31,9 @@
 from isaacgym import gymapi
 from isaacgym import gymtorch
 
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
 from rofunc.utils.oslab.path import get_rofunc_path
-from .base.vec_task import VecTask
-from .utils.torch_jit_utils import *
 
 
 class HumanoidTask(VecTask):
diff --git a/rofunc/learning/RofuncRL/tasks/humanoid_amp.py b/rofunc/learning/RofuncRL/tasks/humanoid_amp.py
index 155c8d863..9935f479f 100644
--- a/rofunc/learning/RofuncRL/tasks/humanoid_amp.py
+++ b/rofunc/learning/RofuncRL/tasks/humanoid_amp.py
@@ -32,9 +32,10 @@
 from gym import spaces
 from isaacgym import gymtorch
 
-from .amp.humanoid_amp_base import HumanoidAMPBase, dof_to_obs
+import rofunc as rf
+from rofunc.learning.RofuncRL.tasks.amp.humanoid_amp_base import HumanoidAMPBase, dof_to_obs
 from rofunc.learning.RofuncRL.tasks.amp.motion_lib import MotionLib
-from .utils.torch_jit_utils import *
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
 
 NUM_AMP_OBS_PER_STEP = 13 + 52 + 28 + 12  # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos]
 
@@ -63,8 +64,11 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
                          virtual_screen_capture=virtual_screen_capture, force_render=force_render)
 
         motion_file = cfg['env'].get('motion_file', "amp_humanoid_backflip.npy")
-        motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                        "../../../../examples/data/amp/" + motion_file)
+        if rf.oslab.is_absl_path(motion_file):
+            motion_file_path = motion_file
+        else:
+            motion_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                            "../../../../examples/data/amp/" + motion_file)
         self._load_motion(motion_file_path)
 
         self.num_amp_obs = self._num_amp_obs_steps * NUM_AMP_OBS_PER_STEP
diff --git a/rofunc/learning/RofuncRL/tasks/ikea.py b/rofunc/learning/RofuncRL/tasks/ikea.py
index 494b1672d..521f15650 100644
--- a/rofunc/learning/RofuncRL/tasks/ikea.py
+++ b/rofunc/learning/RofuncRL/tasks/ikea.py
@@ -14,10 +14,11 @@
 
 import os
 
-from rofunc.utils.oslab.path import get_rofunc_path
 from isaacgym import gymtorch, gymapi
 from isaacgym.torch_utils import *
-from .base.vec_task import VecTask
+
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.utils.oslab.path import get_rofunc_path
 
 
 class IKEABaseTask(VecTask):
diff --git a/rofunc/learning/RofuncRL/tasks/shadow_hand.py b/rofunc/learning/RofuncRL/tasks/shadow_hand.py
index 30fa3613b..8206a5aea 100644
--- a/rofunc/learning/RofuncRL/tasks/shadow_hand.py
+++ b/rofunc/learning/RofuncRL/tasks/shadow_hand.py
@@ -26,16 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
 import os
-import torch
 
-from isaacgym import gymtorch
 from isaacgym import gymapi
-from isaacgym.torch_utils import *
+from isaacgym import gymtorch
 
-from isaacgymenvs.utils.torch_jit_utils import *
-from isaacgymenvs.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
 
 
 class ShadowHand(VecTask):
@@ -94,9 +91,12 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         }
 
         if "asset" in self.cfg["env"]:
-            self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock", self.asset_files_dict["block"])
-            self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg", self.asset_files_dict["egg"])
-            self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen", self.asset_files_dict["pen"])
+            self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock",
+                                                                          self.asset_files_dict["block"])
+            self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg",
+                                                                        self.asset_files_dict["egg"])
+            self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen",
+                                                                        self.asset_files_dict["pen"])
 
         # can be "openai", "full_no_vel", "full", "full_state"
         self.obs_type = self.cfg["env"]["observationType"]
@@ -116,7 +116,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
 
         self.up_axis = 'z'
 
-        self.fingertips = ["robot0:ffdistal", "robot0:mfdistal", "robot0:rfdistal", "robot0:lfdistal", "robot0:thdistal"]
+        self.fingertips = ["robot0:ffdistal", "robot0:mfdistal", "robot0:rfdistal", "robot0:lfdistal",
+                           "robot0:thdistal"]
         self.num_fingertips = len(self.fingertips)
 
         self.use_vel_obs = False
@@ -131,12 +132,14 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.cfg["env"]["numStates"] = num_states
         self.cfg["env"]["numActions"] = 20
 
-        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render)
+        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device,
+                         graphics_device_id=graphics_device_id, headless=headless,
+                         virtual_screen_capture=virtual_screen_capture, force_render=force_render)
 
         self.dt = self.sim_params.dt
         control_freq_inv = self.cfg["env"].get("controlFrequencyInv", 1)
         if self.reset_time > 0.0:
-            self.max_episode_length = int(round(self.reset_time/(control_freq_inv * self.dt)))
+            self.max_episode_length = int(round(self.reset_time / (control_freq_inv * self.dt)))
             print("Reset time: ", self.reset_time)
             print("New episode length: ", self.max_episode_length)
 
@@ -155,7 +158,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
             self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, self.num_fingertips * 6)
 
             dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim)
-            self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, self.num_shadow_hand_dofs)
+            self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs,
+                                                                                self.num_shadow_hand_dofs)
 
         self.gym.refresh_actor_root_state_tensor(self.sim)
         self.gym.refresh_dof_state_tensor(self.sim)
@@ -177,7 +181,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.prev_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device)
         self.cur_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device)
 
-        self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs, -1)
+        self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs,
+                                                                                                          -1)
         self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
         self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
         self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
@@ -195,13 +200,14 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.force_decay = to_torch(self.force_decay, dtype=torch.float, device=self.device)
         self.force_prob_range = to_torch(self.force_prob_range, dtype=torch.float, device=self.device)
         self.random_force_prob = torch.exp((torch.log(self.force_prob_range[0]) - torch.log(self.force_prob_range[1]))
-                                           * torch.rand(self.num_envs, device=self.device) + torch.log(self.force_prob_range[1]))
+                                           * torch.rand(self.num_envs, device=self.device) + torch.log(
+            self.force_prob_range[1]))
 
         self.rb_forces = torch.zeros((self.num_envs, self.num_bodies, 3), dtype=torch.float, device=self.device)
 
     def create_sim(self):
         self.dt = self.cfg["sim"]["dt"]
-        self.up_axis_idx = 2 if self.up_axis == 'z' else 1 # index of up axis: Y=1, Z=2
+        self.up_axis_idx = 2 if self.up_axis == 'z' else 1  # index of up axis: Y=1, Z=2
 
         self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
         self._create_ground_plane()
@@ -225,7 +231,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
 
         if "asset" in self.cfg["env"]:
             # asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root)
-            shadow_hand_asset_file = os.path.normpath(self.cfg["env"]["asset"].get("assetFileName", shadow_hand_asset_file))
+            shadow_hand_asset_file = os.path.normpath(
+                self.cfg["env"]["asset"].get("assetFileName", shadow_hand_asset_file))
 
         object_asset_file = self.asset_files_dict[self.object_type]
 
@@ -264,8 +271,10 @@ def _create_envs(self, num_envs, spacing, num_per_row):
                     tendon_props[i].damping = t_damping
         self.gym.set_asset_tendon_properties(shadow_hand_asset, tendon_props)
 
-        actuated_dof_names = [self.gym.get_asset_actuator_joint_name(shadow_hand_asset, i) for i in range(self.num_shadow_hand_actuators)]
-        self.actuated_dof_indices = [self.gym.find_asset_dof_index(shadow_hand_asset, name) for name in actuated_dof_names]
+        actuated_dof_names = [self.gym.get_asset_actuator_joint_name(shadow_hand_asset, i) for i in
+                              range(self.num_shadow_hand_actuators)]
+        self.actuated_dof_indices = [self.gym.find_asset_dof_index(shadow_hand_asset, name) for name in
+                                     actuated_dof_names]
 
         # get shadow_hand dof properties, loaded by Isaac Gym from the MJCF file
         shadow_hand_dof_props = self.gym.get_asset_dof_properties(shadow_hand_asset)
@@ -287,7 +296,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         self.shadow_hand_dof_default_pos = to_torch(self.shadow_hand_dof_default_pos, device=self.device)
         self.shadow_hand_dof_default_vel = to_torch(self.shadow_hand_dof_default_vel, device=self.device)
 
-        self.fingertip_handles = [self.gym.find_asset_rigid_body_index(shadow_hand_asset, name) for name in self.fingertips]
+        self.fingertip_handles = [self.gym.find_asset_rigid_body_index(shadow_hand_asset, name) for name in
+                                  self.fingertips]
 
         # create fingertip force sensors, if needed
         if self.obs_type == "full_state" or self.asymmetric_obs:
@@ -308,7 +318,7 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         object_start_pose = gymapi.Transform()
         object_start_pose.p = gymapi.Vec3()
         object_start_pose.p.x = shadow_hand_start_pose.p.x
-        pose_dy, pose_dz = -0.39, 0.10  
+        pose_dy, pose_dz = -0.39, 0.10
 
         object_start_pose.p.y = shadow_hand_start_pose.p.y + pose_dy
         object_start_pose.p.z = shadow_hand_start_pose.p.z + pose_dz
@@ -339,7 +349,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         self.object_indices = []
         self.goal_object_indices = []
 
-        self.fingertip_handles = [self.gym.find_asset_rigid_body_index(shadow_hand_asset, name) for name in self.fingertips]
+        self.fingertip_handles = [self.gym.find_asset_rigid_body_index(shadow_hand_asset, name) for name in
+                                  self.fingertips]
 
         shadow_hand_rb_count = self.gym.get_asset_rigid_body_count(shadow_hand_asset)
         object_rb_count = self.gym.get_asset_rigid_body_count(object_asset)
@@ -355,10 +366,13 @@ def _create_envs(self, num_envs, spacing, num_per_row):
                 self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True)
 
             # add hand - collision filter = -1 to use asset collision filters set in mjcf loader
-            shadow_hand_actor = self.gym.create_actor(env_ptr, shadow_hand_asset, shadow_hand_start_pose, "hand", i, -1, 0)
-            self.hand_start_states.append([shadow_hand_start_pose.p.x, shadow_hand_start_pose.p.y, shadow_hand_start_pose.p.z,
-                                           shadow_hand_start_pose.r.x, shadow_hand_start_pose.r.y, shadow_hand_start_pose.r.z, shadow_hand_start_pose.r.w,
-                                           0, 0, 0, 0, 0, 0])
+            shadow_hand_actor = self.gym.create_actor(env_ptr, shadow_hand_asset, shadow_hand_start_pose, "hand", i, -1,
+                                                      0)
+            self.hand_start_states.append(
+                [shadow_hand_start_pose.p.x, shadow_hand_start_pose.p.y, shadow_hand_start_pose.p.z,
+                 shadow_hand_start_pose.r.x, shadow_hand_start_pose.r.y, shadow_hand_start_pose.r.z,
+                 shadow_hand_start_pose.r.w,
+                 0, 0, 0, 0, 0, 0])
             self.gym.set_actor_dof_properties(env_ptr, shadow_hand_actor, shadow_hand_dof_props)
             hand_idx = self.gym.get_actor_index(env_ptr, shadow_hand_actor, gymapi.DOMAIN_SIM)
             self.hand_indices.append(hand_idx)
@@ -370,13 +384,15 @@ def _create_envs(self, num_envs, spacing, num_per_row):
             # add object
             object_handle = self.gym.create_actor(env_ptr, object_asset, object_start_pose, "object", i, 0, 0)
             self.object_init_state.append([object_start_pose.p.x, object_start_pose.p.y, object_start_pose.p.z,
-                                           object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z, object_start_pose.r.w,
+                                           object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z,
+                                           object_start_pose.r.w,
                                            0, 0, 0, 0, 0, 0])
             object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM)
             self.object_indices.append(object_idx)
 
             # add goal object
-            goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs, 0, 0)
+            goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs,
+                                                0, 0)
             goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM)
             self.goal_object_indices.append(goal_object_idx)
 
@@ -397,7 +413,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         object_rb_props = self.gym.get_actor_rigid_body_properties(env_ptr, object_handle)
         self.object_rb_masses = [prop.mass for prop in object_rb_props]
 
-        self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view(self.num_envs, 13)
+        self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view(
+            self.num_envs, 13)
         self.goal_states = self.object_init_state.clone()
         self.goal_states[:, self.up_axis_idx] -= 0.04
         self.goal_init_state = self.goal_states.clone()
@@ -412,8 +429,11 @@ def _create_envs(self, num_envs, spacing, num_per_row):
         self.goal_object_indices = to_torch(self.goal_object_indices, dtype=torch.long, device=self.device)
 
     def compute_reward(self, actions):
-        self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[:], self.consecutive_successes[:] = compute_hand_reward(
-            self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, self.consecutive_successes,
+        self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[
+                                                                                          :], self.consecutive_successes[
+                                                                                              :] = compute_hand_reward(
+            self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes,
+            self.consecutive_successes,
             self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot,
             self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale,
             self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty,
@@ -429,9 +449,11 @@ def compute_reward(self, actions):
 
             # The direct average shows the overall result more quickly, but slightly undershoots long term
             # policy performance.
-            print("Direct average consecutive successes = {:.1f}".format(direct_average_successes/(self.total_resets + self.num_envs)))
+            print("Direct average consecutive successes = {:.1f}".format(
+                direct_average_successes / (self.total_resets + self.num_envs)))
             if self.total_resets > 0:
-                print("Post-Reset average consecutive successes = {:.1f}".format(self.total_successes/self.total_resets))
+                print("Post-Reset average consecutive successes = {:.1f}".format(
+                    self.total_successes / self.total_resets))
 
     def compute_observations(self):
         self.gym.refresh_dof_state_tensor(self.sim)
@@ -497,7 +519,8 @@ def compute_fingertip_observations(self, no_vel=False):
     def compute_full_observations(self, no_vel=False):
         if no_vel:
             self.obs_buf[:, 0:self.num_shadow_hand_dofs] = unscale(self.shadow_hand_dof_pos,
-                                                                   self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits)
+                                                                   self.shadow_hand_dof_lower_limits,
+                                                                   self.shadow_hand_dof_upper_limits)
 
             self.obs_buf[:, 24:31] = self.object_pose
             self.obs_buf[:, 31:38] = self.goal_pose
@@ -509,8 +532,10 @@ def compute_full_observations(self, no_vel=False):
             self.obs_buf[:, 57:77] = self.actions
         else:
             self.obs_buf[:, 0:self.num_shadow_hand_dofs] = unscale(self.shadow_hand_dof_pos,
-                                                                   self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits)
-            self.obs_buf[:, self.num_shadow_hand_dofs:2*self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel
+                                                                   self.shadow_hand_dof_lower_limits,
+                                                                   self.shadow_hand_dof_upper_limits)
+            self.obs_buf[:,
+            self.num_shadow_hand_dofs:2 * self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel
 
             self.obs_buf[:, 48:55] = self.object_pose
             self.obs_buf[:, 55:58] = self.object_linvel
@@ -527,27 +552,32 @@ def compute_full_observations(self, no_vel=False):
     def compute_full_state(self, asymm_obs=False):
         if asymm_obs:
             self.states_buf[:, 0:self.num_shadow_hand_dofs] = unscale(self.shadow_hand_dof_pos,
-                                                                      self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits)
-            self.states_buf[:, self.num_shadow_hand_dofs:2*self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel
-            self.states_buf[:, 2*self.num_shadow_hand_dofs:3*self.num_shadow_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor
-
-            obj_obs_start = 3*self.num_shadow_hand_dofs  # 72
+                                                                      self.shadow_hand_dof_lower_limits,
+                                                                      self.shadow_hand_dof_upper_limits)
+            self.states_buf[:,
+            self.num_shadow_hand_dofs:2 * self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel
+            self.states_buf[:,
+            2 * self.num_shadow_hand_dofs:3 * self.num_shadow_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor
+
+            obj_obs_start = 3 * self.num_shadow_hand_dofs  # 72
             self.states_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose
             self.states_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
             self.states_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
 
             goal_obs_start = obj_obs_start + 13  # 85
             self.states_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose
-            self.states_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
+            self.states_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot,
+                                                                                  quat_conjugate(self.goal_rot))
 
             # fingertip observations, state(pose and vel) + force-torque sensors
             num_ft_states = 13 * self.num_fingertips  # 65
             num_ft_force_torques = 6 * self.num_fingertips  # 30
 
             fingertip_obs_start = goal_obs_start + 11  # 96
-            self.states_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(self.num_envs, num_ft_states)
+            self.states_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(
+                self.num_envs, num_ft_states)
             self.states_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states +
-                            num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor
+                                                                   num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor
 
             # obs_end = 96 + 65 + 30 = 191
             # obs_total = obs_end + num_actions = 211
@@ -555,27 +585,32 @@ def compute_full_state(self, asymm_obs=False):
             self.states_buf[:, obs_end:obs_end + self.num_actions] = self.actions
         else:
             self.obs_buf[:, 0:self.num_shadow_hand_dofs] = unscale(self.shadow_hand_dof_pos,
-                                                                   self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits)
-            self.obs_buf[:, self.num_shadow_hand_dofs:2*self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel
-            self.obs_buf[:, 2*self.num_shadow_hand_dofs:3*self.num_shadow_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor
-
-            obj_obs_start = 3*self.num_shadow_hand_dofs  # 72
+                                                                   self.shadow_hand_dof_lower_limits,
+                                                                   self.shadow_hand_dof_upper_limits)
+            self.obs_buf[:,
+            self.num_shadow_hand_dofs:2 * self.num_shadow_hand_dofs] = self.vel_obs_scale * self.shadow_hand_dof_vel
+            self.obs_buf[:,
+            2 * self.num_shadow_hand_dofs:3 * self.num_shadow_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor
+
+            obj_obs_start = 3 * self.num_shadow_hand_dofs  # 72
             self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose
             self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
             self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
 
             goal_obs_start = obj_obs_start + 13  # 85
             self.obs_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose
-            self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
+            self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot,
+                                                                               quat_conjugate(self.goal_rot))
 
             # fingertip observations, state(pose and vel) + force-torque sensors
             num_ft_states = 13 * self.num_fingertips  # 65
             num_ft_force_torques = 6 * self.num_fingertips  # 30
 
             fingertip_obs_start = goal_obs_start + 11  # 96
-            self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(self.num_envs, num_ft_states)
+            self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.fingertip_state.reshape(
+                self.num_envs, num_ft_states)
             self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states +
-                         num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor
+                                                                num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor
 
             # obs_end = 96 + 65 + 30 = 191
             # obs_total = obs_end + num_actions = 211
@@ -585,13 +620,16 @@ def compute_full_state(self, asymm_obs=False):
     def reset_target_pose(self, env_ids, apply_reset=False):
         rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), 4), device=self.device)
 
-        new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids])
+        new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids],
+                                     self.y_unit_tensor[env_ids])
 
         self.goal_states[env_ids, 0:3] = self.goal_init_state[env_ids, 0:3]
         self.goal_states[env_ids, 3:7] = new_rot
-        self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids, 0:3] + self.goal_displacement_tensor
+        self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids,
+                                                                         0:3] + self.goal_displacement_tensor
         self.root_state_tensor[self.goal_object_indices[env_ids], 3:7] = self.goal_states[env_ids, 3:7]
-        self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like(self.root_state_tensor[self.goal_object_indices[env_ids], 7:13])
+        self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like(
+            self.root_state_tensor[self.goal_object_indices[env_ids], 7:13])
 
         if apply_reset:
             goal_object_indices = self.goal_object_indices[env_ids].to(torch.int32)
@@ -617,18 +655,24 @@ def reset_idx(self, env_ids, goal_env_ids):
         # reset object
         self.root_state_tensor[self.object_indices[env_ids]] = self.object_init_state[env_ids].clone()
         self.root_state_tensor[self.object_indices[env_ids], 0:2] = self.object_init_state[env_ids, 0:2] + \
-            self.reset_position_noise * rand_floats[:, 0:2]
-        self.root_state_tensor[self.object_indices[env_ids], self.up_axis_idx] = self.object_init_state[env_ids, self.up_axis_idx] + \
-            self.reset_position_noise * rand_floats[:, self.up_axis_idx]
-
-        new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids])
+                                                                    self.reset_position_noise * rand_floats[:, 0:2]
+        self.root_state_tensor[self.object_indices[env_ids], self.up_axis_idx] = self.object_init_state[
+                                                                                     env_ids, self.up_axis_idx] + \
+                                                                                 self.reset_position_noise * rand_floats[
+                                                                                                             :,
+                                                                                                             self.up_axis_idx]
+
+        new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids],
+                                            self.y_unit_tensor[env_ids])
         if self.object_type == "pen":
             rand_angle_y = torch.tensor(0.3)
             new_object_rot = randomize_rotation_pen(rand_floats[:, 3], rand_floats[:, 4], rand_angle_y,
-                                                    self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids], self.z_unit_tensor[env_ids])
+                                                    self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids],
+                                                    self.z_unit_tensor[env_ids])
 
         self.root_state_tensor[self.object_indices[env_ids], 3:7] = new_object_rot
-        self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like(self.root_state_tensor[self.object_indices[env_ids], 7:13])
+        self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like(
+            self.root_state_tensor[self.object_indices[env_ids], 7:13])
 
         object_indices = torch.unique(torch.cat([self.object_indices[env_ids],
                                                  self.goal_object_indices[env_ids],
@@ -638,18 +682,20 @@ def reset_idx(self, env_ids, goal_env_ids):
                                                      gymtorch.unwrap_tensor(object_indices), len(object_indices))
 
         # reset random force probabilities
-        self.random_force_prob[env_ids] = torch.exp((torch.log(self.force_prob_range[0]) - torch.log(self.force_prob_range[1]))
-                                                    * torch.rand(len(env_ids), device=self.device) + torch.log(self.force_prob_range[1]))
+        self.random_force_prob[env_ids] = torch.exp(
+            (torch.log(self.force_prob_range[0]) - torch.log(self.force_prob_range[1]))
+            * torch.rand(len(env_ids), device=self.device) + torch.log(self.force_prob_range[1]))
 
         # reset shadow hand
         delta_max = self.shadow_hand_dof_upper_limits - self.shadow_hand_dof_default_pos
         delta_min = self.shadow_hand_dof_lower_limits - self.shadow_hand_dof_default_pos
-        rand_delta = delta_min + (delta_max - delta_min) * 0.5 * (rand_floats[:, 5:5+self.num_shadow_hand_dofs] + 1)
+        rand_delta = delta_min + (delta_max - delta_min) * 0.5 * (rand_floats[:, 5:5 + self.num_shadow_hand_dofs] + 1)
 
         pos = self.shadow_hand_default_dof_pos + self.reset_dof_pos_noise * rand_delta
         self.shadow_hand_dof_pos[env_ids, :] = pos
         self.shadow_hand_dof_vel[env_ids, :] = self.shadow_hand_dof_default_vel + \
-            self.reset_dof_vel_noise * rand_floats[:, 5+self.num_shadow_hand_dofs:5+self.num_shadow_hand_dofs*2]
+                                               self.reset_dof_vel_noise * rand_floats[:,
+                                                                          5 + self.num_shadow_hand_dofs:5 + self.num_shadow_hand_dofs * 2]
         self.prev_targets[env_ids, :self.num_shadow_hand_dofs] = pos
         self.cur_targets[env_ids, :self.num_shadow_hand_dofs] = pos
 
@@ -682,16 +728,28 @@ def pre_physics_step(self, actions):
 
         self.actions = actions.clone().to(self.device)
         if self.use_relative_control:
-            targets = self.prev_targets[:, self.actuated_dof_indices] + self.shadow_hand_dof_speed_scale * self.dt * self.actions
+            targets = self.prev_targets[:,
+                      self.actuated_dof_indices] + self.shadow_hand_dof_speed_scale * self.dt * self.actions
             self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(targets,
-                                                                          self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], self.shadow_hand_dof_upper_limits[self.actuated_dof_indices])
+                                                                          self.shadow_hand_dof_lower_limits[
+                                                                              self.actuated_dof_indices],
+                                                                          self.shadow_hand_dof_upper_limits[
+                                                                              self.actuated_dof_indices])
         else:
             self.cur_targets[:, self.actuated_dof_indices] = scale(self.actions,
-                                                                   self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], self.shadow_hand_dof_upper_limits[self.actuated_dof_indices])
+                                                                   self.shadow_hand_dof_lower_limits[
+                                                                       self.actuated_dof_indices],
+                                                                   self.shadow_hand_dof_upper_limits[
+                                                                       self.actuated_dof_indices])
             self.cur_targets[:, self.actuated_dof_indices] = self.act_moving_average * self.cur_targets[:,
-                                                                                                        self.actuated_dof_indices] + (1.0 - self.act_moving_average) * self.prev_targets[:, self.actuated_dof_indices]
-            self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(self.cur_targets[:, self.actuated_dof_indices],
-                                                                          self.shadow_hand_dof_lower_limits[self.actuated_dof_indices], self.shadow_hand_dof_upper_limits[self.actuated_dof_indices])
+                                                                                       self.actuated_dof_indices] + (
+                                                                         1.0 - self.act_moving_average) * self.prev_targets[
+                                                                                                          :,
+                                                                                                          self.actuated_dof_indices]
+            self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(
+                self.cur_targets[:, self.actuated_dof_indices],
+                self.shadow_hand_dof_lower_limits[self.actuated_dof_indices],
+                self.shadow_hand_dof_upper_limits[self.actuated_dof_indices])
 
         self.prev_targets[:, self.actuated_dof_indices] = self.cur_targets[:, self.actuated_dof_indices]
         self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(self.cur_targets))
@@ -702,9 +760,11 @@ def pre_physics_step(self, actions):
             # apply new forces
             force_indices = (torch.rand(self.num_envs, device=self.device) < self.random_force_prob).nonzero()
             self.rb_forces[force_indices, self.object_rb_handles, :] = torch.randn(
-                self.rb_forces[force_indices, self.object_rb_handles, :].shape, device=self.device) * self.object_rb_masses * self.force_scale
+                self.rb_forces[force_indices, self.object_rb_handles, :].shape,
+                device=self.device) * self.object_rb_masses * self.force_scale
 
-            self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.rb_forces), None, gymapi.LOCAL_SPACE)
+            self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.rb_forces), None,
+                                                    gymapi.LOCAL_SPACE)
 
     def post_physics_step(self):
         self.progress_buf += 1
@@ -719,23 +779,36 @@ def post_physics_step(self):
             self.gym.refresh_rigid_body_state_tensor(self.sim)
 
             for i in range(self.num_envs):
-                targetx = (self.goal_pos[i] + quat_apply(self.goal_rot[i], to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy()
-                targety = (self.goal_pos[i] + quat_apply(self.goal_rot[i], to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy()
-                targetz = (self.goal_pos[i] + quat_apply(self.goal_rot[i], to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy()
+                targetx = (self.goal_pos[i] + quat_apply(self.goal_rot[i],
+                                                         to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy()
+                targety = (self.goal_pos[i] + quat_apply(self.goal_rot[i],
+                                                         to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy()
+                targetz = (self.goal_pos[i] + quat_apply(self.goal_rot[i],
+                                                         to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy()
 
                 p0 = self.goal_pos[i].cpu().numpy() + self.goal_displacement_tensor.cpu().numpy()
-                self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], targetx[0], targetx[1], targetx[2]], [0.85, 0.1, 0.1])
-                self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], targety[0], targety[1], targety[2]], [0.1, 0.85, 0.1])
-                self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], targetz[0], targetz[1], targetz[2]], [0.1, 0.1, 0.85])
-
-                objectx = (self.object_pos[i] + quat_apply(self.object_rot[i], to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy()
-                objecty = (self.object_pos[i] + quat_apply(self.object_rot[i], to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy()
-                objectz = (self.object_pos[i] + quat_apply(self.object_rot[i], to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy()
+                self.gym.add_lines(self.viewer, self.envs[i], 1,
+                                   [p0[0], p0[1], p0[2], targetx[0], targetx[1], targetx[2]], [0.85, 0.1, 0.1])
+                self.gym.add_lines(self.viewer, self.envs[i], 1,
+                                   [p0[0], p0[1], p0[2], targety[0], targety[1], targety[2]], [0.1, 0.85, 0.1])
+                self.gym.add_lines(self.viewer, self.envs[i], 1,
+                                   [p0[0], p0[1], p0[2], targetz[0], targetz[1], targetz[2]], [0.1, 0.1, 0.85])
+
+                objectx = (self.object_pos[i] + quat_apply(self.object_rot[i],
+                                                           to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy()
+                objecty = (self.object_pos[i] + quat_apply(self.object_rot[i],
+                                                           to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy()
+                objectz = (self.object_pos[i] + quat_apply(self.object_rot[i],
+                                                           to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy()
 
                 p0 = self.object_pos[i].cpu().numpy()
-                self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], objectx[0], objectx[1], objectx[2]], [0.85, 0.1, 0.1])
-                self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], objecty[0], objecty[1], objecty[2]], [0.1, 0.85, 0.1])
-                self.gym.add_lines(self.viewer, self.envs[i], 1, [p0[0], p0[1], p0[2], objectz[0], objectz[1], objectz[2]], [0.1, 0.1, 0.85])
+                self.gym.add_lines(self.viewer, self.envs[i], 1,
+                                   [p0[0], p0[1], p0[2], objectx[0], objectx[1], objectx[2]], [0.85, 0.1, 0.1])
+                self.gym.add_lines(self.viewer, self.envs[i], 1,
+                                   [p0[0], p0[1], p0[2], objecty[0], objecty[1], objecty[2]], [0.1, 0.85, 0.1])
+                self.gym.add_lines(self.viewer, self.envs[i], 1,
+                                   [p0[0], p0[1], p0[2], objectz[0], objectz[1], objectz[2]], [0.1, 0.1, 0.85])
+
 
 #####################################################################
 ###=========================jit functions=========================###
@@ -744,12 +817,12 @@ def post_physics_step(self):
 
 @torch.jit.script
 def compute_hand_reward(
-    rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes,
-    max_episode_length: float, object_pos, object_rot, target_pos, target_rot,
-    dist_reward_scale: float, rot_reward_scale: float, rot_eps: float,
-    actions, action_penalty_scale: float,
-    success_tolerance: float, reach_goal_bonus: float, fall_dist: float,
-    fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool
+        rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes,
+        max_episode_length: float, object_pos, object_rot, target_pos, target_rot,
+        dist_reward_scale: float, rot_reward_scale: float, rot_eps: float,
+        actions, action_penalty_scale: float,
+        success_tolerance: float, reach_goal_bonus: float, fall_dist: float,
+        fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool
 ):
     # Distance from the hand to the object
     goal_dist = torch.norm(object_pos - target_pos, p=2, dim=-1)
@@ -762,7 +835,7 @@ def compute_hand_reward(
     rot_dist = 2.0 * torch.asin(torch.clamp(torch.norm(quat_diff[:, 0:3], p=2, dim=-1), max=1.0))
 
     dist_rew = goal_dist * dist_reward_scale
-    rot_rew = 1.0/(torch.abs(rot_dist) + rot_eps) * rot_reward_scale
+    rot_rew = 1.0 / (torch.abs(rot_dist) + rot_eps) * rot_reward_scale
 
     action_penalty = torch.sum(actions ** 2, dim=-1)
 
@@ -783,7 +856,8 @@ def compute_hand_reward(
     resets = torch.where(goal_dist >= fall_dist, torch.ones_like(reset_buf), reset_buf)
     if max_consecutive_successes > 0:
         # Reset progress buffer on goal envs if max_consecutive_successes > 0
-        progress_buf = torch.where(torch.abs(rot_dist) <= success_tolerance, torch.zeros_like(progress_buf), progress_buf)
+        progress_buf = torch.where(torch.abs(rot_dist) <= success_tolerance, torch.zeros_like(progress_buf),
+                                   progress_buf)
         resets = torch.where(successes >= max_consecutive_successes, torch.ones_like(resets), resets)
     resets = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(resets), resets)
 
@@ -794,7 +868,8 @@ def compute_hand_reward(
     num_resets = torch.sum(resets)
     finished_cons_successes = torch.sum(successes * resets.float())
 
-    cons_successes = torch.where(num_resets > 0, av_factor*finished_cons_successes/num_resets + (1.0 - av_factor)*consecutive_successes, consecutive_successes)
+    cons_successes = torch.where(num_resets > 0, av_factor * finished_cons_successes / num_resets + (
+                1.0 - av_factor) * consecutive_successes, consecutive_successes)
 
     return reward, resets, goal_resets, progress_buf, successes, cons_successes
 
diff --git a/rofunc/learning/RofuncRL/tasks/trifinger.py b/rofunc/learning/RofuncRL/tasks/trifinger.py
index f6f632acf..d5d4aa83c 100644
--- a/rofunc/learning/RofuncRL/tasks/trifinger.py
+++ b/rofunc/learning/RofuncRL/tasks/trifinger.py
@@ -26,30 +26,26 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
 import os
-import torch
+from collections import OrderedDict
 
-from isaacgym import gymtorch
 from isaacgym import gymapi
-from isaacgym.torch_utils import *
-
-from collections import OrderedDict
+from isaacgym import gymtorch
 
 project_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 
-from isaacgymenvs.utils.torch_jit_utils import *
-from isaacgymenvs.tasks.base.vec_task import VecTask
+from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
+from rofunc.learning.RofuncRL.tasks.base.vec_task import VecTask
 
 from types import SimpleNamespace
 from collections import deque
 from typing import Deque, Dict, Tuple, Union
 
-
 # python
 import enum
 import numpy as np
 
+
 # ################### #
 # Dimensions of robot #
 # ################### #
@@ -85,6 +81,7 @@ class TrifingerDimensions(enum.Enum):
     ObjectPoseDim = 7
     ObjectVelocityDim = 6
 
+
 # ################# #
 # Different objects #
 # ################# #
@@ -176,7 +173,6 @@ def __compute(self):
 
 
 class Trifinger(VecTask):
-
     # constants
     # directory where assets for the simulator are present
     _trifinger_assets_dir = os.path.join(project_dir, "../", "assets", "trifinger")
@@ -345,7 +341,7 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
             self.state_spec = self.obs_spec
 
         self.action_spec = {
-            "command":  self.action_dim
+            "command": self.action_dim
         }
 
         self.cfg["env"]["numObservations"] = sum(self.obs_spec.values())
@@ -355,7 +351,6 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self.randomize = self.cfg["task"]["randomize"]
         self.randomization_params = self.cfg["task"]["randomization_params"]
 
-
         # define prims present in the scene
         prim_names = ["robot", "table", "boundary", "object", "goal_object"]
         # mapping from name to asset instance
@@ -374,14 +369,15 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
                                 f'finger_middle_to_lower_joint_{finger_pos}']
         self._robot_dof_indices = OrderedDict.fromkeys(robot_dof_names, None)
 
-        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device, graphics_device_id=graphics_device_id, headless=headless, virtual_screen_capture=virtual_screen_capture, force_render=force_render)
+        super().__init__(config=self.cfg, rl_device=rl_device, sim_device=sim_device,
+                         graphics_device_id=graphics_device_id, headless=headless,
+                         virtual_screen_capture=virtual_screen_capture, force_render=force_render)
 
         if self.viewer != None:
             cam_pos = gymapi.Vec3(0.7, 0.0, 0.7)
             cam_target = gymapi.Vec3(0.0, 0.0, 0.0)
             self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
 
-
         # change constant buffers from numpy/lists into torch tensors
         # limits for robot
         for limit_name in self._robot_limits:
@@ -418,7 +414,8 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
             self._ft_sensors_values = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, num_ft_dims)
 
             dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim)
-            self._dof_torque = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, self._dims.JointTorqueDim.value)
+            self._dof_torque = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs,
+                                                                           self._dims.JointTorqueDim.value)
 
         # get gym GPU state tensors
         actor_root_state_tensor = self.gym.acquire_actor_root_state_tensor(self.sim)
@@ -460,11 +457,10 @@ def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, vir
         self._successes_pos = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
         self._successes_quat = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
 
-
         self.__configure_mdp_spaces()
 
     def create_sim(self):
-        self.up_axis_idx = 2 # index of up axis: Y=1, Z=2
+        self.up_axis_idx = 2  # index of up axis: Y=1, Z=2
         self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
 
         self._create_ground_plane()
@@ -528,7 +524,8 @@ def _create_envs(self, num_envs, spacing, num_per_row):
 
         # define lower and upper region bound for each environment
         env_lower_bound = gymapi.Vec3(-self.cfg["env"]["envSpacing"], -self.cfg["env"]["envSpacing"], 0.0)
-        env_upper_bound = gymapi.Vec3(self.cfg["env"]["envSpacing"], self.cfg["env"]["envSpacing"], self.cfg["env"]["envSpacing"])
+        env_upper_bound = gymapi.Vec3(self.cfg["env"]["envSpacing"], self.cfg["env"]["envSpacing"],
+                                      self.cfg["env"]["envSpacing"])
         num_envs_per_row = int(np.sqrt(self.num_envs))
         # initialize gym indices buffer as a list
         # note: later the list is converted to torch tensor for ease in interfacing with IsaacGym.
@@ -549,26 +546,26 @@ def _create_envs(self, num_envs, spacing, num_per_row):
                 self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True)
             # add trifinger robot to environment
             trifinger_actor = self.gym.create_actor(env_ptr, self.gym_assets["robot"], gymapi.Transform(),
-                                                     "robot", env_index, 0, 0)
+                                                    "robot", env_index, 0, 0)
             trifinger_idx = self.gym.get_actor_index(env_ptr, trifinger_actor, gymapi.DOMAIN_SIM)
 
             # add table to environment
             table_handle = self.gym.create_actor(env_ptr, self.gym_assets["table"], gymapi.Transform(),
-                                                  "table", env_index, 1, 0)
+                                                 "table", env_index, 1, 0)
             table_idx = self.gym.get_actor_index(env_ptr, table_handle, gymapi.DOMAIN_SIM)
 
             # add stage to environment
             boundary_handle = self.gym.create_actor(env_ptr, self.gym_assets["boundary"], gymapi.Transform(),
-                                                     "boundary", env_index, 1, 0)
+                                                    "boundary", env_index, 1, 0)
             boundary_idx = self.gym.get_actor_index(env_ptr, boundary_handle, gymapi.DOMAIN_SIM)
 
             # add object to environment
             object_handle = self.gym.create_actor(env_ptr, self.gym_assets["object"], gymapi.Transform(),
-                                                   "object", env_index, 0, 0)
+                                                  "object", env_index, 0, 0)
             object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM)
             # add goal object to environment
             goal_handle = self.gym.create_actor(env_ptr, self.gym_assets["goal_object"], gymapi.Transform(),
-                                                 "goal_object", env_index + self.num_envs, 0, 0)
+                                                "goal_object", env_index + self.num_envs, 0, 0)
             goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM)
             # change settings of DOF
             self.gym.set_actor_dof_properties(env_ptr, trifinger_actor, robot_dof_props)
@@ -621,11 +618,11 @@ def __configure_mdp_spaces(self):
         object_obs_low = torch.cat([
                                        self._object_limits["position"].low,
                                        self._object_limits["orientation"].low,
-                                   ]*2)
+                                   ] * 2)
         object_obs_high = torch.cat([
                                         self._object_limits["position"].high,
                                         self._object_limits["orientation"].high,
-                                    ]*2)
+                                    ] * 2)
 
         # Note: This is order sensitive.
         self._observations_scale.low = torch.cat([
@@ -702,14 +699,14 @@ def __configure_mdp_spaces(self):
             raise AssertionError(msg)
         # print the scaling
         print(f'MDP Raw observation bounds\n'
-                   f'\tLow: {self._observations_scale.low}\n'
-                   f'\tHigh: {self._observations_scale.high}')
+              f'\tLow: {self._observations_scale.low}\n'
+              f'\tHigh: {self._observations_scale.high}')
         print(f'MDP Raw state bounds\n'
-                   f'\tLow: {self._states_scale.low}\n'
-                   f'\tHigh: {self._states_scale.high}')
+              f'\tLow: {self._states_scale.low}\n'
+              f'\tHigh: {self._states_scale.high}')
         print(f'MDP Raw action bounds\n'
-                   f'\tLow: {self._action_scale.low}\n'
-                   f'\tHigh: {self._action_scale.high}')
+              f'\tLow: {self._action_scale.low}\n'
+              f'\tHigh: {self._action_scale.high}')
 
     def compute_reward(self, actions):
         self.rew_buf[:] = 0.
@@ -734,7 +731,7 @@ def compute_reward(self, actions):
             self.cfg["env"]["reward_terms"]["keypoints_dist"]["activate"]
         )
 
-        self.extras.update({"env/rewards/"+k: v.mean() for k, v in log_dict.items()})
+        self.extras.update({"env/rewards/" + k: v.mean() for k, v in log_dict.items()})
 
     def compute_observations(self):
         # refresh memory buffers
@@ -749,8 +746,10 @@ def compute_observations(self):
             tip_wrenches = self._ft_sensors_values
 
         else:
-            joint_torques = torch.zeros(self.num_envs, self._dims.JointTorqueDim.value, dtype=torch.float32, device=self.device)
-            tip_wrenches = torch.zeros(self.num_envs, self._dims.NumFingers.value * self._dims.WrenchDim.value, dtype=torch.float32, device=self.device)
+            joint_torques = torch.zeros(self.num_envs, self._dims.JointTorqueDim.value, dtype=torch.float32,
+                                        device=self.device)
+            tip_wrenches = torch.zeros(self.num_envs, self._dims.NumFingers.value * self._dims.WrenchDim.value,
+                                       dtype=torch.float32, device=self.device)
 
         # extract frame handles
         fingertip_handles_indices = list(self._fingertips_handles.values())
@@ -826,13 +825,13 @@ def reset_idx(self, env_ids):
         # D) Set values into simulator
         # -- DOF
         self.gym.set_dof_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._dof_state),
-                                               gymtorch.unwrap_tensor(robot_indices), len(robot_indices))
+                                              gymtorch.unwrap_tensor(robot_indices), len(robot_indices))
         # -- actor root states
         self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._actors_root_state),
-                                                      gymtorch.unwrap_tensor(all_indices), len(all_indices))
+                                                     gymtorch.unwrap_tensor(all_indices), len(all_indices))
 
     def _sample_robot_state(self, instances: torch.Tensor, distribution: str = 'default',
-                             dof_pos_stddev: float = 0.0, dof_vel_stddev: float = 0.0):
+                            dof_pos_stddev: float = 0.0, dof_vel_stddev: float = 0.0):
         """Samples the robot DOF state based on the settings.
 
         Type of robot initial state distribution: ["default", "random"]
@@ -1083,7 +1082,7 @@ def _check_termination(self):
         # Check for distance within tolerance
         goal_orientation_reset = torch.le(object_goal_orientation_dist,
                                           termination_config["success"]["orientation_tolerance"])
-        self._step_info['env/current_orientation_goal/per_env'] =  np.mean(goal_orientation_reset.float().cpu().numpy())
+        self._step_info['env/current_orientation_goal/per_env'] = np.mean(goal_orientation_reset.float().cpu().numpy())
 
         if self.cfg["env"]['task_difficulty'] < 4:
             # Check for task completion if position goal is within a threshold
@@ -1099,7 +1098,6 @@ def _check_termination(self):
         self._successes_pos = goal_position_reset
         self._successes_quat = goal_orientation_reset
 
-
     """
     Helper functions - define assets
     """
@@ -1130,7 +1128,7 @@ def __define_robot_asset(self):
             robot_asset_options.use_physx_armature = True
         # load tri-finger asset
         trifinger_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir,
-                                               self._robot_urdf_file, robot_asset_options)
+                                              self._robot_urdf_file, robot_asset_options)
         # set the link properties for the robot
         # Ref: https://github.com/rr-learning/rrc_simulation/blob/master/python/rrc_simulation/sim_finger.py#L563
         trifinger_props = self.gym.get_asset_rigid_shape_properties(trifinger_asset)
@@ -1142,7 +1140,7 @@ def __define_robot_asset(self):
         # extract the frame handles
         for frame_name in self._fingertips_handles.keys():
             self._fingertips_handles[frame_name] = self.gym.find_asset_rigid_body_index(trifinger_asset,
-                                                                                         frame_name)
+                                                                                        frame_name)
             # check valid handle
             if self._fingertips_handles[frame_name] == gymapi.INVALID_HANDLE:
                 msg = f"Invalid handle received for frame: `{frame_name}`."
@@ -1174,7 +1172,7 @@ def __define_table_asset(self):
 
         # load stage asset
         table_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir,
-                                           self._table_urdf_file, table_asset_options)
+                                          self._table_urdf_file, table_asset_options)
         # set stage properties
         table_props = self.gym.get_asset_rigid_shape_properties(table_asset)
         # iterate over each mesh
@@ -1204,7 +1202,7 @@ def __define_boundary_asset(self):
 
         # load stage asset
         boundary_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir,
-                                              self._boundary_urdf_file, boundary_asset_options)
+                                             self._boundary_urdf_file, boundary_asset_options)
         # set stage properties
         boundary_props = self.gym.get_asset_rigid_shape_properties(boundary_asset)
 
@@ -1222,7 +1220,7 @@ def __define_object_asset(self):
         object_asset_options.flip_visual_attachments = True
         # load object asset
         object_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir,
-                                            self._object_urdf_file, object_asset_options)
+                                           self._object_urdf_file, object_asset_options)
         # set object properties
         # Ref: https://github.com/rr-learning/rrc_simulation/blob/master/python/rrc_simulation/collision_objects.py#L96
         object_props = self.gym.get_asset_rigid_shape_properties(object_asset)
@@ -1245,7 +1243,7 @@ def __define_goal_object_asset(self):
         object_asset_options.flip_visual_attachments = True
         # load object asset
         goal_object_asset = self.gym.load_asset(self.sim, self._trifinger_assets_dir,
-                                                 self._object_urdf_file, object_asset_options)
+                                                self._object_urdf_file, object_asset_options)
         # return the asset
         return goal_object_asset
 
@@ -1254,12 +1252,13 @@ def env_steps_count(self) -> int:
         """Returns the total number of environment steps aggregated across parallel environments."""
         return self.gym.get_frame_count(self.sim) * self.num_envs
 
+
 #####################################################################
 ###=========================jit functions=========================###
 #####################################################################
 
 @torch.jit.script
-def lgsk_kernel(x: torch.Tensor, scale: float = 50.0, eps:float=2) -> torch.Tensor:
+def lgsk_kernel(x: torch.Tensor, scale: float = 50.0, eps: float = 2) -> torch.Tensor:
     """Defines logistic kernel function to bound input to [-0.25, 0)
 
     Ref: https://arxiv.org/abs/1901.08652 (page 15)
@@ -1275,9 +1274,9 @@ def lgsk_kernel(x: torch.Tensor, scale: float = 50.0, eps:float=2) -> torch.Tens
     scaled = x * scale
     return 1.0 / (scaled.exp() + eps + (-scaled).exp())
 
+
 @torch.jit.script
 def gen_keypoints(pose: torch.Tensor, num_keypoints: int = 8, size: Tuple[float, float, float] = (0.065, 0.065, 0.065)):
-
     num_envs = pose.shape[0]
 
     keypoints_buf = torch.ones(num_envs, num_keypoints, 3, dtype=torch.float32, device=pose.device)
@@ -1290,6 +1289,7 @@ def gen_keypoints(pose: torch.Tensor, num_keypoints: int = 8, size: Tuple[float,
         keypoints_buf[:, i, :] = local_to_world_space(corner, pose)
     return keypoints_buf
 
+
 @torch.jit.script
 def compute_trifinger_reward(
         obs_buf: torch.Tensor,
@@ -1309,7 +1309,6 @@ def compute_trifinger_reward(
         last_fingertip_state: torch.Tensor,
         use_keypoints: bool
 ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]:
-
     ft_sched_start = 0
     ft_sched_end = 5e7
 
@@ -1360,7 +1359,7 @@ def compute_trifinger_reward(
         quat_b = object_goal_poses_buf[:, 3:7]
 
         angles = quat_diff_rad(quat_a, quat_b)
-        object_rot_reward =  object_rot_weight * dt / (3. * torch.abs(angles) + 0.01)
+        object_rot_reward = object_rot_weight * dt / (3. * torch.abs(angles) + 0.01)
 
         pose_reward = object_dist_reward + object_rot_reward
 
@@ -1396,13 +1395,12 @@ def compute_trifinger_observations_states(
         joint_torques: torch.Tensor,
         tip_wrenches: torch.Tensor
 ):
-
     num_envs = dof_position.shape[0]
 
     obs_buf = torch.cat([
         dof_position,
         dof_velocity,
-        object_state[:, 0:7], # pose
+        object_state[:, 0:7],  # pose
         object_goal_poses,
         actions
     ], dim=-1)
@@ -1410,7 +1408,7 @@ def compute_trifinger_observations_states(
     if asymmetric_obs:
         states_buf = torch.cat([
             obs_buf,
-            object_state[:, 7:13], # linear / angular velocity
+            object_state[:, 7:13],  # linear / angular velocity
             fingertip_state.reshape(num_envs, -1),
             joint_torques,
             tip_wrenches
@@ -1420,6 +1418,7 @@ def compute_trifinger_observations_states(
 
     return obs_buf, states_buf
 
+
 """
 Sampling of cuboidal object
 """
@@ -1470,8 +1469,9 @@ def random_orientation(num: int, device: str) -> torch.Tensor:
 
     return quat
 
+
 @torch.jit.script
-def random_orientation_within_angle(num: int, device:str, base: torch.Tensor, max_angle: float):
+def random_orientation_within_angle(num: int, device: str, base: torch.Tensor, max_angle: float):
     """ Generates random quaternions within max_angle of base
     Ref: https://math.stackexchange.com/a/3448434
     """
@@ -1479,13 +1479,13 @@ def random_orientation_within_angle(num: int, device:str, base: torch.Tensor, ma
 
     rand = torch.rand((num, 3), dtype=torch.float, device=device)
 
-    c = torch.cos(rand[:, 0]*max_angle)
-    n = torch.sqrt((1.-c)/2.)
+    c = torch.cos(rand[:, 0] * max_angle)
+    n = torch.sqrt((1. - c) / 2.)
 
-    quat[:, 3] = torch.sqrt((1+c)/2.)
-    quat[:, 2] = (rand[:, 1]*2.-1.) * n
-    quat[:, 0] = (torch.sqrt(1-quat[:, 2]**2.) * torch.cos(2*np.pi*rand[:, 2])) * n
-    quat[:, 1] = (torch.sqrt(1-quat[:, 2]**2.) * torch.sin(2*np.pi*rand[:, 2])) * n
+    quat[:, 3] = torch.sqrt((1 + c) / 2.)
+    quat[:, 2] = (rand[:, 1] * 2. - 1.) * n
+    quat[:, 0] = (torch.sqrt(1 - quat[:, 2] ** 2.) * torch.cos(2 * np.pi * rand[:, 2])) * n
+    quat[:, 1] = (torch.sqrt(1 - quat[:, 2] ** 2.) * torch.sin(2 * np.pi * rand[:, 2])) * n
 
     # floating point errors can cause it to  be slightly off, re-normalise
     quat = torch.nn.functional.normalize(quat, p=2., dim=-1, eps=1e-12)
@@ -1503,6 +1503,7 @@ def random_angular_vel(num: int, device: str, magnitude_stdev: float) -> torch.T
     magnitude *= magnitude_stdev
     return magnitude * axis
 
+
 @torch.jit.script
 def random_yaw_orientation(num: int, device: str) -> torch.Tensor:
     """Returns sampled rotation around z-axis."""
diff --git a/rofunc/utils/oslab/path.py b/rofunc/utils/oslab/path.py
index 06fc9fabb..25304eeb5 100644
--- a/rofunc/utils/oslab/path.py
+++ b/rofunc/utils/oslab/path.py
@@ -55,3 +55,13 @@ def check_ckpt_exist(ckpt_name):
         return True
     else:
         return False
+
+
+def is_absl_path(path):
+    """
+    Check if the path is an absolute path.
+
+    :param path: the path to be checked
+    :return: True if the path is an absolute path, False otherwise
+    """
+    return os.path.isabs(path)