Skip to content

Commit

Permalink
Clamp and normalize reward of RotateCubeEnv (#261)
Browse files Browse the repository at this point in the history
  • Loading branch information
Kami-code authored Apr 4, 2024
1 parent 3142a0e commit 833d97d
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions mani_skill/envs/tasks/rotate_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,13 +338,15 @@ def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
+ object_rot_weight * object_rot_reward
)
total_reward = finger_reach_object_reward + pose_reward
total_reward[info["success"]] = 3
total_reward = total_reward.clamp(-15, 15)
total_reward[info["success"]] = 15
return total_reward

def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
max_reward = 20
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward

self.max_reward = 15
dense_reward = self.compute_dense_reward(obs=obs, action=action, info=info)
norm_dense_reward = dense_reward / (2 * self.max_reward) + 0.5
return norm_dense_reward

# TODO (stao): pick a better name, TrifingerRotateCube? perhaps?
@register_env("RotateCubeLevel0-v1", max_episode_steps=250)
Expand Down

0 comments on commit 833d97d

Please sign in to comment.