Skip to content

Commit

Permalink
Maniskill RGB and Depth only observations. Added resnet and impala cu…
Browse files Browse the repository at this point in the history
…be pickup configs.
  • Loading branch information
ViktorM committed Sep 1, 2024
1 parent 8be57aa commit d21fd8f
Show file tree
Hide file tree
Showing 7 changed files with 318 additions and 128 deletions.
5 changes: 3 additions & 2 deletions rl_games/algos_torch/a2c_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def restore_central_value_function(self, fn):
def get_masked_action_values(self, obs, action_masks):
assert False

@torch.compile() #(mode='max-autotune')
# @torch.compile() #(mode='max-autotune')
def calc_losses(self, actor_loss_func, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip, value_preds_batch, values, return_batch, mu, entropy, rnn_masks):
a_loss = actor_loss_func(old_action_log_probs_batch, action_log_probs, advantage, self.ppo, curr_e_clip)

Expand All @@ -109,7 +109,8 @@ def calc_losses(self, actor_loss_func, old_action_log_probs_batch, action_log_pr
b_loss = self.bound_loss(mu)
else:
b_loss = torch.zeros(1, device=self.ppo_device)
losses, sum_mask = torch_ext.apply_masks([a_loss.unsqueeze(1), c_loss , entropy.unsqueeze(1), b_loss.unsqueeze(1)], rnn_masks)

losses, sum_mask = torch_ext.apply_masks([a_loss.unsqueeze(1), c_loss, entropy.unsqueeze(1), b_loss.unsqueeze(1)], rnn_masks)
a_loss, c_loss, entropy, b_loss = losses[0], losses[1], losses[2], losses[3]

loss = a_loss + 0.5 * c_loss * self.critic_coef - entropy * self.entropy_coef + b_loss * self.bounds_loss_coef
Expand Down
101 changes: 101 additions & 0 deletions rl_games/configs/maniskill/maniskill_pickcube_impala.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
params:
seed: 42

# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: vision_actor_critic
require_rewards: False
require_last_actions: False
separate: False
value_shape: 1
space:
continuous:
mu_activation: None
sigma_activation: None

mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
cnn:
permute_input: True
conv_depths: [16, 32, 32]
activation: relu
initializer:
name: default
regularizer:
name: None
mlp:
units: [512, 256]
activation: elu
regularizer:
name: None
initializer:
name: default
# rnn:
# name: lstm
# units: 512
# layers: 1
# before_mlp: True
# concat_output: True

config:
name: PickCube_RGB_impala
env_name: maniskill
reward_shaper:
scale_value: 1.0
device: cuda:0
device_name: cuda:0
multi_gpu: False
ppo: True
mixed_precision: True
normalize_input: False
normalize_value: True
normalize_advantage: True
num_actors: 256
reward_shaper:
scale_value: 1.0
gamma: 0.99
tau : 0.95
learning_rate: 1e-4
lr_schedule: adaptive
kl_threshold: 0.008
max_epochs: 10000
save_best_after: 25
save_frequency: 500
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 16
minibatch_size: 2048
mini_epochs: 2
critic_coef: 1
clip_value: True
seq_length: 8
bounds_loss_coef: 0.0001
#weight_decay: 0.001

env_config:
env_name: PickCube-v1
obs_mode: rgbd
control_mode: pd_ee_delta_pose
reward_mode: dense

player:
render: True
deterministic: True

9 changes: 6 additions & 3 deletions rl_games/configs/maniskill/maniskill_pickcube_state.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ params:
config:
name: PickCube_state
env_name: maniskill
reward_shaper:
scale_value: 1.0
normalize_input: True
normalize_value: True
value_bootstrap: True
reward_shaper:
scale_value: 1.0
normalize_advantage: True
gamma: 0.99
tau: 0.95
Expand All @@ -48,7 +48,7 @@ params:
use_smooth_clamp: True
bound_loss_type: regularisation
bounds_loss_coef: 0.0005
max_epochs: 4000
max_epochs: 5000
save_best_after: 25
save_frequency: 100
num_actors: 4096
Expand All @@ -59,6 +59,9 @@ params:

env_config:
env_name: PickCube-v1 # todo: add list of all envs
observation_mode: state
control_mode: pd_ee_delta_pose
reward_mode: dense

player:
render: True
101 changes: 101 additions & 0 deletions rl_games/configs/maniskill/maniskill_pickcube_vision.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
params:
seed: 42

# environment wrapper clipping
env:
# added to the wrapper
clip_observations: 5.0
# can make custom wrapper?
clip_actions: 1.0
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: e2e_vision_actor_critic
separate: False
value_shape: 1
space:
continuous:
mu_activation: None
sigma_activation: None

mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True

backbone:
type: resnet18 # can be efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34
pretrained: True
permute_input: True
freeze: False
preprocess_image: True
args:
zero_init_residual: True
norm_layer: None
mlp:
units: [512, 256]
activation: elu
regularizer:
name: None
initializer:
name: default
# rnn:
# name: lstm
# units: 512
# layers: 1
# before_mlp: True
# concat_output: True

config:
name: PickCube_RGB_resnet18
env_name: maniskill
reward_shaper:
scale_value: 1.0
device: cuda:0
device_name: cuda:0
multi_gpu: False
ppo: True
mixed_precision: True
normalize_input: False
normalize_value: True
normalize_advantage: True
num_actors: 128
reward_shaper:
scale_value: 1.0
gamma: 0.99
tau : 0.95
learning_rate: 1e-4
lr_schedule: adaptive
kl_threshold: 0.008
max_epochs: 20000
save_best_after: 25
save_frequency: 500
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 16
minibatch_size: 512
mini_epochs: 2
critic_coef: 1
clip_value: True
seq_length: 8
bounds_loss_coef: 0.0001
#weight_decay: 0.001

env_config:
env_name: PickCube-v1
obs_mode: rgbd
control_mode: pd_ee_delta_pose
reward_mode: dense

player:
render: True
deterministic: True

2 changes: 1 addition & 1 deletion rl_games/configs/maniskill/maniskill_pushcube_state.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ params:
name: default

config:
name: PushCube_state
name: PushCube_EE_state
env_name: maniskill
normalize_input: True
normalize_value: True
Expand Down
77 changes: 0 additions & 77 deletions rl_games/configs/maniskill/maniskill_resnet.yaml

This file was deleted.

Loading

0 comments on commit d21fd8f

Please sign in to comment.