Maniskill RGB and Depth only observations. Added resnet and impala cu…

…be pickup configs.
Denys88 · Sep 1, 2024 · d21fd8f · d21fd8f
1 parent 8be57aa
commit d21fd8f
Show file tree

Hide file tree

Showing 7 changed files with 318 additions and 128 deletions.
diff --git a/rl_games/algos_torch/a2c_continuous.py b/rl_games/algos_torch/a2c_continuous.py
@@ -95,7 +95,7 @@ def restore_central_value_function(self, fn):
     def get_masked_action_values(self, obs, action_masks):
         assert False
 
-    @torch.compile() #(mode='max-autotune')
+#    @torch.compile() #(mode='max-autotune')
     def calc_losses(self, actor_loss_func, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip, value_preds_batch, values, return_batch, mu, entropy, rnn_masks):
         a_loss = actor_loss_func(old_action_log_probs_batch, action_log_probs, advantage, self.ppo, curr_e_clip)
 
@@ -109,7 +109,8 @@ def calc_losses(self, actor_loss_func, old_action_log_probs_batch, action_log_pr
             b_loss = self.bound_loss(mu)
         else:
             b_loss = torch.zeros(1, device=self.ppo_device)
-        losses, sum_mask = torch_ext.apply_masks([a_loss.unsqueeze(1), c_loss , entropy.unsqueeze(1), b_loss.unsqueeze(1)], rnn_masks)
+
+        losses, sum_mask = torch_ext.apply_masks([a_loss.unsqueeze(1), c_loss, entropy.unsqueeze(1), b_loss.unsqueeze(1)], rnn_masks)
         a_loss, c_loss, entropy, b_loss = losses[0], losses[1], losses[2], losses[3]
 
         loss = a_loss + 0.5 * c_loss * self.critic_coef - entropy * self.entropy_coef + b_loss * self.bounds_loss_coef

diff --git a/rl_games/configs/maniskill/maniskill_pickcube_impala.yaml b/rl_games/configs/maniskill/maniskill_pickcube_impala.yaml
@@ -0,0 +1,101 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: vision_actor_critic
+    require_rewards: False
+    require_last_actions: False
+    separate: False
+    value_shape: 1
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    cnn:
+      permute_input: True
+      conv_depths: [16, 32, 32]
+      activation: relu
+      initializer:
+        name: default
+      regularizer:
+        name: None
+    mlp:
+      units: [512, 256]
+      activation: elu
+      regularizer:
+        name: None
+      initializer:
+        name: default
+    # rnn:
+    #   name: lstm
+    #   units: 512
+    #   layers: 1
+    #   before_mlp: True
+    #   concat_output: True
+
+  config:
+    name: PickCube_RGB_impala
+    env_name: maniskill
+    reward_shaper:
+      scale_value: 1.0
+    device: cuda:0
+    device_name: cuda:0
+    multi_gpu: False
+    ppo: True
+    mixed_precision: True
+    normalize_input: False
+    normalize_value: True
+    normalize_advantage: True
+    num_actors: 256
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 1e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    max_epochs: 10000
+    save_best_after: 25
+    save_frequency: 500
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 16
+    minibatch_size: 2048
+    mini_epochs: 2
+    critic_coef: 1
+    clip_value: True
+    seq_length: 8
+    bounds_loss_coef: 0.0001
+    #weight_decay: 0.001
+
+    env_config:
+      env_name: PickCube-v1
+      obs_mode: rgbd
+      control_mode: pd_ee_delta_pose
+      reward_mode: dense
+
+    player:
+        render: True
+        deterministic: True
+
diff --git a/rl_games/configs/maniskill/maniskill_pickcube_state.yaml b/rl_games/configs/maniskill/maniskill_pickcube_state.yaml
@@ -28,11 +28,11 @@ params:
   config:
       name: PickCube_state
       env_name: maniskill
+      reward_shaper:
+        scale_value: 1.0
       normalize_input: True
       normalize_value: True
       value_bootstrap: True
-      reward_shaper:
-        scale_value: 1.0
       normalize_advantage: True
       gamma: 0.99
       tau: 0.95
@@ -48,7 +48,7 @@ params:
       use_smooth_clamp: True
       bound_loss_type: regularisation
       bounds_loss_coef: 0.0005
-      max_epochs: 4000
+      max_epochs: 5000
       save_best_after: 25
       save_frequency: 100
       num_actors: 4096
@@ -59,6 +59,9 @@ params:
 
       env_config:
         env_name: PickCube-v1  # todo: add list of all envs
+        observation_mode: state
+        control_mode: pd_ee_delta_pose
+        reward_mode: dense
 
       player:
         render: True
diff --git a/rl_games/configs/maniskill/maniskill_pickcube_vision.yaml b/rl_games/configs/maniskill/maniskill_pickcube_vision.yaml
@@ -0,0 +1,101 @@
+params:
+  seed: 42
+
+  # environment wrapper clipping
+  env:
+    # added to the wrapper
+    clip_observations: 5.0
+    # can make custom wrapper?
+    clip_actions: 1.0
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: e2e_vision_actor_critic
+    separate: False
+    value_shape: 1
+    space: 
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+
+    backbone:
+      type: resnet18 # can be efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34
+      pretrained: True
+      permute_input: True
+      freeze: False
+      preprocess_image: True
+      args:
+        zero_init_residual: True
+        norm_layer: None
+    mlp:
+      units: [512, 256]
+      activation: elu
+      regularizer:
+        name: None
+      initializer:
+        name: default
+    # rnn:
+    #   name: lstm
+    #   units: 512
+    #   layers: 1
+    #   before_mlp: True
+    #   concat_output: True
+
+  config:
+    name: PickCube_RGB_resnet18
+    env_name: maniskill
+    reward_shaper:
+      scale_value: 1.0
+    device: cuda:0
+    device_name: cuda:0
+    multi_gpu: False
+    ppo: True
+    mixed_precision: True
+    normalize_input: False
+    normalize_value: True
+    normalize_advantage: True
+    num_actors: 128
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau : 0.95
+    learning_rate: 1e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    max_epochs: 20000
+    save_best_after: 25
+    save_frequency: 500
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    horizon_length: 16
+    minibatch_size: 512
+    mini_epochs: 2
+    critic_coef: 1
+    clip_value: True
+    seq_length: 8
+    bounds_loss_coef: 0.0001
+    #weight_decay: 0.001
+
+    env_config:
+      env_name: PickCube-v1
+      obs_mode: rgbd
+      control_mode: pd_ee_delta_pose
+      reward_mode: dense
+
+    player:
+        render: True
+        deterministic: True
+
diff --git a/rl_games/configs/maniskill/maniskill_pushcube_state.yaml b/rl_games/configs/maniskill/maniskill_pushcube_state.yaml
@@ -26,7 +26,7 @@ params:
         name: default
 
   config:
-      name: PushCube_state
+      name: PushCube_EE_state
       env_name: maniskill
       normalize_input: True
       normalize_value: True

diff --git a/rl_games/configs/maniskill/maniskill_resnet.yaml b/rl_games/configs/maniskill/maniskill_resnet.yaml