added humanoid

Denys88 · Oct 19, 2024 · 305f17b · 305f17b
1 parent 6fe2039
commit 305f17b
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 7 deletions.
diff --git a/rl_games/configs/mujoco/ant_envpool_moe.yaml b/rl_games/configs/mujoco/ant_envpool_moe.yaml
@@ -21,9 +21,9 @@ params:
     num_experts: 4
     hidden_size: 256
     gating_hidden_size: 128
-    use_sparse_gating: true
-    use_entropy_loss: true
-    use_diversity_loss: true
+    use_sparse_gating: True
+    use_entropy_loss: True
+    use_diversity_loss: False
     top_k: 2
     lambda_entropy: 0.01
     lambda_diversity: 0.01

diff --git a/rl_games/configs/mujoco/humanoid_envpool_moe.yaml b/rl_games/configs/mujoco/humanoid_envpool_moe.yaml
@@ -0,0 +1,71 @@
+params:
+  seed: 5
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: moe
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    num_experts: 4
+    hidden_size: 512
+    gating_hidden_size: 128
+    use_sparse_gating: True
+    use_entropy_loss: True
+    use_diversity_loss: False
+    top_k: 2
+    lambda_entropy: 0.01
+    lambda_diversity: 0.01
+
+  config:
+    name: Ant-v4_envpool_moe
+    env_name: envpool
+    score_to_win: 20000
+    normalize_input: True
+    normalize_value: True
+    value_bootstrap: True
+    normalize_advantage: True
+    reward_shaper:
+      scale_value: 1
+
+    gamma: 0.99
+    tau: 0.95
+    learning_rate: 3e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    grad_norm: 1.0
+    entropy_coef: 0.0
+    truncate_grads: True
+    e_clip: 0.2
+    clip_value: True
+    use_smooth_clamp: True
+    bound_loss_type: regularisation
+    bounds_loss_coef: 0.0
+    max_epochs: 2000
+    num_actors: 64
+    horizon_length: 64
+    minibatch_size: 2048
+    mini_epochs: 4
+    critic_coef: 2
+
+    env_config:
+      env_name: Humanoid-v4
+      seed: 5
+      #flat_observation: True
+
+    player:
+      render: False
+      num_actors: 64
+      games_num: 1000
+      use_vecenv: True
diff --git a/rl_games/networks/moe.py b/rl_games/networks/moe.py
@@ -81,9 +81,9 @@ def __init__(self, params, **kwargs):
         self.aux_loss_map = {
         }
         if self.use_diversity_loss:
-            self.aux_loss_map['diversity_loss'] = 0.0
+            self.aux_loss_map['moe_diversity_loss'] = 0.0
         if self.use_entropy_loss:
-            self.aux_loss_map['entropy_loss'] = 0.0
+            self.aux_loss_map['moe_entropy_loss'] = 0.0
 
     def is_rnn(self):
         return False
@@ -111,7 +111,7 @@ def forward(self, obs_dict):
         # Compute Entropy Loss for Gating Weights
             entropy = -torch.sum(gating_weights * torch.log(gating_weights + 1e-8), dim=1)
             entropy_loss = torch.mean(entropy)
-            self.aux_loss_map['entropy_loss'] = self.lambda_entropy * entropy_loss
+            self.aux_loss_map['moe_entropy_loss'] = self.lambda_entropy * entropy_loss
 
         # Expert Networks Forward Pass
         expert_outputs = []
@@ -129,7 +129,7 @@ def forward(self, obs_dict):
                     diversity_loss += torch.mean(similarity)
             num_pairs = num_experts * (num_experts - 1) / 2
             diversity_loss = diversity_loss / num_pairs
-            self.aux_loss_map['diversity_loss'] = self.lambda_diversity * diversity_loss
+            self.aux_loss_map['moe_diversity_loss'] = self.lambda_diversity * diversity_loss
 
         # Aggregate Expert Outputs
         gating_weights = gating_weights.unsqueeze(-1)  # Shape: [batch_size, num_experts, 1]