From 305f17b7a9c86b9759b6637f6d1c5d1f0223d555 Mon Sep 17 00:00:00 2001 From: Denys Makoviichuk Date: Sat, 19 Oct 2024 13:48:12 -0700 Subject: [PATCH] added humanoid --- rl_games/configs/mujoco/ant_envpool_moe.yaml | 6 +- .../configs/mujoco/humanoid_envpool_moe.yaml | 71 +++++++++++++++++++ rl_games/networks/moe.py | 8 +-- 3 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 rl_games/configs/mujoco/humanoid_envpool_moe.yaml diff --git a/rl_games/configs/mujoco/ant_envpool_moe.yaml b/rl_games/configs/mujoco/ant_envpool_moe.yaml index 814850cc..41d39dcd 100644 --- a/rl_games/configs/mujoco/ant_envpool_moe.yaml +++ b/rl_games/configs/mujoco/ant_envpool_moe.yaml @@ -21,9 +21,9 @@ params: num_experts: 4 hidden_size: 256 gating_hidden_size: 128 - use_sparse_gating: true - use_entropy_loss: true - use_diversity_loss: true + use_sparse_gating: True + use_entropy_loss: True + use_diversity_loss: False top_k: 2 lambda_entropy: 0.01 lambda_diversity: 0.01 diff --git a/rl_games/configs/mujoco/humanoid_envpool_moe.yaml b/rl_games/configs/mujoco/humanoid_envpool_moe.yaml new file mode 100644 index 00000000..95dc22df --- /dev/null +++ b/rl_games/configs/mujoco/humanoid_envpool_moe.yaml @@ -0,0 +1,71 @@ +params: + seed: 5 + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: moe + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + num_experts: 4 + hidden_size: 512 + gating_hidden_size: 128 + use_sparse_gating: True + use_entropy_loss: True + use_diversity_loss: False + top_k: 2 + lambda_entropy: 0.01 + lambda_diversity: 0.01 + + config: + name: Ant-v4_envpool_moe + env_name: envpool + score_to_win: 20000 + normalize_input: True + normalize_value: True + value_bootstrap: True + normalize_advantage: True + reward_shaper: + scale_value: 1 + + gamma: 0.99 + tau: 0.95 + learning_rate: 3e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + clip_value: True + use_smooth_clamp: True + bound_loss_type: regularisation + bounds_loss_coef: 0.0 + max_epochs: 2000 + num_actors: 64 + horizon_length: 64 + minibatch_size: 2048 + mini_epochs: 4 + critic_coef: 2 + + env_config: + env_name: Humanoid-v4 + seed: 5 + #flat_observation: True + + player: + render: False + num_actors: 64 + games_num: 1000 + use_vecenv: True \ No newline at end of file diff --git a/rl_games/networks/moe.py b/rl_games/networks/moe.py index 081699e8..37d33e79 100644 --- a/rl_games/networks/moe.py +++ b/rl_games/networks/moe.py @@ -81,9 +81,9 @@ def __init__(self, params, **kwargs): self.aux_loss_map = { } if self.use_diversity_loss: - self.aux_loss_map['diversity_loss'] = 0.0 + self.aux_loss_map['moe_diversity_loss'] = 0.0 if self.use_entropy_loss: - self.aux_loss_map['entropy_loss'] = 0.0 + self.aux_loss_map['moe_entropy_loss'] = 0.0 def is_rnn(self): return False @@ -111,7 +111,7 @@ def forward(self, obs_dict): # Compute Entropy Loss for Gating Weights entropy = -torch.sum(gating_weights * torch.log(gating_weights + 1e-8), dim=1) entropy_loss = torch.mean(entropy) - self.aux_loss_map['entropy_loss'] = self.lambda_entropy * entropy_loss + self.aux_loss_map['moe_entropy_loss'] = self.lambda_entropy * entropy_loss # Expert Networks Forward Pass expert_outputs = [] @@ -129,7 +129,7 @@ def forward(self, obs_dict): diversity_loss += torch.mean(similarity) num_pairs = num_experts * (num_experts - 1) / 2 diversity_loss = diversity_loss / num_pairs - self.aux_loss_map['diversity_loss'] = self.lambda_diversity * diversity_loss + self.aux_loss_map['moe_diversity_loss'] = self.lambda_diversity * diversity_loss # Aggregate Expert Outputs gating_weights = gating_weights.unsqueeze(-1) # Shape: [batch_size, num_experts, 1]