Skip to content

Commit

Permalink
added humanoid
Browse files Browse the repository at this point in the history
  • Loading branch information
DenSumy committed Oct 19, 2024
1 parent 6fe2039 commit 305f17b
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 7 deletions.
6 changes: 3 additions & 3 deletions rl_games/configs/mujoco/ant_envpool_moe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ params:
num_experts: 4
hidden_size: 256
gating_hidden_size: 128
use_sparse_gating: true
use_entropy_loss: true
use_diversity_loss: true
use_sparse_gating: True
use_entropy_loss: True
use_diversity_loss: False
top_k: 2
lambda_entropy: 0.01
lambda_diversity: 0.01
Expand Down
71 changes: 71 additions & 0 deletions rl_games/configs/mujoco/humanoid_envpool_moe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
params:
seed: 5
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: moe
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
num_experts: 4
hidden_size: 512
gating_hidden_size: 128
use_sparse_gating: True
use_entropy_loss: True
use_diversity_loss: False
top_k: 2
lambda_entropy: 0.01
lambda_diversity: 0.01

config:
name: Ant-v4_envpool_moe
env_name: envpool
score_to_win: 20000
normalize_input: True
normalize_value: True
value_bootstrap: True
normalize_advantage: True
reward_shaper:
scale_value: 1

gamma: 0.99
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.008
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
clip_value: True
use_smooth_clamp: True
bound_loss_type: regularisation
bounds_loss_coef: 0.0
max_epochs: 2000
num_actors: 64
horizon_length: 64
minibatch_size: 2048
mini_epochs: 4
critic_coef: 2

env_config:
env_name: Humanoid-v4
seed: 5
#flat_observation: True

player:
render: False
num_actors: 64
games_num: 1000
use_vecenv: True
8 changes: 4 additions & 4 deletions rl_games/networks/moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ def __init__(self, params, **kwargs):
self.aux_loss_map = {
}
if self.use_diversity_loss:
self.aux_loss_map['diversity_loss'] = 0.0
self.aux_loss_map['moe_diversity_loss'] = 0.0
if self.use_entropy_loss:
self.aux_loss_map['entropy_loss'] = 0.0
self.aux_loss_map['moe_entropy_loss'] = 0.0

def is_rnn(self):
return False
Expand Down Expand Up @@ -111,7 +111,7 @@ def forward(self, obs_dict):
# Compute Entropy Loss for Gating Weights
entropy = -torch.sum(gating_weights * torch.log(gating_weights + 1e-8), dim=1)
entropy_loss = torch.mean(entropy)
self.aux_loss_map['entropy_loss'] = self.lambda_entropy * entropy_loss
self.aux_loss_map['moe_entropy_loss'] = self.lambda_entropy * entropy_loss

# Expert Networks Forward Pass
expert_outputs = []
Expand All @@ -129,7 +129,7 @@ def forward(self, obs_dict):
diversity_loss += torch.mean(similarity)
num_pairs = num_experts * (num_experts - 1) / 2
diversity_loss = diversity_loss / num_pairs
self.aux_loss_map['diversity_loss'] = self.lambda_diversity * diversity_loss
self.aux_loss_map['moe_diversity_loss'] = self.lambda_diversity * diversity_loss

# Aggregate Expert Outputs
gating_weights = gating_weights.unsqueeze(-1) # Shape: [batch_size, num_experts, 1]
Expand Down

0 comments on commit 305f17b

Please sign in to comment.