-
Notifications
You must be signed in to change notification settings - Fork 42
/
config.yaml
62 lines (48 loc) · 1.23 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
defaults:
- experiment: base_experiment
- algorithm: ???
- task: ???
- model: layers/mlp
- model@critic_model: layers/mlp
- _self_
hydra:
searchpath:
# Tells hydra to add the default benchmarl configuration to its path
- pkg://benchmarl/conf
seed: 0
experiment:
sampling_device: "cuda"
train_device: "cuda"
buffer_device: "cuda"
share_policy_params: True
prefer_continuous_actions: True
gamma: 0.9
lr: 0.00005
clip_grad_norm: True
clip_grad_val: 5
soft_target_update: True
polyak_tau: 0.005
hard_target_update_frequency: 5
exploration_eps_init: 0.8
exploration_eps_end: 0.01
exploration_anneal_frames: 1_000_000
max_n_iters: null
max_n_frames: 10_000_000
on_policy_collected_frames_per_batch: 60_000
on_policy_n_envs_per_worker: 600
on_policy_n_minibatch_iters: 45
on_policy_minibatch_size: 4096
off_policy_collected_frames_per_batch: 6000
off_policy_n_envs_per_worker: 60
off_policy_n_optimizer_steps: 1000
off_policy_train_batch_size: 128
off_policy_memory_size: 1_000_000
evaluation: True
render: True
evaluation_interval: 120_000
evaluation_episodes: 200
loggers: [wandb]
create_json: True
save_folder: null
restore_file: null
checkpoint_interval: 0