-
Notifications
You must be signed in to change notification settings - Fork 515
/
dqn.yml
82 lines (78 loc) · 1.86 KB
/
dqn.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
atari:
env_wrapper:
- stable_baselines3.common.atari_wrappers.AtariWrapper
frame_stack: 4
policy: 'CnnPolicy'
n_timesteps: !!float 1e7
buffer_size: 100000
learning_rate: !!float 1e-4
batch_size: 32
learning_starts: 100000
target_update_interval: 1000
train_freq: 4
gradient_steps: 1
exploration_fraction: 0.1
exploration_final_eps: 0.01
# If True, you need to deactivate handle_timeout_termination
# in the replay_buffer_kwargs
optimize_memory_usage: False
# Almost Tuned
CartPole-v1:
n_timesteps: !!float 5e4
policy: 'MlpPolicy'
learning_rate: !!float 2.3e-3
batch_size: 64
buffer_size: 100000
learning_starts: 1000
gamma: 0.99
target_update_interval: 10
train_freq: 256
gradient_steps: 128
exploration_fraction: 0.16
exploration_final_eps: 0.04
policy_kwargs: "dict(net_arch=[256, 256])"
# Tuned
MountainCar-v0:
n_timesteps: !!float 1.2e5
policy: 'MlpPolicy'
learning_rate: !!float 4e-3
batch_size: 128
buffer_size: 10000
learning_starts: 1000
gamma: 0.98
target_update_interval: 600
train_freq: 16
gradient_steps: 8
exploration_fraction: 0.2
exploration_final_eps: 0.07
policy_kwargs: "dict(net_arch=[256, 256])"
# Tuned
LunarLander-v2:
n_timesteps: !!float 1e5
policy: 'MlpPolicy'
learning_rate: !!float 6.3e-4
batch_size: 128
buffer_size: 50000
learning_starts: 0
gamma: 0.99
target_update_interval: 250
train_freq: 4
gradient_steps: -1
exploration_fraction: 0.12
exploration_final_eps: 0.1
policy_kwargs: "dict(net_arch=[256, 256])"
# Tuned
Acrobot-v1:
n_timesteps: !!float 1e5
policy: 'MlpPolicy'
learning_rate: !!float 6.3e-4
batch_size: 128
buffer_size: 50000
learning_starts: 0
gamma: 0.99
target_update_interval: 250
train_freq: 4
gradient_steps: -1
exploration_fraction: 0.12
exploration_final_eps: 0.1
policy_kwargs: "dict(net_arch=[256, 256])"