-
Notifications
You must be signed in to change notification settings - Fork 2
/
TD.py
59 lines (45 loc) · 1.12 KB
/
TD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
import os
import os.path as osp
from utils import TD_learning
# Global Variables
# learning rate
alpha = 0.1
# factor
gamma = 0.95
# maximum episodes
max_episodes = 100000
# epsilon for action choice
epsilon = 0.05
# only support environment: 'MountainCar-v0'
env_name = 'MountainCar-v0'
# if continous, please specify the n_actions
n_actions = 200
# pickle_path
pickle_path = 'pickles'
if not osp.exists(pickle_path):
os.makedirs(pickle_path)
# discretized state value
min_state_val = 0
max_state_val = 40
# random seed
seed = 42
# init mode: "zeros" or "random"
init_mode = "random"
# learning mode "Q-learning", "SARSA" or "Expected-SARSA"
learning_mode = "Expected-SARSA"
if __name__ == "__main__":
_, score_list = TD_learning(
env_name=env_name,
alpha=alpha,
gamma=gamma,
epsilon=epsilon,
max_episodes=max_episodes,
min_state_val=min_state_val,
max_state_val=max_state_val,
seed=seed,
pickle_path=pickle_path,
init_mode=init_mode,
learning_mode=learning_mode,
n_actions = n_actions
)