-
Notifications
You must be signed in to change notification settings - Fork 0
/
MAIN.py
76 lines (67 loc) · 1.63 KB
/
MAIN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from MemoryClasses import *
from AgentClasses import *
from GymRunner import GymRunner
from PolicyClasses import *
import TorchModelClasses as models
import numpy as np
"""
TODO:
- Отключать обучение при нескольких последовательных rewardax более 200
- Явное переобучение на эпизодах-аутлиерах
"""
ENV_NAME = "LunarLander-v2"
OBS_SPACE = 8
ACTION_SPACE = 4
AGENT = DQN_agent
MEMORY = MemoryNumpy
MODEL = models.HuberNet
SEED = 228
def kostil(reward):
return (
reward == 100 or
reward == -100 or
reward == 10 or
reward == 200
)
def main():
agent = AGENT(
MODEL,
MEMORY,
BasePolicy(
eps=1.0,
min_eps=0.05,
eps_delta=0.0996,
action_space = ACTION_SPACE
),
gamma=0.99,
alpha=1e-3,
maxMemorySize=15000,
tau=1e-3,
action_space=ACTION_SPACE,
observation_space=OBS_SPACE,
seed=SEED
)
gR = GymRunner(
env_name=ENV_NAME,
behavior_func=kostil,
seed=SEED
)
gR.random_actions(agent, 64)
print("Заполнение памяти случайными действиями завершено")
gR.fit(
agent,
n_iters = 5000,
batch_size=64,
LEARN_FREQ=2,
visualize=False
)
gR.test_agent(
agent,
n_iters=10,
render=False,
save_video=True,
save_model=True)
# Лучшая модель
print("Готово!")
if __name__ == "__main__":
main()