-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgame_state.py
85 lines (70 loc) · 2.44 KB
/
game_state.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from skimage.transform import resize
from skimage.color import rgb2gray
import numpy as np
import matplotlib.pyplot as plt
import time
import gym
class GameState(object):
def __init__(self, random_seed, log, game, display, frame_skip):
np.random.seed(random_seed)
self.log = log
self.display = display
self.frame_skip = frame_skip
# Load game environment
self.game = gym.make(game)
self.game.seed(random_seed)
# Get minimal action set
if game == 'PongDeterministic-v0' or game == 'BreakoutDeterministic-v0':
self.action_size = 3
# Shift action space from [0,1,2] --> [1,2,3]
self.action_shift = 1
else:
# Tip: Rather than letting it pass to this case, see which
# actions the game you want to run uses to speed up the training
# significantly!
self.action_size = self.game.action_space.n
self.action_shift = 0
'''
Resets game environments and regenerates new internal state s_t.
'''
def reset(self):
x_t_raw = self.game.reset()
self.x_t = self.process_frame(x_t_raw)
self.s_t = np.stack((self.x_t, self.x_t, self.x_t, self.x_t), axis=2)
return self.s_t
'''
Processes image frame for network input.
'''
def process_frame(self, frame):
frame_cut = frame[30:195,10:150]
self.x_t = resize(rgb2gray(frame_cut), (84, 84))
return self.x_t
'''
Make action and observe enviroment return.
'''
def step(self, action):
if self.display:
self.game.render()
accum_reward = 0
for n in range (self.frame_skip+1):
x_t1_raw, r, terminal, info = self.game.step(action+self.action_shift)
accum_reward += r
if terminal:
break
reward = accum_reward
x_t1 = self.process_frame(x_t1_raw)
if False: # TODO: Keep?
plt.imshow(x_t1, cmap='gray')
plt.savefig(str(np.random.randint(0,10000)) + '.png')
time.sleep(10)
if self.log:
print info
# Clip reward to [-1, 1]
reward = np.clip(reward, -1, 1)
self.s_t1 = np.append(self.s_t[:,:,1:], x_t1.reshape(84, 84, 1), axis=2)
return self.s_t1, reward, terminal
'''
Update internal game state s_t to s_t1.
'''
def update_state(self):
self.s_t = self.s_t1