-
Notifications
You must be signed in to change notification settings - Fork 35
/
fourrooms.py
129 lines (108 loc) · 4.06 KB
/
fourrooms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import logging
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
logger = logging.getLogger(__name__)
class Fourrooms(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
def __init__(self):
layout = """\
wwwwwwwwwwwww
w w w
w w w
w w
w w w
w w w
ww wwww w
w www www
w w w
w w w
w w
w w w
wwwwwwwwwwwww
"""
self.occupancy = np.array([list(map(lambda c: 1 if c=='w' else 0, line)) for line in layout.splitlines()])
# From any state the agent can perform one of four actions, up, down, left or right
self.action_space = spaces.Discrete(4)
self.observation_space = spaces.Box(low=0., high=1., shape=(np.sum(self.occupancy == 0),))
self.directions = [np.array((-1,0)), np.array((1,0)), np.array((0,-1)), np.array((0,1))]
self.rng = np.random.RandomState(1234)
self.tostate = {}
statenum = 0
for i in range(13):
for j in range(13):
if self.occupancy[i, j] == 0:
self.tostate[(i,j)] = statenum
statenum += 1
self.tocell = {v:k for k,v in self.tostate.items()}
self.goal = 62 # East doorway
self.init_states = list(range(self.observation_space.shape[0]))
self.init_states.remove(self.goal)
self.ep_steps = 0
def seed(self, seed=None):
return self._seed(seed)
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def empty_around(self, cell):
avail = []
for action in range(self.action_space.n):
nextcell = tuple(cell + self.directions[action])
if not self.occupancy[nextcell]:
avail.append(nextcell)
return avail
def reset(self):
state = self.rng.choice(self.init_states)
self.currentcell = self.tocell[state]
self.ep_steps = 0
return self.get_state(state)
def switch_goal(self):
prev_goal = self.goal
self.goal = self.rng.choice(self.init_states)
self.init_states.append(prev_goal)
self.init_states.remove(self.goal)
assert prev_goal in self.init_states
assert self.goal not in self.init_states
def get_state(self, state):
s = np.zeros(self.observation_space.shape[0])
s[state] = 1
return s
def render(self, show_goal=True):
current_grid = np.array(self.occupancy)
current_grid[self.currentcell[0], self.currentcell[1]] = -1
if show_goal:
goal_cell = self.tocell[self.goal]
current_grid[goal_cell[0], goal_cell[1]] = -1
return current_grid
def step(self, action):
"""
The agent can perform one of four actions,
up, down, left or right, which have a stochastic effect. With probability 2/3, the actions
cause the agent to move one cell in the corresponding direction, and with probability 1/3,
the agent moves instead in one of the other three directions, each with 1/9 probability. In
either case, if the movement would take the agent into a wall then the agent remains in the
same cell.
We consider a case in which rewards are zero on all state transitions.
"""
self.ep_steps += 1
nextcell = tuple(self.currentcell + self.directions[action])
if not self.occupancy[nextcell]:
if self.rng.uniform() < 1/3.:
empty_cells = self.empty_around(self.currentcell)
self.currentcell = empty_cells[self.rng.randint(len(empty_cells))]
else:
self.currentcell = nextcell
state = self.tostate[self.currentcell]
done = state == self.goal
reward = float(done)
if not done and self.ep_steps >= 1000:
done = True ; reward = 0.0
return self.get_state(state), reward, done, None
if __name__=="__main__":
env = Fourrooms()
env.seed(3)