-
Notifications
You must be signed in to change notification settings - Fork 5
/
fourrooms.py
84 lines (73 loc) · 2.79 KB
/
fourrooms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import numpy as np
from gym import core, spaces
from gym.envs.registration import register
class Fourrooms:
def __init__(self):
layout = """\
wwwwwwwwwwwww
w w w
w w w
w w
w w w
w w w
ww wwww w
w www www
w w w
w w w
w w
w w w
wwwwwwwwwwwww
"""
self.occupancy = np.array([list(map(lambda c: 1 if c=='w' else 0, line)) for line in layout.splitlines()])
# From any state the agent can perform one of four actions, up, down, left or right
self.action_space = spaces.Discrete(4)
self.observation_space = spaces.Discrete(np.sum(self.occupancy == 0))
self.directions = [np.array((-1,0)), np.array((1,0)), np.array((0,-1)), np.array((0,1))]
self.rng = np.random.RandomState(1234)
self.tostate = {}
statenum = 0
for i in range(13):
for j in range(13):
if self.occupancy[i, j] == 0:
self.tostate[(i,j)] = statenum
statenum += 1
self.tocell = {v:k for k,v in self.tostate.items()}
self.goal = 62
self.init_states = list(range(self.observation_space.n))
self.init_states.remove(self.goal)
def empty_around(self, cell):
avail = []
for action in range(self.action_space.n):
nextcell = tuple(cell + self.directions[action])
if not self.occupancy[nextcell]:
avail.append(nextcell)
return avail
def reset(self):
state = self.rng.choice(self.init_states)
self.currentcell = self.tocell[state]
return state
def step(self, action):
"""
The agent can perform one of four actions,
up, down, left or right, which have a stochastic effect. With probability 2/3, the actions
cause the agent to move one cell in the corresponding direction, and with probability 1/3,
the agent moves instead in one of the other three directions, each with 1/9 probability. In
either case, if the movement would take the agent into a wall then the agent remains in the
same cell.
We consider a case in which rewards are zero on all state transitions.
"""
nextcell = tuple(self.currentcell + self.directions[action])
if not self.occupancy[nextcell]:
self.currentcell = nextcell
if self.rng.uniform() < 1/3.:
empty_cells = self.empty_around(self.currentcell)
self.currentcell = empty_cells[self.rng.randint(len(empty_cells))]
state = self.tostate[self.currentcell]
done = state == self.goal
return state, float(done), done, None
register(
id='Fourrooms-v0',
entry_point='fourrooms:Fourrooms',
timestep_limit=20000,
reward_threshold=1,
)