Skip to content

Commit 8581633

Browse files
committed
added environment docs
1 parent 78eb48e commit 8581633

File tree

5 files changed

+83
-33
lines changed

5 files changed

+83
-33
lines changed

docs/environments.md

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
Environments
2+
============
3+
4+
A reinforcement learning environment provides the API to a simulated or real
5+
environment as the subject for optimization. It could be anything from
6+
video games (e.g. Atari) to robots or trading systems. The agent interacts
7+
with this environment and learns to act optimally in its dynamics.
8+
9+
> Environment <-> Runner <-> Agent <-> Model
10+
11+
```eval_rst
12+
.. autoclass:: tensorforce.environments.Environment
13+
:members:
14+
```
15+
16+
17+
Ready-to-use environments
18+
-------------------------
19+
20+
### OpenAI Gym
21+
22+
```eval_rst
23+
.. autoclass:: tensorforce.environments.openai_gym.OpenAIGym
24+
:show-inheritance:
25+
:members:
26+
:special-members: __init__
27+
```
28+
29+
### OpenAI Universe
30+
31+
```eval_rst
32+
.. autoclass:: tensorforce.environments.openai_universe.OpenAIUniverse
33+
:show-inheritance:
34+
:members:
35+
:special-members: __init__
36+
```
37+
38+
### Deepmind Lab
39+
40+
```eval_rst
41+
.. autoclass:: tensorforce.environments.deepmind_lab.DeepMindLab
42+
:show-inheritance:
43+
:members:
44+
:special-members: __init__
45+
```

tensorforce/environments/deepmind_lab.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,17 @@ def __init__(self, level_id, repeat_action=1, state_attribute='RGB_INTERLACED',
5454
"""
5555
Initialize DeepMind Lab environment.
5656
57-
:param level_id: string with id/descriptor of the level, e.g. 'seekavoid_arena_01'
58-
:param num_steps: number of frames the environment is advanced, executing the given action during every frame
59-
:param state_attribute: Attributes which represents the state for this environment, should adhere to the specification given in DeepMindLabEnvironment.state_spec(level_id)
60-
:param settings: dict specifying additional settings as key-value string pairs. The following options are recognized: 'width' (horizontal resolution of the observation frames), 'height' (vertical resolution of the observation frames), 'fps' (frames per second) and 'appendCommand' (commands for the internal Quake console).
57+
Args:
58+
level_id: string with id/descriptor of the level, e.g. 'seekavoid_arena_01'.
59+
repeat_action: number of frames the environment is advanced, executing the given action during every frame.
60+
state_attribute: Attributes which represents the state for this environment, should adhere to the
61+
specification given in DeepMindLabEnvironment.state_spec(level_id).
62+
settings: dict specifying additional settings as key-value string pairs. The following options are
63+
recognized: 'width' (horizontal resolution of the observation frames),
64+
'height' (vertical resolution of the observation frames),
65+
'fps' (frames per second) and
66+
'appendCommand' (commands for the internal Quake console).
67+
6168
"""
6269
self.level_id = level_id
6370
self.level = deepmind_lab.Lab(level=level_id, observations=[state_attribute], config=settings)

tensorforce/environments/environment.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
class Environment(object):
2323
"""
24-
Base environment class
24+
Base environment class.
2525
"""
2626

2727
def __str__(self):
@@ -37,24 +37,37 @@ def reset(self):
3737
"""
3838
Reset environment and setup for new episode.
3939
40-
:return: initial state
40+
Returns: initial state of resetted environment.
4141
"""
4242
raise NotImplementedError
4343

4444
def execute(self, action):
4545
"""
4646
Executes action, observes next state and reward.
4747
48-
:param action: Action to execute
48+
Args:
49+
action: Action to execute.
4950
50-
:return: dict containing at least next_state, reward, and terminal_state
51+
Returns: tuple of state (tuple), reward (float), and terminal_state (bool).
5152
"""
5253
raise NotImplementedError
5354

5455
@property
5556
def states(self):
57+
"""
58+
Return the state space. Might include subdicts if multiple states are available simultaneously.
59+
60+
Returns: dict of state properties (shape and type).
61+
62+
"""
5663
raise NotImplementedError
5764

5865
@property
5966
def actions(self):
67+
"""
68+
Return the action space. Might include subdicts if multiple actions are available simultaneously.
69+
70+
Returns: dict of action properties (continuous, number of actions)
71+
72+
"""
6073
raise NotImplementedError

tensorforce/environments/openai_gym.py

+7-14
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,15 @@ class OpenAIGym(Environment):
3333

3434
def __init__(self, gym_id, monitor=None, monitor_safe=False, monitor_video=0):
3535
"""
36-
Initialize OpenAI gym environment.
36+
Initialize OpenAI Gym.
3737
38-
:param gym_id: OpenAI Gym environment ID. See https://gym.openai.com/envs
39-
:param monitor: Output directory. Setting this to None disables monitoring.
40-
:param monitor_safe: Setting this to True prevents existing log files to be overwritten. Default False.
41-
:param monitor_video: Save a video every monitor_video steps. Setting this to 0 disables recording of videos.
38+
Args:
39+
gym_id: OpenAI Gym environment ID. See https://gym.openai.com/envs
40+
monitor: Output directory. Setting this to None disables monitoring.
41+
monitor_safe: Setting this to True prevents existing log files to be overwritten. Default False.
42+
monitor_video: Save a video every monitor_video steps. Setting this to 0 disables recording of videos.
4243
"""
44+
4345
self.gym_id = gym_id
4446
self.gym = gym.make(gym_id) # Might raise gym.error.UnregisteredEnv or gym.error.DeprecatedEnv
4547

@@ -54,21 +56,12 @@ def __str__(self):
5456
return 'OpenAIGym({})'.format(self.gym_id)
5557

5658
def close(self):
57-
"""
58-
Close environment. No other method calls possible afterwards.
59-
"""
6059
self.gym = None
6160

6261
def reset(self):
63-
"""
64-
Pass reset function to gym.
65-
"""
6662
return self.gym.reset()
6763

6864
def execute(self, action):
69-
"""
70-
Pass action to gym, return reward, next step, terminal state and additional info.
71-
"""
7265
if isinstance(self.gym.action_space, gym.spaces.Box):
7366
action = [action] # some gym environments expect a list (f.i. Pendulum-v0)
7467
state, reward, terminal, _ = self.gym.step(action)

tensorforce/environments/openai_universe.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@
3434
class OpenAIUniverse(Environment):
3535
def __init__(self, env_id):
3636
"""
37-
Initialize open ai universe environment.
37+
Initialize OpenAI universe environment.
3838
39-
:param env_id: string with id/descriptor of the universe environment, e.g. 'HarvestDay-v0'
39+
Args:
40+
env_id: string with id/descriptor of the universe environment, e.g. 'HarvestDay-v0'.
4041
"""
4142
self.env_id = env_id
4243
self.env = gym.make(env_id)
@@ -45,15 +46,9 @@ def __str__(self):
4546
return 'OpenAI-Universe({})'.format(self.env_id)
4647

4748
def close(self):
48-
"""
49-
Close environment. No other method calls possible afterwards.
50-
"""
5149
self.env = None
5250

5351
def reset(self):
54-
"""
55-
Pass reset function to universe environment.
56-
"""
5752
state = self.env.reset()
5853
if state == [None]:
5954
state, r, t = self._wait_state(state, None, None)
@@ -64,9 +59,6 @@ def reset(self):
6459
return state[0]
6560

6661
def execute(self, action):
67-
"""
68-
Pass action to universe environment, return reward, next step, terminal state and additional info.
69-
"""
7062
state, reward, terminal = self._execute(action)
7163
return self._wait_state(state, reward, terminal)
7264

0 commit comments

Comments
 (0)