added environment docs

krfricke · krfricke · commit 85816336b864 · 2017-07-09T14:55:21.000+02:00
diff --git a/docs/environments.md b/docs/environments.md
@@ -0,0 +1,45 @@
+Environments
+============
+
+A reinforcement learning environment provides the API to a simulated or real
+environment as the subject for optimization. It could be anything from
+video games (e.g. Atari) to robots or trading systems. The agent interacts
+with this environment and learns to act optimally in its dynamics.
+
+> Environment <-> Runner <-> Agent <-> Model
+
+```eval_rst
+    .. autoclass:: tensorforce.environments.Environment
+        :members:
+```
+
+
+Ready-to-use environments
+-------------------------
+
+### OpenAI Gym
+
+```eval_rst
+    .. autoclass:: tensorforce.environments.openai_gym.OpenAIGym
+        :show-inheritance:
+        :members:
+        :special-members: __init__
+```
+
+### OpenAI Universe
+
+```eval_rst
+    .. autoclass:: tensorforce.environments.openai_universe.OpenAIUniverse
+        :show-inheritance:
+        :members:
+        :special-members: __init__
+```
+
+### Deepmind Lab
+
+```eval_rst
+    .. autoclass:: tensorforce.environments.deepmind_lab.DeepMindLab
+        :show-inheritance:
+        :members:
+        :special-members: __init__
+```
diff --git a/tensorforce/environments/deepmind_lab.py b/tensorforce/environments/deepmind_lab.py
@@ -54,10 +54,17 @@ def __init__(self, level_id, repeat_action=1, state_attribute='RGB_INTERLACED',
         """
         Initialize DeepMind Lab environment.
 
-        :param level_id: string with id/descriptor of the level, e.g. 'seekavoid_arena_01'
-        :param num_steps: number of frames the environment is advanced, executing the given action during every frame
-        :param state_attribute: Attributes which represents the state for this environment, should adhere to the specification given in DeepMindLabEnvironment.state_spec(level_id)
-        :param settings: dict specifying additional settings as key-value string pairs. The following options are recognized: 'width' (horizontal resolution of the observation frames), 'height' (vertical resolution of the observation frames), 'fps' (frames per second) and 'appendCommand' (commands for the internal Quake console).
+        Args:
+            level_id: string with id/descriptor of the level, e.g. 'seekavoid_arena_01'.
+            repeat_action: number of frames the environment is advanced, executing the given action during every frame.
+            state_attribute: Attributes which represents the state for this environment, should adhere to the
+                specification given in DeepMindLabEnvironment.state_spec(level_id).
+            settings: dict specifying additional settings as key-value string pairs. The following options are
+                recognized: 'width' (horizontal resolution of the observation frames),
+                'height' (vertical resolution of the observation frames),
+                'fps' (frames per second) and
+                'appendCommand' (commands for the internal Quake console).
+
         """
         self.level_id = level_id
         self.level = deepmind_lab.Lab(level=level_id, observations=[state_attribute], config=settings)
diff --git a/tensorforce/environments/environment.py b/tensorforce/environments/environment.py
@@ -21,7 +21,7 @@
 
 class Environment(object):
     """
-    Base environment class
+    Base environment class.
     """
 
     def __str__(self):
@@ -37,24 +37,37 @@ def reset(self):
         """
         Reset environment and setup for new episode.
 
-        :return: initial state
+        Returns: initial state of resetted environment.
         """
         raise NotImplementedError
 
     def execute(self, action):
         """
         Executes action, observes next state and reward.
 
-        :param action: Action to execute
+        Args:
+            action: Action to execute.
 
-        :return: dict containing at least next_state, reward, and terminal_state
+        Returns: tuple of state (tuple), reward (float), and terminal_state (bool).
         """
         raise NotImplementedError
 
     @property
     def states(self):
+        """
+        Return the state space. Might include subdicts if multiple states are available simultaneously.
+
+        Returns: dict of state properties (shape and type).
+
+        """
         raise NotImplementedError
 
     @property
     def actions(self):
+        """
+        Return the action space. Might include subdicts if multiple actions are available simultaneously.
+
+        Returns: dict of action properties (continuous, number of actions)
+
+        """
         raise NotImplementedError
diff --git a/tensorforce/environments/openai_gym.py b/tensorforce/environments/openai_gym.py
@@ -33,13 +33,15 @@ class OpenAIGym(Environment):
 
     def __init__(self, gym_id, monitor=None, monitor_safe=False, monitor_video=0):
         """
-        Initialize OpenAI gym environment.
+        Initialize OpenAI Gym.
 
-        :param gym_id: OpenAI Gym environment ID. See https://gym.openai.com/envs
-        :param monitor: Output directory. Setting this to None disables monitoring.
-        :param monitor_safe: Setting this to True prevents existing log files to be overwritten. Default False.
-        :param monitor_video: Save a video every monitor_video steps. Setting this to 0 disables recording of videos.
+        Args:
+            gym_id: OpenAI Gym environment ID. See https://gym.openai.com/envs
+            monitor: Output directory. Setting this to None disables monitoring.
+            monitor_safe: Setting this to True prevents existing log files to be overwritten. Default False.
+            monitor_video: Save a video every monitor_video steps. Setting this to 0 disables recording of videos.
         """
+
         self.gym_id = gym_id
         self.gym = gym.make(gym_id)  # Might raise gym.error.UnregisteredEnv or gym.error.DeprecatedEnv
 
@@ -54,21 +56,12 @@ def __str__(self):
         return 'OpenAIGym({})'.format(self.gym_id)
 
     def close(self):
-        """
-        Close environment. No other method calls possible afterwards.
-        """
         self.gym = None
 
     def reset(self):
-        """
-        Pass reset function to gym.
-        """
         return self.gym.reset()
 
     def execute(self, action):
-        """
-        Pass action to gym, return reward, next step, terminal state and additional info.
-        """
         if isinstance(self.gym.action_space, gym.spaces.Box):
             action = [action]  # some gym environments expect a list (f.i. Pendulum-v0)
         state, reward, terminal, _ = self.gym.step(action)
diff --git a/tensorforce/environments/openai_universe.py b/tensorforce/environments/openai_universe.py
@@ -34,9 +34,10 @@
 class OpenAIUniverse(Environment):
     def __init__(self, env_id):
         """
-        Initialize open ai universe environment.
+        Initialize OpenAI universe environment.
 
-        :param env_id: string with id/descriptor of the universe environment, e.g. 'HarvestDay-v0'
+        Args:
+            env_id: string with id/descriptor of the universe environment, e.g. 'HarvestDay-v0'.
         """
         self.env_id = env_id
         self.env = gym.make(env_id)
@@ -45,15 +46,9 @@ def __str__(self):
         return 'OpenAI-Universe({})'.format(self.env_id)
 
     def close(self):
-        """
-        Close environment. No other method calls possible afterwards.
-        """
         self.env = None
 
     def reset(self):
-        """
-        Pass reset function to universe environment.
-        """
         state = self.env.reset()
         if state == [None]:
             state, r, t = self._wait_state(state, None, None)
@@ -64,9 +59,6 @@ def reset(self):
         return state[0]
 
     def execute(self, action):
-        """
-        Pass action to universe environment, return reward, next step, terminal state and additional info.
-        """
         state, reward, terminal = self._execute(action)
         return self._wait_state(state, reward, terminal)