Skip to content

Commit 687fa59

Browse files
author
Kai Fricke
committed
updated runner docs
1 parent fd89b24 commit 687fa59

File tree

1 file changed

+20
-32
lines changed

1 file changed

+20
-32
lines changed

docs/runner.md

+20-32
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ initialization:
2525
from tensorforce.execution import Runner
2626

2727
runner = Runner(
28-
agent = agent, # tensorforce.agents.agent.Agent object
29-
environment = env # tensorforce.environments.environment.Environment object
28+
agent = agent, # Agent object
29+
environment = env # Environment object
3030
)
3131
```
3232

@@ -66,53 +66,45 @@ def episode_finished(r):
6666
Here is some example code for using the runner (without preprocessing).
6767

6868
```python
69-
from tensorforce.config import Config
70-
from tensorforce.external.openai_gym import OpenAIGymEnvironment
69+
from tensorforce.config import Configuration
70+
from tensorforce.environments.openai_gym import OpenAIGym
71+
from tensorforce.agents import DQNAgent
7172
from tensorforce.execution import Runner
72-
from tensorforce.examples.simple_q_agent import SimpleQAgent
7373

7474
def main():
7575
gym_id = 'CartPole-v0'
7676
max_episodes = 10000
7777
max_timesteps = 1000
7878

79-
env = OpenAIGymEnvironment(gym_id, monitor=False, monitor_video=False)
79+
env = OpenAIGym(gym_id)
8080

81-
config = Config({
82-
'repeat_actions': 1,
81+
config = Configuration({
8382
'actions': env.actions,
84-
'action_shape': env.action_shape,
85-
'state_shape': env.state_shape,
86-
'exploration': 'constant',
87-
'exploration_args': [0.1]
83+
'states': env.states
84+
# ...
8885
})
8986

90-
agent = SimpleQAgent(config, "simpleq")
87+
agent = DQNAgent(config)
9188

9289
runner = Runner(agent, env)
9390

9491
def episode_finished(r):
95-
if r.episode % 10 == 0:
96-
print("Finished episode {ep} after {ts} timesteps".format(ep=r.episode + 1, ts=r.timestep + 1))
97-
print("Episode reward: {}".format(r.episode_rewards[-1]))
98-
print("Average of last 10 rewards: {}".format(np.mean(r.episode_rewards[-10:])))
92+
if r.episode % report_episodes == 0:
93+
logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep))
94+
logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
95+
logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
9996
return True
10097

10198
print("Starting {agent} for Environment '{env}'".format(agent=agent, env=env))
99+
102100
runner.run(max_episodes, max_timesteps, episode_finished=episode_finished)
103-
print("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1))
101+
102+
print("Learning finished. Total episodes: {ep}".format(ep=runner.episode))
104103

105104
if __name__ == '__main__':
106105
main()
107106
```
108107

109-
### ThreadRunner
110-
111-
No description, yet.
112-
113-
### DistributedRunner
114-
115-
No description, yet.
116108

117109
Building your own runner
118110
------------------------
@@ -123,13 +115,13 @@ resulting observation to the agent.
123115

124116
```python
125117
# Get action
126-
action = agent.get_action(state, self.episode)
118+
action = agent.act(state, self.episode)
127119

128120
# Execute action in the environment
129-
result = environment.execute_action(action)
121+
state, reward, terminal_state = environment.execute(action)
130122

131123
# Pass observation to the agent
132-
agent.add_observation(state, action, result['reward'], result['terminal_state'])
124+
agent.observe(state, action, reward, terminal_state)
133125
```
134126

135127
The key idea here is the separation of concerns. External code should
@@ -138,10 +130,6 @@ the agent is for. Conversely, an agent need not concern itself with how
138130
a model is implemented and the API should facilitate easy combination of
139131
different agents and models.
140132

141-
There are other tasks a runner could implement, such as
142-
preprocessing <preprocessing>, repeating actions and storing
143-
episode rewards.
144-
145133
If you would like to build your own runner, it is probably a good idea
146134
to take a look at the [source code of our Runner
147135
class](https://github.com/reinforceio/tensorforce/blob/master/tensorforce/execution/runner.py).

0 commit comments

Comments
 (0)