@@ -25,8 +25,8 @@ initialization:
25
25
from tensorforce.execution import Runner
26
26
27
27
runner = Runner(
28
- agent = agent, # tensorforce.agents.agent. Agent object
29
- environment = env # tensorforce.environments.environment. Environment object
28
+ agent = agent, # Agent object
29
+ environment = env # Environment object
30
30
)
31
31
```
32
32
@@ -66,53 +66,45 @@ def episode_finished(r):
66
66
Here is some example code for using the runner (without preprocessing).
67
67
68
68
``` python
69
- from tensorforce.config import Config
70
- from tensorforce.external.openai_gym import OpenAIGymEnvironment
69
+ from tensorforce.config import Configuration
70
+ from tensorforce.environments.openai_gym import OpenAIGym
71
+ from tensorforce.agents import DQNAgent
71
72
from tensorforce.execution import Runner
72
- from tensorforce.examples.simple_q_agent import SimpleQAgent
73
73
74
74
def main ():
75
75
gym_id = ' CartPole-v0'
76
76
max_episodes = 10000
77
77
max_timesteps = 1000
78
78
79
- env = OpenAIGymEnvironment (gym_id, monitor = False , monitor_video = False )
79
+ env = OpenAIGym (gym_id)
80
80
81
- config = Config({
82
- ' repeat_actions' : 1 ,
81
+ config = Configuration({
83
82
' actions' : env.actions,
84
- ' action_shape' : env.action_shape,
85
- ' state_shape' : env.state_shape,
86
- ' exploration' : ' constant' ,
87
- ' exploration_args' : [0.1 ]
83
+ ' states' : env.states
84
+ # ...
88
85
})
89
86
90
- agent = SimpleQAgent (config, " simpleq " )
87
+ agent = DQNAgent (config)
91
88
92
89
runner = Runner(agent, env)
93
90
94
91
def episode_finished (r ):
95
- if r.episode % 10 == 0 :
96
- print (" Finished episode {ep} after {ts} timesteps" .format(ep = r.episode + 1 , ts = r.timestep + 1 ))
97
- print (" Episode reward: {} " .format(r.episode_rewards[- 1 ]))
98
- print (" Average of last 10 rewards: {} " .format(np.mean (r.episode_rewards[- 10 :])))
92
+ if r.episode % report_episodes == 0 :
93
+ logger.info (" Finished episode {ep} after {ts} timesteps" .format(ep = r.episode, ts = r.timestep))
94
+ logger.info (" Episode reward: {} " .format(r.episode_rewards[- 1 ]))
95
+ logger.info (" Average of last 100 rewards: {} " .format(sum (r.episode_rewards[- 100 :]) / 100 ))
99
96
return True
100
97
101
98
print (" Starting {agent} for Environment '{env} '" .format(agent = agent, env = env))
99
+
102
100
runner.run(max_episodes, max_timesteps, episode_finished = episode_finished)
103
- print (" Learning finished. Total episodes: {ep} " .format(ep = runner.episode + 1 ))
101
+
102
+ print (" Learning finished. Total episodes: {ep} " .format(ep = runner.episode))
104
103
105
104
if __name__ == ' __main__' :
106
105
main()
107
106
```
108
107
109
- ### ThreadRunner
110
-
111
- No description, yet.
112
-
113
- ### DistributedRunner
114
-
115
- No description, yet.
116
108
117
109
Building your own runner
118
110
------------------------
@@ -123,13 +115,13 @@ resulting observation to the agent.
123
115
124
116
``` python
125
117
# Get action
126
- action = agent.get_action (state, self .episode)
118
+ action = agent.act (state, self .episode)
127
119
128
120
# Execute action in the environment
129
- result = environment.execute_action (action)
121
+ state, reward, terminal_state = environment.execute (action)
130
122
131
123
# Pass observation to the agent
132
- agent.add_observation (state, action, result[ ' reward' ], result[ ' terminal_state' ] )
124
+ agent.observe (state, action, reward, terminal_state)
133
125
```
134
126
135
127
The key idea here is the separation of concerns. External code should
@@ -138,10 +130,6 @@ the agent is for. Conversely, an agent need not concern itself with how
138
130
a model is implemented and the API should facilitate easy combination of
139
131
different agents and models.
140
132
141
- There are other tasks a runner could implement, such as
142
- preprocessing < ; preprocessing> ; , repeating actions and storing
143
- episode rewards.
144
-
145
133
If you would like to build your own runner, it is probably a good idea
146
134
to take a look at the [ source code of our Runner
147
135
class] ( https://github.com/reinforceio/tensorforce/blob/master/tensorforce/execution/runner.py ) .
0 commit comments