-
Notifications
You must be signed in to change notification settings - Fork 7.2k
[wingman -> rllib] Remote and entangled environments #3968
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
242aeea
5d168c5
a3cb42d
e211849
2228d4c
c8752d3
04c1fbc
5d1c9e4
30e9228
95be509
865819f
30ef65b
d774d1e
f7b2e2f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,8 +2,13 @@ | |
| from __future__ import division | ||
| from __future__ import print_function | ||
|
|
||
| import logging | ||
|
|
||
| import ray | ||
| from ray.rllib.utils.annotations import override, PublicAPI | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| @PublicAPI | ||
| class VectorEnv(object): | ||
|
|
@@ -18,8 +23,17 @@ class VectorEnv(object): | |
| """ | ||
|
|
||
| @staticmethod | ||
| def wrap(make_env=None, existing_envs=None, num_envs=1): | ||
| return _VectorizedGymEnv(make_env, existing_envs or [], num_envs) | ||
| def wrap(make_env=None, | ||
| existing_envs=None, | ||
| num_envs=1, | ||
| remote_envs=False, | ||
| action_space=None, | ||
| observation_space=None): | ||
| if remote_envs: | ||
| return _RemoteVectorizedGymEnv(make_env, num_envs, action_space, | ||
| observation_space) | ||
| return _VectorizedGymEnv(make_env, existing_envs or [], num_envs, | ||
| action_space, observation_space) | ||
|
|
||
| @PublicAPI | ||
| def vector_reset(self): | ||
|
|
@@ -70,14 +84,20 @@ class _VectorizedGymEnv(VectorEnv): | |
| num_envs (int): Desired num gym envs to keep total. | ||
| """ | ||
|
|
||
| def __init__(self, make_env, existing_envs, num_envs): | ||
| def __init__(self, | ||
| make_env, | ||
| existing_envs, | ||
| num_envs, | ||
| action_space=None, | ||
| observation_space=None): | ||
| self.make_env = make_env | ||
| self.envs = existing_envs | ||
| self.num_envs = num_envs | ||
| while len(self.envs) < self.num_envs: | ||
| self.envs.append(self.make_env(len(self.envs))) | ||
| self.action_space = self.envs[0].action_space | ||
| self.observation_space = self.envs[0].observation_space | ||
| self.action_space = action_space or self.envs[0].action_space | ||
| self.observation_space = observation_space or \ | ||
| self.envs[0].observation_space | ||
|
|
||
| @override(VectorEnv) | ||
| def vector_reset(self): | ||
|
|
@@ -101,3 +121,71 @@ def vector_step(self, actions): | |
| @override(VectorEnv) | ||
| def get_unwrapped(self): | ||
| return self.envs | ||
|
|
||
|
|
||
| @ray.remote(num_cpus=0) | ||
| class _RemoteEnv(object): | ||
| """Wrapper class for making a gym env a remote actor.""" | ||
|
|
||
| def __init__(self, make_env, i): | ||
| self.env = make_env(i) | ||
|
|
||
| def reset(self): | ||
| return self.env.reset() | ||
|
|
||
| def step(self, action): | ||
| return self.env.step(action) | ||
|
|
||
|
|
||
| class _RemoteVectorizedGymEnv(_VectorizedGymEnv): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider extending VectorEnv directly, since you don't seem to use much of the functionality of VectorizedGymEnv
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, I do reuse constructor and get_unwrapped, no need to copy those. I would leave it like this. |
||
| """Internal wrapper for gym envs to implement VectorEnv as remote workers. | ||
| """ | ||
|
|
||
| def __init__(self, | ||
| make_env, | ||
| num_envs, | ||
| action_space=None, | ||
| observation_space=None): | ||
| self.make_local_env = make_env | ||
| self.num_envs = num_envs | ||
| self.initialized = False | ||
| self.action_space = action_space | ||
| self.observation_space = observation_space | ||
|
|
||
| def _initialize_if_needed(self): | ||
| if self.initialized: | ||
| return | ||
|
|
||
| self.initialized = True | ||
|
|
||
| def make_remote_env(i): | ||
| logger.info("Launching env {} in remote actor".format(i)) | ||
| return _RemoteEnv.remote(self.make_local_env, i) | ||
|
|
||
| _VectorizedGymEnv.__init__(self, make_remote_env, [], self.num_envs, | ||
| self.action_space, self.observation_space) | ||
|
|
||
| for env in self.envs: | ||
| assert isinstance(env, ray.actor.ActorHandle), env | ||
|
|
||
| @override(_VectorizedGymEnv) | ||
| def vector_reset(self): | ||
| self._initialize_if_needed() | ||
| return ray.get([env.reset.remote() for env in self.envs]) | ||
|
|
||
| @override(_VectorizedGymEnv) | ||
| def reset_at(self, index): | ||
| return ray.get(self.envs[index].reset.remote()) | ||
|
|
||
| @override(_VectorizedGymEnv) | ||
| def vector_step(self, actions): | ||
| step_outs = ray.get( | ||
| [env.step.remote(act) for env, act in zip(self.envs, actions)]) | ||
|
|
||
| obs_batch, rew_batch, done_batch, info_batch = [], [], [], [] | ||
| for obs, rew, done, info in step_outs: | ||
| obs_batch.append(obs) | ||
| rew_batch.append(rew) | ||
| done_batch.append(done) | ||
| info_batch.append(info) | ||
| return obs_batch, rew_batch, done_batch, info_batch | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to call close() for remote env? SC2 environments are starting SC2 server which is a separate process, and and I guess the correct way to stop it in these situations would be calling the close method (though I see them dying after keyboard interrupt).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's python
atexitwhich I think should work. If not, we can add close() hooks (but I don't know if this is as reliable in case of errors).