wip

sven1977 · Jun 9, 2023 · 20896ab · 20896ab
1 parent eb5194d
commit 20896ab
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 24 deletions.
diff --git a/examples/atari_100k.yaml b/examples/atari_100k.yaml
@@ -1,16 +1,17 @@
 atari-100k:
   # Run with --env ALE/[gym ID], e.g. ALE/Pong-v5.
 
-  # [2]: "We follow the evaluation protocol of Machado et al. (2018) with 200M
-  # environment steps, action repeat of 4, a time limit of 108,000 steps per
-  # episode that correspond to 30 minutes of game play, no access to life
-  # information, full action space, and sticky actions. Because the world model
-  # integrates information over time, DreamerV2 does not use frame stacking.
-  # The experiments use a single-task setup where a separate agent is trained
-  # for each game. Moreover, each agent uses only a single environment instance.
-  repeat_action_probability: 0.0  # "sticky actions" but not according to Danijar's 100k configs.
-  full_action_space: false  # "full action space" but not according to Danijar's 100k configs.
-  frameskip: 1  # already done by MaxAndSkip wrapper: "action repeat" == 4
+  env_config:
+    # [2]: "We follow the evaluation protocol of Machado et al. (2018) with 200M
+    # environment steps, action repeat of 4, a time limit of 108,000 steps per
+    # episode that correspond to 30 minutes of game play, no access to life
+    # information, full action space, and sticky actions. Because the world model
+    # integrates information over time, DreamerV2 does not use frame stacking.
+    # The experiments use a single-task setup where a separate agent is trained
+    # for each game. Moreover, each agent uses only a single environment instance.
+    repeat_action_probability: 0.0  # "sticky actions" but not according to Danijar's 100k configs.
+    full_action_space: false  # "full action space" but not according to Danijar's 100k configs.
+    frameskip: 1  # already done by MaxAndSkip wrapper: "action repeat" == 4
 
   # See Appendix A.
   model_dimension: S

diff --git a/examples/atari_pong_xs.yaml b/examples/atari_pong_xs.yaml
diff --git a/examples/dm_control_suite_vision.yaml b/examples/dm_control_suite_vision.yaml
@@ -1,7 +1,8 @@
 dm-control-suite-vision:
   # Run with --env DMC/[task]/[domain], e.g. DMC/cartpole/swingup
 
-  from_pixels: true  # use image observations
+  env_config:
+    from_pixels: true  # use image observations
 
   num_envs_per_worker: 4
   remote_worker_envs: true  # parallel envs

diff --git a/utils/env_runner_v2.py b/utils/env_runner_v2.py
@@ -146,8 +146,8 @@ def __init__(
             )
             gym.register(
                 "dmc_env-v0",
-                lambda : DMCEnv(
-                    parts[1], parts[2], from_pixels=True, channels_first=False
+                lambda from_pixels=True: DMCEnv(
+                    parts[1], parts[2], from_pixels=from_pixels, channels_first=False
                 )
             )
             self.env = gym.vector.make(