fix(xcy): fix render settings when using gymnasium (#173)

* polish(xcy):fix the render in gymnasium * polish(xcy):change the arguments of save gif
opendilab · Dec 22, 2023 · 95e94b9 · 95e94b9
1 parent 27188cf
commit 95e94b9
Show file tree

Hide file tree

Showing 11 changed files with 59 additions and 39 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -3,4 +3,5 @@ gymnasium[atari]
 numpy>=1.22.4
 pympler
 bsuite
-minigrid
+minigrid
+moviepy
diff --git a/zoo/atari/entry/atari_eval.py b/zoo/atari/entry/atari_eval.py
@@ -42,7 +42,7 @@
     # A boolean flag indicating whether to save the video of the environment.
     main_config.env.save_replay = True
     # The path where the recorded video will be saved.
-    main_config.env.save_path = './video'
+    main_config.env.replay_path = './video'
     # The maximum number of steps for each episode during evaluation. This may need to be adjusted based on the specific characteristics of the environment.
     main_config.env.eval_max_episode_steps = int(20)
 

diff --git a/zoo/atari/envs/atari_wrappers.py b/zoo/atari/envs/atari_wrappers.py
@@ -11,7 +11,7 @@
     ClipRewardWrapper, FrameStackWrapper
 from ding.utils.compression_helper import jpeg_data_compressor
 from easydict import EasyDict
-from gym.wrappers import RecordVideo
+from gymnasium.wrappers import RecordVideo
 
 
 # only for reference now
@@ -93,8 +93,17 @@ def wrap_lightzero(config: EasyDict, episode_life: bool, clip_rewards: bool) ->
     if config.render_mode_human:
         env = gymnasium.make(config.env_name, render_mode='human')
     else:
-        env = gymnasium.make(config.env_name)
+        env = gymnasium.make(config.env_name, render_mode='rgb_array')
     assert 'NoFrameskip' in env.spec.id
+    if config.save_replay:
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+        video_name = f'{env.spec.id}-video-{timestamp}'
+        env = RecordVideo(
+            env,
+            video_folder=config.replay_path,
+            episode_trigger=lambda episode_id: True,
+            name_prefix=video_name
+        )
     env = GymnasiumToGymWrapper(env)
     env = NoopResetWrapper(env, noop_max=30)
     env = MaxAndSkipWrapper(env, skip=config.frame_skip)
@@ -108,15 +117,6 @@ def wrap_lightzero(config: EasyDict, episode_life: bool, clip_rewards: bool) ->
         env = ScaledFloatFrameWrapper(env)
     if clip_rewards:
         env = ClipRewardWrapper(env)
-    if config.save_replay:
-        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-        video_name = f'{env.spec.id}-video-{timestamp}'
-        env = RecordVideo(
-            env,
-            video_folder=config.replay_path,
-            episode_trigger=lambda episode_id: True,
-            name_prefix=video_name
-        )
 
     env = JpegWrapper(env, transform2string=config.transform2string)
     if config.game_wrapper:

diff --git a/zoo/box2d/bipedalwalker/envs/bipedalwalker_cont_disc_env.py b/zoo/box2d/bipedalwalker/envs/bipedalwalker_cont_disc_env.py
@@ -82,7 +82,7 @@ def reset(self) -> np.ndarray:
             - info_dict (:obj:`Dict[str, Any]`): Including observation, action_mask, and to_play label.
         """
         if not self._init_flag:
-            self._env = gym.make('BipedalWalker-v3', hardcore=True)
+            self._env = gym.make('BipedalWalker-v3', hardcore=True, render_mode="rgb_array")
             self._observation_space = self._env.observation_space
             self._action_space = self._env.action_space
             self._reward_space = gym.spaces.Box(
@@ -141,7 +141,7 @@ def step(self, action: np.ndarray) -> BaseEnvTimestep:
         if self._act_scale:
             action = affine_transform(action, min_val=self._raw_action_space.low, max_val=self._raw_action_space.high)
         if self._save_replay_gif:
-            self._frames.append(self._env.render(mode='rgb_array'))
+            self._frames.append(self._env.render())
         obs, rew, terminated, truncated, info = self._env.step(action)
         done = terminated or truncated
 

diff --git a/zoo/box2d/bipedalwalker/envs/bipedalwalker_env.py b/zoo/box2d/bipedalwalker/envs/bipedalwalker_env.py
@@ -83,9 +83,9 @@ def reset(self) -> Dict[str, np.ndarray]:
         if not self._init_flag:
             assert self._cfg.env_type in ['normal', 'hardcore'], "env_type must be in ['normal', 'hardcore']"
             if self._cfg.env_type == 'normal':
-                self._env = gym.make('BipedalWalker-v3')
+                self._env = gym.make('BipedalWalker-v3', render_mode="rgb_array")
             elif self._cfg.env_type == 'hardcore':
-                self._env = gym.make('BipedalWalker-v3', hardcore=True)
+                self._env = gym.make('BipedalWalker-v3', hardcore=True, render_mode="rgb_array")
             self._observation_space = self._env.observation_space
             self._action_space = self._env.action_space
             self._reward_space = gym.spaces.Box(
@@ -134,7 +134,7 @@ def step(self, action: np.ndarray) -> BaseEnvTimestep:
         if self._act_scale:
             action = affine_transform(action, min_val=self.action_space.low, max_val=self.action_space.high)
         if self._save_replay_gif:
-            self._frames.append(self._env.render(mode='rgb_array'))
+            self._frames.append(self._env.render())
 
         obs, rew, terminated, truncated, info = self._env.step(action)
         done = terminated or truncated

diff --git a/zoo/box2d/lunarlander/envs/lunarlander_cont_disc_env.py b/zoo/box2d/lunarlander/envs/lunarlander_cont_disc_env.py
@@ -5,6 +5,7 @@
 
 import gymnasium as gym
 import numpy as np
+from itertools import product
 from ding.envs import BaseEnvTimestep
 from ding.envs import ObsPlusPrevActRewWrapper
 from ding.envs.common import affine_transform
@@ -84,7 +85,7 @@ def reset(self) -> np.ndarray:
             - info_dict (:obj:`Dict[str, Any]`): Including observation, action_mask, and to_play label.
         """
         if not self._init_flag:
-            self._env = gym.make(self._cfg.env_name)
+            self._env = gym.make(self._cfg.env_name, render_mode="rgb_array")
             if self._replay_path is not None:
                 timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
                 video_name = f'{self._env.spec.id}-video-{timestamp}'
@@ -147,7 +148,7 @@ def step(self, action: np.ndarray) -> BaseEnvTimestep:
         if self._act_scale:
             action = affine_transform(action, min_val=-1, max_val=1)
         if self._save_replay_gif:
-            self._frames.append(self._env.render(mode='rgb_array'))
+            self._frames.append(self._env.render())
         obs, rew, terminated, truncated, info = self._env.step(action)
         done = terminated or truncated
 

diff --git a/zoo/box2d/lunarlander/envs/lunarlander_env.py b/zoo/box2d/lunarlander/envs/lunarlander_env.py
@@ -81,7 +81,7 @@ def reset(self) -> Dict[str, np.ndarray]:
             - obs (:obj:`np.ndarray`): The initial observation after resetting.
         """
         if not self._init_flag:
-            self._env = gym.make(self._cfg.env_name)
+            self._env = gym.make(self._cfg.env_name, render_mode="rgb_array")
             if self._replay_path is not None:
                 timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
                 video_name = f'{self._env.spec.id}-video-{timestamp}'
@@ -133,7 +133,7 @@ def step(self, action: np.ndarray) -> BaseEnvTimestep:
         if self._act_scale:
             action = affine_transform(action, min_val=-1, max_val=1)
         if self._save_replay_gif:
-            self._frames.append(self._env.render(mode='rgb_array'))
+            self._frames.append(self._env.render())
 
         obs, rew, terminated, truncated, info = self._env.step(action)
         done = terminated or truncated

diff --git a/zoo/classic_control/cartpole/envs/cartpole_lightzero_env.py b/zoo/classic_control/cartpole/envs/cartpole_lightzero_env.py
@@ -58,7 +58,7 @@ def reset(self) -> Dict[str, np.ndarray]:
         if necessary. Returns the first observation.
         """
         if not self._init_flag:
-            self._env = gym.make('CartPole-v0')
+            self._env = gym.make('CartPole-v0', render_mode="rgb_array")
             if self._replay_path is not None:
                 timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
                 video_name = f'{self._env.spec.id}-video-{timestamp}'

diff --git a/zoo/classic_control/pendulum/entry/pendulum_eval.py b/zoo/classic_control/pendulum/entry/pendulum_eval.py
@@ -4,20 +4,37 @@
 import numpy as np
 
 if __name__ == "__main__":
-    """ 
-    model_path (:obj:`Optional[str]`): The pretrained model path, which should
-    point to the ckpt file of the pretrained model, and an absolute path is recommended.
-    In LightZero, the path is usually something like ``exp_name/ckpt/ckpt_best.pth.tar``.
     """
-    model_path = "./ckpt/ckpt_best.pth.tar"
-    seeds = [0]
-    num_episodes_each_seed = 1
-    main_config.env.evaluator_env_num = 1
-    main_config.env.n_evaluator_episode = 1
-    total_test_episodes = num_episodes_each_seed * len(seeds)
+    Entry point for the evaluation of the MuZero model on the Pendulum environment. 
+
+    Variables:
+        - model_path (:obj:`Optional[str]`): The pretrained model path, which should point to the ckpt file of the 
+        pretrained model. An absolute path is recommended. In LightZero, the path is usually something like 
+        ``exp_name/ckpt/ckpt_best.pth.tar``.
+        - returns_mean_seeds (:obj:`List[float]`): List to store the mean returns for each seed.
+        - returns_seeds (:obj:`List[float]`): List to store the returns for each seed.
+        - seeds (:obj:`List[int]`): List of seeds for the environment.
+        - num_episodes_each_seed (:obj:`int`): Number of episodes to run for each seed.
+        - total_test_episodes (:obj:`int`): Total number of test episodes, computed as the product of the number of 
+        seeds and the number of episodes per seed.
+    """
+    # model_path = "./ckpt/ckpt_best.pth.tar"
+    model_path = None
     returns_mean_seeds = []
     returns_seeds = []
+    seeds = [0]
+    num_episodes_each_seed = 2
+    total_test_episodes = num_episodes_each_seed * len(seeds)
+    create_config.env_manager.type = 'base'  # Visualization requires the 'type' to be set as base
+    main_config.env.evaluator_env_num = 1  # Visualization requires the 'env_num' to be set as 1
+    main_config.env.n_evaluator_episode = total_test_episodes
+    main_config.env.replay_path = './video'
+
     for seed in seeds:
+        """
+        - returns_mean (:obj:`float`): The mean return of the evaluation.
+        - returns (:obj:`List[float]`): The returns of the evaluation.
+        """
         returns_mean, returns = eval_muzero(
             [main_config, create_config],
             seed=seed,
@@ -36,4 +53,4 @@
     print(f"We evaluated a total of {len(seeds)} seeds. For each seed, we evaluated {num_episodes_each_seed} episode(s).")
     print(f"For seeds {seeds}, the mean returns are {returns_mean_seeds}, and the returns are {returns_seeds}.")
     print("Across all seeds, the mean reward is:", returns_mean_seeds.mean())
-    print("=" * 20)
+    print("=" * 20)
diff --git a/zoo/classic_control/pendulum/envs/pendulum_lightzero_env.py b/zoo/classic_control/pendulum/envs/pendulum_lightzero_env.py
@@ -45,9 +45,9 @@ def __init__(self, cfg: dict) -> None:
         self._cfg = cfg
         self._act_scale = cfg.act_scale
         try:
-            self._env = gym.make('Pendulum-v1')
+            self._env = gym.make('Pendulum-v1', render_mode="rgb_array")
         except:
-            self._env = gym.make('Pendulum-v0')
+            self._env = gym.make('Pendulum-v0', render_mode="rgb_array")
         self._init_flag = False
         self._replay_path = cfg.replay_path
         self._continuous = cfg.get("continuous", True)
@@ -71,9 +71,9 @@ def reset(self) -> Dict[str, np.ndarray]:
         """
         if not self._init_flag:
             try:
-                self._env = gym.make('Pendulum-v1')
+                self._env = gym.make('Pendulum-v1', render_mode="rgb_array")
             except:
-                self._env = gym.make('Pendulum-v0')
+                self._env = gym.make('Pendulum-v0', render_mode="rgb_array")
             if self._replay_path is not None:
                 timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
                 video_name = f'{self._env.spec.id}-video-{timestamp}'

diff --git a/zoo/minigrid/envs/minigrid_lightzero_env.py b/zoo/minigrid/envs/minigrid_lightzero_env.py
@@ -171,7 +171,8 @@ def step(self, action: np.ndarray) -> BaseEnvTimestep:
         if self._save_replay_gif:
             self._frames.append(self._env.render())
         # using the step method of Gymnasium env, return is (observation, reward, terminated, truncated, info)
-        obs, rew, done, _, info = self._env.step(action)
+        obs, rew, terminated, truncated, info = self._env.step(action)
+        done = terminated or truncated
         rew = float(rew)
         self._eval_episode_return += rew
         self._current_step += 1