ray-project · ArturNiederfahrenhorst · May 24, 2023 · May 24, 2023 · May 25, 2023 · May 26, 2023
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
@@ -354,6 +354,10 @@ install_pip_packages() {
     requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/rllib-test-requirements.txt")
     #TODO(amogkam): Add this back to rllib-requirements.txt once mlagents no longer pins torch<1.9.0 version.
     pip install --no-dependencies mlagents==0.28.0
+    pip install moviepy
+    pip install decorator==4.0.2 # Moviepy 1.0.3 will error on decorator==4.4.2 so we have to pin
+    sudo apt install ffmpeg -y
+    export IMAGEIO_FFMPEG_EXE=/usr/bin/ffmpeg
 
     # Install MuJoCo.
     sudo apt install libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf -y

@@ -528,18 +528,15 @@ Debugging RLlib Experiments
 Gym Monitor
 ~~~~~~~~~~~
 
-The ``"monitor": true`` config can be used to save Gym episode videos to the result dir. For example:
+The ``"record": true`` config can be used to save videos of episodes to the result dir. For example:
 
 .. code-block:: bash
 
     rllib train --env=PongDeterministic-v4 \
-        --run=A2C --config '{"num_workers": 2, "monitor": true}'
+        --run=A2C --config '{"num_workers": 2, "record": true}'
+
+Videos will be saved in the ``~/ray_results/<experiment>`` directory.
 
-    # videos will be saved in the ~/ray_results/<experiment> dir, for example
-    openaigym.video.0.31401.video000000.meta.json
-    openaigym.video.0.31401.video000000.mp4
-    openaigym.video.0.31403.video000000.meta.json
-    openaigym.video.0.31403.video000000.mp4
 
 Eager Mode
 ~~~~~~~~~~

@@ -4010,6 +4010,14 @@ py_test(
     args = ["--stop-iters=2", "--num-steps-sampled-before-learning_starts=100", "--framework=tf2", "--use-tune", "--random-test-episodes=10", "--env-num-candidates=50", "--env-slate-size=2"],
 )
 
+py_test(
+    name = "examples/record_videos",
+    main = "examples/record_videos.py",
+    tags = ["team:rllib", "examples"],
+    size = "small",
+    srcs = ["examples/record_videos.py"],
+)
+
 py_test(
     name = "examples/remote_envs_with_inference_done_on_main_node_tf",
     main = "examples/remote_envs_with_inference_done_on_main_node.py",

@@ -308,6 +308,9 @@ def __init__(self, algo_class=None):
         self.disable_env_checking = False
         self.auto_wrap_old_gym_envs = True
         self.action_mask_key = "action_mask"
+        self.record = False
+        self.video_folder = os.path.expanduser("~/ray_results")
+        self.recording_interval = 10
         # Whether this env is an atari env (for atari-specific preprocessing).
         # If not specified, we will try to auto-detect this.
         self._is_atari = None
@@ -455,7 +458,6 @@ def __init__(self, algo_class=None):
         #  have been removed.
         # === Deprecated keys ===
         self.simple_optimizer = DEPRECATED_VALUE
-        self.monitor = DEPRECATED_VALUE
         self.evaluation_num_episodes = DEPRECATED_VALUE
         self.metrics_smoothing_episodes = DEPRECATED_VALUE
         self.timesteps_per_iteration = DEPRECATED_VALUE
@@ -533,7 +535,6 @@ def to_dict(self) -> AlgorithmConfigDict:
         # Simplify: Remove all deprecated keys that have as value `DEPRECATED_VALUE`.
         # These would be useless in the returned dict anyways.
         for dep_k in [
-            "monitor",
             "evaluation_num_episodes",
             "metrics_smoothing_episodes",
             "timesteps_per_iteration",
@@ -1334,6 +1335,9 @@ def environment(
         is_atari: Optional[bool] = NotProvided,
         auto_wrap_old_gym_envs: Optional[bool] = NotProvided,
         action_mask_key: Optional[str] = NotProvided,
+        record: Optional[bool] = NotProvided,
+        video_folder: Optional[str] = NotProvided,
+        recording_interval: Optional[int] = NotProvided,
     ) -> "AlgorithmConfig":
         """Sets the config's RL-environment settings.
 
@@ -1385,9 +1389,16 @@ def environment(
                 (gym.wrappers.EnvCompatibility). If False, RLlib will produce a
                 descriptive error on which steps to perform to upgrade to gymnasium
                 (or to switch this flag to True).
-             action_mask_key: If observation is a dictionary, expect the value by
+            action_mask_key: If observation is a dictionary, expect the value by
                 the key `action_mask_key` to contain a valid actions mask (`numpy.int8`
                 array of zeros and ones). Defaults to "action_mask".
+            record: Whether to record videos of the environment according to the
+                `recording_schedule` callable.
+            video_folder: Path to the directory where to save the recordings.
+                Defaults to "~/ray_results".
+            recording_interval: The interval between recordings. If you set
+                recording interval to any integer number, videos will be recorded every
+                `recording_interval` episodes. Defaults to 10.
 
         Returns:
             This updated AlgorithmConfig object.
@@ -1422,6 +1433,12 @@ def environment(
             self.auto_wrap_old_gym_envs = auto_wrap_old_gym_envs
         if action_mask_key is not NotProvided:
             self.action_mask_key = action_mask_key
+        if record is not NotProvided:
+            self.record = record
+        if recording_interval is not NotProvided:
+            self.recording_interval = recording_interval
+        if video_folder is not NotProvided:
+            self.video_folder = video_folder
 
         return self
 

@@ -46,57 +46,6 @@ def get_wrapper_by_cls(env, cls):
             return None
 
 
-@PublicAPI
-class MonitorEnv(gym.Wrapper):
-    def __init__(self, env=None):
-        """Record episodes stats prior to EpisodicLifeEnv, etc."""
-        gym.Wrapper.__init__(self, env)
-        self._current_reward = None
-        self._num_steps = None
-        self._total_steps = None
-        self._episode_rewards = []
-        self._episode_lengths = []
-        self._num_episodes = 0
-        self._num_returned = 0
-
-    def reset(self, **kwargs):
-        obs, info = self.env.reset(**kwargs)
-
-        if self._total_steps is None:
-            self._total_steps = sum(self._episode_lengths)
-
-        if self._current_reward is not None:
-            self._episode_rewards.append(self._current_reward)
-            self._episode_lengths.append(self._num_steps)
-            self._num_episodes += 1
-
-        self._current_reward = 0
-        self._num_steps = 0
-
-        return obs, info
-
-    def step(self, action):
-        obs, rew, terminated, truncated, info = self.env.step(action)
-        self._current_reward += rew
-        self._num_steps += 1
-        self._total_steps += 1
-        return obs, rew, terminated, truncated, info
-
-    def get_episode_rewards(self):
-        return self._episode_rewards
-
-    def get_episode_lengths(self):
-        return self._episode_lengths
-
-    def get_total_steps(self):
-        return self._total_steps
-
-    def next_episode_results(self):
-        for i in range(self._num_returned, len(self._episode_rewards)):
-            yield (self._episode_rewards[i], self._episode_lengths[i])
-        self._num_returned = len(self._episode_rewards)
-
-
 @PublicAPI
 class NoopResetEnv(gym.Wrapper):
     def __init__(self, env, noop_max=30):
@@ -328,7 +277,6 @@ def wrap_deepmind(env, dim=84, framestack=True, noframeskip=False):
         dim: Dimension to resize observations to (dim x dim).
         framestack: Whether to framestack observations.
     """
-    env = MonitorEnv(env)
     env = NoopResetEnv(env, noop_max=30)
     if env.spec is not None and noframeskip is True:
         env = MaxAndSkipEnv(env, skip=4)

@@ -7,7 +7,6 @@
 
 from ray.rllib.env.base_env import ASYNC_RESET_RETURN, BaseEnv
 from ray.rllib.env.external_env import ExternalEnvWrapper
-from ray.rllib.env.wrappers.atari_wrappers import MonitorEnv, get_wrapper_by_cls
 from ray.rllib.evaluation.collectors.simple_list_collector import _PolicyCollectorGroup
 from ray.rllib.policy.rnn_sequencing import pad_batch_to_sequences_of_same_size
 from ray.rllib.evaluation.episode_v2 import EpisodeV2
@@ -408,13 +407,6 @@ def _get_rollout_metrics(
         self, episode: EpisodeV2, policy_map: Dict[str, Policy]
     ) -> List[RolloutMetrics]:
         """Get rollout metrics from completed episode."""
-        # TODO(jungong) : why do we need to handle atari metrics differently?
-        # Can we unify atari and normal env metrics?
-        atari_metrics: List[RolloutMetrics] = _fetch_atari_metrics(self._base_env)
-        if atari_metrics is not None:
-            for m in atari_metrics:
-                m._replace(custom_metrics=episode.custom_metrics)
-            return atari_metrics
         # Create connector metrics
         connector_metrics = {}
         active_agents = episode.get_agents()
@@ -1209,24 +1201,6 @@ def _maybe_render(self):
         self._perf_stats.incr("env_render_time", time.time() - t5)
 
 
-def _fetch_atari_metrics(base_env: BaseEnv) -> List[RolloutMetrics]:
-    """Atari games have multiple logical episodes, one per life.
-
-    However, for metrics reporting we count full episodes, all lives included.
-    """
-    sub_environments = base_env.get_sub_environments()
-    if not sub_environments:
-        return None
-    atari_out = []
-    for sub_env in sub_environments:
-        monitor = get_wrapper_by_cls(sub_env, MonitorEnv)
-        if not monitor:
-            return None
-        for eps_rew, eps_len in monitor.next_episode_results():
-            atari_out.append(RolloutMetrics(eps_len, eps_rew))
-    return atari_out
-
-
 def _get_or_raise(
     mapping: Dict[PolicyID, Union[Policy, Preprocessor, Filter]], policy_id: PolicyID
 ) -> Union[Policy, Preprocessor, Filter]:

@@ -4,6 +4,7 @@
 import os
 import platform
 import threading
+import gymnasium as gym
 from collections import defaultdict
 from types import FunctionType
 from typing import (
@@ -449,6 +450,23 @@ def wrap(env):
 
             # Wrap env through the correct wrapper.
             self.env: EnvType = wrap(self.env)
+
+            if self.config.record:
+                folder = (
+                    self.config.video_folder
+                    if self.config.video_folder is not None
+                    else self.log_dir + "/videos"
+                )
+                logger.info(f"Recording videos to {folder}")
+
+                self.env = gym.wrappers.RecordVideo(
+                    env=self.env,
+                    video_folder=folder,
+                    # Defines when to capture an episode based on episode ID.
+                    episode_trigger=lambda e: e % self.config.recording_interval == 0,
+                    name_prefix=f"RolloutWorker_{self.worker_index}_",
+                )
+
             # Ideally, we would use the same make_sub_env() function below
             # to create self.env, but wrap(env) and self.env has a cyclic
             # dependency on each other right now, so we would settle on

@@ -26,7 +26,6 @@
 from ray.rllib.evaluation.collectors.simple_list_collector import SimpleListCollector
 from ray.rllib.evaluation.env_runner_v2 import (
     EnvRunnerV2,
-    _fetch_atari_metrics,
     _get_or_raise,
     _PerfStats,
 )
@@ -994,28 +993,18 @@ def _process_observations(
             # Now that all callbacks are done and users had the chance to add custom
             # metrics based on the last observation in the episode, finish up metrics
             # object and append to `outputs`.
-            atari_metrics: List[RolloutMetrics] = _fetch_atari_metrics(base_env)
             if not episode.is_faulty:
-                if atari_metrics is not None:
-                    for m in atari_metrics:
-                        outputs.append(
-                            m._replace(
-                                custom_metrics=episode.custom_metrics,
-                                hist_data=episode.hist_data,
-                            )
-                        )
-                else:
-                    outputs.append(
-                        RolloutMetrics(
-                            episode.length,
-                            episode.total_reward,
-                            dict(episode.agent_rewards),
-                            episode.custom_metrics,
-                            {},
-                            episode.hist_data,
-                            episode.media,
-                        )
+                outputs.append(
+                    RolloutMetrics(
+                        episode.length,
+                        episode.total_reward,
+                        dict(episode.agent_rewards),
+                        episode.custom_metrics,
+                        {},
+                        episode.hist_data,
+                        episode.media,
                     )
+                )
             else:
                 # Add metrics about a faulty episode.
                 outputs.append(RolloutMetrics(episode_faulty=True))

@@ -0,0 +1,35 @@
+"""
+The following example demonstrates how to record videos of your agent's behavior.
+
+RLlib exposes the ability of the Gymnasium API to record videos.
+This is done internally by wrapping the environment with the
+gymnasium.wrappers.RecordVideo wrapper. You can also wrap your environment with this
+wrapper manually to record videos of your agent's behavior if RLlib's built-in
+video recording does not meet your needs.
+
+In order to run this example, please regard the following:
+- You must have moviepy installed (pip install moviepy)
+- You must have ffmpeg installed (system dependent, e.g. brew install ffmpeg)
+- moviepy must find ffmpeg -> https://github.com/Zulko/moviepy/issues/1158.
+- An environment can only be recorded if it can be rendered. For most environments,
+    this can be achieved by setting the render_mode to 'rgb_array' in the environment
+    config. See the gymnasium API for more information.
+"""
+
+# First, we create videos with default settings:
+from ray.rllib.algorithms.ppo import PPOConfig
+
+config = PPOConfig().environment(
+    env="CartPole-v1", record=True, env_config={"render_mode": "rgb_array"}
+)
+
+# By default, videos will be saved to your experiment logs directory under
+# ~/ray_results.
+
+# Secondly, we create videos every 100 episodes::
+config.environment(recording_interval=100)
+algo = config.build()
+algo.train()
+
+algo = config.build()
+algo.train()