Update wandb to 0.13.11 (#385)

* Fix wandb log for video at the wrong indention * Add support for python 3.10 (and remove pybullet) * Update wandb to 0.13.11 for gymnasium support and remove unnecessary code * Revert pyproject.toml to just remove pybullet * Update poetry and remove MacOS from PettingZoo CI * Updated poetry lock * Update requirements * Update requirements * poetry lock --no-update
vwxyzjn · May 15, 2023 · 7104666 · 7104666 · vercel · May 15, 2023
1 parent 5e49edd
commit 7104666
Show file tree

Hide file tree

Showing 6 changed files with 137 additions and 162 deletions.
diff --git a/cleanrl/ddpg_continuous_action.py b/cleanrl/ddpg_continuous_action.py
@@ -138,7 +138,7 @@ def forward(self, x):
             sync_tensorboard=True,
             config=vars(args),
             name=run_name,
-            # monitor_gym=True, # no longer works for gymnasium
+            monitor_gym=True,
             save_code=True,
         )
     writer = SummaryWriter(f"runs/{run_name}")
@@ -180,8 +180,6 @@ def forward(self, x):
 
     # TRY NOT TO MODIFY: start the game
     obs, _ = envs.reset(seed=args.seed)
-    video_filenames = set()
-
     for global_step in range(args.total_timesteps):
         # ALGO LOGIC: put action logic here
         if global_step < args.learning_starts:
@@ -250,10 +248,4 @@ def forward(self, x):
                 writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
 
     envs.close()
-
-    if args.track and args.capture_video:
-        for filename in os.listdir(f"videos/{run_name}"):
-            if filename not in video_filenames and filename.endswith(".mp4"):
-                wandb.log({f"videos": wandb.Video(f"videos/{run_name}/{filename}")})
-                video_filenames.add(filename)
     writer.close()
diff --git a/cleanrl/ddpg_continuous_action_jax.py b/cleanrl/ddpg_continuous_action_jax.py
@@ -134,7 +134,7 @@ class TrainState(TrainState):
             sync_tensorboard=True,
             config=vars(args),
             name=run_name,
-            monitor_gym=True,  # does not work on gymnasium
+            monitor_gym=True,
             save_code=True,
         )
     writer = SummaryWriter(f"runs/{run_name}")
@@ -165,7 +165,7 @@ class TrainState(TrainState):
 
     # TRY NOT TO MODIFY: start the game
     obs, _ = envs.reset()
-    video_filenames = set()
+
     action_scale = np.array((envs.action_space.high - envs.action_space.low) / 2.0)
     action_bias = np.array((envs.action_space.high + envs.action_space.low) / 2.0)
     actor = Actor(
@@ -293,10 +293,4 @@ def actor_loss(params):
                 writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
 
     envs.close()
-    if args.track and args.capture_video:
-        for filename in os.listdir(f"videos/{run_name}"):
-            if filename not in video_filenames and filename.endswith(".mp4"):
-                wandb.log({f"videos": wandb.Video(f"videos/{run_name}/{filename}")})
-                video_filenames.add(filename)
-
     writer.close()
diff --git a/cleanrl/ppo_continuous_action.py b/cleanrl/ppo_continuous_action.py
@@ -147,7 +147,7 @@ def get_action_and_value(self, x, action=None):
             sync_tensorboard=True,
             config=vars(args),
             name=run_name,
-            # monitor_gym=True, no longer works for gymnasium
+            monitor_gym=True,
             save_code=True,
         )
     writer = SummaryWriter(f"runs/{run_name}")
@@ -188,7 +188,6 @@ def get_action_and_value(self, x, action=None):
     next_obs = torch.Tensor(next_obs).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
     num_updates = args.total_timesteps // args.batch_size
-    video_filenames = set()
 
     for update in range(1, num_updates + 1):
         # Annealing the rate if instructed to do so.
@@ -322,11 +321,5 @@ def get_action_and_value(self, x, action=None):
         print("SPS:", int(global_step / (time.time() - start_time)))
         writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
 
-        if args.track and args.capture_video:
-            for filename in os.listdir(f"videos/{run_name}"):
-                if filename not in video_filenames and filename.endswith(".mp4"):
-                    wandb.log({f"videos": wandb.Video(f"videos/{run_name}/{filename}")})
-                    video_filenames.add(filename)
-
     envs.close()
     writer.close()
diff --git a/cleanrl/rpo_continuous_action.py b/cleanrl/rpo_continuous_action.py
@@ -156,7 +156,7 @@ def get_action_and_value(self, x, action=None):
             sync_tensorboard=True,
             config=vars(args),
             name=run_name,
-            # monitor_gym=True, no longer works for gymnasium
+            monitor_gym=True,
             save_code=True,
         )
     writer = SummaryWriter(f"runs/{run_name}")
@@ -197,7 +197,6 @@ def get_action_and_value(self, x, action=None):
     next_obs = torch.Tensor(next_obs).to(device)
     next_done = torch.zeros(args.num_envs).to(device)
     num_updates = args.total_timesteps // args.batch_size
-    video_filenames = set()
 
     for update in range(1, num_updates + 1):
         # Annealing the rate if instructed to do so.
@@ -331,11 +330,5 @@ def get_action_and_value(self, x, action=None):
         print("SPS:", int(global_step / (time.time() - start_time)))
         writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
 
-        if args.track and args.capture_video:
-            for filename in os.listdir(f"videos/{run_name}"):
-                if filename not in video_filenames and filename.endswith(".mp4"):
-                    wandb.log({f"videos": wandb.Video(f"videos/{run_name}/{filename}")})
-                    video_filenames.add(filename)
-
     envs.close()
     writer.close()