oxwhirl · j3soon · Jul 3, 2020
diff --git a/src/runners/episode_runner.py b/src/runners/episode_runner.py
@@ -65,12 +65,13 @@ def run(self, test_mode=False):
             # Pass the entire batch of experiences up till now to the agents
             # Receive the actions for each agent at this timestep in a batch of size 1
             actions = self.mac.select_actions(self.batch, t_ep=self.t, t_env=self.t_env, test_mode=test_mode)
+            cpu_actions = actions.to("cpu").numpy()
 
-            reward, terminated, env_info = self.env.step(actions[0])
+            reward, terminated, env_info = self.env.step(cpu_actions[0])
             episode_return += reward
 
             post_transition_data = {
-                "actions": actions,
+                "actions": cpu_actions,
                 "reward": [(reward,)],
                 "terminated": [(terminated != env_info.get("episode_limit", False),)],
             }
@@ -88,7 +89,8 @@ def run(self, test_mode=False):
 
         # Select actions in the last stored state
         actions = self.mac.select_actions(self.batch, t_ep=self.t, t_env=self.t_env, test_mode=test_mode)
-        self.batch.update({"actions": actions}, ts=self.t)
+        cpu_actions = actions.to("cpu").numpy()
+        self.batch.update({"actions": cpu_actions}, ts=self.t)
 
         cur_stats = self.test_stats if test_mode else self.train_stats
         cur_returns = self.test_returns if test_mode else self.train_returns

diff --git a/src/utils/logging.py b/src/utils/logging.py
@@ -49,6 +49,8 @@ def print_recent_stats(self):
             log_str += "{:<25}{:>8}".format(k + ":", item)
             log_str += "\n" if i % 4 == 0 else "\t"
         self.console_logger.info(log_str)
+        # Reset stats to avoid accumulating logs in memory
+        self.stats = defaultdict(lambda: [])
 
 
 # set up a custom logger