Refacotor optimizer step logic (#163)

Co-authored-by: Costa Huang <[email protected]>
Denys88 · May 23, 2022 · 86f5e82 · 86f5e82
1 parent a320613
commit 86f5e82
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 13 deletions.
diff --git a/rl_games/algos_torch/a2c_continuous.py b/rl_games/algos_torch/a2c_continuous.py
@@ -135,7 +135,7 @@ def calc_gradients(self, input_dict):
 
         self.scaler.scale(loss).backward()
         #TODO: Refactor this ugliest code of they year
-        self.trancate_gradients()
+        self.trancate_gradients_and_step()
 
         with torch.no_grad():
             reduce_kl = rnn_masks is None

diff --git a/rl_games/algos_torch/a2c_discrete.py b/rl_games/algos_torch/a2c_discrete.py
@@ -159,7 +159,7 @@ def calc_gradients(self, input_dict):
                     param.grad = None
 
         self.scaler.scale(loss).backward()
-        self.trancate_gradients()
+        self.trancate_gradients_and_step()
 
         with torch.no_grad():
             kl_dist = 0.5 * ((old_action_log_probs_batch - action_log_probs)**2)

diff --git a/rl_games/common/a2c_common.py b/rl_games/common/a2c_common.py
@@ -254,18 +254,16 @@ def __init__(self, base_name, params):
         # soft augmentation not yet supported
         assert not self.has_soft_aug
 
-    def trancate_gradients(self):
+    def trancate_gradients_and_step(self):
+        if self.multi_gpu:
+            self.optimizer.synchronize()
+
         if self.truncate_grads:
-            if self.multi_gpu:
-                self.optimizer.synchronize()
-                self.scaler.unscale_(self.optimizer)
-                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
-                with self.optimizer.skip_synchronize():
-                    self.scaler.step(self.optimizer)
-                    self.scaler.update()
-            else:
-                self.scaler.unscale_(self.optimizer)
-                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
+            self.scaler.unscale_(self.optimizer)
+            nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
+
+        if self.multi_gpu:
+            with self.optimizer.skip_synchronize():
                 self.scaler.step(self.optimizer)
                 self.scaler.update()
         else: