Denys88 · ViktorM · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024 · Nov 4, 2024
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,23 @@
+[flake8]
+show-source=True
+statistics=True
+per-file-ignores=*/__init__.py:F401
+# E402: Module level import not at top of file
+# E501: Line too long
+# W503: Line break before binary operator
+# E203: Whitespace before ':' -> conflicts with black
+# D401: First line should be in imperative mood
+# R504: Unnecessary variable assignment before return statement.
+# R505: Unnecessary elif after return statement
+# SIM102: Use a single if-statement instead of nested if-statements
+# SIM117: Merge with statements for context managers that have same scope.
+# SIM118: Checks for key-existence checks against dict.keys() calls.
+ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117,SIM118
+max-line-length = 120
+max-complexity = 30
+exclude=_*,.vscode,.git,docs/**
+# docstrings
+docstring-convention=google
+# annotations
+suppress-none-returning=True
+allow-star-arg-any=True
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ authors = [
 ]
 
 [tool.poetry.dependencies]
-python = ">=3.7.1,<3.11"
+python = ">=3.7.1,<3.14"
 gym = {version = "^0.23.0", extras = ["classic_control"]}
 tensorboard = "^2.8.0"
 tensorboardX = "^2.5"

diff --git a/rl_games/algos_torch/a2c_continuous.py b/rl_games/algos_torch/a2c_continuous.py
@@ -6,7 +6,7 @@
 from rl_games.common import datasets
 
 from torch import optim
-import torch 
+import torch
 
 
 class A2CAgent(a2c_common.ContinuousA2CBase):
@@ -30,11 +30,11 @@ def __init__(self, base_name, params):
             'actions_num' : self.actions_num,
             'input_shape' : obs_shape,
             'num_seqs' : self.num_actors * self.num_agents,
-            'value_size': self.env_info.get('value_size',1),
+            'value_size': self.env_info.get('value_size', 1),
             'normalize_value' : self.normalize_value,
             'normalize_input': self.normalize_input,
         }
-        
+
         self.model = self.network.build(build_config)
         self.model.to(self.ppo_device)
         self.states = None
@@ -74,7 +74,7 @@ def __init__(self, base_name, params):
     def update_epoch(self):
         self.epoch_num += 1
         return self.epoch_num
-        
+
     def save(self, fn):
         state = self.get_full_state_weights()
         torch_ext.save_checkpoint(fn, state)
@@ -114,7 +114,7 @@ def calc_gradients(self, input_dict):
 
         batch_dict = {
             'is_train': True,
-            'prev_actions': actions_batch, 
+            'prev_actions': actions_batch,
             'obs' : obs_batch,
         }
 
@@ -195,7 +195,7 @@ def train_actor_critic(self, input_dict):
 
     def reg_loss(self, mu):
         if self.bounds_loss_coef is not None:
-            reg_loss = (mu*mu).sum(axis=-1)
+            reg_loss = (mu * mu).sum(axis=-1)
         else:
             reg_loss = 0
         return reg_loss
@@ -209,5 +209,3 @@ def bound_loss(self, mu):
         else:
             b_loss = 0
         return b_loss
-
-