From 5483c96911b9ad84f05997c1c08f75f9fd79c3a8 Mon Sep 17 00:00:00 2001
From: ViktorM <victor.makoviychuk@gmail.com>
Date: Tue, 19 Nov 2024 16:16:02 -0800
Subject: [PATCH] Minor fixes. Removed Ray and gymnasioum imports when they are
 not needed.

---
 rl_games/algos_torch/a2c_continuous.py |  2 +-
 rl_games/common/player.py              |  4 +-
 rl_games/common/vecenv.py              | 28 +++++++-----
 rl_games/common/wrappers.py            | 60 ++++++++++++++++----------
 4 files changed, 60 insertions(+), 34 deletions(-)

diff --git a/rl_games/algos_torch/a2c_continuous.py b/rl_games/algos_torch/a2c_continuous.py
index e93ea362..786fda61 100644
--- a/rl_games/algos_torch/a2c_continuous.py
+++ b/rl_games/algos_torch/a2c_continuous.py
@@ -41,7 +41,7 @@ def __init__(self, base_name, params):
         self.init_rnn_from_model(self.model)
         self.last_lr = float(self.last_lr)
         self.bound_loss_type = self.config.get('bound_loss_type', 'bound') # 'regularisation' or 'bound'
-        self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay)
+        self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay, fused=True)
 
         if self.has_central_value:
             cv_config = {
diff --git a/rl_games/common/player.py b/rl_games/common/player.py
index 33c848cc..f2181603 100644
--- a/rl_games/common/player.py
+++ b/rl_games/common/player.py
@@ -14,6 +14,7 @@
 
 import pandas as pd
 
+
 class BasePlayer(object):
 
     def __init__(self, params):
@@ -392,7 +393,8 @@ def run(self):
         else:
             print('av reward:', sum_rewards / games_played * n_game_life,
                   'av steps:', sum_steps / games_played * n_game_life)
-            
+
+        # save game data to parquet file
         df.to_parquet('game_data.parquet')
 
     def get_batch_size(self, obses, batch_size):
diff --git a/rl_games/common/vecenv.py b/rl_games/common/vecenv.py
index c29fd4be..275f9e37 100644
--- a/rl_games/common/vecenv.py
+++ b/rl_games/common/vecenv.py
@@ -7,6 +7,7 @@
 from time import sleep
 import torch
 
+
 class RayWorker:
     """Wrapper around a third-party (gym for example) environment class that enables parallel training.
 
@@ -47,7 +48,7 @@ def step(self, action):
 
         """
         next_state, reward, is_done, info = self.env.step(action)
-        
+
         if np.isscalar(is_done):
             episode_done = is_done
         else:
@@ -64,7 +65,7 @@ def seed(self, seed):
             np.random.seed(seed)
             random.seed(seed)
             self.env.seed(seed)
-            
+
     def render(self):
         self.env.render()
 
@@ -95,7 +96,7 @@ def get_env_info(self):
         info = {}
         observation_space = self.env.observation_space
 
-        #if isinstance(observation_space, gym.spaces.dict.Dict):
+        # if isinstance(observation_space, gym.spaces.dict.Dict):
         #    observation_space = observation_space['observations']
 
         info['action_space'] = self.env.action_space
@@ -115,12 +116,16 @@ def get_env_info(self):
 
 class RayVecEnv(IVecEnv):
     """Main env class that manages several `rl_games.common.vecenv.Rayworker` objects for parallel training
-    
+
     The RayVecEnv class manages a set of individual environments and wraps around the methods from RayWorker.
     Each worker is executed asynchronously.
 
     """
-    import ray
+    # To avoid import errors when Ray is not installed and this class is not used
+    try:
+        import ray
+    except ImportError:
+        pass
 
     def __init__(self, config_name, num_actors, **kwargs):
         """Initialise the class. Sets up the config for the environment and creates individual workers to manage.
@@ -136,7 +141,6 @@ def __init__(self, config_name, num_actors, **kwargs):
         self.use_torch = False
         self.seed = kwargs.pop('seed', None)
 
-        
         self.remote_worker = self.ray.remote(RayWorker)
         self.workers = [self.remote_worker.remote(self.config_name, kwargs) for i in range(self.num_actors)]
 
@@ -162,7 +166,7 @@ def __init__(self, config_name, num_actors, **kwargs):
             self.concat_func = np.stack
         else:
             self.concat_func = np.concatenate
-    
+
     def step(self, actions):
         """Step all individual environments (using the created workers). 
         Returns a concatenated array of observations, rewards, done states, and infos if the env allows concatenation.
@@ -201,7 +205,7 @@ def step(self, actions):
         if self.use_global_obs:
             newobsdict = {}
             newobsdict["obs"] = ret_obs
-            
+
             if self.state_type_dict:
                 newobsdict["states"] = dicts_to_dict_with_arrays(newstates, True)
             else:
@@ -231,7 +235,7 @@ def get_action_masks(self):
 
     def reset(self):
         res_obs = [worker.reset.remote() for worker in self.workers]
-        newobs, newstates = [],[]
+        newobs, newstates = [], []
         for res in res_obs:
             cobs = self.ray.get(res)
             if self.use_global_obs:
@@ -248,7 +252,7 @@ def reset(self):
         if self.use_global_obs:
             newobsdict = {}
             newobsdict["obs"] = ret_obs
-            
+
             if self.state_type_dict:
                 newobsdict["states"] = dicts_to_dict_with_arrays(newstates, True)
             else:
@@ -256,8 +260,10 @@ def reset(self):
             ret_obs = newobsdict
         return ret_obs
 
+
 vecenv_config = {}
 
+
 def register(config_name, func):
     """Add an environment type (for example RayVecEnv) to the list of available types `rl_games.common.vecenv.vecenv_config`
     Args:
@@ -267,10 +273,12 @@ def register(config_name, func):
     """
     vecenv_config[config_name] = func
 
+
 def create_vec_env(config_name, num_actors, **kwargs):
     vec_env_name = configurations[config_name]['vecenv_type']
     return vecenv_config[vec_env_name](config_name, num_actors, **kwargs)
 
+
 register('RAY', lambda config_name, num_actors, **kwargs: RayVecEnv(config_name, num_actors, **kwargs))
 
 from rl_games.envs.brax import BraxEnv
diff --git a/rl_games/common/wrappers.py b/rl_games/common/wrappers.py
index dab4a648..5c3b17c7 100644
--- a/rl_games/common/wrappers.py
+++ b/rl_games/common/wrappers.py
@@ -1,4 +1,3 @@
-import gymnasium
 import numpy as np
 from numpy.random import randint
 
@@ -11,12 +10,12 @@
 from copy import copy
 
 
-
 class InfoWrapper(gym.Wrapper):
     def __init__(self, env):
         gym.RewardWrapper.__init__(self, env)
-        
+
         self.reward = 0
+
     def reset(self, **kwargs):
         self.reward = 0
         return self.env.reset(**kwargs)
@@ -87,7 +86,7 @@ def __init__(self, env):
         """
         gym.Wrapper.__init__(self, env)
         self.lives = 0
-        self.was_real_done  = True
+        self.was_real_done = True
 
     def step(self, action):
         obs, reward, done, info = self.env.step(action)
@@ -122,7 +121,7 @@ def __init__(self, env):
 
         gym.Wrapper.__init__(self, env)
         self.max_stacked_steps = 1000
-        self.current_steps=0
+        self.current_steps = 0
 
     def step(self, action):
         obs, reward, done, info = self.env.step(action)
@@ -140,17 +139,17 @@ def step(self, action):
 
 
 class MaxAndSkipEnv(gym.Wrapper):
-    def __init__(self, env,skip=4, use_max = True):
+    def __init__(self, env, skip=4, use_max=True):
         """Return only every `skip`-th frame"""
         gym.Wrapper.__init__(self, env)
         self.use_max = use_max 
         # most recent raw observations (for max pooling across time steps)
         if self.use_max:
-            self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8)
+            self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=np.uint8)
         else:
-            self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.float32)
-        self._skip       = skip
-        
+            self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=np.float32)
+        self._skip = skip
+
     def step(self, action):
         """Repeat action, sum reward, and max over last observations."""
         total_reward = 0.0
@@ -211,8 +210,9 @@ def observation(self, frame):
             frame = np.expand_dims(frame, -1)
         return frame
 
+
 class FrameStack(gym.Wrapper):
-    def __init__(self, env, k, flat = False):
+    def __init__(self, env, k, flat=False):
         """
         Stack k last frames.
         Returns lazy array, which is much more memory efficient.
@@ -262,7 +262,7 @@ def _get_ob(self):
 
 
 class BatchedFrameStack(gym.Wrapper):
-    def __init__(self, env, k, transpose = False, flatten = False):
+    def __init__(self, env, k, transpose=False, flatten=False):
         gym.Wrapper.__init__(self, env)
         self.k = k
         self.frames = deque([], maxlen=k)
@@ -303,8 +303,9 @@ def _get_ob(self):
                 frames = np.transpose(self.frames, (1, 0, 2))
         return frames
 
+
 class BatchedFrameStackWithStates(gym.Wrapper):
-    def __init__(self, env, k, transpose = False, flatten = False):
+    def __init__(self, env, k, transpose=False, flatten=False):
         gym.Wrapper.__init__(self, env)
         self.k = k
         self.obses = deque([], maxlen=k)
@@ -363,6 +364,7 @@ def process_data(self, data):
                 obses = np.transpose(data, (1, 0, 2))
         return obses
 
+
 class ProcgenStack(gym.Wrapper):
     def __init__(self, env, k = 2, greyscale=True):
         gym.Wrapper.__init__(self, env)
@@ -370,7 +372,7 @@ def __init__(self, env, k = 2, greyscale=True):
         self.curr_frame = 0
         self.frames = deque([], maxlen=k)
 
-        self.greyscale=greyscale
+        self.greyscale = greyscale
         self.prev_frame = None
         shp = env.observation_space.shape
         if greyscale:
@@ -421,6 +423,7 @@ def observation(self, observation):
         # with smaller replay buffers only.
         return np.array(observation).astype(np.float32) / 255.0
 
+
 class LazyFrames(object):
     def __init__(self, frames):
         """This object ensures that common frames between the observations are only stored once.
@@ -449,6 +452,7 @@ def __len__(self):
     def __getitem__(self, i):
         return self._force()[i]
 
+
 class ReallyDoneWrapper(gym.Wrapper):
     def __init__(self, env):
         """
@@ -457,7 +461,7 @@ def __init__(self, env):
         self.old_env = env
         gym.Wrapper.__init__(self, env)
         self.lives = 0
-        self.was_real_done  = True
+        self.was_real_done = True
 
     def step(self, action):
         old_lives = self.env.unwrapped.ale.lives()
@@ -471,6 +475,7 @@ def step(self, action):
         done = lives == 0
         return obs, reward, done, info
 
+
 class AllowBacktracking(gym.Wrapper):
     """
     Use deltas in max(X) as the reward, rather than deltas
@@ -506,6 +511,7 @@ def unwrap(env):
     else:
         return env
 
+
 class StickyActionEnv(gym.Wrapper):
     def __init__(self, env, p=0.25):
         super(StickyActionEnv, self).__init__(env)
@@ -591,7 +597,7 @@ def step(self, action):
         obs, reward, done, info = self.env.step(action)
         obs = {
             'observation': obs,
-            'reward':np.clip(reward, -1, 1),
+            'reward': np.clip(reward, -1, 1),
             'last_action': action
         }
         return obs, reward, done, info
@@ -625,10 +631,13 @@ def __init__(self, env, name):
             raise NotImplementedError
 
     def observation(self, observation):
-        return  observation * self.mask
+        return observation * self.mask
+
 
 class OldGymWrapper(gym.Env):
     def __init__(self, env):
+        import gymnasium
+
         self.env = env
 
         # Convert Gymnasium spaces to Gym spaces
@@ -636,6 +645,8 @@ def __init__(self, env):
         self.action_space = self.convert_space(env.action_space)
 
     def convert_space(self, space):
+        import gymnasium
+
         """Recursively convert Gymnasium spaces to Gym spaces."""
         if isinstance(space, gymnasium.spaces.Box):
             return gym.spaces.Box(
@@ -691,6 +702,7 @@ def render(self, mode='human'):
     def close(self):
         return self.env.close()
 
+
 # Example usage:
 if __name__ == "__main__":
     # Create a MyoSuite environment
@@ -718,19 +730,21 @@ def make_atari(env_id, timelimit=True, noop_max=0, skip=4, sticky=False, directo
         env = MontezumaInfoWrapper(env, room_address=3 if 'Montezuma' in env_id else 1)
         env = StickyActionEnv(env)
     env = InfoWrapper(env)
-    if directory != None:
-        env = gym.wrappers.Monitor(env,directory=directory,force=True)
+
+    if directory is not None:
+        env = gym.wrappers.Monitor(env, directory=directory, force=True)
     if sticky:
         env = StickyActionEnv(env)
     if not timelimit:
         env = env.env
-    #assert 'NoFrameskip' in env.spec.id
+    # assert 'NoFrameskip' in env.spec.id
     if noop_max > 0:
         env = NoopResetEnv(env, noop_max=noop_max)
     env = MaxAndSkipEnv(env, skip=skip)
-    #env = EpisodeStackedEnv(env)
+    # env = EpisodeStackedEnv(env)
     return env
 
+
 def wrap_deepmind(env, episode_life=False, clip_rewards=True, frame_stack=True, scale =False, wrap_impala=False):
     """Configure environment for DeepMind-style Atari.
     """
@@ -749,6 +763,7 @@ def wrap_deepmind(env, episode_life=False, clip_rewards=True, frame_stack=True,
         env = ImpalaEnvWrapper(env)
     return env
 
+
 def wrap_carracing(env, clip_rewards=True, frame_stack=True, scale=False):
     """Configure environment for DeepMind-style Atari.
     """
@@ -761,11 +776,12 @@ def wrap_carracing(env, clip_rewards=True, frame_stack=True, scale=False):
         env = FrameStack(env, 4)
     return env
 
+
 def make_car_racing(env_id, skip=4):
     env = make_atari(env_id, noop_max=0, skip=skip)
     return wrap_carracing(env, clip_rewards=False)
 
+
 def make_atari_deepmind(env_id, noop_max=30, skip=4, sticky=False, episode_life=True, wrap_impala=False, **kwargs):
     env = make_atari(env_id, noop_max=noop_max, skip=skip, sticky=sticky, **kwargs)
     return wrap_deepmind(env, episode_life=episode_life, clip_rewards=False, wrap_impala=wrap_impala)
-