Skip to content

Commit

Permalink
Minor fixes. Removed Ray and gymnasioum imports when they are not nee…
Browse files Browse the repository at this point in the history
…ded.
  • Loading branch information
ViktorM committed Nov 20, 2024
1 parent a28f62a commit 5483c96
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 34 deletions.
2 changes: 1 addition & 1 deletion rl_games/algos_torch/a2c_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, base_name, params):
self.init_rnn_from_model(self.model)
self.last_lr = float(self.last_lr)
self.bound_loss_type = self.config.get('bound_loss_type', 'bound') # 'regularisation' or 'bound'
self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay)
self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay, fused=True)

if self.has_central_value:
cv_config = {
Expand Down
4 changes: 3 additions & 1 deletion rl_games/common/player.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import pandas as pd


class BasePlayer(object):

def __init__(self, params):
Expand Down Expand Up @@ -392,7 +393,8 @@ def run(self):
else:
print('av reward:', sum_rewards / games_played * n_game_life,
'av steps:', sum_steps / games_played * n_game_life)


# save game data to parquet file
df.to_parquet('game_data.parquet')

def get_batch_size(self, obses, batch_size):
Expand Down
28 changes: 18 additions & 10 deletions rl_games/common/vecenv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from time import sleep
import torch


class RayWorker:
"""Wrapper around a third-party (gym for example) environment class that enables parallel training.
Expand Down Expand Up @@ -47,7 +48,7 @@ def step(self, action):
"""
next_state, reward, is_done, info = self.env.step(action)

if np.isscalar(is_done):
episode_done = is_done
else:
Expand All @@ -64,7 +65,7 @@ def seed(self, seed):
np.random.seed(seed)
random.seed(seed)
self.env.seed(seed)

def render(self):
self.env.render()

Expand Down Expand Up @@ -95,7 +96,7 @@ def get_env_info(self):
info = {}
observation_space = self.env.observation_space

#if isinstance(observation_space, gym.spaces.dict.Dict):
# if isinstance(observation_space, gym.spaces.dict.Dict):
# observation_space = observation_space['observations']

info['action_space'] = self.env.action_space
Expand All @@ -115,12 +116,16 @@ def get_env_info(self):

class RayVecEnv(IVecEnv):
"""Main env class that manages several `rl_games.common.vecenv.Rayworker` objects for parallel training
The RayVecEnv class manages a set of individual environments and wraps around the methods from RayWorker.
Each worker is executed asynchronously.
"""
import ray
# To avoid import errors when Ray is not installed and this class is not used
try:
import ray
except ImportError:
pass

def __init__(self, config_name, num_actors, **kwargs):
"""Initialise the class. Sets up the config for the environment and creates individual workers to manage.
Expand All @@ -136,7 +141,6 @@ def __init__(self, config_name, num_actors, **kwargs):
self.use_torch = False
self.seed = kwargs.pop('seed', None)


self.remote_worker = self.ray.remote(RayWorker)
self.workers = [self.remote_worker.remote(self.config_name, kwargs) for i in range(self.num_actors)]

Expand All @@ -162,7 +166,7 @@ def __init__(self, config_name, num_actors, **kwargs):
self.concat_func = np.stack
else:
self.concat_func = np.concatenate

def step(self, actions):
"""Step all individual environments (using the created workers).
Returns a concatenated array of observations, rewards, done states, and infos if the env allows concatenation.
Expand Down Expand Up @@ -201,7 +205,7 @@ def step(self, actions):
if self.use_global_obs:
newobsdict = {}
newobsdict["obs"] = ret_obs

if self.state_type_dict:
newobsdict["states"] = dicts_to_dict_with_arrays(newstates, True)
else:
Expand Down Expand Up @@ -231,7 +235,7 @@ def get_action_masks(self):

def reset(self):
res_obs = [worker.reset.remote() for worker in self.workers]
newobs, newstates = [],[]
newobs, newstates = [], []
for res in res_obs:
cobs = self.ray.get(res)
if self.use_global_obs:
Expand All @@ -248,16 +252,18 @@ def reset(self):
if self.use_global_obs:
newobsdict = {}
newobsdict["obs"] = ret_obs

if self.state_type_dict:
newobsdict["states"] = dicts_to_dict_with_arrays(newstates, True)
else:
newobsdict["states"] = np.stack(newstates)
ret_obs = newobsdict
return ret_obs


vecenv_config = {}


def register(config_name, func):
"""Add an environment type (for example RayVecEnv) to the list of available types `rl_games.common.vecenv.vecenv_config`
Args:
Expand All @@ -267,10 +273,12 @@ def register(config_name, func):
"""
vecenv_config[config_name] = func


def create_vec_env(config_name, num_actors, **kwargs):
vec_env_name = configurations[config_name]['vecenv_type']
return vecenv_config[vec_env_name](config_name, num_actors, **kwargs)


register('RAY', lambda config_name, num_actors, **kwargs: RayVecEnv(config_name, num_actors, **kwargs))

from rl_games.envs.brax import BraxEnv
Expand Down
60 changes: 38 additions & 22 deletions rl_games/common/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import gymnasium
import numpy as np
from numpy.random import randint

Expand All @@ -11,12 +10,12 @@
from copy import copy



class InfoWrapper(gym.Wrapper):
def __init__(self, env):
gym.RewardWrapper.__init__(self, env)

self.reward = 0

def reset(self, **kwargs):
self.reward = 0
return self.env.reset(**kwargs)
Expand Down Expand Up @@ -87,7 +86,7 @@ def __init__(self, env):
"""
gym.Wrapper.__init__(self, env)
self.lives = 0
self.was_real_done = True
self.was_real_done = True

def step(self, action):
obs, reward, done, info = self.env.step(action)
Expand Down Expand Up @@ -122,7 +121,7 @@ def __init__(self, env):

gym.Wrapper.__init__(self, env)
self.max_stacked_steps = 1000
self.current_steps=0
self.current_steps = 0

def step(self, action):
obs, reward, done, info = self.env.step(action)
Expand All @@ -140,17 +139,17 @@ def step(self, action):


class MaxAndSkipEnv(gym.Wrapper):
def __init__(self, env,skip=4, use_max = True):
def __init__(self, env, skip=4, use_max=True):
"""Return only every `skip`-th frame"""
gym.Wrapper.__init__(self, env)
self.use_max = use_max
# most recent raw observations (for max pooling across time steps)
if self.use_max:
self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8)
self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=np.uint8)
else:
self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.float32)
self._skip = skip
self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=np.float32)
self._skip = skip

def step(self, action):
"""Repeat action, sum reward, and max over last observations."""
total_reward = 0.0
Expand Down Expand Up @@ -211,8 +210,9 @@ def observation(self, frame):
frame = np.expand_dims(frame, -1)
return frame


class FrameStack(gym.Wrapper):
def __init__(self, env, k, flat = False):
def __init__(self, env, k, flat=False):
"""
Stack k last frames.
Returns lazy array, which is much more memory efficient.
Expand Down Expand Up @@ -262,7 +262,7 @@ def _get_ob(self):


class BatchedFrameStack(gym.Wrapper):
def __init__(self, env, k, transpose = False, flatten = False):
def __init__(self, env, k, transpose=False, flatten=False):
gym.Wrapper.__init__(self, env)
self.k = k
self.frames = deque([], maxlen=k)
Expand Down Expand Up @@ -303,8 +303,9 @@ def _get_ob(self):
frames = np.transpose(self.frames, (1, 0, 2))
return frames


class BatchedFrameStackWithStates(gym.Wrapper):
def __init__(self, env, k, transpose = False, flatten = False):
def __init__(self, env, k, transpose=False, flatten=False):
gym.Wrapper.__init__(self, env)
self.k = k
self.obses = deque([], maxlen=k)
Expand Down Expand Up @@ -363,14 +364,15 @@ def process_data(self, data):
obses = np.transpose(data, (1, 0, 2))
return obses


class ProcgenStack(gym.Wrapper):
def __init__(self, env, k = 2, greyscale=True):
gym.Wrapper.__init__(self, env)
self.k = k
self.curr_frame = 0
self.frames = deque([], maxlen=k)

self.greyscale=greyscale
self.greyscale = greyscale
self.prev_frame = None
shp = env.observation_space.shape
if greyscale:
Expand Down Expand Up @@ -421,6 +423,7 @@ def observation(self, observation):
# with smaller replay buffers only.
return np.array(observation).astype(np.float32) / 255.0


class LazyFrames(object):
def __init__(self, frames):
"""This object ensures that common frames between the observations are only stored once.
Expand Down Expand Up @@ -449,6 +452,7 @@ def __len__(self):
def __getitem__(self, i):
return self._force()[i]


class ReallyDoneWrapper(gym.Wrapper):
def __init__(self, env):
"""
Expand All @@ -457,7 +461,7 @@ def __init__(self, env):
self.old_env = env
gym.Wrapper.__init__(self, env)
self.lives = 0
self.was_real_done = True
self.was_real_done = True

def step(self, action):
old_lives = self.env.unwrapped.ale.lives()
Expand All @@ -471,6 +475,7 @@ def step(self, action):
done = lives == 0
return obs, reward, done, info


class AllowBacktracking(gym.Wrapper):
"""
Use deltas in max(X) as the reward, rather than deltas
Expand Down Expand Up @@ -506,6 +511,7 @@ def unwrap(env):
else:
return env


class StickyActionEnv(gym.Wrapper):
def __init__(self, env, p=0.25):
super(StickyActionEnv, self).__init__(env)
Expand Down Expand Up @@ -591,7 +597,7 @@ def step(self, action):
obs, reward, done, info = self.env.step(action)
obs = {
'observation': obs,
'reward':np.clip(reward, -1, 1),
'reward': np.clip(reward, -1, 1),
'last_action': action
}
return obs, reward, done, info
Expand Down Expand Up @@ -625,17 +631,22 @@ def __init__(self, env, name):
raise NotImplementedError

def observation(self, observation):
return observation * self.mask
return observation * self.mask


class OldGymWrapper(gym.Env):
def __init__(self, env):
import gymnasium

self.env = env

# Convert Gymnasium spaces to Gym spaces
self.observation_space = self.convert_space(env.observation_space)
self.action_space = self.convert_space(env.action_space)

def convert_space(self, space):
import gymnasium

"""Recursively convert Gymnasium spaces to Gym spaces."""
if isinstance(space, gymnasium.spaces.Box):
return gym.spaces.Box(
Expand Down Expand Up @@ -691,6 +702,7 @@ def render(self, mode='human'):
def close(self):
return self.env.close()


# Example usage:
if __name__ == "__main__":
# Create a MyoSuite environment
Expand Down Expand Up @@ -718,19 +730,21 @@ def make_atari(env_id, timelimit=True, noop_max=0, skip=4, sticky=False, directo
env = MontezumaInfoWrapper(env, room_address=3 if 'Montezuma' in env_id else 1)
env = StickyActionEnv(env)
env = InfoWrapper(env)
if directory != None:
env = gym.wrappers.Monitor(env,directory=directory,force=True)

if directory is not None:
env = gym.wrappers.Monitor(env, directory=directory, force=True)
if sticky:
env = StickyActionEnv(env)
if not timelimit:
env = env.env
#assert 'NoFrameskip' in env.spec.id
# assert 'NoFrameskip' in env.spec.id
if noop_max > 0:
env = NoopResetEnv(env, noop_max=noop_max)
env = MaxAndSkipEnv(env, skip=skip)
#env = EpisodeStackedEnv(env)
# env = EpisodeStackedEnv(env)
return env


def wrap_deepmind(env, episode_life=False, clip_rewards=True, frame_stack=True, scale =False, wrap_impala=False):
"""Configure environment for DeepMind-style Atari.
"""
Expand All @@ -749,6 +763,7 @@ def wrap_deepmind(env, episode_life=False, clip_rewards=True, frame_stack=True,
env = ImpalaEnvWrapper(env)
return env


def wrap_carracing(env, clip_rewards=True, frame_stack=True, scale=False):
"""Configure environment for DeepMind-style Atari.
"""
Expand All @@ -761,11 +776,12 @@ def wrap_carracing(env, clip_rewards=True, frame_stack=True, scale=False):
env = FrameStack(env, 4)
return env


def make_car_racing(env_id, skip=4):
env = make_atari(env_id, noop_max=0, skip=skip)
return wrap_carracing(env, clip_rewards=False)


def make_atari_deepmind(env_id, noop_max=30, skip=4, sticky=False, episode_life=True, wrap_impala=False, **kwargs):
env = make_atari(env_id, noop_max=noop_max, skip=skip, sticky=sticky, **kwargs)
return wrap_deepmind(env, episode_life=episode_life, clip_rewards=False, wrap_impala=wrap_impala)

0 comments on commit 5483c96

Please sign in to comment.