From 0ed7c3a344f735d0e244beea7d2a733c8b92d35b Mon Sep 17 00:00:00 2001 From: Denys Makoviichuk Date: Sat, 28 Sep 2024 15:53:35 -0700 Subject: [PATCH] Added myo suite support (#306) * Added myosuite support with Ray * Added training config example --------- Co-authored-by: Denys Makoviichuk --- rl_games/common/env_configurations.py | 10 ++++ rl_games/common/wrappers.py | 85 +++++++++++++++++++++++++++ rl_games/configs/ppo_myo.yaml | 68 +++++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 rl_games/configs/ppo_myo.yaml diff --git a/rl_games/common/env_configurations.py b/rl_games/common/env_configurations.py index 43c8ebe1..08170847 100644 --- a/rl_games/common/env_configurations.py +++ b/rl_games/common/env_configurations.py @@ -86,6 +86,12 @@ def create_slime_gym_env(**kwargs): env = gym.make(name, **kwargs) return env +def create_myo(**kwargs): + from myosuite.utils import gym + name = kwargs.pop('name') + env = gym.make(name, **kwargs) + env = wrappers.OldGymWrapper(env) + return env def create_atari_gym_env(**kwargs): #frames = kwargs.pop('frames', 1) @@ -427,6 +433,10 @@ def create_env(name, **kwargs): 'env_creator': lambda **kwargs: create_cule(**kwargs), 'vecenv_type': 'CULE' }, + 'myo_gym' : { + 'env_creator' : lambda **kwargs : create_myo(**kwargs), + 'vecenv_type' : 'RAY' + }, } def get_env_info(env): diff --git a/rl_games/common/wrappers.py b/rl_games/common/wrappers.py index a62e0855..dab4a648 100644 --- a/rl_games/common/wrappers.py +++ b/rl_games/common/wrappers.py @@ -1,3 +1,4 @@ +import gymnasium import numpy as np from numpy.random import randint @@ -626,6 +627,90 @@ def __init__(self, env, name): def observation(self, observation): return observation * self.mask +class OldGymWrapper(gym.Env): + def __init__(self, env): + self.env = env + + # Convert Gymnasium spaces to Gym spaces + self.observation_space = self.convert_space(env.observation_space) + self.action_space = self.convert_space(env.action_space) + + def convert_space(self, space): + """Recursively convert Gymnasium spaces to Gym spaces.""" + if isinstance(space, gymnasium.spaces.Box): + return gym.spaces.Box( + low=space.low, + high=space.high, + shape=space.shape, + dtype=space.dtype + ) + elif isinstance(space, gymnasium.spaces.Discrete): + return gym.spaces.Discrete(n=space.n) + elif isinstance(space, gymnasium.spaces.MultiDiscrete): + return gym.spaces.MultiDiscrete(nvec=space.nvec) + elif isinstance(space, gymnasium.spaces.MultiBinary): + return gym.spaces.MultiBinary(n=space.n) + elif isinstance(space, gymnasium.spaces.Tuple): + return gym.spaces.Tuple([self.convert_space(s) for s in space.spaces]) + elif isinstance(space, gymnasium.spaces.Dict): + return gym.spaces.Dict({k: self.convert_space(s) for k, s in space.spaces.items()}) + else: + raise NotImplementedError(f"Space type {type(space)} is not supported.") + + def reset(self): + result = self.env.reset() + if isinstance(result, tuple): + # Gymnasium returns (observation, info) + observation, _ = result + else: + observation = result + # Flatten the observation + observation = gym.spaces.flatten(self.observation_space, observation) + return observation # Old Gym API returns only the observation + + def step(self, action): + # Unflatten the action + action = gym.spaces.unflatten(self.action_space, action) + result = self.env.step(action) + + if len(result) == 5: + # Gymnasium returns (obs, reward, terminated, truncated, info) + observation, reward, terminated, truncated, info = result + done = terminated or truncated # Combine for old Gym API + else: + # Old Gym returns (obs, reward, done, info) + observation, reward, done, info = result + + # Flatten the observation + observation = gym.spaces.flatten(self.observation_space, observation) + return observation, reward, done, info + + def render(self, mode='human'): + return self.env.render(mode=mode) + + def close(self): + return self.env.close() + +# Example usage: +if __name__ == "__main__": + # Create a MyoSuite environment + env = myosuite.make('myoChallengeDieReorientP2-v0') + + # Wrap it with the old Gym-style wrapper + env = OldGymWrapper(env) + + # Use the environment as usual + observation = env.reset() + done = False + while not done: + # Sample a random action + action = env.action_space.sample() + # Step the environment + observation, reward, done, info = env.step(action) + # Optionally render the environment + env.render() + env.close() + def make_atari(env_id, timelimit=True, noop_max=0, skip=4, sticky=False, directory=None, **kwargs): env = gym.make(env_id, **kwargs) diff --git a/rl_games/configs/ppo_myo.yaml b/rl_games/configs/ppo_myo.yaml new file mode 100644 index 00000000..297a014b --- /dev/null +++ b/rl_games/configs/ppo_myo.yaml @@ -0,0 +1,68 @@ +params: + seed: 8 + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [256,128,64] + d2rl: False + activation: elu + initializer: + name: default + scale: 2 + config: + env_name: myo_gym + name: myo + reward_shaper: + min_val: -1 + scale_value: 0.1 + + normalize_advantage: True + gamma: 0.995 + tau: 0.95 + learning_rate: 3e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + save_best_after: 10 + score_to_win: 10000 + grad_norm: 1.5 + entropy_coef: 0 + truncate_grads: True + e_clip: 0.2 + clip_value: False + num_actors: 16 + horizon_length: 128 + minibatch_size: 1024 + mini_epochs: 4 + critic_coef: 2 + normalize_input: True + bounds_loss_coef: 0.00 + max_epochs: 10000 + normalize_value: True + use_diagnostics: True + value_bootstrap: True + #weight_decay: 0.0001 + use_smooth_clamp: True + env_config: + name: 'myoElbowPose1D6MRandom-v0' + player: + + render: True + deterministic: True + games_num: 200