Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added myo suite support #306

Merged
merged 4 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions rl_games/common/env_configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ def create_slime_gym_env(**kwargs):
env = gym.make(name, **kwargs)
return env

def create_myo(**kwargs):
from myosuite.utils import gym
name = kwargs.pop('name')
env = gym.make(name, **kwargs)
env = wrappers.OldGymWrapper(env)
return env

def create_atari_gym_env(**kwargs):
#frames = kwargs.pop('frames', 1)
Expand Down Expand Up @@ -427,6 +433,10 @@ def create_env(name, **kwargs):
'env_creator': lambda **kwargs: create_cule(**kwargs),
'vecenv_type': 'CULE'
},
'myo_gym' : {
'env_creator' : lambda **kwargs : create_myo(**kwargs),
'vecenv_type' : 'RAY'
},
}

def get_env_info(env):
Expand Down
85 changes: 85 additions & 0 deletions rl_games/common/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import gymnasium
import numpy as np
from numpy.random import randint

Expand Down Expand Up @@ -626,6 +627,90 @@ def __init__(self, env, name):
def observation(self, observation):
return observation * self.mask

class OldGymWrapper(gym.Env):
def __init__(self, env):
self.env = env

# Convert Gymnasium spaces to Gym spaces
self.observation_space = self.convert_space(env.observation_space)
self.action_space = self.convert_space(env.action_space)

def convert_space(self, space):
"""Recursively convert Gymnasium spaces to Gym spaces."""
if isinstance(space, gymnasium.spaces.Box):
return gym.spaces.Box(
low=space.low,
high=space.high,
shape=space.shape,
dtype=space.dtype
)
elif isinstance(space, gymnasium.spaces.Discrete):
return gym.spaces.Discrete(n=space.n)
elif isinstance(space, gymnasium.spaces.MultiDiscrete):
return gym.spaces.MultiDiscrete(nvec=space.nvec)
elif isinstance(space, gymnasium.spaces.MultiBinary):
return gym.spaces.MultiBinary(n=space.n)
elif isinstance(space, gymnasium.spaces.Tuple):
return gym.spaces.Tuple([self.convert_space(s) for s in space.spaces])
elif isinstance(space, gymnasium.spaces.Dict):
return gym.spaces.Dict({k: self.convert_space(s) for k, s in space.spaces.items()})
else:
raise NotImplementedError(f"Space type {type(space)} is not supported.")

def reset(self):
result = self.env.reset()
if isinstance(result, tuple):
# Gymnasium returns (observation, info)
observation, _ = result
else:
observation = result
# Flatten the observation
observation = gym.spaces.flatten(self.observation_space, observation)
return observation # Old Gym API returns only the observation

def step(self, action):
# Unflatten the action
action = gym.spaces.unflatten(self.action_space, action)
result = self.env.step(action)

if len(result) == 5:
# Gymnasium returns (obs, reward, terminated, truncated, info)
observation, reward, terminated, truncated, info = result
done = terminated or truncated # Combine for old Gym API
else:
# Old Gym returns (obs, reward, done, info)
observation, reward, done, info = result

# Flatten the observation
observation = gym.spaces.flatten(self.observation_space, observation)
return observation, reward, done, info

def render(self, mode='human'):
return self.env.render(mode=mode)

def close(self):
return self.env.close()

# Example usage:
if __name__ == "__main__":
# Create a MyoSuite environment
env = myosuite.make('myoChallengeDieReorientP2-v0')

# Wrap it with the old Gym-style wrapper
env = OldGymWrapper(env)

# Use the environment as usual
observation = env.reset()
done = False
while not done:
# Sample a random action
action = env.action_space.sample()
# Step the environment
observation, reward, done, info = env.step(action)
# Optionally render the environment
env.render()
env.close()


def make_atari(env_id, timelimit=True, noop_max=0, skip=4, sticky=False, directory=None, **kwargs):
env = gym.make(env_id, **kwargs)
Expand Down
68 changes: 68 additions & 0 deletions rl_games/configs/ppo_myo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
params:
seed: 8
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [256,128,64]
d2rl: False
activation: elu
initializer:
name: default
scale: 2
config:
env_name: myo_gym
name: myo
reward_shaper:
min_val: -1
scale_value: 0.1

normalize_advantage: True
gamma: 0.995
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.008
save_best_after: 10
score_to_win: 10000
grad_norm: 1.5
entropy_coef: 0
truncate_grads: True
e_clip: 0.2
clip_value: False
num_actors: 16
horizon_length: 128
minibatch_size: 1024
mini_epochs: 4
critic_coef: 2
normalize_input: True
bounds_loss_coef: 0.00
max_epochs: 10000
normalize_value: True
use_diagnostics: True
value_bootstrap: True
#weight_decay: 0.0001
use_smooth_clamp: True
env_config:
name: 'myoElbowPose1D6MRandom-v0'
player:

render: True
deterministic: True
games_num: 200