From 0ed7c3a344f735d0e244beea7d2a733c8b92d35b Mon Sep 17 00:00:00 2001
From: Denys Makoviichuk <denis_valerievich@yahoo.com>
Date: Sat, 28 Sep 2024 15:53:35 -0700
Subject: [PATCH] Added myo suite support (#306)

* Added myosuite support with Ray
* Added training config example

---------

Co-authored-by: Denys Makoviichuk <trrrrr97@gmail.com>
---
 rl_games/common/env_configurations.py | 10 ++++
 rl_games/common/wrappers.py           | 85 +++++++++++++++++++++++++++
 rl_games/configs/ppo_myo.yaml         | 68 +++++++++++++++++++++
 3 files changed, 163 insertions(+)
 create mode 100644 rl_games/configs/ppo_myo.yaml

diff --git a/rl_games/common/env_configurations.py b/rl_games/common/env_configurations.py
index 43c8ebe1..08170847 100644
--- a/rl_games/common/env_configurations.py
+++ b/rl_games/common/env_configurations.py
@@ -86,6 +86,12 @@ def create_slime_gym_env(**kwargs):
         env = gym.make(name, **kwargs)
     return env
 
+def create_myo(**kwargs):
+    from myosuite.utils import gym
+    name = kwargs.pop('name')
+    env = gym.make(name, **kwargs)
+    env = wrappers.OldGymWrapper(env)
+    return env
 
 def create_atari_gym_env(**kwargs):
     #frames = kwargs.pop('frames', 1)
@@ -427,6 +433,10 @@ def create_env(name, **kwargs):
         'env_creator': lambda **kwargs: create_cule(**kwargs),
         'vecenv_type': 'CULE'
     },
+    'myo_gym' : {
+        'env_creator' : lambda **kwargs : create_myo(**kwargs),
+        'vecenv_type' : 'RAY'
+    },
 }
 
 def get_env_info(env):
diff --git a/rl_games/common/wrappers.py b/rl_games/common/wrappers.py
index a62e0855..dab4a648 100644
--- a/rl_games/common/wrappers.py
+++ b/rl_games/common/wrappers.py
@@ -1,3 +1,4 @@
+import gymnasium
 import numpy as np
 from numpy.random import randint
 
@@ -626,6 +627,90 @@ def __init__(self, env, name):
     def observation(self, observation):
         return  observation * self.mask
 
+class OldGymWrapper(gym.Env):
+    def __init__(self, env):
+        self.env = env
+
+        # Convert Gymnasium spaces to Gym spaces
+        self.observation_space = self.convert_space(env.observation_space)
+        self.action_space = self.convert_space(env.action_space)
+
+    def convert_space(self, space):
+        """Recursively convert Gymnasium spaces to Gym spaces."""
+        if isinstance(space, gymnasium.spaces.Box):
+            return gym.spaces.Box(
+                low=space.low,
+                high=space.high,
+                shape=space.shape,
+                dtype=space.dtype
+            )
+        elif isinstance(space, gymnasium.spaces.Discrete):
+            return gym.spaces.Discrete(n=space.n)
+        elif isinstance(space, gymnasium.spaces.MultiDiscrete):
+            return gym.spaces.MultiDiscrete(nvec=space.nvec)
+        elif isinstance(space, gymnasium.spaces.MultiBinary):
+            return gym.spaces.MultiBinary(n=space.n)
+        elif isinstance(space, gymnasium.spaces.Tuple):
+            return gym.spaces.Tuple([self.convert_space(s) for s in space.spaces])
+        elif isinstance(space, gymnasium.spaces.Dict):
+            return gym.spaces.Dict({k: self.convert_space(s) for k, s in space.spaces.items()})
+        else:
+            raise NotImplementedError(f"Space type {type(space)} is not supported.")
+
+    def reset(self):
+        result = self.env.reset()
+        if isinstance(result, tuple):
+            # Gymnasium returns (observation, info)
+            observation, _ = result
+        else:
+            observation = result
+        # Flatten the observation
+        observation = gym.spaces.flatten(self.observation_space, observation)
+        return observation  # Old Gym API returns only the observation
+
+    def step(self, action):
+        # Unflatten the action
+        action = gym.spaces.unflatten(self.action_space, action)
+        result = self.env.step(action)
+
+        if len(result) == 5:
+            # Gymnasium returns (obs, reward, terminated, truncated, info)
+            observation, reward, terminated, truncated, info = result
+            done = terminated or truncated  # Combine for old Gym API
+        else:
+            # Old Gym returns (obs, reward, done, info)
+            observation, reward, done, info = result
+
+        # Flatten the observation
+        observation = gym.spaces.flatten(self.observation_space, observation)
+        return observation, reward, done, info
+
+    def render(self, mode='human'):
+        return self.env.render(mode=mode)
+
+    def close(self):
+        return self.env.close()
+
+# Example usage:
+if __name__ == "__main__":
+    # Create a MyoSuite environment
+    env = myosuite.make('myoChallengeDieReorientP2-v0')
+
+    # Wrap it with the old Gym-style wrapper
+    env = OldGymWrapper(env)
+
+    # Use the environment as usual
+    observation = env.reset()
+    done = False
+    while not done:
+        # Sample a random action
+        action = env.action_space.sample()
+        # Step the environment
+        observation, reward, done, info = env.step(action)
+        # Optionally render the environment
+        env.render()
+    env.close()
+
 
 def make_atari(env_id, timelimit=True, noop_max=0, skip=4, sticky=False, directory=None, **kwargs):
     env = gym.make(env_id, **kwargs)
diff --git a/rl_games/configs/ppo_myo.yaml b/rl_games/configs/ppo_myo.yaml
new file mode 100644
index 00000000..297a014b
--- /dev/null
+++ b/rl_games/configs/ppo_myo.yaml
@@ -0,0 +1,68 @@
+params:  
+  seed: 8
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: False
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0
+        fixed_sigma: True
+    mlp:
+      units: [256,128,64]
+      d2rl: False
+      activation: elu
+      initializer:
+        name: default
+        scale: 2
+  config:
+    env_name: myo_gym
+    name: myo
+    reward_shaper:
+      min_val: -1
+      scale_value: 0.1
+
+    normalize_advantage: True
+    gamma: 0.995
+    tau: 0.95
+    learning_rate: 3e-4
+    lr_schedule: adaptive
+    kl_threshold: 0.008
+    save_best_after: 10
+    score_to_win: 10000
+    grad_norm: 1.5
+    entropy_coef: 0
+    truncate_grads: True
+    e_clip: 0.2
+    clip_value: False
+    num_actors: 16
+    horizon_length: 128
+    minibatch_size: 1024
+    mini_epochs: 4
+    critic_coef: 2
+    normalize_input: True
+    bounds_loss_coef: 0.00
+    max_epochs: 10000
+    normalize_value: True
+    use_diagnostics: True
+    value_bootstrap: True
+    #weight_decay: 0.0001
+    use_smooth_clamp: True
+    env_config:
+      name: 'myoElbowPose1D6MRandom-v0'
+    player:
+
+      render: True
+      deterministic: True
+      games_num: 200