-
Notifications
You must be signed in to change notification settings - Fork 155
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
159 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
params: | ||
algo: | ||
name: a2c_discrete | ||
|
||
model: | ||
name: discrete_a2c | ||
|
||
network: | ||
name: resnet_actor_critic | ||
require_rewards: True | ||
require_last_actions: True | ||
separate: False | ||
value_shape: 1 | ||
space: | ||
discrete: | ||
|
||
cnn: | ||
permute_input: True | ||
conv_depths: [16, 32, 32] | ||
activation: relu | ||
initializer: | ||
name: default | ||
regularizer: | ||
name: 'None' | ||
|
||
mlp: | ||
units: [512] | ||
activation: relu | ||
regularizer: | ||
name: 'None' | ||
initializer: | ||
name: default | ||
rnn: | ||
name: lstm | ||
units: 256 | ||
layers: 1 | ||
config: | ||
reward_shaper: | ||
min_val: -1 | ||
max_val: 1 | ||
|
||
normalize_advantage: True | ||
gamma: 0.995 | ||
tau: 0.95 | ||
learning_rate: 3e-4 | ||
name: mario_resnet | ||
score_to_win: 100000 | ||
grad_norm: 1.5 | ||
entropy_coef: 0.01 | ||
truncate_grads: True | ||
env_name: MarioEnv | ||
e_clip: 0.2 | ||
clip_value: True | ||
num_actors: 16 | ||
horizon_length: 256 | ||
minibatch_size: 2048 | ||
mini_epochs: 2 | ||
critic_coef: 1 | ||
lr_schedule: None | ||
kl_threshold: 0.01 | ||
normalize_input: False | ||
use_diagnostics: True | ||
seq_length: 32 | ||
max_epochs: 200000 | ||
|
||
env_config: | ||
use_dict_obs_space: True | ||
|
||
player: | ||
render: False | ||
games_num: 20 | ||
n_game_life: 5 | ||
deterministic: True | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import gym | ||
import numpy as np | ||
from rl_games.common import wrappers | ||
|
||
class MarioEnv(gym.Env): | ||
def __init__(self, **kwargs): | ||
env_name=kwargs.pop('env_name', 'SuperMarioBros-v1') | ||
self.has_lives = kwargs.pop('has_lives', True) | ||
self.max_lives = kwargs.pop('max_lives', 16) | ||
self.movement = kwargs.pop('movement', 'SIMPLE') | ||
self.use_dict_obs_space = kwargs.pop('use_dict_obs_space', False) | ||
self.env = self._create_super_mario_env(env_name) | ||
if self.use_dict_obs_space: | ||
self.observation_space= gym.spaces.Dict({ | ||
'observation' : self.env.observation_space, | ||
'reward' : gym.spaces.Box(low=0, high=1, shape=( ), dtype=np.float32), | ||
'last_action': gym.spaces.Box(low=0, high=self.env.action_space.n, shape=(), dtype=int) | ||
}) | ||
else: | ||
self.observation_space = self.env.observation_space | ||
|
||
self.action_space = self.env.action_space | ||
|
||
|
||
def _create_super_mario_env(self, name='SuperMarioBros-v1'): | ||
from nes_py.wrappers import JoypadSpace | ||
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT | ||
import gym_super_mario_bros | ||
movement = SIMPLE_MOVEMENT if name == 'SIMPLE' else COMPLEX_MOVEMENT | ||
env = gym_super_mario_bros.make(name) | ||
env = JoypadSpace(env, movement) | ||
if 'Random' in name: | ||
env = wrappers.EpisodicLifeRandomMarioEnv(env) | ||
else: | ||
env = wrappers.EpisodicLifeMarioEnv(env) | ||
env = wrappers.MaxAndSkipEnv(env, skip=4) | ||
env = wrappers.wrap_deepmind( | ||
env, episode_life=False, clip_rewards=False, frame_stack=True, scale=True) | ||
return env | ||
|
||
def step(self, action): | ||
next_obs, reward, is_done, info = self.env.step(action) | ||
if self.use_dict_obs_space: | ||
next_obs = { | ||
'observation': next_obs, | ||
'reward': np.clip(np.array(reward, dtype=float), -1, 1), | ||
'last_action': np.array(action, dtype=int) | ||
} | ||
return next_obs, reward, is_done, info | ||
|
||
def reset(self): | ||
obs = self.env.reset() | ||
self.env.unwrapped.ram[0x075a] = self.max_lives | ||
if self.use_dict_obs_space: | ||
obs = { | ||
'observation': obs, | ||
'reward': np.array(0.0, dtype=float), | ||
'last_action': np.array(0, dtype=int), | ||
} | ||
return obs | ||
|
||
def get_number_of_agents(self): | ||
return 1 |