Skip to content

Commit

Permalink
Walk config.
Browse files Browse the repository at this point in the history
  • Loading branch information
ViktorM committed Nov 4, 2024
1 parent 44ccb51 commit 86de35f
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 46 deletions.
99 changes: 53 additions & 46 deletions rl_games/algos_torch/network_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,16 @@ def __init__(self, **kwargs):
self.activations_factory.register_builder('None', lambda **kwargs : nn.Identity())

self.init_factory = object_factory.ObjectFactory()
#self.init_factory.register_builder('normc_initializer', lambda **kwargs : normc_initializer(**kwargs))
self.init_factory.register_builder('const_initializer', lambda **kwargs : _create_initializer(nn.init.constant_,**kwargs))
self.init_factory.register_builder('orthogonal_initializer', lambda **kwargs : _create_initializer(nn.init.orthogonal_,**kwargs))
self.init_factory.register_builder('glorot_normal_initializer', lambda **kwargs : _create_initializer(nn.init.xavier_normal_,**kwargs))
self.init_factory.register_builder('glorot_uniform_initializer', lambda **kwargs : _create_initializer(nn.init.xavier_uniform_,**kwargs))
self.init_factory.register_builder('variance_scaling_initializer', lambda **kwargs : _create_initializer(torch_ext.variance_scaling_initializer,**kwargs))
self.init_factory.register_builder('random_uniform_initializer', lambda **kwargs : _create_initializer(nn.init.uniform_,**kwargs))
self.init_factory.register_builder('kaiming_normal', lambda **kwargs : _create_initializer(nn.init.kaiming_normal_,**kwargs))
self.init_factory.register_builder('orthogonal', lambda **kwargs : _create_initializer(nn.init.orthogonal_,**kwargs))
self.init_factory.register_builder('default', lambda **kwargs : nn.Identity() )
# self.init_factory.register_builder('normc_initializer', lambda **kwargs : normc_initializer(**kwargs))
self.init_factory.register_builder('const_initializer', lambda **kwargs : _create_initializer(nn.init.constant_, **kwargs))
self.init_factory.register_builder('orthogonal_initializer', lambda **kwargs : _create_initializer(nn.init.orthogonal_, **kwargs))
self.init_factory.register_builder('glorot_normal_initializer', lambda **kwargs : _create_initializer(nn.init.xavier_normal_, **kwargs))
self.init_factory.register_builder('glorot_uniform_initializer', lambda **kwargs : _create_initializer(nn.init.xavier_uniform_, **kwargs))
self.init_factory.register_builder('variance_scaling_initializer', lambda **kwargs : _create_initializer(torch_ext.variance_scaling_initializer, **kwargs))
self.init_factory.register_builder('random_uniform_initializer', lambda **kwargs : _create_initializer(nn.init.uniform_, **kwargs))
self.init_factory.register_builder('kaiming_normal', lambda **kwargs : _create_initializer(nn.init.kaiming_normal_, **kwargs))
self.init_factory.register_builder('orthogonal', lambda **kwargs : _create_initializer(nn.init.orthogonal_, **kwargs))
self.init_factory.register_builder('default', lambda **kwargs : nn.Identity())

def is_separate_critic(self):
return False
Expand All @@ -70,14 +70,15 @@ def get_default_rnn_state(self):
def get_aux_loss(self):
return None

def _calc_input_size(self, input_shape,cnn_layers=None):
def _calc_input_size(self, input_shape, cnn_layers=None):
if cnn_layers is None:
assert(len(input_shape) == 1)
return input_shape[0]
else:
return nn.Sequential(*cnn_layers)(torch.rand(1, *(input_shape))).flatten(1).data.size(1)

def _noisy_dense(self, inputs, units):
# TODO: to fix!
return layers.NoisyFactorizedLinear(inputs, units)

def _build_rnn(self, name, input, units, layers):
Expand Down Expand Up @@ -106,7 +107,7 @@ def _build_sequential_mlp(self,
if not need_norm:
continue
if norm_only_first_layer and norm_func_name is not None:
need_norm = False
need_norm = False
if norm_func_name == 'layer_norm':
layers.append(torch.nn.LayerNorm(unit))
elif norm_func_name == 'batch_norm':
Expand All @@ -127,7 +128,7 @@ def _build_mlp(self,
act_layers = [self.activations_factory.create(activation) for i in range(len(units))]
return D2RLNet(input_size, units, act_layers, norm_func_name)
else:
return self._build_sequential_mlp(input_size, units, activation, dense_func, norm_func_name = None,)
return self._build_sequential_mlp(input_size, units, activation, dense_func, norm_func_name=None,)

def _build_conv(self, ctype, **kwargs):
print('conv_name:', ctype)
Expand All @@ -148,18 +149,19 @@ def _build_cnn2d(self, input_shape, convs, activation, conv_func=torch.nn.Conv2d
in_channels = input_shape[0]
layers = []
for conv in convs:
layers.append(conv_func(in_channels=in_channels,
out_channels=conv['filters'],
kernel_size=conv['kernel_size'],
layers.append(conv_func(in_channels=in_channels,
out_channels=conv['filters'],
kernel_size=conv['kernel_size'],
stride=conv['strides'], padding=conv['padding']))
conv_func=torch.nn.Conv2d
conv_func = torch.nn.Conv2d
act = self.activations_factory.create(activation)
layers.append(act)
in_channels = conv['filters']
if norm_func_name == 'layer_norm':
layers.append(torch_ext.LayerNorm2d(in_channels))
elif norm_func_name == 'batch_norm':
layers.append(torch.nn.BatchNorm2d(in_channels))

if add_spatial_softmax:
layers.append(SpatialSoftArgmax(normalize=True))
if add_flatten:
Expand All @@ -178,21 +180,20 @@ def _build_cnn1d(self, input_shape, convs, activation, norm_func_name=None):
if norm_func_name == 'layer_norm':
layers.append(torch.nn.LayerNorm(in_channels))
elif norm_func_name == 'batch_norm':
layers.append(torch.nn.BatchNorm2d(in_channels))
layers.append(torch.nn.BatchNorm2d(in_channels))
return nn.Sequential(*layers)

def _build_value_layer(self, input_size, output_size, value_type='legacy'):
if value_type == 'legacy':
return torch.nn.Linear(input_size, output_size)
if value_type == 'default':
return DefaultValue(input_size, output_size)
return DefaultValue(input_size, output_size)
if value_type == 'twohot_encoded':
return TwoHotEncodedValue(input_size, output_size)

raise ValueError('value type is not "default", "legacy" or "two_hot_encoded"')



class A2CBuilder(NetworkBuilder):
def __init__(self, **kwargs):
NetworkBuilder.__init__(self)
Expand All @@ -218,10 +219,10 @@ def __init__(self, params, **kwargs):
if self.permute_input:
input_shape = torch_ext.shape_whc_to_cwh(input_shape)
cnn_args = {
'ctype' : self.cnn['type'],
'input_shape' : input_shape,
'convs' : self.cnn['convs'],
'activation' : self.cnn['activation'],
'ctype' : self.cnn['type'],
'input_shape' : input_shape,
'convs' : self.cnn['convs'],
'activation' : self.cnn['activation'],
'norm_func_name' : self.normalization,
}
self.actor_cnn = self._build_conv(**cnn_args)
Expand Down Expand Up @@ -266,8 +267,8 @@ def __init__(self, params, **kwargs):

mlp_args = {
'input_size' : mlp_input_size,
'units' : self.units,
'activation' : self.activation,
'units' : self.units,
'activation' : self.activation,
'norm_func_name' : self.normalization,
'dense_func' : torch.nn.Linear,
'd2rl' : self.is_d2rl,
Expand Down Expand Up @@ -311,7 +312,7 @@ def __init__(self, params, **kwargs):
if isinstance(m, nn.Linear):
mlp_init(m.weight)
if getattr(m, "bias", None) is not None:
torch.nn.init.zeros_(m.bias)
torch.nn.init.zeros_(m.bias)

if self.is_continuous:
mu_init(self.mu.weight)
Expand Down Expand Up @@ -491,16 +492,16 @@ def get_default_rnn_state(self):
rnn_units = self.rnn_units
if self.rnn_name == 'lstm':
if self.separate:
return (torch.zeros((num_layers, self.num_seqs, rnn_units)),
return (torch.zeros((num_layers, self.num_seqs, rnn_units)),
torch.zeros((num_layers, self.num_seqs, rnn_units)),
torch.zeros((num_layers, self.num_seqs, rnn_units)),
torch.zeros((num_layers, self.num_seqs, rnn_units)),
torch.zeros((num_layers, self.num_seqs, rnn_units)))
else:
return (torch.zeros((num_layers, self.num_seqs, rnn_units)),
return (torch.zeros((num_layers, self.num_seqs, rnn_units)),
torch.zeros((num_layers, self.num_seqs, rnn_units)))
else:
if self.separate:
return (torch.zeros((num_layers, self.num_seqs, rnn_units)),
return (torch.zeros((num_layers, self.num_seqs, rnn_units)),
torch.zeros((num_layers, self.num_seqs, rnn_units)))
else:
return (torch.zeros((num_layers, self.num_seqs, rnn_units)),)
Expand All @@ -520,9 +521,9 @@ def load(self, params):
self.joint_obs_actions_config = params.get('joint_obs_actions', None)

if self.has_space:
self.is_multi_discrete = 'multi_discrete'in params['space']
self.is_multi_discrete = 'multi_discrete' in params['space']
self.is_discrete = 'discrete' in params['space']
self.is_continuous = 'continuous'in params['space']
self.is_continuous = 'continuous' in params['space']
if self.is_continuous:
self.space_config = params['space']['continuous']
self.fixed_sigma = self.space_config['fixed_sigma']
Expand Down Expand Up @@ -555,10 +556,12 @@ def build(self, name, **kwargs):
net = A2CBuilder.Network(self.params, **kwargs)
return net


class Conv2dAuto(nn.Conv2d):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.padding = (self.kernel_size[0] // 2, self.kernel_size[1] // 2) # dynamic add padding based on the kernel_size
# dynamic add padding based on the kernel_size
self.padding = (self.kernel_size[0] // 2, self.kernel_size[1] // 2)


class ConvBlock(nn.Module):
Expand All @@ -579,15 +582,18 @@ def forward(self, x):
class ResidualBlock(nn.Module):
def __init__(self, channels, activation='relu', use_bn=False, use_zero_init=False, use_attention=False):
super().__init__()
self.use_zero_init=use_zero_init
self.use_zero_init = use_zero_init
self.use_attention = use_attention

if use_zero_init:
self.alpha = nn.Parameter(torch.zeros(1))

self.activation = activation
self.conv1 = ConvBlock(channels, channels, use_bn)
self.conv2 = ConvBlock(channels, channels, use_bn)
self.activate1 = nn.ReLU()
self.activate2 = nn.ReLU()

if use_attention:
self.ca = ChannelAttention(channels)
self.sa = SpatialAttention()
Expand Down Expand Up @@ -623,6 +629,7 @@ def forward(self, x):
x = self.res_block2(x)
return x


class A2CResnetBuilder(NetworkBuilder):
def __init__(self, **kwargs):
NetworkBuilder.__init__(self)
Expand Down Expand Up @@ -740,7 +747,7 @@ def forward(self, obs_dict):
out = self.flatten_act(out)

if self.has_rnn:
#seq_length = obs_dict['seq_length']
# seq_length = obs_dict['seq_length']
seq_length = obs_dict.get('seq_length', 1)

out_in = out
Expand Down Expand Up @@ -799,7 +806,7 @@ def load(self, params):
self.initializer = params['mlp']['initializer']
self.is_discrete = 'discrete' in params['space']
self.is_continuous = 'continuous' in params['space']
self.is_multi_discrete = 'multi_discrete'in params['space']
self.is_multi_discrete = 'multi_discrete' in params['space']
self.value_activation = params.get('value_activation', 'None')
self.normalization = params.get('normalization', None)

Expand Down Expand Up @@ -842,8 +849,8 @@ def is_rnn(self):
def get_default_rnn_state(self):
num_layers = self.rnn_layers
if self.rnn_name == 'lstm':
return (torch.zeros((num_layers, self.num_seqs, self.rnn_units)),
torch.zeros((num_layers, self.num_seqs, self.rnn_units)))
return (torch.zeros((num_layers, self.num_seqs, self.rnn_units)),
torch.zeros((num_layers, self.num_seqs, self.rnn_units)))
else:
return (torch.zeros((num_layers, self.num_seqs, self.rnn_units)))

Expand Down Expand Up @@ -926,19 +933,19 @@ def __init__(self, params, **kwargs):
self.load(params)

actor_mlp_args = {
'input_size' : obs_dim,
'units' : self.units,
'activation' : self.activation,
'input_size' : obs_dim,
'units' : self.units,
'activation' : self.activation,
'norm_func_name' : self.normalization,
'dense_func' : torch.nn.Linear,
'd2rl' : self.is_d2rl,
'norm_only_first_layer' : self.norm_only_first_layer
}

critic_mlp_args = {
'input_size' : obs_dim + action_dim,
'units' : self.units,
'activation' : self.activation,
'input_size' : obs_dim + action_dim,
'units' : self.units,
'activation' : self.activation,
'norm_func_name' : self.normalization,
'dense_func' : torch.nn.Linear,
'd2rl' : self.is_d2rl,
Expand All @@ -952,7 +959,7 @@ def __init__(self, params, **kwargs):
self.critic = self._build_critic(1, **critic_mlp_args)
print("Building Critic Target")
self.critic_target = self._build_critic(1, **critic_mlp_args)
self.critic_target.load_state_dict(self.critic.state_dict())
self.critic_target.load_state_dict(self.critic.state_dict())

mlp_init = self.init_factory.create(**self.initializer)
for m in self.modules():
Expand Down
68 changes: 68 additions & 0 deletions rl_games/configs/myosuite/ppo_myo_walk.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
params:
seed: 8
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: actor_critic
separate: False
space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 256, 128]
d2rl: False
activation: elu
initializer:
name: default
scale: 2
config:
env_name: myo_gym
name: MyoLegWalk
reward_shaper:
scale_value: 1.0
log_val: False
mixed_precision: True
normalize_input: True
normalize_value: True
value_bootstrap: True
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.008
save_best_after: 20
save_frequency: 500
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
clip_value: False
num_actors: 32
horizon_length: 256
minibatch_size: 2048
mini_epochs: 6
critic_coef: 2
bounds_loss_coef: 0.001
max_epochs: 50000
use_diagnostics: True
weight_decay: 0.0
use_smooth_clamp: True
env_config:
name: myoLegWalk-v0
player:
render: True
deterministic: True
render_sleep: 0.01
games_num: 1000

0 comments on commit 86de35f

Please sign in to comment.