Skip to content

Commit

Permalink
Pacman training.
Browse files Browse the repository at this point in the history
  • Loading branch information
ViktorM committed Aug 22, 2024
1 parent c1eeeba commit 3531fb8
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 63 deletions.
47 changes: 40 additions & 7 deletions rl_games/algos_torch/network_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,6 +1098,8 @@ def __init__(self, params, **kwargs):
self.actions_num = kwargs.pop('actions_num')
full_input_shape = kwargs.pop('input_shape')

print("Observations shape: ", full_input_shape)

self.proprio_size = 0 # Number of proprioceptive features
if isinstance(full_input_shape, dict):
input_shape = full_input_shape['camera']
Expand All @@ -1116,7 +1118,6 @@ def __init__(self, params, **kwargs):

self.cnn, self.cnn_output_size = self._build_backbone(input_shape, params['backbone'])

self.resize_transform = transforms.Resize((224, 224))
mlp_input_size = self.cnn_output_size + self.proprio_size
if len(self.units) == 0:
out_size = self.cnn_output_size
Expand Down Expand Up @@ -1287,10 +1288,13 @@ def _build_backbone(self, input_shape, backbone_params):
pretrained = backbone_params.get('pretrained', False)
self.preprocess_image = backbone_params.get('preprocess_image', False)

if backbone_type == 'resnet18':
backbone = models.resnet18(pretrained=pretrained, zero_init_residual=True) # norm_layer=nn.LayerNorm
# Modify the first convolution layer to match input shape if needed
if backbone_type == 'resnet18' or backbone_type == 'resnet34':
if backbone_type == 'resnet18':
backbone = models.resnet18(pretrained=pretrained, zero_init_residual=True)
else:
backbone = models.resnet34(pretrained=pretrained, zero_init_residual=True)

# Modify the first convolution layer to match input shape if needed
# TODO: add low-res parameter
backbone.conv1 = nn.Conv2d(input_shape[0], 64, kernel_size=3, stride=1, padding=1, bias=False)
# backbone.maxpool = nn.Identity()
Expand All @@ -1314,26 +1318,55 @@ def _build_backbone(self, input_shape, backbone_params):
padding=1, # Add padding to preserve dimensions after convolution
bias=True # False
)

elif backbone_type == 'efficientnet_v2_s':
backbone = models.efficientnet_v2_s(pretrained=pretrained)
backbone.features[0][0] = nn.Conv2d(input_shape[0], 24, kernel_size=3, stride=1, padding=1, bias=False)
backbone_output_size = backbone.classifier[1].in_features
backbone.classifier = nn.Identity()
elif backbone_type == 'vit_b_16':
backbone = models.vision_transformer.vit_b_16(pretrained=pretrained)

# Add a resize layer to ensure the input is correctly sized for ViT
resize_layer = nn.Upsample(size=(224, 224), mode='bilinear', align_corners=False)

backbone_output_size = backbone.heads.head.in_features
backbone.heads.head = nn.Identity()

# Combine the resize layer and the backbone into a sequential model
backbone = nn.Sequential(resize_layer, backbone)
# # Assuming your input image is a tensor or PIL image, resize it to 224x224
# #obs = self.resize_transform(obs)
# backbone = models.vision_transformer.vit_b_16(pretrained=pretrained)

# backbone_output_size = backbone.heads.head.in_features
# backbone.heads.head = nn.Identity()
else:
raise ValueError(f'Unknown backbone type: {backbone_type}')

# Optionally freeze the follow-up layers, leaving the first convolutional layer unfrozen
if backbone_params.get('freeze', False):
print('Freezing backbone')
for name, param in backbone.named_parameters():
if 'conv1' not in name: # Ensure the first conv layer is not frozen
param.requires_grad = False

return backbone, backbone_output_size

def is_separate_critic(self):
return False

def is_rnn(self):
return self.has_rnn

def get_default_rnn_state(self):
num_layers = self.rnn_layers
if self.rnn_name == 'lstm':
return (torch.zeros((num_layers, self.num_seqs, self.rnn_units)),
torch.zeros((num_layers, self.num_seqs, self.rnn_units)))
else:
return (torch.zeros((num_layers, self.num_seqs, self.rnn_units)))

def build(self, name, **kwargs):
print("Building Network")
print(self.params)
net = VisionBackboneBuilder.Network(self.params, **kwargs)
return net

Expand Down
80 changes: 44 additions & 36 deletions rl_games/configs/atari/ppo_pacman_envpool_resnet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,74 +6,82 @@ params:
name: discrete_a2c

network:
name: resnet_actor_critic
require_rewards: True
require_last_actions: True
name: e2e_vision_actor_critic
separate: False
value_shape: 1
space:
discrete:

cnn:

backbone:
type: resnet18 #efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34
pretrained: True
permute_input: False
conv_depths: [16, 32, 32]
activation: relu
initializer:
name: default
regularizer:
name: 'None'
freeze: False
preprocess_image: True

args:
zero_init_residual: True
norm_layer: None

mlp:
units: [512]
activation: relu
units: [256]
activation: relu #elu
regularizer:
name: 'None'
name: None
initializer:
name: default
name: default

rnn:
name: lstm
units: 256
units: 512
layers: 1
before_mlp: True
config:
name: Pacman_resnet18_LSTM_before_MLP_rew_shaper_100
env_name: envpool
reward_shaper:
min_val: -1
max_val: 1
#scale_value: 0.01

min_val: -100
max_val: 100

mixed_precision: True
normalize_input: False
normalize_value: True
normalize_advantage: True
gamma: 0.995
gamma: 0.99
tau: 0.95
learning_rate: 3e-4
name: pacman_resnet
learning_rate: 2e-4

score_to_win: 100000
grad_norm: 1.5
grad_norm: 1.0
entropy_coef: 0.01
truncate_grads: True
env_name: envpool #'openai_gym' #'PongNoFrameskip-v4' #

e_clip: 0.2
clip_value: True
save_best_after: 25
save_frequency: 500
num_actors: 64
horizon_length: 128
minibatch_size: 2048
mini_epochs: 2
critic_coef: 1
lr_schedule: None
lr_schedule: linear
kl_threshold: 0.01
normalize_input: False
normalize_value: False
use_diagnostics: True
seq_length: 32
max_epochs: 200000
seq_length: 8
max_epochs: 10000
#weight_decay: 0.001

env_config:
env_name: MsPacman-v5
episodic_life: True
has_lives: True
use_dict_obs_space: True

use_dict_obs_space: False #True
stack_num: 1
gray_scale: False
player:
render: False
games_num: 20
n_game_life: 3
render: True
games_num: 10
n_game_life: 1
deterministic: True

2 changes: 0 additions & 2 deletions rl_games/configs/atari/ppo_pacman_envpool_rnn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ params:
model:
name: discrete_a2c



network:
name: actor_critic
separate: False
Expand Down
87 changes: 87 additions & 0 deletions rl_games/configs/atari/ppo_pacman_envpool_vit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
params:
algo:
name: a2c_discrete

model:
name: discrete_a2c

network:
name: e2e_vision_actor_critic
separate: False
value_shape: 1
space:
discrete:

backbone:
type: vit_b_16 #efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34
pretrained: True
permute_input: False
freeze: True
preprocess_image: True

args:
zero_init_residual: True
norm_layer: None

mlp:
units: [256]
activation: relu
regularizer:
name: None
initializer:
name: default

rnn:
name: lstm
units: 512
layers: 1
before_mlp: True
config:
name: Pacman_vit_b_16_frozen
env_name: envpool
reward_shaper:
min_val: -100
max_val: 100

mixed_precision: True
normalize_input: False
normalize_value: True
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 2e-4

score_to_win: 100000
grad_norm: 1.0
entropy_coef: 0.01
truncate_grads: True

e_clip: 0.2
clip_value: True
save_best_after: 25
save_frequency: 500
num_actors: 64
horizon_length: 128
minibatch_size: 1024 #2048
mini_epochs: 2
critic_coef: 1
lr_schedule: linear
kl_threshold: 0.01
use_diagnostics: True
seq_length: 8
max_epochs: 10000
#weight_decay: 0.001

env_config:
env_name: MsPacman-v5
episodic_life: True
has_lives: True
use_dict_obs_space: False #True
stack_num: 1
gray_scale: False
player:
render: True
games_num: 10
n_game_life: 1
deterministic: True

34 changes: 16 additions & 18 deletions rl_games/configs/atari/ppo_pong_envpool_backbone.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,39 @@ params:
name: discrete_a2c

network:
# name: resnet_actor_critic
# require_rewards: True
# require_last_actions: True
# separate: False
# value_shape: 1
name: e2e_vision_actor_critic
separate: False
value_shape: 1
space:
discrete:

backbone:
type: resnet18 #convnext_tiny #vit_b_16 #resnet18
type: vit_b_16 #efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34
pretrained: True
permute_input: False
freeze: False
preprocess_image: False
freeze: True
preprocess_image: True

args:
zero_init_residual: True
norm_layer: None

mlp:
units: [512]
activation: relu
units: [256]
activation: relu #elu
regularizer:
name: None
initializer:
name: default

# rnn:
# name: lstm
# units: 256
# layers: 1
rnn:
name: lstm
units: 512
layers: 1
before_mlp: True
config:
name: pong_resnet18_maxpool
name: Pong_vit_b_16_color
#name: Pong_resnet18_maxpool_LSTM_before_MLP_ELU
env_name: envpool
reward_shaper:
min_val: -1
Expand All @@ -53,7 +50,7 @@ params:
normalize_advantage: True
gamma: 0.99
tau: 0.95
learning_rate: 1e-4
learning_rate: 2e-4

score_to_win: 100000
grad_norm: 1.0
Expand All @@ -73,14 +70,15 @@ params:
kl_threshold: 0.01
use_diagnostics: True
seq_length: 8
max_epochs: 2000
max_epochs: 10000
#weight_decay: 0.001

env_config:
env_name: Pong-v5
has_lives: False
use_dict_obs_space: False #True
stack_num: 3
stack_num: 1
gray_scale: False
player:
render: True
games_num: 10
Expand Down

0 comments on commit 3531fb8

Please sign in to comment.