diff --git a/rl_games/algos_torch/network_builder.py b/rl_games/algos_torch/network_builder.py index 284b0305..896555ca 100644 --- a/rl_games/algos_torch/network_builder.py +++ b/rl_games/algos_torch/network_builder.py @@ -1098,6 +1098,8 @@ def __init__(self, params, **kwargs): self.actions_num = kwargs.pop('actions_num') full_input_shape = kwargs.pop('input_shape') + print("Observations shape: ", full_input_shape) + self.proprio_size = 0 # Number of proprioceptive features if isinstance(full_input_shape, dict): input_shape = full_input_shape['camera'] @@ -1116,7 +1118,6 @@ def __init__(self, params, **kwargs): self.cnn, self.cnn_output_size = self._build_backbone(input_shape, params['backbone']) - self.resize_transform = transforms.Resize((224, 224)) mlp_input_size = self.cnn_output_size + self.proprio_size if len(self.units) == 0: out_size = self.cnn_output_size @@ -1287,10 +1288,13 @@ def _build_backbone(self, input_shape, backbone_params): pretrained = backbone_params.get('pretrained', False) self.preprocess_image = backbone_params.get('preprocess_image', False) - if backbone_type == 'resnet18': - backbone = models.resnet18(pretrained=pretrained, zero_init_residual=True) # norm_layer=nn.LayerNorm - # Modify the first convolution layer to match input shape if needed + if backbone_type == 'resnet18' or backbone_type == 'resnet34': + if backbone_type == 'resnet18': + backbone = models.resnet18(pretrained=pretrained, zero_init_residual=True) + else: + backbone = models.resnet34(pretrained=pretrained, zero_init_residual=True) + # Modify the first convolution layer to match input shape if needed # TODO: add low-res parameter backbone.conv1 = nn.Conv2d(input_shape[0], 64, kernel_size=3, stride=1, padding=1, bias=False) # backbone.maxpool = nn.Identity() @@ -1314,26 +1318,55 @@ def _build_backbone(self, input_shape, backbone_params): padding=1, # Add padding to preserve dimensions after convolution bias=True # False ) - + elif backbone_type == 'efficientnet_v2_s': + backbone = models.efficientnet_v2_s(pretrained=pretrained) + backbone.features[0][0] = nn.Conv2d(input_shape[0], 24, kernel_size=3, stride=1, padding=1, bias=False) + backbone_output_size = backbone.classifier[1].in_features + backbone.classifier = nn.Identity() elif backbone_type == 'vit_b_16': backbone = models.vision_transformer.vit_b_16(pretrained=pretrained) + # Add a resize layer to ensure the input is correctly sized for ViT + resize_layer = nn.Upsample(size=(224, 224), mode='bilinear', align_corners=False) + backbone_output_size = backbone.heads.head.in_features backbone.heads.head = nn.Identity() + + # Combine the resize layer and the backbone into a sequential model + backbone = nn.Sequential(resize_layer, backbone) + # # Assuming your input image is a tensor or PIL image, resize it to 224x224 + # #obs = self.resize_transform(obs) + # backbone = models.vision_transformer.vit_b_16(pretrained=pretrained) + + # backbone_output_size = backbone.heads.head.in_features + # backbone.heads.head = nn.Identity() else: raise ValueError(f'Unknown backbone type: {backbone_type}') # Optionally freeze the follow-up layers, leaving the first convolutional layer unfrozen if backbone_params.get('freeze', False): + print('Freezing backbone') for name, param in backbone.named_parameters(): if 'conv1' not in name: # Ensure the first conv layer is not frozen param.requires_grad = False return backbone, backbone_output_size + def is_separate_critic(self): + return False + + def is_rnn(self): + return self.has_rnn + + def get_default_rnn_state(self): + num_layers = self.rnn_layers + if self.rnn_name == 'lstm': + return (torch.zeros((num_layers, self.num_seqs, self.rnn_units)), + torch.zeros((num_layers, self.num_seqs, self.rnn_units))) + else: + return (torch.zeros((num_layers, self.num_seqs, self.rnn_units))) + def build(self, name, **kwargs): - print("Building Network") - print(self.params) net = VisionBackboneBuilder.Network(self.params, **kwargs) return net diff --git a/rl_games/configs/atari/ppo_pacman_envpool_resnet.yaml b/rl_games/configs/atari/ppo_pacman_envpool_resnet.yaml index bd343342..c1cf0178 100644 --- a/rl_games/configs/atari/ppo_pacman_envpool_resnet.yaml +++ b/rl_games/configs/atari/ppo_pacman_envpool_resnet.yaml @@ -6,74 +6,82 @@ params: name: discrete_a2c network: - name: resnet_actor_critic - require_rewards: True - require_last_actions: True + name: e2e_vision_actor_critic separate: False value_shape: 1 space: discrete: - - cnn: + + backbone: + type: resnet18 #efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34 + pretrained: True permute_input: False - conv_depths: [16, 32, 32] - activation: relu - initializer: - name: default - regularizer: - name: 'None' - + freeze: False + preprocess_image: True + + args: + zero_init_residual: True + norm_layer: None + mlp: - units: [512] - activation: relu + units: [256] + activation: relu #elu regularizer: - name: 'None' + name: None initializer: - name: default + name: default + rnn: name: lstm - units: 256 + units: 512 layers: 1 + before_mlp: True config: + name: Pacman_resnet18_LSTM_before_MLP_rew_shaper_100 + env_name: envpool reward_shaper: - min_val: -1 - max_val: 1 - #scale_value: 0.01 - + min_val: -100 + max_val: 100 + + mixed_precision: True + normalize_input: False + normalize_value: True normalize_advantage: True - gamma: 0.995 + gamma: 0.99 tau: 0.95 - learning_rate: 3e-4 - name: pacman_resnet + learning_rate: 2e-4 + score_to_win: 100000 - grad_norm: 1.5 + grad_norm: 1.0 entropy_coef: 0.01 truncate_grads: True - env_name: envpool #'openai_gym' #'PongNoFrameskip-v4' # + e_clip: 0.2 clip_value: True + save_best_after: 25 + save_frequency: 500 num_actors: 64 horizon_length: 128 minibatch_size: 2048 mini_epochs: 2 critic_coef: 1 - lr_schedule: None + lr_schedule: linear kl_threshold: 0.01 - normalize_input: False - normalize_value: False use_diagnostics: True - seq_length: 32 - max_epochs: 200000 + seq_length: 8 + max_epochs: 10000 + #weight_decay: 0.001 env_config: env_name: MsPacman-v5 episodic_life: True has_lives: True - use_dict_obs_space: True - + use_dict_obs_space: False #True + stack_num: 1 + gray_scale: False player: - render: False - games_num: 20 - n_game_life: 3 + render: True + games_num: 10 + n_game_life: 1 deterministic: True diff --git a/rl_games/configs/atari/ppo_pacman_envpool_rnn.yaml b/rl_games/configs/atari/ppo_pacman_envpool_rnn.yaml index 7e255900..fbf77dca 100644 --- a/rl_games/configs/atari/ppo_pacman_envpool_rnn.yaml +++ b/rl_games/configs/atari/ppo_pacman_envpool_rnn.yaml @@ -5,8 +5,6 @@ params: model: name: discrete_a2c - - network: name: actor_critic separate: False diff --git a/rl_games/configs/atari/ppo_pacman_envpool_vit.yaml b/rl_games/configs/atari/ppo_pacman_envpool_vit.yaml new file mode 100644 index 00000000..b009f867 --- /dev/null +++ b/rl_games/configs/atari/ppo_pacman_envpool_vit.yaml @@ -0,0 +1,87 @@ +params: + algo: + name: a2c_discrete + + model: + name: discrete_a2c + + network: + name: e2e_vision_actor_critic + separate: False + value_shape: 1 + space: + discrete: + + backbone: + type: vit_b_16 #efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34 + pretrained: True + permute_input: False + freeze: True + preprocess_image: True + + args: + zero_init_residual: True + norm_layer: None + + mlp: + units: [256] + activation: relu + regularizer: + name: None + initializer: + name: default + + rnn: + name: lstm + units: 512 + layers: 1 + before_mlp: True + config: + name: Pacman_vit_b_16_frozen + env_name: envpool + reward_shaper: + min_val: -100 + max_val: 100 + + mixed_precision: True + normalize_input: False + normalize_value: True + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 2e-4 + + score_to_win: 100000 + grad_norm: 1.0 + entropy_coef: 0.01 + truncate_grads: True + + e_clip: 0.2 + clip_value: True + save_best_after: 25 + save_frequency: 500 + num_actors: 64 + horizon_length: 128 + minibatch_size: 1024 #2048 + mini_epochs: 2 + critic_coef: 1 + lr_schedule: linear + kl_threshold: 0.01 + use_diagnostics: True + seq_length: 8 + max_epochs: 10000 + #weight_decay: 0.001 + + env_config: + env_name: MsPacman-v5 + episodic_life: True + has_lives: True + use_dict_obs_space: False #True + stack_num: 1 + gray_scale: False + player: + render: True + games_num: 10 + n_game_life: 1 + deterministic: True + diff --git a/rl_games/configs/atari/ppo_pong_envpool_backbone.yaml b/rl_games/configs/atari/ppo_pong_envpool_backbone.yaml index 7afe311f..f43b00de 100644 --- a/rl_games/configs/atari/ppo_pong_envpool_backbone.yaml +++ b/rl_games/configs/atari/ppo_pong_envpool_backbone.yaml @@ -6,11 +6,6 @@ params: name: discrete_a2c network: - # name: resnet_actor_critic - # require_rewards: True - # require_last_actions: True - # separate: False - # value_shape: 1 name: e2e_vision_actor_critic separate: False value_shape: 1 @@ -18,30 +13,32 @@ params: discrete: backbone: - type: resnet18 #convnext_tiny #vit_b_16 #resnet18 + type: vit_b_16 #efficientnet_v2_s #convnext_tiny #vit_b_16 #resnet18 #resnet34 pretrained: True permute_input: False - freeze: False - preprocess_image: False + freeze: True + preprocess_image: True args: zero_init_residual: True norm_layer: None mlp: - units: [512] - activation: relu + units: [256] + activation: relu #elu regularizer: name: None initializer: name: default - # rnn: - # name: lstm - # units: 256 - # layers: 1 + rnn: + name: lstm + units: 512 + layers: 1 + before_mlp: True config: - name: pong_resnet18_maxpool + name: Pong_vit_b_16_color + #name: Pong_resnet18_maxpool_LSTM_before_MLP_ELU env_name: envpool reward_shaper: min_val: -1 @@ -53,7 +50,7 @@ params: normalize_advantage: True gamma: 0.99 tau: 0.95 - learning_rate: 1e-4 + learning_rate: 2e-4 score_to_win: 100000 grad_norm: 1.0 @@ -73,14 +70,15 @@ params: kl_threshold: 0.01 use_diagnostics: True seq_length: 8 - max_epochs: 2000 + max_epochs: 10000 #weight_decay: 0.001 env_config: env_name: Pong-v5 has_lives: False use_dict_obs_space: False #True - stack_num: 3 + stack_num: 1 + gray_scale: False player: render: True games_num: 10 diff --git a/rl_games/configs/atari/ppo_pong_envpool_resnet.yaml b/rl_games/configs/atari/ppo_pong_envpool_impala.yaml similarity index 100% rename from rl_games/configs/atari/ppo_pong_envpool_resnet.yaml rename to rl_games/configs/atari/ppo_pong_envpool_impala.yaml