diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index ac8e6a772d3f..fdb4fc865c37 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -230,6 +230,7 @@ def forward(self, hidden_state: torch.Tensor, attention_mask: Optional[torch.Ten return hidden_states +# Copied from transformers.models.clip.modeling_clip.CLIPVisionEmbeddings with CLIP->BridgeTower class BridgeTowerVisionEmbeddings(nn.Module): def __init__(self, config: BridgeTowerVisionConfig): super().__init__() @@ -960,8 +961,8 @@ def _init_weights(self, module): nn.init.normal_(block.mlp.c_fc.weight, std=fc_std * self.config.initializer_factor) nn.init.normal_(block.mlp.c_proj.weight, std=proj_std * self.config.initializer_factor) - nn.init.normal_(module.visual.class_embedding, std=attn_std * self.config.initializer_factor) - nn.init.normal_(module.visual.positional_embedding, std=attn_std * self.config.initializer_factor) + nn.init.normal_(module.visual.embeddings.class_embedding, std=attn_std * self.config.initializer_factor) + nn.init.normal_(module.visual.embeddings.position_embedding, std=attn_std * self.config.initializer_factor) elif isinstance(module, (nn.Linear, nn.Conv2d, nn.Embedding)): module.weight.data.normal_(mean=0.0, std=0.05 * self.config.initializer_factor) elif isinstance(module, nn.LayerNorm):