diff --git a/optimum/habana/transformers/models/bert/modeling_bert.py b/optimum/habana/transformers/models/bert/modeling_bert.py index 549cb670c2..178793d240 100644 --- a/optimum/habana/transformers/models/bert/modeling_bert.py +++ b/optimum/habana/transformers/models/bert/modeling_bert.py @@ -73,8 +73,7 @@ def gaudi_BertModel_forward( # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] # ourselves in which case we just need to make it broadcastable to all heads. - dtype = torch.hpu.get_autocast_hpu_dtype() if torch.hpu.is_autocast_hpu_enabled() else self.dtype - extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape, dtype=dtype) + extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape, dtype=self.dtype) # If a 2D or 3D attention mask is provided for the cross-attention # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]