From b035ef32bb7a002cbbe500de3cba8f6de2e735b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20=C5=81api=C5=84ski?= Date: Fri, 20 Sep 2024 13:37:12 +0300 Subject: [PATCH] [SW-200913] Removed workaround for NaN bug causing graph break. Problem: graph brake is introduced by get_autocast_hpu_dtype call Solution: this line is no longer needed as autocast logic will be executed in GaudiTrainer, therefore can be removed without breaking anything. Change-Id: I959a93ba5a2f0473b7aea63f0ba73d7ac8d95c4b --- optimum/habana/transformers/models/bert/modeling_bert.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/optimum/habana/transformers/models/bert/modeling_bert.py b/optimum/habana/transformers/models/bert/modeling_bert.py index 549cb670c2..178793d240 100644 --- a/optimum/habana/transformers/models/bert/modeling_bert.py +++ b/optimum/habana/transformers/models/bert/modeling_bert.py @@ -73,8 +73,7 @@ def gaudi_BertModel_forward( # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] # ourselves in which case we just need to make it broadcastable to all heads. - dtype = torch.hpu.get_autocast_hpu_dtype() if torch.hpu.is_autocast_hpu_enabled() else self.dtype - extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape, dtype=dtype) + extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape, dtype=self.dtype) # If a 2D or 3D attention mask is provided for the cross-attention # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]