From b035ef32bb7a002cbbe500de3cba8f6de2e735b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20=C5=81api=C5=84ski?= <mlapinskix@habana.ai>
Date: Fri, 20 Sep 2024 13:37:12 +0300
Subject: [PATCH] [SW-200913] Removed workaround for NaN bug causing graph
 break.

Problem: graph brake is introduced by get_autocast_hpu_dtype call

Solution: this line is no longer needed as autocast logic will be
executed in GaudiTrainer, therefore can be removed without breaking
anything.

Change-Id: I959a93ba5a2f0473b7aea63f0ba73d7ac8d95c4b
---
 optimum/habana/transformers/models/bert/modeling_bert.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/optimum/habana/transformers/models/bert/modeling_bert.py b/optimum/habana/transformers/models/bert/modeling_bert.py
index 549cb670c2..178793d240 100644
--- a/optimum/habana/transformers/models/bert/modeling_bert.py
+++ b/optimum/habana/transformers/models/bert/modeling_bert.py
@@ -73,8 +73,7 @@ def gaudi_BertModel_forward(
 
     # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
     # ourselves in which case we just need to make it broadcastable to all heads.
-    dtype = torch.hpu.get_autocast_hpu_dtype() if torch.hpu.is_autocast_hpu_enabled() else self.dtype
-    extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape, dtype=dtype)
+    extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape, dtype=self.dtype)
 
     # If a 2D or 3D attention mask is provided for the cross-attention
     # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]