From 5d6b7c30ad82eeb25f78c5708087c574e938d64a Mon Sep 17 00:00:00 2001 From: Artur Kloniecki Date: Mon, 22 Sep 2025 12:04:52 +0300 Subject: [PATCH] Properly apply dropout in GPTBigCode only when in training. Signed-off-by: Artur Kloniecki --- .../transformers/models/gpt_bigcode/modeling_gpt_bigcode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py b/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py index e9063530d4..10f9889b3b 100644 --- a/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +++ b/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py @@ -142,7 +142,7 @@ def _attn(self, query, key, value, attention_mask=None, head_mask=None): attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1) - attn_weights = torch.nn.functional.dropout(attn_weights, p=self.attn_dropout) + attn_weights = torch.nn.functional.dropout(attn_weights, p=self.attn_dropout, training=self.training) # Mask heads if we want to if head_mask is not None: