From 5d6b7c30ad82eeb25f78c5708087c574e938d64a Mon Sep 17 00:00:00 2001
From: Artur Kloniecki <arturx.kloniecki@intel.com>
Date: Mon, 22 Sep 2025 12:04:52 +0300
Subject: [PATCH] Properly apply dropout in GPTBigCode only when in training.

Signed-off-by: Artur Kloniecki <arturx.kloniecki@intel.com>
---
 .../transformers/models/gpt_bigcode/modeling_gpt_bigcode.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py b/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
index e9063530d4..10f9889b3b 100644
--- a/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
+++ b/optimum/habana/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
@@ -142,7 +142,7 @@ def _attn(self, query, key, value, attention_mask=None, head_mask=None):
 
             attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
 
-        attn_weights = torch.nn.functional.dropout(attn_weights, p=self.attn_dropout)
+        attn_weights = torch.nn.functional.dropout(attn_weights, p=self.attn_dropout, training=self.training)
 
         # Mask heads if we want to
         if head_mask is not None: