From 9c7842127d28134112eb4972dccf7a18f7a00eb2 Mon Sep 17 00:00:00 2001
From: Chetan Kumar Verma <39086835+ckvermaAI@users.noreply.github.com>
Date: Wed, 16 Apr 2025 21:24:52 +0530
Subject: [PATCH] [SW-226132] Pad the examples for QLoRa finetuning test (#252)

* [SW-226132] Pad the examples

* update test name

---------

Co-authored-by: Vivek Goel <vgoel@habana.ai>
---
 setup.py                                      |  1 +
 .../fixture/tests/test_bnb_inference.json     |  2 +-
 .../fixture/tests/test_bnb_qlora.json         |  6 ++---
 tests/test_bnb_inference.py                   |  2 --
 tests/test_bnb_qlora.py                       | 22 +++++++++++--------
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/setup.py b/setup.py
index ee22ddfae6..e2087d75e6 100644
--- a/setup.py
+++ b/setup.py
@@ -52,6 +52,7 @@
     "torchsde",
     "timm",
     "peft",
+    "bitsandbytes @ git+https://github.com/bitsandbytes-foundation/bitsandbytes.git@multi-backend-refactor",
 ]
 
 QUALITY_REQUIRES = [
diff --git a/tests/baselines/fixture/tests/test_bnb_inference.json b/tests/baselines/fixture/tests/test_bnb_inference.json
index 8768031ddd..2056f34d94 100644
--- a/tests/baselines/fixture/tests/test_bnb_inference.json
+++ b/tests/baselines/fixture/tests/test_bnb_inference.json
@@ -1,5 +1,5 @@
 {
   "tests/test_bnb_inference.py::test_nf4_quantization_inference": {
-    "output": "Hello my name is Marlene and I am 36 years old. I am a very happy person, I love to"
+    "output": "Hello my name is Kelsey and I am a 16 year old girl who loves to draw and paint. I have"
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_bnb_qlora.json b/tests/baselines/fixture/tests/test_bnb_qlora.json
index ddaaec170d..6e448772de 100644
--- a/tests/baselines/fixture/tests/test_bnb_qlora.json
+++ b/tests/baselines/fixture/tests/test_bnb_qlora.json
@@ -1,10 +1,10 @@
 {
-  "tests/test_bnb_qlora.py::test_nf4_quantization_inference": {
+  "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning": {
     "gaudi2": {
-      "eval_loss": 1.638
+      "eval_loss": 1.225
     },
     "gaudi3": {
-      "eval_loss": 1.638
+      "eval_loss": 1.225
     }
   }
 }
\ No newline at end of file
diff --git a/tests/test_bnb_inference.py b/tests/test_bnb_inference.py
index 64d5bd9214..8898e3bd32 100644
--- a/tests/test_bnb_inference.py
+++ b/tests/test_bnb_inference.py
@@ -56,8 +56,6 @@ def test_nf4_quantization_inference(token: str, baseline):
     generation_config.use_cache = True
     generation_config.use_flash_attention = True
 
-    model.model = torch.compile(model.model, backend="hpu_backend")
-
     input_text = "Hello my name is"
     inputs = tokenizer(input_text, return_tensors="pt").to(device="hpu")
 
diff --git a/tests/test_bnb_qlora.py b/tests/test_bnb_qlora.py
index 60fa1972a3..d05f01cbc6 100644
--- a/tests/test_bnb_qlora.py
+++ b/tests/test_bnb_qlora.py
@@ -53,10 +53,12 @@ def print_trainable_parameters(model):
     )
 
 
-def get_data(tokenizer, dataset_name):
+def get_data(tokenizer, dataset_name, max_seq_length=1024):
     dataset = load_dataset(dataset_name)
     dataset = dataset.shuffle(seed=42)
-    data = dataset.map(lambda example: tokenizer(example["text"]), batched=True)
+    data = dataset.map(
+        lambda example: tokenizer(example["text"], max_length=max_seq_length, padding="max_length"), batched=True
+    )
     split_data = data["train"].train_test_split(test_size=0.1, seed=42)
 
     return split_data
@@ -77,7 +79,7 @@ def get_model(token: str):
 
 
 @pytest.mark.skipif("gaudi1" == OH_DEVICE_CONTEXT, reason="execution not supported on gaudi1")
-def test_nf4_quantization_inference(token: str, baseline):
+def test_nf4_quantization_finetuning(token: str, baseline):
     try:
         import sys
 
@@ -91,7 +93,7 @@ def test_nf4_quantization_inference(token: str, baseline):
 
     modeling_utils.adapt_transformers_to_gaudi()
 
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token.value)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token.value, padding_side="right")
     # needed for llama tokenizer
     tokenizer.pad_token = tokenizer.eos_token
 
@@ -113,7 +115,9 @@ def test_nf4_quantization_inference(token: str, baseline):
     model = get_peft_model(model, config)
     print_trainable_parameters(model)
 
-    data = get_data(tokenizer, dataset_name="tatsu-lab/alpaca")
+    max_seq_length = 1024
+    print(f"max_seq_len {max_seq_length}")
+    data = get_data(tokenizer, dataset_name="tatsu-lab/alpaca", max_seq_length=max_seq_length)
 
     gaudi_config = GaudiConfig(
         use_fused_adam=True,
@@ -126,8 +130,8 @@ def test_nf4_quantization_inference(token: str, baseline):
         per_device_train_batch_size=8,
         per_device_eval_batch_size=8,
         gradient_accumulation_steps=2,
-        max_steps=5,
-        eval_steps=3,
+        max_steps=50,
+        eval_steps=10,
         warmup_steps=3,
         learning_rate=2e-4,
         logging_steps=1,
@@ -136,8 +140,8 @@ def test_nf4_quantization_inference(token: str, baseline):
         use_habana=True,
         use_lazy_mode=False,
         pipelining_fwd_bwd=True,
-        torch_compile=True,
-        torch_compile_backend="hpu_backend",
+        adjust_throughput=True,
+        throughput_warmup_steps=2,
     )
 
     trainer = GaudiTrainer(