From 9c7842127d28134112eb4972dccf7a18f7a00eb2 Mon Sep 17 00:00:00 2001 From: Chetan Kumar Verma <39086835+ckvermaAI@users.noreply.github.com> Date: Wed, 16 Apr 2025 21:24:52 +0530 Subject: [PATCH] [SW-226132] Pad the examples for QLoRa finetuning test (#252) * [SW-226132] Pad the examples * update test name --------- Co-authored-by: Vivek Goel --- setup.py | 1 + .../fixture/tests/test_bnb_inference.json | 2 +- .../fixture/tests/test_bnb_qlora.json | 6 ++--- tests/test_bnb_inference.py | 2 -- tests/test_bnb_qlora.py | 22 +++++++++++-------- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/setup.py b/setup.py index ee22ddfae6..e2087d75e6 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ "torchsde", "timm", "peft", + "bitsandbytes @ git+https://github.com/bitsandbytes-foundation/bitsandbytes.git@multi-backend-refactor", ] QUALITY_REQUIRES = [ diff --git a/tests/baselines/fixture/tests/test_bnb_inference.json b/tests/baselines/fixture/tests/test_bnb_inference.json index 8768031ddd..2056f34d94 100644 --- a/tests/baselines/fixture/tests/test_bnb_inference.json +++ b/tests/baselines/fixture/tests/test_bnb_inference.json @@ -1,5 +1,5 @@ { "tests/test_bnb_inference.py::test_nf4_quantization_inference": { - "output": "Hello my name is Marlene and I am 36 years old. I am a very happy person, I love to" + "output": "Hello my name is Kelsey and I am a 16 year old girl who loves to draw and paint. I have" } } \ No newline at end of file diff --git a/tests/baselines/fixture/tests/test_bnb_qlora.json b/tests/baselines/fixture/tests/test_bnb_qlora.json index ddaaec170d..6e448772de 100644 --- a/tests/baselines/fixture/tests/test_bnb_qlora.json +++ b/tests/baselines/fixture/tests/test_bnb_qlora.json @@ -1,10 +1,10 @@ { - "tests/test_bnb_qlora.py::test_nf4_quantization_inference": { + "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning": { "gaudi2": { - "eval_loss": 1.638 + "eval_loss": 1.225 }, "gaudi3": { - "eval_loss": 1.638 + "eval_loss": 1.225 } } } \ No newline at end of file diff --git a/tests/test_bnb_inference.py b/tests/test_bnb_inference.py index 64d5bd9214..8898e3bd32 100644 --- a/tests/test_bnb_inference.py +++ b/tests/test_bnb_inference.py @@ -56,8 +56,6 @@ def test_nf4_quantization_inference(token: str, baseline): generation_config.use_cache = True generation_config.use_flash_attention = True - model.model = torch.compile(model.model, backend="hpu_backend") - input_text = "Hello my name is" inputs = tokenizer(input_text, return_tensors="pt").to(device="hpu") diff --git a/tests/test_bnb_qlora.py b/tests/test_bnb_qlora.py index 60fa1972a3..d05f01cbc6 100644 --- a/tests/test_bnb_qlora.py +++ b/tests/test_bnb_qlora.py @@ -53,10 +53,12 @@ def print_trainable_parameters(model): ) -def get_data(tokenizer, dataset_name): +def get_data(tokenizer, dataset_name, max_seq_length=1024): dataset = load_dataset(dataset_name) dataset = dataset.shuffle(seed=42) - data = dataset.map(lambda example: tokenizer(example["text"]), batched=True) + data = dataset.map( + lambda example: tokenizer(example["text"], max_length=max_seq_length, padding="max_length"), batched=True + ) split_data = data["train"].train_test_split(test_size=0.1, seed=42) return split_data @@ -77,7 +79,7 @@ def get_model(token: str): @pytest.mark.skipif("gaudi1" == OH_DEVICE_CONTEXT, reason="execution not supported on gaudi1") -def test_nf4_quantization_inference(token: str, baseline): +def test_nf4_quantization_finetuning(token: str, baseline): try: import sys @@ -91,7 +93,7 @@ def test_nf4_quantization_inference(token: str, baseline): modeling_utils.adapt_transformers_to_gaudi() - tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token.value) + tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token.value, padding_side="right") # needed for llama tokenizer tokenizer.pad_token = tokenizer.eos_token @@ -113,7 +115,9 @@ def test_nf4_quantization_inference(token: str, baseline): model = get_peft_model(model, config) print_trainable_parameters(model) - data = get_data(tokenizer, dataset_name="tatsu-lab/alpaca") + max_seq_length = 1024 + print(f"max_seq_len {max_seq_length}") + data = get_data(tokenizer, dataset_name="tatsu-lab/alpaca", max_seq_length=max_seq_length) gaudi_config = GaudiConfig( use_fused_adam=True, @@ -126,8 +130,8 @@ def test_nf4_quantization_inference(token: str, baseline): per_device_train_batch_size=8, per_device_eval_batch_size=8, gradient_accumulation_steps=2, - max_steps=5, - eval_steps=3, + max_steps=50, + eval_steps=10, warmup_steps=3, learning_rate=2e-4, logging_steps=1, @@ -136,8 +140,8 @@ def test_nf4_quantization_inference(token: str, baseline): use_habana=True, use_lazy_mode=False, pipelining_fwd_bwd=True, - torch_compile=True, - torch_compile_backend="hpu_backend", + adjust_throughput=True, + throughput_warmup_steps=2, ) trainer = GaudiTrainer(