From d49bfdb8900ed24b3c8eb35691dea66e04870350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20=C5=81api=C5=84ski?= Date: Fri, 20 Sep 2024 16:27:20 +0300 Subject: [PATCH] [SW-198498] pass "lazy_mode" arg to GaudiLlamaModel GaudiTrainer Problem: TrainingArgs.use_lazy_mode is not used by GaudiLlamaModel Cause: lazy_mode argument was not passed by GaudiTrainer Solution: Added missing argument to inputs in GaudiTrainer._inner_training_loop Change-Id: I956023956af3d7962b24be53ec74d20e6bb56bd6 --- optimum/habana/transformers/trainer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/optimum/habana/transformers/trainer.py b/optimum/habana/transformers/trainer.py index f4b231285e..3bf3babcbb 100644 --- a/optimum/habana/transformers/trainer.py +++ b/optimum/habana/transformers/trainer.py @@ -978,7 +978,9 @@ def hpu_deepspeed_checkpointing(function, *checkpoint_args, use_reentrant: Optio inputs["flash_attention_recompute"] = True if self.model.generation_config.flash_attention_causal_mask: inputs["flash_attention_causal_mask"] = True - + if self.model.config is not None: + if self.model.config.model_type in ["llama", "qwen2", "mistral", "starcoder2"]: + inputs["lazy_mode"] = args.use_lazy_mode # TODO: keep syncs for fast DDP? with self.accelerator.accumulate(model): tr_loss_step = self.training_step(model, inputs)