From 34482d238f5cf7afd7414f8292dec4a5e31b775e Mon Sep 17 00:00:00 2001 From: Himangshu Lahkar <49579433+hlahkar@users.noreply.github.com> Date: Mon, 26 Feb 2024 20:44:24 +0530 Subject: [PATCH] Fix Llama-70B-FSDP model loading issue (#63) --- examples/language-modeling/run_lora_clm.py | 6 ++++-- optimum/habana/transformers/training_args.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/language-modeling/run_lora_clm.py b/examples/language-modeling/run_lora_clm.py index c7ba0ebeae..8fc43f6280 100644 --- a/examples/language-modeling/run_lora_clm.py +++ b/examples/language-modeling/run_lora_clm.py @@ -42,7 +42,6 @@ from transformers.trainer_utils import is_main_process from optimum.habana import GaudiConfig, GaudiTrainer, GaudiTrainingArguments -from optimum.habana.peft.layer import GaudiLoraLayerLinearForward from optimum.habana.utils import set_seed @@ -675,7 +674,10 @@ def compute_metrics(eval_preds): ) if training_args.gradient_checkpointing: model.enable_input_require_grads() - tuners.lora.layer.Linear.forward = GaudiLoraLayerLinearForward + if training_args.torch_compile: + from optimum.habana.peft.layer import GaudiLoraLayerLinearForward + + tuners.lora.layer.Linear.forward = GaudiLoraLayerLinearForward lora_model = get_peft_model(model, peft_config) if training_args.bf16: lora_model = lora_model.to(torch.bfloat16) diff --git a/optimum/habana/transformers/training_args.py b/optimum/habana/transformers/training_args.py index 5979e00243..4c265d1357 100644 --- a/optimum/habana/transformers/training_args.py +++ b/optimum/habana/transformers/training_args.py @@ -568,6 +568,7 @@ def __post_init__(self): # accelerate integration for FSDP if len(self.fsdp) > 0 and not self.fsdp_config["xla"]: os.environ["ACCELERATE_USE_FSDP"] = "true" + os.environ["FSDP_CPU_RAM_EFFICIENT_LOADING"] = "true" from accelerate.utils.constants import ( FSDP_AUTO_WRAP_POLICY, FSDP_SHARDING_STRATEGY,