diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py index b0ea3869b5..d642282787 100644 --- a/src/peft/tuners/lora/model.py +++ b/src/peft/tuners/lora/model.py @@ -209,6 +209,7 @@ def _create_and_replace( "target_name": current_key, "loaded_in_8bit": getattr(self.model, "is_loaded_in_8bit", False), "loaded_in_4bit": getattr(self.model, "is_loaded_in_4bit", False), + "ephemeral_gpu_offload": lora_config.runtime_config.ephemeral_gpu_offload, "parameter_name": parameter_name, } diff --git a/tests/test_common_gpu.py b/tests/test_common_gpu.py index cac1a32128..a8a4c58c4d 100644 --- a/tests/test_common_gpu.py +++ b/tests/test_common_gpu.py @@ -64,6 +64,7 @@ load_cat_image, require_bitsandbytes, require_deterministic_for_xpu, + require_gptqmodel, require_non_cpu, require_torch_multi_accelerator, ) @@ -519,6 +520,7 @@ def test_ia3_bnb_quantization_from_pretrained_safetensors(self, quantization): assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l + @require_gptqmodel @pytest.mark.single_gpu_tests def test_lora_gptq_quantization_from_pretrained_safetensors(self): r""" diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py index 53a7ae5bd1..cf431f7f69 100644 --- a/tests/test_custom_models.py +++ b/tests/test_custom_models.py @@ -1365,6 +1365,10 @@ def __init__(self, emb_size=100): super().__init__() self.emb = nn.Embedding(emb_size, 5) self.conv1d = Conv1D(1, 5) + # make sure that we have a good signal-to-noise ratio + # since apparently CUDA ReLU clips the gradient at a + # certain point. + self.conv1d.weight.data += 10 self.relu = nn.ReLU() self.flat = nn.Flatten() self.lin0 = nn.Linear(10, 2) diff --git a/tests/test_gpu_examples.py b/tests/test_gpu_examples.py index 909942c9e6..71ccc928a8 100644 --- a/tests/test_gpu_examples.py +++ b/tests/test_gpu_examples.py @@ -592,7 +592,8 @@ def test_seq2seq_lm_training_single_gpu(self): device_map={"": 0}, ) - assert set(model.hf_device_map.values()) == {0} + # note: transformers v5 doesn't set the device map if there's only one device + assert not hasattr(model.hf_device_map) or set(model.hf_device_map.values()) == {0} tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id) model = prepare_model_for_kbit_training(model)