From 270611258688c8a72f48240d00524fb2aacdcb4d Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 6 May 2026 10:37:27 +0200 Subject: [PATCH 1/3] Make DPO fail fast for peft + liger-kernel --- trl/trainer/dpo_trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 3797b5bd184..38d54427f81 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -709,6 +709,8 @@ def __init__( "Liger DPO loss does not support precomputing reference log probabilities. Either disable " "`precompute_ref_log_probs` or set `use_liger_kernel` to False." ) + if is_peft_model(model): + raise NotImplementedError("Liger DPO loss is not implemented for PEFT models.") # Dataset # Skip dataset preparation if it's a VLM, where preprocessing (e.g., image-to-pixel conversion) is too costly From 62a66af5d4c04a90e2d7a05eba6e22d585107bc6 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 6 May 2026 10:37:56 +0200 Subject: [PATCH 2/3] Test DPO fail fast for peft + liger-kernel --- tests/test_dpo_trainer.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index d569b0cb6ce..6f2b8f4e61f 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -662,6 +662,25 @@ def test_train_with_liger(self): new_param = trainer.model.get_parameter(n) assert not torch.equal(param, new_param), f"Parameter {n} has not changed." + @require_liger_kernel + @require_peft + def test_init_fails_with_peft_and_liger(self): + dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") + + training_args = DPOConfig( + output_dir=self.tmp_dir, + use_liger_kernel=True, + report_to="none", + ) + + with pytest.raises(NotImplementedError, match="Liger DPO loss is not implemented for PEFT models."): + DPOTrainer( + model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + args=training_args, + train_dataset=dataset, + peft_config=LoraConfig(), + ) + def test_train_with_iterable_dataset(self): dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train", streaming=True) From 70540c1fc63556e190a1ec7363af2bb3694a8b08 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 6 May 2026 15:39:23 +0200 Subject: [PATCH 3/3] Remove error raising from _compute_loss_liger --- trl/trainer/dpo_trainer.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 38d54427f81..9caf8858b61 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -1098,16 +1098,13 @@ def _compute_loss_liger(self, model, inputs, return_outputs): weight = lm_head.weight bias = lm_head.bias - if is_peft_model(model): - raise NotImplementedError("Liger DPO loss is not implemented for PEFT models.") - else: - with torch.no_grad(), disable_gradient_checkpointing(self.model, self.args.gradient_checkpointing_kwargs): - ref_decoder = self.ref_model.get_decoder() - ref_outputs = ref_decoder(input_ids, attention_mask=attention_mask, use_cache=False) - ref_lm_head = self.ref_model.get_output_embeddings() - ref_hidden_states = ref_outputs.last_hidden_state[:, :-1].contiguous() - ref_weight = ref_lm_head.weight - ref_bias = ref_lm_head.bias + with torch.no_grad(), disable_gradient_checkpointing(self.model, self.args.gradient_checkpointing_kwargs): + ref_decoder = self.ref_model.get_decoder() + ref_outputs = ref_decoder(input_ids, attention_mask=attention_mask, use_cache=False) + ref_lm_head = self.ref_model.get_output_embeddings() + ref_hidden_states = ref_outputs.last_hidden_state[:, :-1].contiguous() + ref_weight = ref_lm_head.weight + ref_bias = ref_lm_head.bias shift_completion_mask = completion_mask[:, 1:].contiguous() labels = input_ids[:, 1:].clone()