From b154d6aa71698f0fb3367e4b827f3fa31026c2ea Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 12 Feb 2026 12:41:59 +0100 Subject: [PATCH 1/4] Validate reward model has 1 num_labels --- trl/trainer/reward_trainer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 2677901726b..e40a5e6ffa1 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -354,6 +354,13 @@ def __init__( "You passed `model_init_kwargs` to the `RewardConfig`, but your model is already instantiated. " "The `model_init_kwargs` will be ignored." ) + # Validate that the model has num_labels = 1 (required for reward models) + if getattr(model.config, "num_labels", None) != 1: + raise ValueError( + f"The model has `num_labels={model.config.num_labels}`, but reward models require `num_labels=1` " + "`num_labels=1` to output a single scalar reward per sequence. Please instantiate your model with " + "`num_labels=1` or pass a model name as a string to have it configured automatically." + ) # Processing class if processing_class is None: From b1c13694bf7ef51c9ea57945f92bf3ac5e161af6 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 12 Feb 2026 18:12:46 +0100 Subject: [PATCH 2/4] Fix error message --- trl/trainer/reward_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index e40a5e6ffa1..9c309060e3c 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -358,8 +358,8 @@ def __init__( if getattr(model.config, "num_labels", None) != 1: raise ValueError( f"The model has `num_labels={model.config.num_labels}`, but reward models require `num_labels=1` " - "`num_labels=1` to output a single scalar reward per sequence. Please instantiate your model with " - "`num_labels=1` or pass a model name as a string to have it configured automatically." + "to output a single scalar reward per sequence. Please instantiate your model with `num_labels=1` " + "or pass a model name as a string to have it configured automatically." ) # Processing class From 3b68e19fdd1963385dc3c10a7fc9eb5d9bee7335 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 12 Feb 2026 18:23:02 +0100 Subject: [PATCH 3/4] Fix tests by setting num_labels=1 --- tests/test_reward_trainer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py index 90be24348eb..cdd81721408 100644 --- a/tests/test_reward_trainer.py +++ b/tests/test_reward_trainer.py @@ -176,6 +176,7 @@ def test_train_model(self): # Instantiate the model model = AutoModelForSequenceClassification.from_pretrained( "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + num_labels=1, # required for reward models dtype="float32", ) @@ -341,7 +342,11 @@ def test_train_moe_with_peft_config(self): def test_train_peft_model(self): # Get the base model model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" - model = AutoModelForSequenceClassification.from_pretrained(model_id, dtype="float32") + model = AutoModelForSequenceClassification.from_pretrained( + model_id, + num_labels=1, # required for reward models + dtype="float32", + ) # Get the base model parameter names base_param_names = [f"base_model.model.{n}" for n, _ in model.named_parameters()] From 320dbdb2d450823ae4cdea9756d1fb49094121e2 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 12 Feb 2026 18:23:47 +0100 Subject: [PATCH 4/4] Add test to check ValueError --- tests/test_reward_trainer.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py index cdd81721408..c1bcd97b713 100644 --- a/tests/test_reward_trainer.py +++ b/tests/test_reward_trainer.py @@ -107,6 +107,18 @@ def test_collate_with_margin(self): class TestRewardTrainer(TrlTestCase): + def test_raises_error_when_model_num_labels_not_one(self): + """Test that RewardTrainer raises ValueError when model doesn't have num_labels=1.""" + model = AutoModelForSequenceClassification.from_pretrained( + "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", + dtype="float32", + # num_labels=2, # Defaults to 2 num_labels for causal models + ) + + training_args = RewardConfig(output_dir=self.tmp_dir, report_to="none") + with pytest.raises(ValueError, match=r"reward models require `num_labels=1`"): + RewardTrainer(model=model, args=training_args) + @pytest.mark.parametrize( "model_id", [