From f673037e09bf408740b9250e8d64307d715fb48f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 10 Feb 2026 20:27:48 +0000 Subject: [PATCH 1/2] Set num_labels to 1 in model initialization for RewardTrainer --- trl/trainer/reward_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index a0f27a173d6..167b7312ed1 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -310,6 +310,7 @@ def __init__( # Distributed training requires device_map=None ("auto" fails) if args.distributed_state.distributed_type in ["MULTI_GPU", "DEEPSPEED"]: model_init_kwargs["device_map"] = None + model_init_kwargs["num_labels"] = 1 model = create_model_from_path(model, AutoModelForSequenceClassification, **model_init_kwargs) else: if args.model_init_kwargs is not None: From cc622b2e44a0be64347a8cccaf273824c0f5dacf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 11 Feb 2026 14:52:28 +0000 Subject: [PATCH 2/2] add comment --- trl/trainer/reward_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 167b7312ed1..3a1043cd676 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -310,7 +310,7 @@ def __init__( # Distributed training requires device_map=None ("auto" fails) if args.distributed_state.distributed_type in ["MULTI_GPU", "DEEPSPEED"]: model_init_kwargs["device_map"] = None - model_init_kwargs["num_labels"] = 1 + model_init_kwargs["num_labels"] = 1 # the only output of the model is the reward score model = create_model_from_path(model, AutoModelForSequenceClassification, **model_init_kwargs) else: if args.model_init_kwargs is not None: