From 2143f434e7b5d7820a8c761585c0fa712f376cdb Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 3 Feb 2026 10:36:04 +0000 Subject: [PATCH] Fix num_train_epochs=None causing TypeError in GRPOConfig When users pass `num_train_epochs=None` to GRPOConfig (relying on max_steps to control training duration), Trainer.__init__ fails with: TypeError: '>' not supported between instances of 'NoneType' and 'int' This happens because transformers.Trainer does `args.num_train_epochs > 0` in its __init__ which fails when the value is None. This fix converts None to 3.0 (the default) before Trainer initialization. The actual training duration is still controlled by max_steps since it takes precedence when both are set. Example that now works: ```python config = GRPOConfig( num_train_epochs=None, # Previously caused TypeError max_steps=500, # This controls actual duration ... ) ``` --- unsloth/models/rl.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index 6fa07bdd18..647c7e5f08 100644 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -891,6 +891,15 @@ def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"): ) extra_args += learning_rate_check + # Fix num_train_epochs = None causing TypeError in Trainer.__init__ + # Trainer does `args.num_train_epochs > 0` which fails when None + if "num_train_epochs" in call_args: + num_train_epochs_check = ( + "if num_train_epochs is None:\n" + " num_train_epochs = 3.0 # Default to 3 epochs if None, max_steps will override\n" + ) + extra_args += num_train_epochs_check + # Check if max_seq_length is NOT defined (max_length is now default) if "max_seq_length" not in call_args and "max_length" in call_args: max_seq_length_pre = """max_seq_length : Optional[int] = field(