From 68b0dc03e09bc0384ccd656ee3ce3759e74771bf Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 16 Dec 2025 13:03:16 +0100
Subject: [PATCH 1/2] Align use of vllm_max_model_length

---
 trl/trainer/rloo_config.py  | 10 ++++++++++
 trl/trainer/rloo_trainer.py |  6 +-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/trl/trainer/rloo_config.py b/trl/trainer/rloo_config.py
index 921afa697a1..61042e013b1 100644
--- a/trl/trainer/rloo_config.py
+++ b/trl/trainer/rloo_config.py
@@ -144,6 +144,9 @@ class RLOOConfig(TrainingArguments):
             Control the GPU memory utilization for vLLM. This setting only applies when `vllm_mode` is set to
             `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when
             launching the vLLM server via the `--vllm_gpu_memory_utilization` flag.
+        vllm_max_model_length (`int`, *optional*, defaults to `None`):
+            Context window for vLLM. Set it to at least the maximum prompt length in the dataset plus
+            `max_completion_length`; if omitted, it is inferred from the model config.
         vllm_tensor_parallel_size (`int`, *optional*, defaults to `1`):
             Control the tensor parallel size for vLLM. This setting only applies when `vllm_mode` is set to
             `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when
@@ -458,6 +461,13 @@ class RLOOConfig(TrainingArguments):
             "launching the vLLM server via the `--vllm_gpu_memory_utilization` flag."
         },
     )
+    vllm_max_model_length: int | None = field(
+        default=None,
+        metadata={
+            "help": "Context window for vLLM. Set it to at least the maximum prompt length in the dataset plus "
+            "`max_completion_length`; if omitted, it is inferred from the model config."
+        },
+    )
     vllm_tensor_parallel_size: int = field(
         default=1,
         metadata={
diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py
index bf4e2a97648..fedb8d6081a 100644
--- a/trl/trainer/rloo_trainer.py
+++ b/trl/trainer/rloo_trainer.py
@@ -508,10 +508,6 @@ def __init__(
                 # Ensure distributed rendezvous variables are set without colliding across concurrent runs
                 ensure_master_addr_port()
 
-                if self.max_prompt_length is not None and self.max_completion_length is not None:
-                    max_model_len = self.max_prompt_length + self.max_completion_length
-                else:
-                    max_model_len = None
                 vllm_quantization = None
                 if is_bitsandbytes_available():
                     for _, module in model.named_modules():
@@ -527,7 +523,7 @@ def __init__(
                     max_num_seqs=self.args.per_device_train_batch_size
                     * self.vllm_tensor_parallel_size
                     * self.args.steps_per_generation,
-                    max_model_len=max_model_len,
+                    max_model_len=self.args.vllm_max_model_length,
                     distributed_executor_backend="external_launcher",
                     # Feed identical seed for tp groups to ensure sampling results are the same across workers
                     seed=self.accelerator.process_index // self.vllm_tensor_parallel_size,

From 00d631dae95c1c4a5b4220422b447c972c71d5c6 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Tue, 16 Dec 2025 19:53:27 +0100
Subject: [PATCH 2/2] Remove redundant type hint

---
 trl/trainer/grpo_config.py | 2 +-
 trl/trainer/rloo_config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
index 40af62a57f6..115937af2d2 100644
--- a/trl/trainer/grpo_config.py
+++ b/trl/trainer/grpo_config.py
@@ -147,7 +147,7 @@ class GRPOConfig(TrainingArguments):
             Control the GPU memory utilization for vLLM. This setting only applies when `vllm_mode` is set to
             `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when
             launching the vLLM server via the `--vllm_gpu_memory_utilization` flag.
-        vllm_max_model_length (`int`, *optional*, defaults to `None`):
+        vllm_max_model_length (`int`, *optional*):
             Context window for vLLM. Set it to at least the maximum prompt length in the dataset plus
             `max_completion_length`; if omitted, it is inferred from the model config.
         vllm_tensor_parallel_size (`int`, *optional*, defaults to `1`):
diff --git a/trl/trainer/rloo_config.py b/trl/trainer/rloo_config.py
index 61042e013b1..fd81338ce2a 100644
--- a/trl/trainer/rloo_config.py
+++ b/trl/trainer/rloo_config.py
@@ -144,7 +144,7 @@ class RLOOConfig(TrainingArguments):
             Control the GPU memory utilization for vLLM. This setting only applies when `vllm_mode` is set to
             `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when
             launching the vLLM server via the `--vllm_gpu_memory_utilization` flag.
-        vllm_max_model_length (`int`, *optional*, defaults to `None`):
+        vllm_max_model_length (`int`, *optional*):
             Context window for vLLM. Set it to at least the maximum prompt length in the dataset plus
             `max_completion_length`; if omitted, it is inferred from the model config.
         vllm_tensor_parallel_size (`int`, *optional*, defaults to `1`):