From 5935d78a79bf24fd965a37ac36d5c4df62ee5389 Mon Sep 17 00:00:00 2001 From: Falcon Dai Date: Tue, 17 Mar 2026 22:02:47 -0700 Subject: [PATCH] Fix grpo trainer attribute chain it should be `self.vllm_generation.llm...` --- trl/trainer/grpo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py index bafd348b33e..5dd7571acd0 100644 --- a/trl/trainer/grpo_trainer.py +++ b/trl/trainer/grpo_trainer.py @@ -1424,7 +1424,7 @@ async def _run_async_tools(async_coros): **self.chat_template_kwargs, ) if self.use_vllm and self.vllm_mode == "colocate": - max_model_len = self.llm.llm_engine.model_config.max_model_len + max_model_len = self.vllm_generation.llm.llm_engine.model_config.max_model_len elif not self.use_vllm: max_model_len = self.model.config.max_position_embeddings else: