From 03499b26edfabe3fbc1c2fac48812c9502a0314b Mon Sep 17 00:00:00 2001 From: karanb192 Date: Wed, 18 Mar 2026 13:57:41 +0530 Subject: [PATCH] [Bugfix] Decode prompt text from token IDs upstream in renderer Move the prompt-text decode fix upstream into BaseRenderer._process_tokens, as suggested by @qandrew in PR review. When models like gpt-oss-20b or Mistral tokenizer models render chat prompts directly to token IDs, the engine prompt dict only contains prompt_token_ids without a text prompt field. This caused "prompt: None" in RequestLogger debug output and left the prompt text unavailable for any downstream consumer. Fix by decoding prompt_token_ids back to text in _process_tokens when the prompt text is not already present and a tokenizer is available. This ensures the prompt text is populated in engine inputs for all consumers (logging, responses API, etc.), not just the debug logger. When skip_tokenizer_init=True, self.tokenizer is None, so the decode is safely skipped. Fixes #37253 Signed-off-by: karanb192 --- vllm/renderers/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/renderers/base.py b/vllm/renderers/base.py index b468712adb0..8816d1cbe65 100644 --- a/vllm/renderers/base.py +++ b/vllm/renderers/base.py @@ -648,6 +648,8 @@ def _process_tokens( if prompt_text := prompt.get("prompt"): inputs["prompt"] = prompt_text + elif self.tokenizer is not None: + inputs["prompt"] = self.tokenizer.decode(prompt_token_ids) if cache_salt := prompt.get("cache_salt"): inputs["cache_salt"] = cache_salt