diff --git a/vllm/entrypoints/openai/responses/protocol.py b/vllm/entrypoints/openai/responses/protocol.py index 0ff67373a853..4a4c32cc6e68 100644 --- a/vllm/entrypoints/openai/responses/protocol.py +++ b/vllm/entrypoints/openai/responses/protocol.py @@ -171,6 +171,7 @@ class ResponsesRequest(OpenAIBaseModel): top_k: int | None = None truncation: Literal["auto", "disabled"] | None = "disabled" user: str | None = None + skip_special_tokens: bool = True # --8<-- [start:responses-extra-params] request_id: str = Field( @@ -279,6 +280,7 @@ def to_sampling_params( structured_outputs=structured_outputs, logit_bias=self.logit_bias, skip_clone=True, # Created fresh per request, safe to skip clone + skip_special_tokens=self.skip_special_tokens, ) def is_include_output_logprobs(self) -> bool: