From e58f7715a523e82e6ad8adb5c79ce968cdf756fb Mon Sep 17 00:00:00 2001 From: Ace Eldeib Date: Sun, 31 May 2026 13:00:00 -0400 Subject: [PATCH] [Bugfix] Clear conflicting structured outputs in strict tool calling ToolParser.adjust_request's strict structural-tag path (added in #40894, gated by VLLM_ENFORCE_STRICT_TOOL_CALLING) installs structural_tag on a pre-existing StructuredOutputsParams via in-place attribute assignment and returns without nulling response_format. The in-place set bypasses StructuredOutputsParams.__post_init__, so the params keep a prior mutually-exclusive constraint (json/regex/choice/grammar/json_object, or one lowered from response_format) next to the new structural_tag. On the next re-validation this trips the one-constraint invariant, so a strict-mode request that also carries a structured-output constraint or a response_format fails with: ValueError: You can only use one kind of structured outputs constraint but multiple are specified This affects any parser that installs a structural tag -- currently DeepSeek-V4 and Qwen3-Coder via get_structural_tag. The env var is off by default, and a request with no pre-existing constraint is unaffected. Fix: rebuild structured_outputs with only the structural tag (preserving the whitespace / additional-properties knobs) and null response_format, mirroring Step 2 of the same method. This "tool constraint wins, response_format dropped" resolution already exists in Step 2 and the DeepSeek-V3.2 override (#41178), and is the intent of the open auto-path fix #39969; the in-place-vs-rebuild trade-off was discussed on #40894 and #43155 (whose Kimi path already rebuilds). Repro / regression test (CPU, no model required): pytest tests/tool_use/test_strict_tool_calling_adjust_request.py The added tests enable strict mode, give a parser a structural tag, and send tools together with a response_format or a structured_outputs.json constraint (tool_choice auto and required). On the pre-fix code adjust_request leaves two constraints, and to_sampling_params raises the ValueError above; with this change structured_outputs holds only the structural tag, response_format is None, and the user's whitespace knobs are preserved. The conflict tests fail without this patch and pass with it; the no-pre-existing-constraint case passes either way. Equivalently over HTTP: with strict mode on, a tool_choice="auto" request that also sets response_format returns HTTP 400 (the error above) before this change and a normal tool call after; a required-tool request is unaffected because that path already rebuilds. Signed-off-by: Ace Eldeib --- ...test_strict_tool_calling_adjust_request.py | 147 ++++++++++++++++++ vllm/tool_parsers/abstract_tool_parser.py | 18 ++- 2 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 tests/tool_use/test_strict_tool_calling_adjust_request.py diff --git a/tests/tool_use/test_strict_tool_calling_adjust_request.py b/tests/tool_use/test_strict_tool_calling_adjust_request.py new file mode 100644 index 000000000000..95e3afa684ff --- /dev/null +++ b/tests/tool_use/test_strict_tool_calling_adjust_request.py @@ -0,0 +1,147 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Regression tests for structured-output handling in the strict tool-calling +path of :meth:`ToolParser.adjust_request`. + +When ``VLLM_ENFORCE_STRICT_TOOL_CALLING`` is set, ``adjust_request`` installs a +model structural tag for ``auto``/``required``/named tool choice. If the request +already carries a structured-output constraint (``json``/``regex``/...) or a +``response_format``, the original code set ``.structural_tag`` IN PLACE and +returned without nulling ``response_format`` -- leaving two mutually-exclusive +constraints on the same :class:`StructuredOutputsParams`. Because the in-place +assignment bypasses ``__post_init__``, the conflict is created silently and then +rejected on re-validation (e.g. in ``to_sampling_params``) with +``"You can only use one kind of structured outputs constraint but multiple are +specified"`` (HTTP 400). + +``adjust_request`` must instead rebuild ``structured_outputs`` to hold only the +structural tag (preserving the whitespace / additional-properties knobs) and +null ``response_format`` -- mirroring what Step 2 of the same method already does +for the JSON-schema path. +""" + +from __future__ import annotations + +import pytest + +from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest +from vllm.sampling_params import StructuredOutputsParams +from vllm.tool_parsers.abstract_tool_parser import ToolParser + +_STRICT_FLAG = "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING" +_EXCLUSIVE = ("json", "regex", "choice", "grammar", "json_object", "structural_tag") + + +class _StubStructuralTag: + """Stand-in for the structural tag returned by a model's + ``get_structural_tag`` (only ``model_dump`` is used by ``adjust_request``).""" + + def model_dump(self) -> dict: + return {"type": "structural_tag", "format": {"type": "any_text"}} + + +class _StructuralTagParser(ToolParser): + """A parser that produces a structural tag, like deepseek_v4 / qwen3coder.""" + + def get_structural_tag(self, request): # type: ignore[override] + return _StubStructuralTag() + + +def _parser() -> ToolParser: + parser = _StructuralTagParser.__new__(_StructuralTagParser) + parser.model_tokenizer = None + return parser + + +def _tool() -> dict: + return { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather for a city", + "parameters": { + "type": "object", + "properties": {"city": {"type": "string"}}, + "required": ["city"], + }, + }, + } + + +def _request(*, tool_choice: str) -> ChatCompletionRequest: + return ChatCompletionRequest( + model="strict-test", + messages=[{"role": "user", "content": "What is the weather in Hanoi?"}], + tools=[_tool()], + tool_choice=tool_choice, + ) + + +def _active_constraints(so: StructuredOutputsParams) -> list[str]: + return [f for f in _EXCLUSIVE if getattr(so, f) is not None] + + +@pytest.mark.parametrize("tool_choice", ["auto", "required"]) +def test_strict_structural_tag_replaces_conflicting_constraint( + monkeypatch, tool_choice +) -> None: + """A pre-existing ``json`` constraint must be replaced by the structural + tag (not left coexisting), and user whitespace knobs must be preserved.""" + monkeypatch.setattr(_STRICT_FLAG, True) + + request = _request(tool_choice=tool_choice) + request.structured_outputs = StructuredOutputsParams( + json={"type": "object", "properties": {"a": {"type": "string"}}}, + disable_any_whitespace=True, + ) + + result = _parser().adjust_request(request) + so = result.structured_outputs + + assert so.structural_tag is not None + assert so.json is None, "the conflicting json constraint must be dropped" + assert _active_constraints(so) == ["structural_tag"], ( + "exactly one structured-output constraint must remain" + ) + # The #40894 review's intent (preserve user-set sub-fields) is still honored. + assert so.disable_any_whitespace is True + + +@pytest.mark.parametrize("tool_choice", ["auto", "required"]) +def test_strict_structural_tag_nulls_response_format(monkeypatch, tool_choice) -> None: + """``response_format`` must be nulled so a later ``to_sampling_params`` does + not re-derive a JSON schema next to the structural tag (the 400).""" + monkeypatch.setattr(_STRICT_FLAG, True) + + request = _request(tool_choice=tool_choice) + request.response_format = { + "type": "json_schema", + "json_schema": { + "name": "answer", + "schema": { + "type": "object", + "properties": {"answer": {"type": "string"}}, + "required": ["answer"], + }, + }, + } + + result = _parser().adjust_request(request) + so = result.structured_outputs + + assert result.response_format is None + assert so.structural_tag is not None + assert _active_constraints(so) == ["structural_tag"] + + +def test_strict_structural_tag_no_preexisting_constraint(monkeypatch) -> None: + """Sanity: with no pre-existing constraint, the structural tag is installed + and nothing else changes (the common path is unaffected).""" + monkeypatch.setattr(_STRICT_FLAG, True) + + request = _request(tool_choice="auto") + result = _parser().adjust_request(request) + so = result.structured_outputs + + assert _active_constraints(so) == ["structural_tag"] + assert result.response_format is None diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index c3438082a72d..d873e7d87f61 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -112,9 +112,23 @@ def adjust_request( structural_tag=json.dumps(structure_tag.model_dump()), ) else: - request.structured_outputs.structural_tag = json.dumps( - structure_tag.model_dump() + # Rebuild instead of mutating .structural_tag in place: + # the mutually-exclusive constraints (json/regex/choice/ + # grammar/json_object) must be dropped, else constraint + # precedence keeps e.g. .json and the structural tag is + # silently ignored. Preserve the whitespace knobs. + prev = request.structured_outputs + request.structured_outputs = StructuredOutputsParams( + structural_tag=json.dumps(structure_tag.model_dump()), + disable_any_whitespace=prev.disable_any_whitespace, + disable_additional_properties=( + prev.disable_additional_properties + ), + whitespace_pattern=prev.whitespace_pattern, ) + # Null response_format so a later to_sampling_params() does + # not re-introduce a JSON schema (mutual-exclusivity). + request.response_format = None return request # Step 2: set structured output params when tool constraints are