From e58f7715a523e82e6ad8adb5c79ce968cdf756fb Mon Sep 17 00:00:00 2001
From: Ace Eldeib <aeldeib@coreweave.com>
Date: Sun, 31 May 2026 13:00:00 -0400
Subject: [PATCH] [Bugfix] Clear conflicting structured outputs in strict tool
 calling

ToolParser.adjust_request's strict structural-tag path (added in #40894, gated by
VLLM_ENFORCE_STRICT_TOOL_CALLING) installs structural_tag on a pre-existing
StructuredOutputsParams via in-place attribute assignment and returns without
nulling response_format. The in-place set bypasses
StructuredOutputsParams.__post_init__, so the params keep a prior
mutually-exclusive constraint (json/regex/choice/grammar/json_object, or one
lowered from response_format) next to the new structural_tag. On the next
re-validation this trips the one-constraint invariant, so a strict-mode request
that also carries a structured-output constraint or a response_format fails with:

    ValueError: You can only use one kind of structured outputs constraint
    but multiple are specified

This affects any parser that installs a structural tag -- currently DeepSeek-V4
and Qwen3-Coder via get_structural_tag. The env var is off by default, and a
request with no pre-existing constraint is unaffected.

Fix: rebuild structured_outputs with only the structural tag (preserving the
whitespace / additional-properties knobs) and null response_format, mirroring
Step 2 of the same method. This "tool constraint wins, response_format dropped"
resolution already exists in Step 2 and the DeepSeek-V3.2 override (#41178), and
is the intent of the open auto-path fix #39969; the in-place-vs-rebuild trade-off
was discussed on #40894 and #43155 (whose Kimi path already rebuilds).

Repro / regression test (CPU, no model required):

    pytest tests/tool_use/test_strict_tool_calling_adjust_request.py

The added tests enable strict mode, give a parser a structural tag, and send
tools together with a response_format or a structured_outputs.json constraint
(tool_choice auto and required). On the pre-fix code adjust_request leaves two
constraints, and to_sampling_params raises the ValueError above; with this change
structured_outputs holds only the structural tag, response_format is None, and
the user's whitespace knobs are preserved. The conflict tests fail without this
patch and pass with it; the no-pre-existing-constraint case passes either way.

Equivalently over HTTP: with strict mode on, a tool_choice="auto" request that
also sets response_format returns HTTP 400 (the error above) before this change
and a normal tool call after; a required-tool request is unaffected because that
path already rebuilds.

Signed-off-by: Ace Eldeib <aeldeib@coreweave.com>
---
 ...test_strict_tool_calling_adjust_request.py | 147 ++++++++++++++++++
 vllm/tool_parsers/abstract_tool_parser.py     |  18 ++-
 2 files changed, 163 insertions(+), 2 deletions(-)
 create mode 100644 tests/tool_use/test_strict_tool_calling_adjust_request.py

diff --git a/tests/tool_use/test_strict_tool_calling_adjust_request.py b/tests/tool_use/test_strict_tool_calling_adjust_request.py
new file mode 100644
index 000000000000..95e3afa684ff
--- /dev/null
+++ b/tests/tool_use/test_strict_tool_calling_adjust_request.py
@@ -0,0 +1,147 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Regression tests for structured-output handling in the strict tool-calling
+path of :meth:`ToolParser.adjust_request`.
+
+When ``VLLM_ENFORCE_STRICT_TOOL_CALLING`` is set, ``adjust_request`` installs a
+model structural tag for ``auto``/``required``/named tool choice. If the request
+already carries a structured-output constraint (``json``/``regex``/...) or a
+``response_format``, the original code set ``.structural_tag`` IN PLACE and
+returned without nulling ``response_format`` -- leaving two mutually-exclusive
+constraints on the same :class:`StructuredOutputsParams`. Because the in-place
+assignment bypasses ``__post_init__``, the conflict is created silently and then
+rejected on re-validation (e.g. in ``to_sampling_params``) with
+``"You can only use one kind of structured outputs constraint but multiple are
+specified"`` (HTTP 400).
+
+``adjust_request`` must instead rebuild ``structured_outputs`` to hold only the
+structural tag (preserving the whitespace / additional-properties knobs) and
+null ``response_format`` -- mirroring what Step 2 of the same method already does
+for the JSON-schema path.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.sampling_params import StructuredOutputsParams
+from vllm.tool_parsers.abstract_tool_parser import ToolParser
+
+_STRICT_FLAG = "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING"
+_EXCLUSIVE = ("json", "regex", "choice", "grammar", "json_object", "structural_tag")
+
+
+class _StubStructuralTag:
+    """Stand-in for the structural tag returned by a model's
+    ``get_structural_tag`` (only ``model_dump`` is used by ``adjust_request``)."""
+
+    def model_dump(self) -> dict:
+        return {"type": "structural_tag", "format": {"type": "any_text"}}
+
+
+class _StructuralTagParser(ToolParser):
+    """A parser that produces a structural tag, like deepseek_v4 / qwen3coder."""
+
+    def get_structural_tag(self, request):  # type: ignore[override]
+        return _StubStructuralTag()
+
+
+def _parser() -> ToolParser:
+    parser = _StructuralTagParser.__new__(_StructuralTagParser)
+    parser.model_tokenizer = None
+    return parser
+
+
+def _tool() -> dict:
+    return {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get current weather for a city",
+            "parameters": {
+                "type": "object",
+                "properties": {"city": {"type": "string"}},
+                "required": ["city"],
+            },
+        },
+    }
+
+
+def _request(*, tool_choice: str) -> ChatCompletionRequest:
+    return ChatCompletionRequest(
+        model="strict-test",
+        messages=[{"role": "user", "content": "What is the weather in Hanoi?"}],
+        tools=[_tool()],
+        tool_choice=tool_choice,
+    )
+
+
+def _active_constraints(so: StructuredOutputsParams) -> list[str]:
+    return [f for f in _EXCLUSIVE if getattr(so, f) is not None]
+
+
+@pytest.mark.parametrize("tool_choice", ["auto", "required"])
+def test_strict_structural_tag_replaces_conflicting_constraint(
+    monkeypatch, tool_choice
+) -> None:
+    """A pre-existing ``json`` constraint must be replaced by the structural
+    tag (not left coexisting), and user whitespace knobs must be preserved."""
+    monkeypatch.setattr(_STRICT_FLAG, True)
+
+    request = _request(tool_choice=tool_choice)
+    request.structured_outputs = StructuredOutputsParams(
+        json={"type": "object", "properties": {"a": {"type": "string"}}},
+        disable_any_whitespace=True,
+    )
+
+    result = _parser().adjust_request(request)
+    so = result.structured_outputs
+
+    assert so.structural_tag is not None
+    assert so.json is None, "the conflicting json constraint must be dropped"
+    assert _active_constraints(so) == ["structural_tag"], (
+        "exactly one structured-output constraint must remain"
+    )
+    # The #40894 review's intent (preserve user-set sub-fields) is still honored.
+    assert so.disable_any_whitespace is True
+
+
+@pytest.mark.parametrize("tool_choice", ["auto", "required"])
+def test_strict_structural_tag_nulls_response_format(monkeypatch, tool_choice) -> None:
+    """``response_format`` must be nulled so a later ``to_sampling_params`` does
+    not re-derive a JSON schema next to the structural tag (the 400)."""
+    monkeypatch.setattr(_STRICT_FLAG, True)
+
+    request = _request(tool_choice=tool_choice)
+    request.response_format = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "answer",
+            "schema": {
+                "type": "object",
+                "properties": {"answer": {"type": "string"}},
+                "required": ["answer"],
+            },
+        },
+    }
+
+    result = _parser().adjust_request(request)
+    so = result.structured_outputs
+
+    assert result.response_format is None
+    assert so.structural_tag is not None
+    assert _active_constraints(so) == ["structural_tag"]
+
+
+def test_strict_structural_tag_no_preexisting_constraint(monkeypatch) -> None:
+    """Sanity: with no pre-existing constraint, the structural tag is installed
+    and nothing else changes (the common path is unaffected)."""
+    monkeypatch.setattr(_STRICT_FLAG, True)
+
+    request = _request(tool_choice="auto")
+    result = _parser().adjust_request(request)
+    so = result.structured_outputs
+
+    assert _active_constraints(so) == ["structural_tag"]
+    assert result.response_format is None
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index c3438082a72d..d873e7d87f61 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -112,9 +112,23 @@ def adjust_request(
                             structural_tag=json.dumps(structure_tag.model_dump()),
                         )
                     else:
-                        request.structured_outputs.structural_tag = json.dumps(
-                            structure_tag.model_dump()
+                        # Rebuild instead of mutating .structural_tag in place:
+                        # the mutually-exclusive constraints (json/regex/choice/
+                        # grammar/json_object) must be dropped, else constraint
+                        # precedence keeps e.g. .json and the structural tag is
+                        # silently ignored. Preserve the whitespace knobs.
+                        prev = request.structured_outputs
+                        request.structured_outputs = StructuredOutputsParams(
+                            structural_tag=json.dumps(structure_tag.model_dump()),
+                            disable_any_whitespace=prev.disable_any_whitespace,
+                            disable_additional_properties=(
+                                prev.disable_additional_properties
+                            ),
+                            whitespace_pattern=prev.whitespace_pattern,
                         )
+                    # Null response_format so a later to_sampling_params() does
+                    # not re-introduce a JSON schema (mutual-exclusivity).
+                    request.response_format = None
                     return request
 
         # Step 2: set structured output params when tool constraints are