vllm-project · pst2154 · Jun 3, 2026
@@ -19,6 +19,7 @@
     FunctionCall,
     ToolCall,
 )
+from vllm.parser.parser_manager import ParserManager
 from vllm.tokenizers import TokenizerLike, get_tokenizer
 from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
 from vllm.tool_parsers.qwen3coder_tool_parser import (
@@ -270,6 +271,63 @@ def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized):
     assert extracted_tool_calls.content == model_output
 
 
+def test_required_tool_choice_uses_qwen3_xml_after_reasoning(
+    qwen3_tokenizer, sample_tools
+):
+    """Regression: qwen3 required tool choice must parse XML after </think>.
+
+    The generic required-tool parser expects JSON. Qwen3-Coder emits XML tool
+    calls, so required/named tool choice must route through this parser's XML
+    extraction path after reasoning extraction.
+    """
+    model_output = (
+        'The user asked for weather. I should call get_current_weather.</think>\n'
+        "<tool_call>\n"
+        "<function=get_current_weather>\n"
+        "<parameter=city>\n"
+        "Dallas\n"
+        "</parameter>\n"
+        "<parameter=state>\n"
+        "TX\n"
+        "</parameter>\n"
+        "<parameter=unit>\n"
+        "fahrenheit\n"
+        "</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    request = ChatCompletionRequest(
+        model=MODEL,
+        messages=[],
+        tools=_as_chat_completion_tools(sample_tools),
+        tool_choice="required",
+    )
+
+    parser_cls = ParserManager.get_parser(
+        tool_parser_name="qwen3_coder",
+        reasoning_parser_name="nemotron_v3",
+        enable_auto_tools=True,
+        model_name=MODEL,
+    )
+    assert parser_cls is not None
+    parser = parser_cls(qwen3_tokenizer, sample_tools)
+
+    reasoning, content, tool_calls = parser.parse(
+        model_output, request, enable_auto_tools=True
+    )
+
+    assert reasoning == "The user asked for weather. I should call get_current_weather."
+    assert content is None
+    assert tool_calls is not None
+    assert len(tool_calls) == 1
+    assert tool_calls[0].name == "get_current_weather"
+    assert json.loads(tool_calls[0].arguments) == {
+        "city": "Dallas",
+        "state": "TX",
+        "unit": "fahrenheit",
+    }
+
+
 @pytest.mark.parametrize(
     ids=[
         "single_tool",

@@ -1244,6 +1244,7 @@ async def chat_completion_full_generator(
             is_finish_reason_tool_calls = auto_tools_called or (
                 request.tool_choice
                 and request.tool_choice == "required"
+                and message.tool_calls
                 and output.finish_reason == "stop"
             )
 

@@ -18,7 +18,6 @@
     FunctionCall,
     ToolCall,
 )
-from vllm.envs import VLLM_ENFORCE_STRICT_TOOL_CALLING
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
 from vllm.tool_parsers.abstract_tool_parser import (
@@ -39,7 +38,7 @@
 
 
 class Qwen3CoderToolParser(ToolParser):
-    supports_required_and_named: bool = not VLLM_ENFORCE_STRICT_TOOL_CALLING
+    supports_required_and_named: bool = False
 
     def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
         super().__init__(tokenizer, tools)