diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py index cec531ca07f7..815cd5515e67 100644 --- a/tests/tool_parsers/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -19,6 +19,7 @@ FunctionCall, ToolCall, ) +from vllm.parser.parser_manager import ParserManager from vllm.tokenizers import TokenizerLike, get_tokenizer from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tool_parsers.qwen3coder_tool_parser import ( @@ -270,6 +271,63 @@ def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized): assert extracted_tool_calls.content == model_output +def test_required_tool_choice_uses_qwen3_xml_after_reasoning( + qwen3_tokenizer, sample_tools +): + """Regression: qwen3 required tool choice must parse XML after . + + The generic required-tool parser expects JSON. Qwen3-Coder emits XML tool + calls, so required/named tool choice must route through this parser's XML + extraction path after reasoning extraction. + """ + model_output = ( + 'The user asked for weather. I should call get_current_weather.\n' + "\n" + "\n" + "\n" + "Dallas\n" + "\n" + "\n" + "TX\n" + "\n" + "\n" + "fahrenheit\n" + "\n" + "\n" + "" + ) + request = ChatCompletionRequest( + model=MODEL, + messages=[], + tools=_as_chat_completion_tools(sample_tools), + tool_choice="required", + ) + + parser_cls = ParserManager.get_parser( + tool_parser_name="qwen3_coder", + reasoning_parser_name="nemotron_v3", + enable_auto_tools=True, + model_name=MODEL, + ) + assert parser_cls is not None + parser = parser_cls(qwen3_tokenizer, sample_tools) + + reasoning, content, tool_calls = parser.parse( + model_output, request, enable_auto_tools=True + ) + + assert reasoning == "The user asked for weather. I should call get_current_weather." + assert content is None + assert tool_calls is not None + assert len(tool_calls) == 1 + assert tool_calls[0].name == "get_current_weather" + assert json.loads(tool_calls[0].arguments) == { + "city": "Dallas", + "state": "TX", + "unit": "fahrenheit", + } + + @pytest.mark.parametrize( ids=[ "single_tool", diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index a378fb79d3bc..91cdd18965b2 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1244,6 +1244,7 @@ async def chat_completion_full_generator( is_finish_reason_tool_calls = auto_tools_called or ( request.tool_choice and request.tool_choice == "required" + and message.tool_calls and output.finish_reason == "stop" ) diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py index 7457590c5ac0..8c1319133398 100644 --- a/vllm/tool_parsers/qwen3coder_tool_parser.py +++ b/vllm/tool_parsers/qwen3coder_tool_parser.py @@ -18,7 +18,6 @@ FunctionCall, ToolCall, ) -from vllm.envs import VLLM_ENFORCE_STRICT_TOOL_CALLING from vllm.logger import init_logger from vllm.tokenizers import TokenizerLike from vllm.tool_parsers.abstract_tool_parser import ( @@ -39,7 +38,7 @@ class Qwen3CoderToolParser(ToolParser): - supports_required_and_named: bool = not VLLM_ENFORCE_STRICT_TOOL_CALLING + supports_required_and_named: bool = False def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None): super().__init__(tokenizer, tools)