diff --git a/tests/tool_parsers/test_qwen3_xml_coder_shared.py b/tests/tool_parsers/test_qwen3_xml_coder_shared.py
new file mode 100644
index 000000000000..6c56c6d47063
--- /dev/null
+++ b/tests/tool_parsers/test_qwen3_xml_coder_shared.py
@@ -0,0 +1,2230 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Shared tests for the Qwen3 XML and Coder tool parsers.
+
+These tests cover behaviour that BOTH parsers must implement identically.
+Each test runs twice — once against ``Qwen3XMLToolParser`` and once against
+``Qwen3CoderToolParser`` — via the ``parser_cls`` fixture.  Tests that
+target streaming-mode-specific quirks of one parser only stay in their
+parser-specific file (``test_qwen3xml_tool_parser.py`` or
+``test_qwen3coder_tool_parser.py``).
+"""
+
+import json
+from collections.abc import Generator
+
+import pytest
+from openai.types.responses.function_tool import FunctionTool
+from xgrammar import StructuralTag
+
+from tests.tool_parsers.utils import run_tool_extraction_streaming
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedFunction,
+    ChatCompletionNamedToolChoiceParam,
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+)
+from vllm.entrypoints.openai.engine.protocol import (
+    DeltaMessage,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.tokenizers import TokenizerLike, get_tokenizer
+from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
+from vllm.tool_parsers.qwen3coder_tool_parser import Qwen3CoderToolParser
+from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
+
+MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"
+
+
+@pytest.fixture(scope="module")
+def qwen3_tokenizer():
+    return get_tokenizer(tokenizer_name=MODEL)
+
+
+@pytest.fixture(
+    params=[Qwen3XMLToolParser, Qwen3CoderToolParser],
+    ids=["xml", "coder"],
+)
+def parser_cls(request):
+    return request.param
+
+
+WEATHER_PARAMS = {
+    "type": "object",
+    "properties": {
+        "city": {"type": "string", "description": "The city name"},
+        "state": {"type": "string", "description": "The state code"},
+        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
+    },
+    "required": ["city", "state"],
+}
+
+AREA_PARAMS = {
+    "type": "object",
+    "properties": {
+        "shape": {"type": "string"},
+        "dimensions": {"type": "object"},
+        "precision": {"type": "integer"},
+    },
+}
+
+
+@pytest.fixture(params=["chat_completion", "responses_api"])
+def sample_tools(request):
+    if request.param == "chat_completion":
+        return [
+            ChatCompletionToolsParam(
+                type="function",
+                function={
+                    "name": "get_current_weather",
+                    "description": "Get the current weather",
+                    "parameters": WEATHER_PARAMS,
+                },
+            ),
+            ChatCompletionToolsParam(
+                type="function",
+                function={
+                    "name": "calculate_area",
+                    "description": "Calculate area of a shape",
+                    "parameters": AREA_PARAMS,
+                },
+            ),
+        ]
+    else:
+        return [
+            FunctionTool(
+                type="function",
+                name="get_current_weather",
+                description="Get the current weather",
+                parameters=WEATHER_PARAMS,
+            ),
+            FunctionTool(
+                type="function",
+                name="calculate_area",
+                description="Calculate area of a shape",
+                parameters=AREA_PARAMS,
+            ),
+        ]
+
+
+@pytest.fixture
+def parser(parser_cls, qwen3_tokenizer, sample_tools):
+    return parser_cls(qwen3_tokenizer, tools=sample_tools)
+
+
+def _as_chat_completion_tools(
+    tools: list[ChatCompletionToolsParam | FunctionTool],
+) -> list[ChatCompletionToolsParam]:
+    normalized: list[ChatCompletionToolsParam] = []
+    for tool in tools:
+        if isinstance(tool, ChatCompletionToolsParam):
+            normalized.append(tool)
+        else:
+            normalized.append(
+                ChatCompletionToolsParam(
+                    type="function",
+                    function={
+                        "name": tool.name,
+                        "description": tool.description,
+                        "parameters": tool.parameters,
+                    },
+                )
+            )
+    return normalized
+
+
+def assert_tool_calls(
+    actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall]
+):
+    assert len(actual_tool_calls) == len(expected_tool_calls)
+    for actual_tool_call, expected_tool_call in zip(
+        actual_tool_calls, expected_tool_calls
+    ):
+        assert actual_tool_call.type == "function"
+        assert actual_tool_call.function.name == expected_tool_call.function.name
+        assert json.loads(actual_tool_call.function.arguments) == json.loads(
+            expected_tool_call.function.arguments
+        )
+
+
+def stream_delta_message_generator(
+    parser,
+    tokenizer: TokenizerLike,
+    model_output: str,
+    request: ChatCompletionRequest | None = None,
+) -> Generator[DeltaMessage, None, None]:
+    all_token_ids = tokenizer.encode(model_output, add_special_tokens=False)
+
+    previous_text = ""
+    previous_tokens = None
+    prefix_offset = 0
+    read_offset = 0
+    for i, delta_token in enumerate(all_token_ids):
+        delta_token_ids = [delta_token]
+        previous_token_ids = all_token_ids[:i]
+        current_token_ids = all_token_ids[: i + 1]
+
+        (new_tokens, delta_text, new_prefix_offset, new_read_offset) = (
+            detokenize_incrementally(
+                tokenizer=tokenizer,
+                all_input_ids=current_token_ids,
+                prev_tokens=previous_tokens,
+                prefix_offset=prefix_offset,
+                read_offset=read_offset,
+                skip_special_tokens=False,
+                spaces_between_special_tokens=True,
+            )
+        )
+
+        current_text = previous_text + delta_text
+
+        delta_message = parser.extract_tool_calls_streaming(
+            previous_text,
+            current_text,
+            delta_text,
+            previous_token_ids,
+            current_token_ids,
+            delta_token_ids,
+            request=request,
+        )
+        if delta_message:
+            yield delta_message
+
+        previous_text = current_text
+        previous_tokens = (
+            previous_tokens + new_tokens if previous_tokens else new_tokens
+        )
+        prefix_offset = new_prefix_offset
+        read_offset = new_read_offset
+
+
+# ---------------------------------------------------------------------------
+# Basic extraction
+# ---------------------------------------------------------------------------
+
+
+def test_extract_tool_calls_no_tools(parser):
+    model_output = "This is a test response without any tool calls"
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=None)
+    assert not extracted_tool_calls.tools_called
+    assert extracted_tool_calls.tool_calls == []
+    assert extracted_tool_calls.content == model_output
+
+
+_EXTRACT_CASES = [
+    (
+        """<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Dallas
+</parameter>
+<parameter=state>
+TX
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>""",
+        [
+            ToolCall(
+                function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps(
+                        {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
+                    ),
+                )
+            )
+        ],
+        None,
+    ),
+    (
+        """Sure! Let me check the weather for you.<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Dallas
+</parameter>
+<parameter=state>
+TX
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>""",
+        [
+            ToolCall(
+                function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps(
+                        {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
+                    ),
+                )
+            )
+        ],
+        "Sure! Let me check the weather for you.",
+    ),
+    (
+        """<tool_call>
+<function=calculate_area>
+<parameter=shape>
+rectangle
+</parameter>
+<parameter=dimensions>
+{"width": 10,
+ "height": 20}
+</parameter>
+<parameter=precision>
+2
+</parameter>
+</function>
+</tool_call>""",
+        [
+            ToolCall(
+                function=FunctionCall(
+                    name="calculate_area",
+                    arguments=json.dumps(
+                        {
+                            "shape": "rectangle",
+                            "dimensions": {"width": 10, "height": 20},
+                            "precision": 2,
+                        }
+                    ),
+                )
+            )
+        ],
+        None,
+    ),
+    (
+        """<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Dallas
+</parameter>
+<parameter=state>
+TX
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>
+<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Orlando
+</parameter>
+<parameter=state>
+FL
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>""",
+        [
+            ToolCall(
+                function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps(
+                        {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
+                    ),
+                )
+            ),
+            ToolCall(
+                function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps(
+                        {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}
+                    ),
+                )
+            ),
+        ],
+        "\n",
+    ),
+    (
+        """Let me calculate that area for you.<tool_call>
+<function=calculate_area>
+<parameter=shape>
+circle
+</parameter>
+<parameter=dimensions>
+{"radius": 15.5}
+</parameter>
+<parameter=precision>
+3
+</parameter>
+</function>
+</tool_call>""",
+        [
+            ToolCall(
+                function=FunctionCall(
+                    name="calculate_area",
+                    arguments=json.dumps(
+                        {
+                            "shape": "circle",
+                            "dimensions": {"radius": 15.5},
+                            "precision": 3,
+                        }
+                    ),
+                )
+            )
+        ],
+        "Let me calculate that area for you.",
+    ),
+]
+
+_EXTRACT_IDS = [
+    "single_tool",
+    "single_tool_with_content",
+    "single_tool_multiline_param",
+    "parallel_tools",
+    "tool_with_typed_params",
+]
+
+
+@pytest.mark.parametrize(
+    ids=_EXTRACT_IDS,
+    argnames=["model_output", "expected_tool_calls", "expected_content"],
+    argvalues=_EXTRACT_CASES,
+)
+def test_extract_tool_calls(
+    parser, model_output, expected_tool_calls, expected_content
+):
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
+    assert extracted_tool_calls.tools_called
+    assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
+    # Both ``None`` and ``""`` are acceptable when the expected content is
+    # only whitespace — the two parsers differ on whether they preserve the
+    # newline that separates parallel tool-call blocks.
+    actual_content = extracted_tool_calls.content
+    if expected_content and expected_content.strip():
+        assert actual_content == expected_content
+    else:
+        assert (actual_content or "").strip() == (expected_content or "").strip()
+
+
+def test_extract_tool_calls_fallback_no_tags(parser):
+    """Test fallback parsing when XML tags are missing."""
+    model_output = """<function=get_current_weather>
+<parameter=city>
+Dallas
+</parameter>
+<parameter=state>
+TX
+</parameter>
+</function>"""
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
+    assert extracted_tool_calls.tools_called
+    assert len(extracted_tool_calls.tool_calls) == 1
+    assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather"
+
+
+# ---------------------------------------------------------------------------
+# Type conversion
+# ---------------------------------------------------------------------------
+
+
+def test_extract_tool_calls_type_conversion(qwen3_tokenizer, parser_cls):
+    """Test parameter type conversion based on tool schema."""
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "test_types",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "int_param": {"type": "integer"},
+                        "float_param": {"type": "float"},
+                        "bool_param": {"type": "boolean"},
+                        "str_param": {"type": "string"},
+                        "obj_param": {"type": "object"},
+                    },
+                },
+            },
+        )
+    ]
+
+    model_output = """<tool_call>
+<function=test_types>
+<parameter=int_param>
+42
+</parameter>
+<parameter=float_param>
+3.14
+</parameter>
+<parameter=bool_param>
+true
+</parameter>
+<parameter=str_param>
+hello world
+</parameter>
+<parameter=obj_param>
+{"key": "value"}
+</parameter>
+</function>
+</tool_call>"""
+
+    parser_inst = parser_cls(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    extracted_tool_calls = parser_inst.extract_tool_calls(model_output, request=request)
+
+    args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
+    assert args["int_param"] == 42
+    assert args["float_param"] == 3.14
+    assert args["bool_param"] is True
+    assert args["str_param"] == "hello world"
+    assert args["obj_param"] == {"key": "value"}
+
+
+def test_extract_tool_calls_complex_type_with_single_quote(qwen3_tokenizer, parser_cls):
+    """Object parameter expressed as a Python repr (single quotes)."""
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "test_types",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "int_param": {"type": "integer"},
+                        "float_param": {"type": "float"},
+                        "bool_param": {"type": "boolean"},
+                        "str_param": {"type": "string"},
+                        "obj_param": {"type": "object"},
+                    },
+                },
+            },
+        )
+    ]
+
+    model_output = """<tool_call>
+<function=test_types>
+<parameter=obj_param>
+{'key': 'value'}
+</parameter>
+</function>
+</tool_call>"""
+
+    parser_inst = parser_cls(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    extracted_tool_calls = parser_inst.extract_tool_calls(model_output, request=request)
+
+    args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
+    assert args["obj_param"] == {"key": "value"}
+
+
+# ---------------------------------------------------------------------------
+# Streaming extraction
+# ---------------------------------------------------------------------------
+
+
+_STREAMING_CASES = [
+    ("This is a test without tools", [], "This is a test without tools"),
+] + _EXTRACT_CASES
+
+_STREAMING_IDS = ["no_tools"] + _EXTRACT_IDS
+
+
+@pytest.mark.parametrize(
+    ids=_STREAMING_IDS,
+    argnames=["model_output", "expected_tool_calls", "expected_content"],
+    argvalues=_STREAMING_CASES,
+)
+def test_extract_tool_calls_streaming(
+    parser,
+    qwen3_tokenizer,
+    model_output,
+    expected_tool_calls,
+    expected_content,
+):
+    """Test incremental streaming behavior including typed parameters."""
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+
+    other_content = ""
+    tool_states = {}
+
+    for delta_message in stream_delta_message_generator(
+        parser, qwen3_tokenizer, model_output, request
+    ):
+        assert not delta_message.role
+
+        if delta_message.content:
+            other_content += delta_message.content
+
+        if delta_message.tool_calls:
+            for tool_call in delta_message.tool_calls:
+                idx = tool_call.index
+
+                if idx not in tool_states:
+                    tool_states[idx] = {
+                        "id": None,
+                        "name": None,
+                        "arguments": "",
+                        "type": None,
+                    }
+
+                if tool_call.id:
+                    tool_states[idx]["id"] = tool_call.id
+
+                if tool_call.type:
+                    assert tool_call.type == "function"
+                    tool_states[idx]["type"] = tool_call.type
+
+                if tool_call.function:
+                    if tool_call.function.name:
+                        assert tool_states[idx]["name"] is None
+                        tool_states[idx]["name"] = tool_call.function.name
+
+                    if tool_call.function.arguments is not None:
+                        tool_states[idx]["arguments"] += tool_call.function.arguments
+
+    # Be tolerant about whitespace-only deltas between parallel tool calls;
+    # see ``test_extract_tool_calls`` for the same reasoning.
+    if expected_content and expected_content.strip():
+        assert other_content == expected_content
+    else:
+        assert other_content.strip() == (expected_content or "").strip()
+    assert len(tool_states) == len(expected_tool_calls)
+    assert len(parser.prev_tool_call_arr) == len(expected_tool_calls)
+
+    for idx, expected_tool in enumerate(expected_tool_calls):
+        state = tool_states[idx]
+        assert state["id"] is not None
+        assert state["type"] == "function"
+        assert state["name"] == expected_tool.function.name
+
+        arguments_str = state["arguments"]
+        assert arguments_str is not None
+        actual_args = json.loads(arguments_str)
+        expected_args = json.loads(expected_tool.function.arguments)
+        assert actual_args == expected_args
+
+
+def test_extract_tool_calls_missing_closing_parameter_tag(parser):
+    """Test handling of missing closing </parameter> tag."""
+    model_output = """Let me check the weather for you:
+<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Dallas
+<parameter=state>
+TX
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>"""
+
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
+
+    assert extracted_tool_calls.tools_called
+    assert len(extracted_tool_calls.tool_calls) == 1
+    assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather"
+    args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
+    assert "city" in args
+    assert args["city"] == "Dallas"
+    assert args["state"] == "TX"
+    assert args["unit"] == "fahrenheit"
+    assert "Let me check the weather for you:" in extracted_tool_calls.content
+
+
+def test_extract_tool_calls_streaming_missing_closing_tag(parser, qwen3_tokenizer):
+    """Streaming with missing closing </parameter> tag."""
+    model_output = """Let me check the weather for you:
+<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Dallas
+<parameter=state>
+TX
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>"""
+
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    other_content = ""
+    tool_states = {}
+
+    for delta_message in stream_delta_message_generator(
+        parser, qwen3_tokenizer, model_output, request
+    ):
+        if delta_message.content:
+            other_content += delta_message.content
+
+        if delta_message.tool_calls:
+            for tool_call in delta_message.tool_calls:
+                idx = tool_call.index
+                if idx not in tool_states:
+                    tool_states[idx] = {
+                        "id": None,
+                        "name": None,
+                        "arguments": "",
+                        "type": None,
+                    }
+                if tool_call.id:
+                    tool_states[idx]["id"] = tool_call.id
+                if tool_call.type:
+                    assert tool_call.type == "function"
+                    tool_states[idx]["type"] = tool_call.type
+                if tool_call.function:
+                    if tool_call.function.name:
+                        tool_states[idx]["name"] = tool_call.function.name
+                    if tool_call.function.arguments is not None:
+                        tool_states[idx]["arguments"] += tool_call.function.arguments
+
+    assert "Let me check the weather for you:" in other_content
+    assert len(tool_states) == 1
+    assert len(parser.prev_tool_call_arr) == 1
+
+    state = tool_states[0]
+    assert state["id"] is not None
+    assert state["type"] == "function"
+    assert state["name"] == "get_current_weather"
+    args = json.loads(state["arguments"])
+    assert args["city"] == "Dallas"
+    assert args["state"] == "TX"
+    assert args["unit"] == "fahrenheit"
+
+
+def test_extract_tool_calls_streaming_incremental(parser, qwen3_tokenizer):
+    """Test that streaming is truly incremental."""
+    model_output = """I'll check the weather.<tool_call>
+<function=get_current_weather>
+<parameter=city>
+Dallas
+</parameter>
+<parameter=state>
+TX
+</parameter>
+</function>
+</tool_call>"""
+
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    chunks = []
+    for delta_message in stream_delta_message_generator(
+        parser, qwen3_tokenizer, model_output, request
+    ):
+        chunks.append(delta_message)
+
+    assert len(chunks) > 3
+    assert chunks[0].content is not None
+    assert chunks[0].tool_calls is None or chunks[0].tool_calls == []
+
+    header_found = False
+    for chunk in chunks:
+        if chunk.tool_calls and chunk.tool_calls[0].id:
+            header_found = True
+            assert chunk.tool_calls[0].function.name == "get_current_weather"
+            assert chunk.tool_calls[0].type == "function"
+            # XML emits an empty arguments string with the header; Coder
+            # emits the opening "{" with the header.  Both are valid.
+            assert chunk.tool_calls[0].function.arguments in ("", "{")
+            break
+    assert header_found
+
+    arg_chunks = []
+    for chunk in chunks:
+        if chunk.tool_calls and chunk.tool_calls[0].function.arguments:
+            arg_chunks.append(chunk.tool_calls[0].function.arguments)
+
+    assert len(arg_chunks) > 1
+    full_args = "".join(arg_chunks)
+    parsed_args = json.loads(full_args)
+    assert parsed_args["city"] == "Dallas"
+    assert parsed_args["state"] == "TX"
+
+
+# ---------------------------------------------------------------------------
+# Robustness regressions
+# ---------------------------------------------------------------------------
+
+
+def test_malformed_xml_no_gt_delimiter(parser):
+    """Regression: malformed XML without '>' must not crash (PR #36774)."""
+    model_output = (
+        "<tool_call>\n"
+        "<function=get_current_weather\n"
+        "<parameter=city>Dallas</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    result = parser.extract_tool_calls(model_output, request=request)
+    assert result is not None
+    assert isinstance(result.tool_calls, list)
+    assert all(tc is not None for tc in result.tool_calls)
+
+
+def test_none_tool_calls_filtered(parser):
+    """Regression: None tool calls filtered from output (PR #36774)."""
+    model_output = (
+        "<tool_call>\n"
+        "<function=bad_func_no_gt\n"
+        "</function>\n"
+        "</tool_call>\n"
+        "<tool_call>\n"
+        "<function=get_current_weather>\n"
+        "<parameter=city>Dallas</parameter>\n"
+        "<parameter=state>TX</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    result = parser.extract_tool_calls(model_output, request=request)
+    assert all(tc is not None for tc in result.tool_calls)
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    assert result.tool_calls[0].function.name == "get_current_weather"
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args["city"] == "Dallas"
+    assert args["state"] == "TX"
+
+
+def test_streaming_multi_param_single_chunk(parser):
+    """Regression: speculative decode delivering multiple params at once
+    (PR #35615)."""
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+
+    deltas = [
+        "<tool_call>",
+        "\n<function=get_current_weather>",
+        "\n",
+        # This single delta delivers all three parameters at once
+        "<parameter=city>\nDallas\n</parameter>"
+        "\n<parameter=state>\nTX\n</parameter>"
+        "\n<parameter=unit>\nfahrenheit\n</parameter>",
+        "\n</function>",
+        "\n</tool_call>",
+    ]
+
+    reconstructor = run_tool_extraction_streaming(
+        parser,
+        deltas,
+        request,
+        assert_one_tool_per_delta=False,
+    )
+
+    assert len(reconstructor.tool_calls) == 1
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert args["city"] == "Dallas"
+    assert args["state"] == "TX"
+    assert args["unit"] == "fahrenheit"
+
+
+def test_no_double_serialization_string_args(qwen3_tokenizer, parser_cls):
+    """Regression: string arguments must not be double-serialized
+    (PR #35615)."""
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "greet",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "message": {"type": "string"},
+                    },
+                },
+            },
+        )
+    ]
+
+    model_output = (
+        "<tool_call>\n"
+        "<function=greet>\n"
+        "<parameter=message>hello world</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+
+    parser_inst = parser_cls(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    result = parser_inst.extract_tool_calls(model_output, request=request)
+
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    raw_arguments = result.tool_calls[0].function.arguments
+    args = json.loads(raw_arguments)
+    assert args["message"] == "hello world"
+    assert '\\"hello world\\"' not in raw_arguments
+
+
+def test_extract_tool_calls_streaming_speculative_decode_loss(parser):
+    """If the parser hasn't started JSON yet and the delta contains the
+    parameters AND the end of the tool call, the parser should not just
+    return '{' and lose the parameters.
+    """
+    request = ChatCompletionRequest(model="test", messages=[])
+
+    text1 = "<tool_call>\n<function=test>\n"
+    parser.extract_tool_calls_streaming("", text1, text1, [], [1], [1], request)
+
+    delta_str = "<parameter=city>\nParis\n</parameter>\n</function>\n</tool_call>"
+    text2 = text1 + delta_str
+    delta2 = parser.extract_tool_calls_streaming(
+        text1, text2, delta_str, [1], [1, 2], [2], request
+    )
+
+    assert delta2 is not None
+    assert delta2.tool_calls is not None
+    assert len(delta2.tool_calls) == 1
+    args = delta2.tool_calls[0].function.arguments
+    assert "Paris" in args, f"Arguments lost! Got: {args}"
+
+
+# ---------------------------------------------------------------------------
+# Value conversion: string "null" must NOT become JSON null
+# ---------------------------------------------------------------------------
+
+
+def test_string_null_value_preserved(qwen3_tokenizer, parser_cls):
+    """A string-typed parameter with literal value "null" must be preserved
+    as the string "null" (not converted to Python None / JSON null).
+
+    Root cause: _convert_param_value must check the schema's ``string``
+    type BEFORE the "null" shortcut — otherwise any param whose raw text
+    is "null" becomes None regardless of declared type.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "search",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"query": {"type": "string"}},
+                },
+            },
+        )
+    ]
+    parser = parser_cls(qwen3_tokenizer, tools=tools)
+    model_output = (
+        "<tool_call>\n"
+        "<function=search>\n"
+        "<parameter=query>null</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    result = parser.extract_tool_calls(model_output, request=request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args["query"] == "null", (
+        f"String parameter 'null' was converted incorrectly. Got: {args.get('query')!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# anyOf nullable schema — type detection
+# ---------------------------------------------------------------------------
+
+
+def test_anyof_string_null_keeps_value_as_string(qwen3_tokenizer, parser_cls):
+    """anyOf [{type: string}, {type: null}] with a numeric-looking value
+    must keep the value as a string (the schema declares ``string``).
+
+    Root cause: anyOf was previously treated as ``object`` (for the Coder
+    parser) or fell back to ``string`` only when no object/array option
+    was present (for the XML parser).  The correct behaviour is to pick
+    the FIRST non-null type from the anyOf list.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "set_code",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "code": {
+                            "anyOf": [{"type": "string"}, {"type": "null"}],
+                        },
+                    },
+                },
+            },
+        )
+    ]
+    parser = parser_cls(qwen3_tokenizer, tools=tools)
+    model_output = (
+        "<tool_call>\n"
+        "<function=set_code>\n"
+        "<parameter=code>42</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    result = parser.extract_tool_calls(model_output, request=request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args["code"] == "42", (
+        f"anyOf string|null param '42' was parsed as "
+        f"{type(args['code']).__name__}: {args['code']!r}"
+    )
+
+
+def test_anyof_integer_null_parses_as_int(qwen3_tokenizer, parser_cls):
+    """anyOf [{type: integer}, {type: null}] must parse a numeric value as
+    an int.  Previously the XML parser ignored anyOf for non-container
+    types and silently treated the param as ``string``.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "set_count",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "count": {
+                            "anyOf": [{"type": "integer"}, {"type": "null"}],
+                        },
+                    },
+                },
+            },
+        )
+    ]
+    parser = parser_cls(qwen3_tokenizer, tools=tools)
+    model_output = (
+        "<tool_call>\n"
+        "<function=set_count>\n"
+        "<parameter=count>42</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    result = parser.extract_tool_calls(model_output, request=request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args["count"] == 42, (
+        f"anyOf integer|null: expected int 42, got {args['count']!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# anyOf object schema — value not double-encoded
+# ---------------------------------------------------------------------------
+
+_ANYOF_OBJECT_TOOLS = [
+    ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "update_record",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "anyOf": [{"type": "object"}, {"type": "null"}],
+                    },
+                },
+            },
+        },
+    )
+]
+
+_ANYOF_OBJECT_OUTPUT = (
+    "<tool_call>\n"
+    "<function=update_record>\n"
+    '<parameter=data>{"key": "value", "count": 42}</parameter>\n'
+    "</function>\n"
+    "</tool_call>"
+)
+
+
+def test_anyof_object_param_not_double_encoded_nonstreaming(
+    qwen3_tokenizer, parser_cls
+):
+    parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_OBJECT_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ANYOF_OBJECT_TOOLS)
+    result = parser.extract_tool_calls(_ANYOF_OBJECT_OUTPUT, request=request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert isinstance(args["data"], dict), (
+        f"anyOf object param was double-encoded: data={args['data']!r}"
+    )
+    assert args["data"] == {"key": "value", "count": 42}
+
+
+def test_anyof_object_param_not_double_encoded_streaming(qwen3_tokenizer, parser_cls):
+    parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_OBJECT_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ANYOF_OBJECT_TOOLS)
+    deltas = [
+        "<tool_call>",
+        "\n<function=update_record>",
+        '\n<parameter=data>{"key": "value", "count": 42}</parameter>',
+        "\n</function>",
+        "\n</tool_call>",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert isinstance(args["data"], dict), (
+        f"anyOf object param was double-encoded in streaming: data={args['data']!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# anyOf / nullable (Pydantic v2 Optional[T]) type resolution.
+# Both parsers extract the first non-null type from the anyOf union.
+# ---------------------------------------------------------------------------
+
+_ANYOF_TYPES_TOOLS = [
+    ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "test_anyof",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "anyof_int": {
+                        "anyOf": [{"type": "integer"}, {"type": "null"}],
+                        "default": 5,
+                    },
+                    "anyof_str": {
+                        "anyOf": [{"type": "string"}, {"type": "null"}],
+                    },
+                    "anyof_array": {
+                        "anyOf": [
+                            {"type": "array", "items": {"type": "string"}},
+                            {"type": "null"},
+                        ],
+                    },
+                    "anyof_obj": {
+                        "anyOf": [{"type": "object"}, {"type": "null"}],
+                    },
+                    "type_as_array": {
+                        "type": ["integer", "null"],
+                    },
+                    "multi_non_null": {
+                        "anyOf": [
+                            {"type": "string"},
+                            {"type": "integer"},
+                            {"type": "null"},
+                        ],
+                    },
+                },
+            },
+        },
+    )
+]
+
+_ANYOF_TYPES_OUTPUT = (
+    "<tool_call>\n"
+    "<function=test_anyof>\n"
+    "<parameter=anyof_int>5</parameter>\n"
+    "<parameter=anyof_str>hello</parameter>\n"
+    '<parameter=anyof_array>["a", "b", "c"]</parameter>\n'
+    '<parameter=anyof_obj>{"key": "value"}</parameter>\n'
+    "<parameter=type_as_array>42</parameter>\n"
+    "<parameter=multi_non_null>some text</parameter>\n"
+    "</function>\n"
+    "</tool_call>"
+)
+
+
+def test_extract_tool_calls_anyof_type_conversion(qwen3_tokenizer, parser_cls):
+    """anyOf nullable schemas (Pydantic v2 ``Optional[T]``) must resolve to
+    the first non-null type and apply the matching conversion: int(),
+    list/dict via json, string passthrough.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_TYPES_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ANYOF_TYPES_TOOLS)
+    result = parser.extract_tool_calls(_ANYOF_TYPES_OUTPUT, request=request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args["anyof_int"] == 5
+    assert isinstance(args["anyof_int"], int)
+    assert args["anyof_str"] == "hello"
+    assert isinstance(args["anyof_str"], str)
+    assert args["anyof_array"] == ["a", "b", "c"]
+    assert isinstance(args["anyof_array"], list)
+    assert args["anyof_obj"] == {"key": "value"}
+    assert isinstance(args["anyof_obj"], dict)
+    # JSON-Schema list-form type {"type": ["integer", "null"]} → int
+    assert args["type_as_array"] == 42
+    assert isinstance(args["type_as_array"], int)
+    # anyOf[string, integer, null] → first non-null type is string
+    assert args["multi_non_null"] == "some text"
+    assert isinstance(args["multi_non_null"], str)
+
+
+_ANYOF_STREAMING_TOOLS = [
+    ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "search_web",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "anyOf": [{"type": "string"}, {"type": "null"}],
+                    },
+                    "count": {
+                        "anyOf": [{"type": "integer"}, {"type": "null"}],
+                        "default": 5,
+                    },
+                    "verbose": {
+                        "anyOf": [{"type": "boolean"}, {"type": "null"}],
+                    },
+                },
+            },
+        },
+    )
+]
+
+_ANYOF_STREAMING_OUTPUT = (
+    "<tool_call>\n"
+    "<function=search_web>\n"
+    "<parameter=query>vllm tool parser</parameter>\n"
+    "<parameter=count>10</parameter>\n"
+    "<parameter=verbose>true</parameter>\n"
+    "</function>\n"
+    "</tool_call>"
+)
+
+
+def test_extract_tool_calls_anyof_type_conversion_streaming(
+    qwen3_tokenizer, parser_cls
+):
+    """Streaming e2e for anyOf nullable schemas: string/int/bool types must
+    be resolved through the incremental pipeline for both parsers.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_ANYOF_STREAMING_TOOLS)
+    request = ChatCompletionRequest(
+        model=MODEL, messages=[], tools=_ANYOF_STREAMING_TOOLS
+    )
+    reconstructor = run_tool_extraction_streaming(
+        parser,
+        _ANYOF_STREAMING_OUTPUT,
+        request,
+        assert_one_tool_per_delta=False,
+    )
+    assert len(reconstructor.tool_calls) == 1
+    assert reconstructor.tool_calls[0].function.name == "search_web"
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert args["query"] == "vllm tool parser"
+    assert isinstance(args["query"], str)
+    assert args["count"] == 10
+    assert isinstance(args["count"], int)
+    assert args["verbose"] is True
+    assert isinstance(args["verbose"], bool)
+
+
+# ---------------------------------------------------------------------------
+# Object param double-encoded as JSON-encoded Python repr
+# ---------------------------------------------------------------------------
+
+_DOUBLE_ENCODED_TOOLS = [
+    ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "process",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "data": {"type": "object"},
+                },
+            },
+        },
+    )
+]
+
+_DOUBLE_ENCODED_OUTPUT = (
+    "<tool_call>\n"
+    "<function=process>\n"
+    "<parameter=name>\nhello\n</parameter>\n"
+    "<parameter=data>\n\"{'key': 'value', 'n': 1}\"\n</parameter>\n"
+    "</function>\n"
+    "</tool_call>\n"
+)
+
+
+def test_double_encoded_object_param_nonstreaming(qwen3_tokenizer, parser_cls):
+    """A model trained with a buggy template (json.dumps(str(dict))) emits
+    object args as a JSON-encoded Python repr string.  The parser must
+    double-decode it back to a dict.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_DOUBLE_ENCODED_TOOLS)
+    request = ChatCompletionRequest(
+        model=MODEL, messages=[], tools=_DOUBLE_ENCODED_TOOLS
+    )
+    result = parser.extract_tool_calls(_DOUBLE_ENCODED_OUTPUT, request=request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args["name"] == "hello"
+    assert isinstance(args["data"], dict), (
+        f"Expected dict, got {type(args['data'])}: {args['data']!r}"
+    )
+    assert args["data"] == {"key": "value", "n": 1}
+
+
+def test_double_encoded_object_param_streaming(qwen3_tokenizer, parser_cls):
+    parser = parser_cls(qwen3_tokenizer, tools=_DOUBLE_ENCODED_TOOLS)
+    request = ChatCompletionRequest(
+        model=MODEL, messages=[], tools=_DOUBLE_ENCODED_TOOLS
+    )
+    reconstructor = run_tool_extraction_streaming(
+        parser, _DOUBLE_ENCODED_OUTPUT, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert args["name"] == "hello"
+    assert isinstance(args["data"], dict), (
+        f"Expected dict, got {type(args['data'])}: {args['data']!r}"
+    )
+    assert args["data"] == {"key": "value", "n": 1}
+
+
+# ---------------------------------------------------------------------------
+# Parameter value containing XML structural tags as literal text.
+# Expected: the value is preserved intact, no spurious extra parameters
+# are created from the embedded tags.
+# ---------------------------------------------------------------------------
+
+_WRITE_FILE_TOOLS = [
+    ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "write_file",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string"},
+                    "content": {"type": "string"},
+                },
+            },
+        },
+    )
+]
+
+_XML_TAGS_IN_CONTENT = (
+    "char_deltas = [\n"
+    '    "<tool_call>\\n",\n'
+    '    "<parameter=query>\\n",\n'
+    '    "\\n</parameter>\\n",\n'
+    '    "</function>\\n",\n'
+    "]\n"
+)
+
+_WRITE_FILE_XML_TAGS_OUTPUT = (
+    "<tool_call>\n"
+    "<function=write_file>\n"
+    "<parameter=path>\ntest.py\n</parameter>\n"
+    f"<parameter=content>\n{_XML_TAGS_IN_CONTENT}</parameter>\n"
+    "</function>\n"
+    "</tool_call>\n"
+)
+
+
+def test_content_with_xml_structural_tags_nonstreaming(qwen3_tokenizer, parser_cls):
+    """Non-streaming: a string param whose value embeds <tool_call>,
+    <parameter=...>, </parameter>, </function> as literal text must be
+    extracted intact, with no spurious extra params being created from
+    the embedded tags.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    result = parser.extract_tool_calls(_WRITE_FILE_XML_TAGS_OUTPUT, request=request)
+
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    assert result.tool_calls[0].function.name == "write_file"
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert list(args.keys()) == ["path", "content"], (
+        f"Spurious params from embedded tags: {list(args.keys())}"
+    )
+    assert args["path"] == "test.py"
+    expected = _XML_TAGS_IN_CONTENT.rstrip("\n")
+    assert args["content"] == expected, (
+        f"content was truncated/corrupted. Got: {args.get('content')!r}"
+    )
+
+
+def test_content_with_xml_structural_tags_streaming(qwen3_tokenizer, parser_cls):
+    """Streaming variant: pre-formed chunks, full content in one delta."""
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    char_deltas = [
+        "<tool_call>\n",
+        "<function=write_file>\n",
+        "<parameter=path>\ntest.py\n</parameter>\n",
+        f"<parameter=content>\n{_XML_TAGS_IN_CONTENT}</parameter>\n",
+        "</function>\n",
+        "</tool_call>\n",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, char_deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1
+    assert reconstructor.tool_calls[0].function.name == "write_file"
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert list(args.keys()) == ["path", "content"], (
+        f"Spurious params from embedded tags: {list(args.keys())}"
+    )
+    assert args["path"] == "test.py"
+    expected = _XML_TAGS_IN_CONTENT.rstrip("\n")
+    assert args["content"] == expected
+
+
+# ---------------------------------------------------------------------------
+# Parameter value containing </parameter> and <parameter=NAME> on their
+# OWN lines (Jinja2 templates, parser fixtures, etc.).  Schema filtering
+# must prevent the unknown name from being treated as structural.
+# ---------------------------------------------------------------------------
+
+_CONTENT_WITH_PARAM_LIKE_LINES = (
+    'TOOL_CALL_TEMPLATE = """\n'
+    "</parameter>\n"
+    "<parameter=new_string>\n"
+    "#!/usr/bin/env python3\n"
+    "</parameter>\n"
+    '"""\n'
+)
+
+_WRITE_FILE_PARAM_LIKE_LINES_OUTPUT = (
+    "<tool_call>\n"
+    "<function=write_file>\n"
+    "<parameter=path>\ntest_template.py\n</parameter>\n"
+    f"<parameter=content>\n{_CONTENT_WITH_PARAM_LIKE_LINES}</parameter>\n"
+    "</function>\n"
+    "</tool_call>\n"
+)
+
+
+def test_content_with_param_like_lines_nonstreaming(qwen3_tokenizer, parser_cls):
+    """Non-streaming: ``</parameter>`` and ``<parameter=NAME>`` on their
+    own lines inside a string value must not terminate the parameter
+    early.  Requires schema-based filtering so that ``new_string`` (not a
+    real parameter of write_file) is treated as literal text.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    result = parser.extract_tool_calls(
+        _WRITE_FILE_PARAM_LIKE_LINES_OUTPUT, request=request
+    )
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert list(args.keys()) == ["path", "content"], (
+        f"Spurious params: {list(args.keys())}"
+    )
+    assert args["path"] == "test_template.py"
+    expected = _CONTENT_WITH_PARAM_LIKE_LINES.rstrip("\n")
+    assert args["content"] == expected, (
+        f"content truncated/wrong: {args.get('content')!r}"
+    )
+
+
+def test_content_with_param_like_lines_streaming(qwen3_tokenizer, parser_cls):
+    """Streaming variant: each structural-looking literal line arrives in
+    its own delta — the critical case is when ``</parameter>\\n`` appears
+    alone with empty lookahead, which must NOT be treated as a real
+    structural close.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    char_deltas = [
+        "<tool_call>\n",
+        "<function=write_file>\n",
+        "<parameter=path>\ntest_template.py\n</parameter>\n",
+        '<parameter=content>\nTOOL_CALL_TEMPLATE = """\n',
+        "</parameter>\n",  # literal close — alone in its delta
+        "<parameter=new_string>\n",  # literal new-param line
+        "#!/usr/bin/env python3\n",
+        "</parameter>\n",  # second literal close
+        '"""\n',
+        "</parameter>\n",  # REAL close of content
+        "</function>\n",
+        "</tool_call>\n",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, char_deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert list(args.keys()) == ["path", "content"], (
+        f"Spurious params: {list(args.keys())}"
+    )
+    assert args["path"] == "test_template.py"
+    expected = _CONTENT_WITH_PARAM_LIKE_LINES.rstrip("\n")
+    assert args["content"] == expected
+
+
+# ---------------------------------------------------------------------------
+# Array param containing JSON true/false/null
+# ---------------------------------------------------------------------------
+
+_ARRAY_TOOLS = [
+    ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "pick",
+            "parameters": {
+                "type": "object",
+                "properties": {"items": {"type": "array"}},
+            },
+        },
+    )
+]
+
+_ARRAY_WITH_JSON_BOOL_OUTPUT = (
+    "<tool_call>\n<function=pick>\n"
+    '<parameter=items>\n["a", "b", 1, true]\n</parameter>\n'
+    "</function>\n</tool_call>"
+)
+
+
+def test_array_with_json_bool(qwen3_tokenizer, parser_cls):
+    """An array param containing a JSON literal (``true``/``false``/``null``)
+    must be parsed as a real Python list, not wrapped as a string.
+
+    Root cause for the XML parser: the deferred path used
+    ``ast.literal_eval`` first, which doesn't understand JSON tokens.
+    Both parsers must try ``json.loads`` before falling back to
+    ``ast.literal_eval``.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_ARRAY_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_ARRAY_TOOLS)
+    result = parser.extract_tool_calls(_ARRAY_WITH_JSON_BOOL_OUTPUT, request=request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert isinstance(args["items"], list), (
+        f"Array with JSON bool was not parsed as list: "
+        f"{type(args['items']).__name__} = {args['items']!r}"
+    )
+    assert args["items"] == ["a", "b", 1, True]
+
+
+# ---------------------------------------------------------------------------
+# Speculative decoding: two complete tool calls in a single streaming delta.
+# Both parsers must emit both tool calls, not drop the second.
+# ---------------------------------------------------------------------------
+
+_WEATHER_TOOLS = [
+    ChatCompletionToolsParam(
+        type="function",
+        function={
+            "name": "get_weather",
+            "parameters": {
+                "type": "object",
+                "properties": {"city": {"type": "string"}},
+            },
+        },
+    )
+]
+
+_TWO_TOOL_CALLS_IN_ONE_CHUNK = (
+    "<tool_call>\n<function=get_weather>\n"
+    "<parameter=city>\nParis\n</parameter>\n"
+    "</function>\n</tool_call>\n"
+    "<tool_call>\n<function=get_weather>\n"
+    "<parameter=city>\nLondon\n</parameter>\n"
+    "</function>\n</tool_call>"
+)
+
+
+def test_two_tool_calls_in_one_streaming_chunk(qwen3_tokenizer, parser_cls):
+    """Speculative decoding flushes can deliver several full
+    ``<tool_call>...</tool_call>`` blocks in a single delta. Both must be
+    emitted; dropping the second one is a regression.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS)
+    reconstructor = run_tool_extraction_streaming(
+        parser,
+        [_TWO_TOOL_CALLS_IN_ONE_CHUNK],
+        request,
+        assert_one_tool_per_delta=False,
+    )
+    assert len(reconstructor.tool_calls) == 2, (
+        f"Expected 2 tool calls in one delta, got {len(reconstructor.tool_calls)}"
+    )
+    args0 = json.loads(reconstructor.tool_calls[0].function.arguments)
+    args1 = json.loads(reconstructor.tool_calls[1].function.arguments)
+    assert args0 == {"city": "Paris"}
+    assert args1 == {"city": "London"}
+
+
+# ---------------------------------------------------------------------------
+# Trailing free text after the LAST </tool_call> in the SAME delta (MTP /
+# speculative decoding). The text must be emitted as content; dropping it
+# silently is a regression.
+# ---------------------------------------------------------------------------
+
+
+def test_python_none_value_for_nullable_int(qwen3_tokenizer, parser_cls):
+    """A Qwen3.5-trained model emits Python ``None`` (not ``null``) for a
+    nullable non-string parameter, because the Qwen3.5 chat template
+    renders ``args_value | string`` for non-container types — turning a
+    null arg from a previous tool call into the literal "None" in the
+    prompt. The model then learns to generate the same "None" verbatim.
+
+    The parser must recognise this and convert "None" to JSON null,
+    just like it already does for the literal "null" emitted by
+    Qwen3.6-trained models.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "set_count",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "count": {
+                            "anyOf": [
+                                {"type": "integer"},
+                                {"type": "null"},
+                            ],
+                        },
+                    },
+                },
+            },
+        )
+    ]
+    parser = parser_cls(qwen3_tokenizer, tools=tools)
+    model_output = (
+        "<tool_call>\n"
+        "<function=set_count>\n"
+        "<parameter=count>None</parameter>\n"
+        "</function>\n"
+        "</tool_call>"
+    )
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    result = parser.extract_tool_calls(model_output, request=request)
+
+    assert result.tools_called
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert args["count"] is None, (
+        f"Python repr None was not converted to JSON null. Got: {args['count']!r}"
+    )
+
+
+def test_streaming_two_tool_calls_plus_trailing_text_one_delta(
+    qwen3_tokenizer, parser_cls
+):
+    """MTP: a single delta delivers tool 1 + tool 2 + trailing free text.
+    Both tool calls must be emitted AND the trailing text must surface as
+    content in the same delta — not be silently dropped.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS)
+    deltas = [
+        _TWO_TOOL_CALLS_IN_ONE_CHUNK + "\nAll done!",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 2, (
+        f"Expected 2 tool calls, got {len(reconstructor.tool_calls)}"
+    )
+    assert "All done!" in reconstructor.other_content, (
+        f"Trailing text after the second tool call was dropped. "
+        f"Got content: {reconstructor.other_content!r}"
+    )
+
+
+def test_streaming_trailing_text_with_final_close_in_same_delta(
+    qwen3_tokenizer, parser_cls
+):
+    """MTP / speculative decoding can deliver the closing ``</tool_call>``
+    together with trailing free text in a single delta.  The text after
+    the close must be emitted as content rather than being silently
+    consumed by the parser's "advance to next tool" logic.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WEATHER_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WEATHER_TOOLS)
+    deltas = [
+        # Build up the tool call up to and including </function>.
+        "<tool_call>\n<function=get_weather>\n"
+        "<parameter=city>Paris</parameter>\n</function>",
+        # Then deliver </tool_call> + trailing text in ONE delta.
+        "\n</tool_call>\nI hope this helps!",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1
+    assert "I hope this helps!" in reconstructor.other_content, (
+        f"Trailing text after </tool_call> was dropped. "
+        f"Got content: {reconstructor.other_content!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Parameter value containing a literal ``<parameter=NAME>`` whose NAME IS
+# itself a real parameter of the same tool.  The schema-based filter cannot
+# rule the literal out by name, so a stronger heuristic is required (e.g.
+# the literal does not pair with a structural ``</parameter>`` followed by
+# another structural delimiter).  This is the exact pattern that breaks
+# qwen-code WriteFile when the file being written is itself a parser test
+# fixture.
+# ---------------------------------------------------------------------------
+
+_CONTENT_WITH_REAL_PARAM_NAME_LITERAL = (
+    'doc = """\n<parameter=path>\nliteral/value\n</parameter>\n"""\n'
+)
+
+_REAL_PARAM_NAME_LITERAL_OUTPUT = (
+    "<tool_call>\n"
+    "<function=write_file>\n"
+    "<parameter=path>\nfixture.py\n</parameter>\n"
+    f"<parameter=content>\n{_CONTENT_WITH_REAL_PARAM_NAME_LITERAL}</parameter>\n"
+    "</function>\n"
+    "</tool_call>"
+)
+
+
+def test_content_with_real_param_name_literal_nonstreaming(qwen3_tokenizer, parser_cls):
+    """Non-streaming: parameter ``content`` value embeds
+    ``<parameter=path>...</parameter>`` where ``path`` IS the other real
+    parameter of the same ``write_file`` tool.  Schema name filtering alone
+    cannot disambiguate — the parser must use a stronger rule (e.g. the
+    embedded ``</parameter>`` must be followed by a structural delimiter
+    that closes the OUTER param, not the inner literal).
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    result = parser.extract_tool_calls(_REAL_PARAM_NAME_LITERAL_OUTPUT, request=request)
+
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert list(args.keys()) == ["path", "content"], (
+        f"Spurious params from embedded same-name literal: {list(args.keys())}"
+    )
+    assert args["path"] == "fixture.py", (
+        f"Outer ``path`` was overwritten by embedded literal: {args.get('path')!r}"
+    )
+    expected = _CONTENT_WITH_REAL_PARAM_NAME_LITERAL.rstrip("\n")
+    assert args["content"] == expected, (
+        f"content was truncated at the embedded <parameter=path>. "
+        f"Got: {args.get('content')!r}"
+    )
+
+
+def test_content_with_real_param_name_literal_streaming(qwen3_tokenizer, parser_cls):
+    """Streaming variant of the same case.  Each meaningful structural-
+    looking line arrives in its own delta — the parser cannot wait for the
+    full text to disambiguate.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    char_deltas = [
+        "<tool_call>\n",
+        "<function=write_file>\n",
+        "<parameter=path>\nfixture.py\n</parameter>\n",
+        '<parameter=content>\ndoc = """\n',
+        "<parameter=path>\n",
+        "literal/value\n",
+        "</parameter>\n",
+        '"""\n',
+        "</parameter>\n",
+        "</function>\n",
+        "</tool_call>",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, char_deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert list(args.keys()) == ["path", "content"], (
+        f"Spurious params from embedded same-name literal: {list(args.keys())}"
+    )
+    assert args["path"] == "fixture.py"
+    expected = _CONTENT_WITH_REAL_PARAM_NAME_LITERAL.rstrip("\n")
+    assert args["content"] == expected, (
+        f"content was truncated at the embedded <parameter=path>. "
+        f"Got: {args.get('content')!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Parameter value containing a COMPLETE nested tool_call (all four balise
+# types: <tool_call>, <function=...>, <parameter=...>, </parameter>,
+# </function>, </tool_call>) — the qwen-code WriteFile pattern when the
+# file being written is itself a parser fixture or a chat-template
+# example. Every literal must stay inside the value; no spurious extra
+# tool calls or params should be generated.
+# ---------------------------------------------------------------------------
+
+_CONTENT_WITH_FULL_NESTED_CALL = (
+    'doc = """\n'
+    "<tool_call>\n"
+    "<function=write_file>\n"
+    "<parameter=path>\n"
+    "literal/value.txt\n"
+    "</parameter>\n"
+    "<parameter=content>\n"
+    "hello\n"
+    "</parameter>\n"
+    "</function>\n"
+    "</tool_call>\n"
+    '"""\n'
+)
+
+_FULL_NESTED_CALL_OUTPUT = (
+    "<tool_call>\n"
+    "<function=write_file>\n"
+    "<parameter=path>\nfixture.py\n</parameter>\n"
+    f"<parameter=content>\n{_CONTENT_WITH_FULL_NESTED_CALL}</parameter>\n"
+    "</function>\n"
+    "</tool_call>"
+)
+
+
+def test_content_with_full_nested_tool_call_nonstreaming(qwen3_tokenizer, parser_cls):
+    """Non-streaming: parameter ``content`` contains a complete literal
+    ``<tool_call>...</tool_call>`` whose function/parameter names match
+    the OUTER tool's schema.  Every literal must stay inside the value;
+    no extra tool call must be generated.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    result = parser.extract_tool_calls(_FULL_NESTED_CALL_OUTPUT, request=request)
+
+    assert result.tools_called
+    assert len(result.tool_calls) == 1, (
+        f"Expected 1 tool call (the outer one), got "
+        f"{len(result.tool_calls)} — embedded literal tool_call was "
+        f"incorrectly promoted to a real call."
+    )
+    args = json.loads(result.tool_calls[0].function.arguments)
+    assert list(args.keys()) == ["path", "content"]
+    assert args["path"] == "fixture.py"
+    expected = _CONTENT_WITH_FULL_NESTED_CALL.rstrip("\n")
+    assert args["content"] == expected, (
+        f"content truncated/corrupted: {args.get('content')!r}"
+    )
+
+
+def test_content_with_full_nested_tool_call_streaming(qwen3_tokenizer, parser_cls):
+    """Streaming variant: the literal nested ``<tool_call>...</tool_call>``
+    crosses many delta boundaries; the parser must not start a second
+    tool call.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    char_deltas = [
+        "<tool_call>\n",
+        "<function=write_file>\n",
+        "<parameter=path>\nfixture.py\n</parameter>\n",
+        '<parameter=content>\ndoc = """\n',
+        "<tool_call>\n",
+        "<function=write_file>\n",
+        "<parameter=path>\n",
+        "literal/value.txt\n",
+        "</parameter>\n",
+        "<parameter=content>\n",
+        "hello\n",
+        "</parameter>\n",
+        "</function>\n",
+        "</tool_call>\n",
+        '"""\n',
+        "</parameter>\n",
+        "</function>\n",
+        "</tool_call>",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, char_deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1, (
+        f"Expected 1 tool call, got {len(reconstructor.tool_calls)} — "
+        f"a literal nested <tool_call> was promoted to a real call."
+    )
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+    assert list(args.keys()) == ["path", "content"]
+    assert args["path"] == "fixture.py"
+    expected = _CONTENT_WITH_FULL_NESTED_CALL.rstrip("\n")
+    assert args["content"] == expected, (
+        f"content truncated/corrupted: {args.get('content')!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Two consecutive tool calls, where the SECOND embeds a literal nested
+# tool_call whose ``<parameter=NAME>`` uses a NAME that is NOT in the
+# OUTER tool's schema (e.g. a description of a different tool's format).
+# Reproduces the qwen-code Qwen 3.6 freeze scenario: the depth tracker
+# in ``_find_true_param_end`` filters opens by schema, so the literal
+# ``</parameter>`` that closes the unknown-NAME literal open appears
+# unmatched and matches the structural lookahead of the trailing
+# ``</function>``, truncating the OUTER content value.
+# ---------------------------------------------------------------------------
+
+_OUT_OF_SCHEMA_NESTED_CONTENT = (
+    'template = """\n'
+    "<tool_call>\n<function=foo>\n"
+    "<parameter=bar>baz</parameter>\n"
+    "</function>\n</tool_call>\n"
+    '"""\n'
+)
+
+_TWO_TOOLS_OUT_OF_SCHEMA_NESTED_OUTPUT = (
+    "<tool_call>\n<function=foo>\n"
+    "<parameter=bar>baz</parameter>\n"
+    "</function>\n</tool_call>"
+    "\n\n"
+    "<tool_call>\n<function=write_file>\n"
+    "<parameter=path>\nfixture.py\n</parameter>\n"
+    f"<parameter=content>\n{_OUT_OF_SCHEMA_NESTED_CONTENT}</parameter>\n"
+    "</function>\n</tool_call>"
+)
+
+
+def test_two_tools_second_with_out_of_schema_nested_literal_nonstreaming(
+    qwen3_tokenizer, parser_cls
+):
+    """Two structural tool calls; the second's ``content`` value embeds a
+    literal nested ``<tool_call>`` block whose inner ``<parameter=bar>``
+    uses a NAME not in the outer tool's schema (``write_file`` only knows
+    ``path`` and ``content``).
+
+    The walker must still match the outer ``</parameter>`` of ``content``,
+    not the literal ``</parameter>`` of the unknown-NAME nested open.
+    """
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    result = parser.extract_tool_calls(
+        _TWO_TOOLS_OUT_OF_SCHEMA_NESTED_OUTPUT, request=request
+    )
+    assert result.tools_called
+    assert len(result.tool_calls) == 2, (
+        f"Expected 2 tool calls, got {len(result.tool_calls)}: "
+        f"{[tc.function.name for tc in result.tool_calls]}"
+    )
+    args0 = json.loads(result.tool_calls[0].function.arguments)
+    args1 = json.loads(result.tool_calls[1].function.arguments)
+    assert args0 == {"bar": "baz"}, f"first tool args wrong: {args0!r}"
+    assert result.tool_calls[1].function.name == "write_file"
+    assert list(args1.keys()) == ["path", "content"], (
+        f"Spurious params on outer tool: {list(args1.keys())}"
+    )
+    assert args1["path"] == "fixture.py"
+    expected = _OUT_OF_SCHEMA_NESTED_CONTENT.rstrip("\n")
+    assert args1["content"] == expected, (
+        f"outer content truncated at literal </parameter>: {args1.get('content')!r}"
+    )
+
+
+def test_two_tools_second_with_out_of_schema_nested_literal_streaming(
+    qwen3_tokenizer, parser_cls
+):
+    """Streaming variant of the same scenario."""
+    parser = parser_cls(qwen3_tokenizer, tools=_WRITE_FILE_TOOLS)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=_WRITE_FILE_TOOLS)
+    char_deltas = [
+        "<tool_call>\n<function=foo>\n",
+        "<parameter=bar>baz</parameter>\n",
+        "</function>\n</tool_call>",
+        "\n\n",
+        "<tool_call>\n<function=write_file>\n",
+        "<parameter=path>\nfixture.py\n</parameter>\n",
+        '<parameter=content>\ntemplate = """\n',
+        "<tool_call>\n<function=foo>\n",
+        "<parameter=bar>baz</parameter>\n",
+        "</function>\n</tool_call>\n",
+        '"""\n',
+        "</parameter>\n",
+        "</function>\n",
+        "</tool_call>",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, char_deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 2, (
+        f"Expected 2 tool calls, got {len(reconstructor.tool_calls)}"
+    )
+    args0 = json.loads(reconstructor.tool_calls[0].function.arguments)
+    args1 = json.loads(reconstructor.tool_calls[1].function.arguments)
+    assert args0 == {"bar": "baz"}
+    assert reconstructor.tool_calls[1].function.name == "write_file"
+    assert list(args1.keys()) == ["path", "content"]
+    assert args1["path"] == "fixture.py"
+    expected = _OUT_OF_SCHEMA_NESTED_CONTENT.rstrip("\n")
+    assert args1["content"] == expected, (
+        f"outer content truncated/corrupted: {args1.get('content')!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Phantom tool calls produced when the model writes an UNRENDERED Jinja
+# template literally in its response: ``<tool_call>\n<function={{ x }}>\n
+# <parameter={{ k }}>...``.  The function name ``{{ x }}`` contains
+# template-syntax characters and CANNOT be a real function — the parser
+# must reject these tool calls (or render them as content) rather than
+# emit them as real ones, since the client will then raise "tool not
+# found" errors and cause the agent to loop.
+# ---------------------------------------------------------------------------
+
+_JINJA_PHANTOM_OUTPUT = (
+    "<tool_call>\n<function={{ tc.name }}>\n"
+    "<parameter={{ k }}>\n{{ v }}\n</parameter>\n"
+    "</function>\n</tool_call>"
+    "\n\n"
+    "<tool_call>\n<function=write_file>\n"
+    "<parameter=path>\nout.txt\n</parameter>\n"
+    "<parameter=content>\nhello\n</parameter>\n"
+    "</function>\n</tool_call>"
+)
+
+
+def test_jinja_template_phantom_tool_call_is_rejected_nonstreaming(
+    qwen3_tokenizer, parser_cls
+):
+    """A ``<function={{ tc.name }}>`` block (unrendered Jinja) emits a
+    function name that is not a valid identifier.  It must NOT be
+    surfaced as a real tool call — the client would fail with "tool not
+    found" and the agent would loop.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "write_file",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "path": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            },
+        )
+    ]
+    parser = parser_cls(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    result = parser.extract_tool_calls(_JINJA_PHANTOM_OUTPUT, request=request)
+    assert result.tools_called
+    names = [tc.function.name for tc in result.tool_calls]
+    assert "{{ tc.name }}" not in names, (
+        f"Phantom Jinja-template tool call surfaced as real: {names}"
+    )
+    assert names == ["write_file"], (
+        f"Expected only the real ``write_file`` tool call, got: {names}"
+    )
+
+
+# NOTE: a streaming counterpart of the above test is intentionally not
+# added.  Filtering phantoms in streaming requires a separate
+# "client-visible index" counter (the existing ``current_tool_index`` is
+# also used for internal position bookkeeping).  Until that refactor
+# lands, the streaming path may still surface phantoms and the client
+# is expected to drop unknown function names.  The non-streaming path
+# is the one consumed by the offline tools-extraction code and by the
+# ``_parse_xml_function_call`` helper invoked at function-end during
+# streaming, so production users still see the filtered result for
+# completed tool calls.
+
+
+# ---------------------------------------------------------------------------
+# Inline empty ``<tool_call>...</tool_call>`` (no ``<function=>``) before a
+# real tool call: the content text BETWEEN the inline literal and the real
+# tool call must be preserved.  Previously the content was truncated at the
+# position of the FIRST ``<tool_call>`` token regardless of whether that
+# block contained a real ``<function=>``.
+# ---------------------------------------------------------------------------
+
+
+def test_inline_empty_tool_call_preserves_content_before_real_call(
+    qwen3_tokenizer, parser_cls
+):
+    """A bare ``<tool_call>example</tool_call>`` in the model's narrative
+    text (no ``<function=>`` inside) must NOT consume the surrounding
+    content; only the real ``<tool_call>`` block that contains a valid
+    function call should anchor ``content_index``.
+
+    The XML parser's SAX-based pipeline consumes the inline empty
+    block's body as XML text (so ``example`` is dropped), but the
+    surrounding narrative ("I'll show:" and "Now real:") must still be
+    preserved — both parsers are checked.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "log",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"msg": {"type": "string"}},
+                },
+            },
+        )
+    ]
+    parser = parser_cls(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    text = (
+        "I'll show: <tool_call>example</tool_call>. Now real:\n"
+        "<tool_call>\n<function=log>\n<parameter=msg>\nhi\n</parameter>\n"
+        "</function>\n</tool_call>"
+    )
+    result = parser.extract_tool_calls(text, request=request)
+    assert result.tools_called
+    assert len(result.tool_calls) == 1
+    assert result.tool_calls[0].function.name == "log"
+    # Content between the inline empty tool_call and the real one MUST be
+    # preserved — dropping it loses the model's contextual narrative.
+    assert result.content is not None
+    assert "I'll show:" in result.content, (
+        f"Pre-inline narrative lost from content: {result.content!r}"
+    )
+    assert "Now real:" in result.content, (
+        f"Content between inline literal and real tool_call lost: {result.content!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# anyOf [{type: string}, {type: null}] with the literal "null" or "None"
+# value must convert to JSON null, NOT preserve as the string "null"/"None".
+# Observed against a real Qwen 3.6 server: the model emits ``None`` for a
+# nullable optional parameter and the parser kept it as the string "None",
+# breaking nullable-typed clients.
+# ---------------------------------------------------------------------------
+
+
+def test_anyof_string_null_with_null_literal_returns_none(qwen3_tokenizer, parser_cls):
+    """anyOf [{type: string}, {type: null}] with value "null" or "None"
+    must convert to JSON null.  String-typed paths preserve the literal,
+    but a nullable schema MUST recognise the null sentinel — otherwise
+    the client receives the literal "null" / "None" string and downstream
+    type checks fail.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "set_value",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "optional": {
+                            "anyOf": [{"type": "string"}, {"type": "null"}],
+                        },
+                    },
+                },
+            },
+        )
+    ]
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+    for literal in ("null", "None"):
+        parser = parser_cls(qwen3_tokenizer, tools=tools)
+        model_output = (
+            "<tool_call>\n"
+            "<function=set_value>\n"
+            f"<parameter=optional>{literal}</parameter>\n"
+            "</function>\n"
+            "</tool_call>"
+        )
+        result = parser.extract_tool_calls(model_output, request=request)
+        assert result.tools_called
+        args = json.loads(result.tool_calls[0].function.arguments)
+        assert args["optional"] is None, (
+            f"anyOf string|null with value {literal!r} was kept as "
+            f"{type(args['optional']).__name__}: {args['optional']!r}"
+        )
+
+
+def test_get_vllm_registry_structural_tag_returns_structural_tag(
+    parser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    request_tools = _as_chat_completion_tools(sample_tools)
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=request_tools,
+        tool_choice="auto",
+    )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=request_tools,
+        tool_choice="required",
+    )
+    tag = parser.get_structural_tag(req)
+    assert isinstance(tag, StructuralTag)
+
+    if request_tools:
+        tool = request_tools[0]
+        req = ChatCompletionRequest(
+            messages=[],
+            model="m",
+            tools=request_tools,
+        )
+        req.tool_choice = ChatCompletionNamedToolChoiceParam(
+            function=ChatCompletionNamedFunction(name=tool.function.name)
+        )
+        tag = parser.get_structural_tag(req)
+        assert isinstance(tag, StructuralTag)
+
+
+@pytest.mark.parametrize("include_reasoning", [True, False])
+def test_adjust_request_auto_uses_vllm_registry_structural_tag(
+    monkeypatch: pytest.MonkeyPatch,
+    parser,
+    sample_tools: list[ChatCompletionToolsParam],
+    include_reasoning: bool,
+) -> None:
+    monkeypatch.setattr(
+        "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING",
+        True,
+    )
+    request_tools = _as_chat_completion_tools(sample_tools)
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=request_tools,
+        tool_choice="auto",
+        include_reasoning=include_reasoning,
+    )
+    out = parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
+    assert isinstance(out.structured_outputs.structural_tag, str)
+    loaded = json.loads(out.structured_outputs.structural_tag)
+    assert isinstance(loaded, dict)
+
+
+def test_adjust_request_required_prefers_structural_tag(
+    monkeypatch: pytest.MonkeyPatch,
+    parser,
+    sample_tools: list[ChatCompletionToolsParam],
+) -> None:
+    monkeypatch.setattr(
+        "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING",
+        True,
+    )
+    request_tools = _as_chat_completion_tools(sample_tools)
+    req = ChatCompletionRequest(
+        messages=[],
+        model="m",
+        tools=request_tools,
+        tool_choice="required",
+    )
+    out = parser.adjust_request(req)
+    assert out.structured_outputs is not None
+    assert out.structured_outputs.structural_tag is not None
diff --git a/tests/tool_parsers/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
index defc6d23eff4..9ff5a933a515 100644
--- a/tests/tool_parsers/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -1,30 +1,24 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+"""
+Coder-parser-specific tests.
+
+Tests that exercise behaviour shared with the XML parser live in
+``tests/tool_parsers/test_qwen3_xml_coder_shared.py``.  Only tests that
+depend on Coder-only API (e.g. ``is_tool_call_started``) or on Coder-only
+streaming behaviour (e.g. character-by-character chunking) belong here.
+"""
+
 import json
-from collections.abc import Generator
 
 import pytest
-from openai.types.responses.function_tool import FunctionTool
-from xgrammar import StructuralTag
 
 from vllm.entrypoints.openai.chat_completion.protocol import (
-    ChatCompletionNamedFunction,
-    ChatCompletionNamedToolChoiceParam,
     ChatCompletionRequest,
-    ChatCompletionToolsParam,
-)
-from vllm.entrypoints.openai.engine.protocol import (
-    DeltaMessage,
-    FunctionCall,
-    ToolCall,
-)
-from vllm.tokenizers import TokenizerLike, get_tokenizer
-from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
-from vllm.tool_parsers.qwen3coder_tool_parser import (
-    Qwen3CoderToolParser,
 )
-from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
+from vllm.tokenizers import get_tokenizer
+from vllm.tool_parsers.qwen3coder_tool_parser import Qwen3CoderToolParser
 
 MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"
 
@@ -35,1407 +29,432 @@ def qwen3_tokenizer():
 
 
 @pytest.fixture
-def qwen3_tool_parser(qwen3_tokenizer, sample_tools):
-    return Qwen3CoderToolParser(qwen3_tokenizer, tools=sample_tools)
-
-
-@pytest.fixture
-def qwen3_xml_tool_parser(qwen3_tokenizer, sample_tools):
-    return Qwen3XMLToolParser(qwen3_tokenizer, tools=sample_tools)
-
-
-@pytest.fixture(params=["xml"])
-def qwen3_tool_parser_parametrized(qwen3_tool_parser, qwen3_xml_tool_parser, request):
-    """Parameterized fixture that provides both parser types for testing"""
-    if request.param == "original":
-        return qwen3_tool_parser
-    else:
-        return qwen3_xml_tool_parser
-
-
-WEATHER_PARAMS = {
-    "type": "object",
-    "properties": {
-        "city": {"type": "string", "description": "The city name"},
-        "state": {"type": "string", "description": "The state code"},
-        "unit": {"type": "string", "enum": ["fahrenheit", "celsius"]},
-    },
-    "required": ["city", "state"],
-}
-
-AREA_PARAMS = {
-    "type": "object",
-    "properties": {
-        "shape": {"type": "string"},
-        "dimensions": {"type": "object"},
-        "precision": {"type": "integer"},
-    },
-}
-
-
-@pytest.fixture(params=["chat_completion", "responses_api"])
-def sample_tools(request):
-    if request.param == "chat_completion":
-        return [
-            ChatCompletionToolsParam(
-                type="function",
-                function={
-                    "name": "get_current_weather",
-                    "description": "Get the current weather",
-                    "parameters": WEATHER_PARAMS,
-                },
-            ),
-            ChatCompletionToolsParam(
-                type="function",
-                function={
-                    "name": "calculate_area",
-                    "description": "Calculate area of a shape",
-                    "parameters": AREA_PARAMS,
-                },
-            ),
-        ]
-    else:
-        return [
-            FunctionTool(
-                type="function",
-                name="get_current_weather",
-                description="Get the current weather",
-                parameters=WEATHER_PARAMS,
-            ),
-            FunctionTool(
-                type="function",
-                name="calculate_area",
-                description="Calculate area of a shape",
-                parameters=AREA_PARAMS,
-            ),
-        ]
-
-
-def _as_chat_completion_tools(
-    tools: list[ChatCompletionToolsParam | FunctionTool],
-) -> list[ChatCompletionToolsParam]:
-    normalized: list[ChatCompletionToolsParam] = []
-    for tool in tools:
-        if isinstance(tool, ChatCompletionToolsParam):
-            normalized.append(tool)
-        else:
-            normalized.append(
-                ChatCompletionToolsParam(
-                    type="function",
-                    function={
-                        "name": tool.name,
-                        "description": tool.description,
-                        "parameters": tool.parameters,
-                    },
-                )
-            )
-    return normalized
-
-
-def assert_tool_calls(
-    actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall]
-):
-    assert len(actual_tool_calls) == len(expected_tool_calls)
-
-    for actual_tool_call, expected_tool_call in zip(
-        actual_tool_calls, expected_tool_calls
-    ):
-        # Qwen3 parser doesn't generate IDs during extraction
-        assert actual_tool_call.type == "function"
-        assert actual_tool_call.function.name == expected_tool_call.function.name
-        assert json.loads(actual_tool_call.function.arguments) == json.loads(
-            expected_tool_call.function.arguments
-        )
-
+def qwen3_tool_parser(qwen3_tokenizer):
+    return Qwen3CoderToolParser(qwen3_tokenizer, tools=None)
 
-def stream_delta_message_generator(
-    qwen3_tool_parser,
-    qwen3_tokenizer: TokenizerLike,
-    model_output: str,
-    request: ChatCompletionRequest | None = None,
-) -> Generator[DeltaMessage, None, None]:
-    all_token_ids = qwen3_tokenizer.encode(model_output, add_special_tokens=False)
 
-    previous_text = ""
-    previous_tokens = None
-    prefix_offset = 0
-    read_offset = 0
-    for i, delta_token in enumerate(all_token_ids):
-        delta_token_ids = [delta_token]
-        previous_token_ids = all_token_ids[:i]
-        current_token_ids = all_token_ids[: i + 1]
-
-        (new_tokens, delta_text, new_prefix_offset, new_read_offset) = (
-            detokenize_incrementally(
-                tokenizer=qwen3_tokenizer,
-                all_input_ids=current_token_ids,
-                prev_tokens=previous_tokens,
-                prefix_offset=prefix_offset,
-                read_offset=read_offset,
-                skip_special_tokens=False,
-                spaces_between_special_tokens=True,
-            )
-        )
-
-        current_text = previous_text + delta_text
-
-        delta_message = qwen3_tool_parser.extract_tool_calls_streaming(
-            previous_text,
-            current_text,
-            delta_text,
-            previous_token_ids,
-            current_token_ids,
-            delta_token_ids,
-            request=request,
-        )
-        if delta_message:
-            yield delta_message
-
-        previous_text = current_text
-        previous_tokens = (
-            previous_tokens + new_tokens if previous_tokens else new_tokens
-        )
-        prefix_offset = new_prefix_offset
-        read_offset = new_read_offset
-
-
-def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized):
-    model_output = "This is a test response without any tool calls"
-    extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
-        model_output, request=None
-    )  # type: ignore[arg-type]
-    assert not extracted_tool_calls.tools_called
-    assert extracted_tool_calls.tool_calls == []
-    assert extracted_tool_calls.content == model_output
-
-
-@pytest.mark.parametrize(
-    ids=[
-        "single_tool",
-        "single_tool_with_content",
-        "single_tool_multiline_param",
-        "parallel_tools",
-        "tool_with_typed_params",
-    ],
-    argnames=["model_output", "expected_tool_calls", "expected_content"],
-    argvalues=[
-        (
-            """<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="get_current_weather",
-                        arguments=json.dumps(
-                            {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
-                        ),
-                    )
-                )
-            ],
-            None,
-        ),
-        (
-            """Sure! Let me check the weather for you.<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="get_current_weather",
-                        arguments=json.dumps(
-                            {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
-                        ),
-                    )
-                )
-            ],
-            "Sure! Let me check the weather for you.",
-        ),
-        (
-            """<tool_call>
-<function=calculate_area>
-<parameter=shape>
-rectangle
-</parameter>
-<parameter=dimensions>
-{"width": 10, 
- "height": 20}
-</parameter>
-<parameter=precision>
-2
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="calculate_area",
-                        arguments=json.dumps(
-                            {
-                                "shape": "rectangle",
-                                "dimensions": {"width": 10, "height": 20},
-                                "precision": 2,
-                            }
-                        ),
-                    )
-                )
-            ],
-            None,
-        ),
-        (
-            """<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>
-<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Orlando
-</parameter>
-<parameter=state>
-FL
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="get_current_weather",
-                        arguments=json.dumps(
-                            {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
-                        ),
-                    )
-                ),
-                ToolCall(
-                    function=FunctionCall(
-                        name="get_current_weather",
-                        arguments=json.dumps(
-                            {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}
-                        ),
-                    )
-                ),
-            ],
-            None,
-        ),
-        (
-            """Let me calculate that area for you.<tool_call>
-<function=calculate_area>
-<parameter=shape>
-circle
-</parameter>
-<parameter=dimensions>
-{"radius": 15.5}
-</parameter>
-<parameter=precision>
-3
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="calculate_area",
-                        arguments=json.dumps(
-                            {
-                                "shape": "circle",
-                                "dimensions": {"radius": 15.5},
-                                "precision": 3,
-                            }
-                        ),
-                    )
-                )
-            ],
-            "Let me calculate that area for you.",
-        ),
-    ],
-)
-def test_extract_tool_calls(
-    qwen3_tool_parser_parametrized,
-    model_output,
-    expected_tool_calls,
-    expected_content,
-):
-    request = ChatCompletionRequest(model=MODEL, messages=[])
-    extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
-        model_output, request=request
-    )
-    assert extracted_tool_calls.tools_called
-
-    assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
-
-    assert extracted_tool_calls.content == expected_content
-
-
-def test_extract_tool_calls_fallback_no_tags(
-    qwen3_tool_parser_parametrized,
+def test_streaming_trailing_text_after_tool_with_literal_close_tag_in_value(
+    qwen3_tokenizer,
 ):
-    """Test fallback parsing when XML tags are missing"""
-    model_output = """<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-</function>"""
-
-    request = ChatCompletionRequest(model=MODEL, messages=[])
-    extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
-        model_output, request=request
+    """A tool call's parameter value contains a literal ``</tool_call>``
+    string.  After the real tool call closes, trailing free text must
+    still be emitted as content.
+
+    The naive ``current_text.count(</tool_call>)`` and
+    ``current_text.find(</tool_call>)`` used by the early-advance and
+    ``_advance_to_next_tool`` logic don't distinguish literal text from
+    structural delimiters.  This can cause ``_sent_content_idx`` to land
+    INSIDE the tool's parameter value, after which the trailing text
+    fails to be emitted.
+    """
+    from vllm.entrypoints.openai.chat_completion.protocol import (
+        ChatCompletionToolsParam,
     )
 
-    assert extracted_tool_calls.tools_called
-    assert len(extracted_tool_calls.tool_calls) == 1
-    assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather"
-
-
-def test_extract_tool_calls_type_conversion(qwen3_tokenizer):
-    """Test parameter type conversion based on tool schema"""
     tools = [
         ChatCompletionToolsParam(
             type="function",
             function={
-                "name": "test_types",
+                "name": "write_file",
                 "parameters": {
                     "type": "object",
                     "properties": {
-                        "int_param": {"type": "integer"},
-                        "float_param": {"type": "float"},
-                        "bool_param": {"type": "boolean"},
-                        "str_param": {"type": "string"},
-                        "obj_param": {"type": "object"},
+                        "path": {"type": "string"},
+                        "content": {"type": "string"},
                     },
                 },
             },
         )
     ]
-
-    model_output = """<tool_call>
-<function=test_types>
-<parameter=int_param>
-42
-</parameter>
-<parameter=float_param>
-3.14
-</parameter>
-<parameter=bool_param>
-true
-</parameter>
-<parameter=str_param>
-hello world
-</parameter>
-<parameter=obj_param>
-{"key": "value"}
-</parameter>
-</function>
-</tool_call>"""
-
-    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
+    parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools)
     request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
 
-    args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
-    assert args["int_param"] == 42
-    assert args["float_param"] == 3.14
-    assert args["bool_param"] is True
-    assert args["str_param"] == "hello world"
-    assert args["obj_param"] == {"key": "value"}
+    # The parameter value contains a literal ``</tool_call>`` string.
+    # The real ``</tool_call>`` follows after ``</function>``.
+    delta_1 = (
+        "<tool_call>\n<function=write_file>\n"
+        "<parameter=path>foo.py</parameter>\n"
+        "<parameter=content>\n"
+        "doc = '<tool_call>example</tool_call>'\n"
+        "</parameter>\n</function>\n</tool_call>"
+    )
+    parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text=delta_1,
+        delta_text=delta_1,
+        previous_token_ids=[],
+        current_token_ids=[1],
+        delta_token_ids=[1],
+        request=request,
+    )
 
+    delta_2 = "\nDone, file written!"
+    text2 = delta_1 + delta_2
+    msg2 = parser.extract_tool_calls_streaming(
+        previous_text=delta_1,
+        current_text=text2,
+        delta_text=delta_2,
+        previous_token_ids=[1],
+        current_token_ids=[1, 2],
+        delta_token_ids=[2],
+        request=request,
+    )
+    contents = []
+    if msg2 and msg2.content:
+        contents.append(msg2.content)
+    # EOS-style empty delta to flush
+    msg3 = parser.extract_tool_calls_streaming(
+        previous_text=text2,
+        current_text=text2,
+        delta_text="",
+        previous_token_ids=[1, 2],
+        current_token_ids=[1, 2, 3],
+        delta_token_ids=[3],
+        request=request,
+    )
+    if msg3 and msg3.content:
+        contents.append(msg3.content)
+
+    full = "".join(contents)
+    assert "Done, file written!" in full, (
+        f"Trailing text after a tool call whose parameter value contains "
+        f"a literal </tool_call> was dropped. Got content: {full!r}"
+    )
 
-def test_extract_tool_calls_anyof_type_conversion(qwen3_tokenizer):
-    """Test type conversion for anyOf/oneOf nullable schemas (Pydantic v2).
 
-    Pydantic v2 emits anyOf for Optional[T] fields, e.g.:
-        Optional[int] -> {"anyOf": [{"type": "integer"}, {"type": "null"}]}
-    The parser must extract the non-null type and apply the correct
-    conversion (int(), float(), etc.) instead of returning a raw string.
+def test_streaming_second_tool_after_first_with_literal_close_tag_in_value(
+    qwen3_tokenizer,
+):
+    """A first tool call's parameter value contains a literal
+    ``</tool_call>``.  A SECOND structural tool call follows after the
+    real ``</tool_call>``.  Both tool calls and any inter-call content
+    must be emitted correctly.
     """
+    from vllm.entrypoints.openai.chat_completion.protocol import (
+        ChatCompletionToolsParam,
+    )
+
     tools = [
         ChatCompletionToolsParam(
             type="function",
             function={
-                "name": "test_anyof",
+                "name": "write_file",
                 "parameters": {
                     "type": "object",
                     "properties": {
-                        "anyof_int": {
-                            "anyOf": [
-                                {"type": "integer"},
-                                {"type": "null"},
-                            ],
-                            "default": 5,
-                        },
-                        "anyof_str": {
-                            "anyOf": [
-                                {"type": "string"},
-                                {"type": "null"},
-                            ],
-                        },
-                        "anyof_array": {
-                            "anyOf": [
-                                {"type": "array", "items": {"type": "string"}},
-                                {"type": "null"},
-                            ],
-                        },
-                        "anyof_obj": {
-                            "anyOf": [
-                                {"type": "object"},
-                                {"type": "null"},
-                            ],
-                        },
-                        "type_as_array": {
-                            "type": ["integer", "null"],
-                        },
-                        "multi_non_null": {
-                            "anyOf": [
-                                {"type": "string"},
-                                {"type": "integer"},
-                                {"type": "null"},
-                            ],
-                        },
+                        "path": {"type": "string"},
+                        "content": {"type": "string"},
                     },
                 },
             },
-        )
-    ]
-
-    model_output = """<tool_call>
-<function=test_anyof>
-<parameter=anyof_int>
-5
-</parameter>
-<parameter=anyof_str>
-hello
-</parameter>
-<parameter=anyof_array>
-["a", "b", "c"]
-</parameter>
-<parameter=anyof_obj>
-{"key": "value"}
-</parameter>
-<parameter=type_as_array>
-42
-</parameter>
-<parameter=multi_non_null>
-some text
-</parameter>
-</function>
-</tool_call>"""
-
-    parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools)
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    extracted = parser.extract_tool_calls(model_output, request=request)
-
-    args = json.loads(extracted.tool_calls[0].function.arguments)
-    assert args["anyof_int"] == 5
-    assert isinstance(args["anyof_int"], int)
-    assert args["anyof_str"] == "hello"
-    assert isinstance(args["anyof_str"], str)
-    assert args["anyof_array"] == ["a", "b", "c"]
-    assert isinstance(args["anyof_array"], list)
-    assert args["anyof_obj"] == {"key": "value"}
-    assert isinstance(args["anyof_obj"], dict)
-    assert args["type_as_array"] == 42
-    assert isinstance(args["type_as_array"], int)
-    # Multi non-null: anyOf[string, integer, null] → first non-null is string
-    assert args["multi_non_null"] == "some text"
-    assert isinstance(args["multi_non_null"], str)
-
-
-def test_extract_tool_calls_anyof_type_conversion_streaming(qwen3_tokenizer):
-    """Test streaming e2e for anyOf/oneOf nullable schemas (Pydantic v2).
-
-    Verifies that the full streaming pipeline — tokenize, incrementally
-    decode, extract_tool_calls_streaming — correctly resolves types from
-    anyOf schemas and produces valid JSON with properly typed values.
-    """
-    tools = [
+        ),
         ChatCompletionToolsParam(
             type="function",
             function={
-                "name": "search_web",
+                "name": "log",
                 "parameters": {
                     "type": "object",
-                    "properties": {
-                        "query": {
-                            "anyOf": [
-                                {"type": "string"},
-                                {"type": "null"},
-                            ],
-                        },
-                        "count": {
-                            "anyOf": [
-                                {"type": "integer"},
-                                {"type": "null"},
-                            ],
-                            "default": 5,
-                        },
-                        "verbose": {
-                            "anyOf": [
-                                {"type": "boolean"},
-                                {"type": "null"},
-                            ],
-                        },
-                    },
+                    "properties": {"msg": {"type": "string"}},
                 },
             },
-        )
+        ),
     ]
-
-    model_output = """<tool_call>
-<function=search_web>
-<parameter=query>
-vllm tool parser
-</parameter>
-<parameter=count>
-10
-</parameter>
-<parameter=verbose>
-true
-</parameter>
-</function>
-</tool_call>"""
-
     parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools)
     request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
 
-    tool_states = {}
-    for delta_message in stream_delta_message_generator(
-        parser, qwen3_tokenizer, model_output, request
-    ):
-        if delta_message.tool_calls:
-            for tool_call in delta_message.tool_calls:
-                idx = tool_call.index
-                if idx not in tool_states:
-                    tool_states[idx] = {"name": None, "arguments": ""}
-                if tool_call.function:
-                    if tool_call.function.name:
-                        tool_states[idx]["name"] = tool_call.function.name
-                    if tool_call.function.arguments is not None:
-                        tool_states[idx]["arguments"] += tool_call.function.arguments
-
-    assert len(tool_states) == 1
-    assert tool_states[0]["name"] == "search_web"
-    assert tool_states[0]["arguments"] is not None
-    args = json.loads(tool_states[0]["arguments"])
-    assert args["query"] == "vllm tool parser"
-    assert isinstance(args["query"], str)
-    assert args["count"] == 10
-    assert isinstance(args["count"], int)
-    assert args["verbose"] is True
-    assert isinstance(args["verbose"], bool)
-
-
-@pytest.mark.parametrize(
-    ids=[
-        "no_tools",
-        "single_tool",
-        "single_tool_with_content",
-        "single_tool_multiline_param",
-        "parallel_tools",
-        "tool_with_typed_params",  # Added this test case
-    ],
-    argnames=["model_output", "expected_tool_calls", "expected_content"],
-    argvalues=[
-        ("This is a test without tools", [], "This is a test without tools"),
-        (
-            """<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="get_current_weather",
-                        arguments=json.dumps(
-                            {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
-                        ),
-                    )
-                )
-            ],
-            None,
-        ),
-        (
-            """Sure! Let me check the weather for you.<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="get_current_weather",
-                        arguments=json.dumps(
-                            {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
-                        ),
-                    )
-                )
-            ],
-            "Sure! Let me check the weather for you.",
-        ),
-        (
-            """<tool_call>
-<function=calculate_area>
-<parameter=shape>
-rectangle
-</parameter>
-<parameter=dimensions>
-{"width": 10, 
- "height": 20}
-</parameter>
-<parameter=precision>
-2
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="calculate_area",
-                        arguments=json.dumps(
-                            {
-                                "shape": "rectangle",
-                                "dimensions": {"width": 10, "height": 20},
-                                "precision": 2,
-                            }
-                        ),
-                    )
-                )
-            ],
-            None,
-        ),
-        (
-            """<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>
-<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Orlando
-</parameter>
-<parameter=state>
-FL
-</parameter>
-<parameter=unit>
-celsius
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="get_current_weather",
-                        arguments=json.dumps(
-                            {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}
-                        ),
-                    )
-                ),
-                ToolCall(
-                    function=FunctionCall(
-                        name="get_current_weather",
-                        arguments=json.dumps(
-                            {"city": "Orlando", "state": "FL", "unit": "celsius"}
-                        ),
-                    )
-                ),
-            ],
-            None,
-        ),
-        # Added tool_with_typed_params test case
-        (
-            """Let me calculate that area for you.<tool_call>
-<function=calculate_area>
-<parameter=shape>
-circle
-</parameter>
-<parameter=dimensions>
-{"radius": 15.5}
-</parameter>
-<parameter=precision>
-3
-</parameter>
-</function>
-</tool_call>""",
-            [
-                ToolCall(
-                    function=FunctionCall(
-                        name="calculate_area",
-                        arguments=json.dumps(
-                            {
-                                "shape": "circle",
-                                "dimensions": {"radius": 15.5},
-                                "precision": 3,
-                            }
-                        ),
-                    )
-                )
-            ],
-            "Let me calculate that area for you.",
-        ),
-    ],
-)
-def test_extract_tool_calls_streaming(
-    qwen3_tool_parser_parametrized,
-    qwen3_tokenizer,
-    model_output,
-    expected_tool_calls,
-    expected_content,
-):
-    """Test incremental streaming behavior including typed parameters"""
-    request = ChatCompletionRequest(model=MODEL, messages=[])
-
-    other_content = ""
-    tool_states = {}  # Track state per tool index
-
-    for delta_message in stream_delta_message_generator(
-        qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request
-    ):
-        # role should never be streamed from tool parser
-        assert not delta_message.role
-
-        if delta_message.content:
-            other_content += delta_message.content
-
-        if delta_message.tool_calls:
-            for tool_call in delta_message.tool_calls:
-                idx = tool_call.index
-
-                # Initialize state for new tool
-                if idx not in tool_states:
-                    tool_states[idx] = {
-                        "id": None,
-                        "name": None,
-                        "arguments": "",
-                        "type": None,
-                    }
-
-                # First chunk should have id, name, and type
-                if tool_call.id:
-                    tool_states[idx]["id"] = tool_call.id
-
-                if tool_call.type:
-                    assert tool_call.type == "function"
-                    tool_states[idx]["type"] = tool_call.type
-
-                if tool_call.function:
-                    if tool_call.function.name:
-                        # Should only be set once
-                        assert tool_states[idx]["name"] is None
-                        tool_states[idx]["name"] = tool_call.function.name
-
-                    if tool_call.function.arguments is not None:
-                        # Accumulate arguments incrementally
-                        tool_states[idx]["arguments"] += tool_call.function.arguments
-
-    # Verify final content
-    assert other_content == (expected_content or "")  # Handle None case
-
-    # Verify we got all expected tool calls
-    assert len(tool_states) == len(expected_tool_calls)
-    assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == len(
-        expected_tool_calls
+    full = (
+        "<tool_call>\n<function=write_file>\n"
+        "<parameter=path>foo.py</parameter>\n"
+        "<parameter=content>\n"
+        "doc = '<tool_call>example</tool_call>'\n"
+        "</parameter>\n</function>\n</tool_call>"
+        "\n"
+        "<tool_call>\n<function=log>\n"
+        "<parameter=msg>done</parameter>\n"
+        "</function>\n</tool_call>"
     )
 
-    # Verify each tool call
-    for idx, expected_tool in enumerate(expected_tool_calls):
-        state = tool_states[idx]
-        assert state["id"] is not None
-        assert state["type"] == "function"
-        assert state["name"] == expected_tool.function.name
-
-        # Parse accumulated arguments
-        arguments_str = state["arguments"]
-        assert arguments_str is not None
-        actual_args = json.loads(arguments_str)
-        expected_args = json.loads(expected_tool.function.arguments)
-        assert actual_args == expected_args
-
-
-def test_extract_tool_calls_missing_closing_parameter_tag(
-    qwen3_tool_parser_parametrized,
-):
-    """Test handling of missing closing </parameter> tag"""
-    # Using get_current_weather from sample_tools but with malformed XML
-    model_output = """Let me check the weather for you:
-<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>"""
-
-    request = ChatCompletionRequest(model=MODEL, messages=[])
-    extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
-        model_output, request=request
+    msg = parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text=full,
+        delta_text=full,
+        previous_token_ids=[],
+        current_token_ids=[1],
+        delta_token_ids=[1],
+        request=request,
     )
-
-    # The parser should handle the malformed XML gracefully
-    assert extracted_tool_calls.tools_called
-    assert len(extracted_tool_calls.tool_calls) == 1
-
-    # Verify the function name is correct
-    assert extracted_tool_calls.tool_calls[0].function.name == "get_current_weather"
-
-    # Verify the arguments are parsed despite the missing closing tag
-    args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
-    assert "city" in args
-    assert args["city"] == "Dallas"
-    assert args["state"] == "TX"
-    assert args["unit"] == "fahrenheit"
-
-    # Check that content before the tool call is preserved
-    assert "Let me check the weather for you:" in extracted_tool_calls.content
+    assert msg is not None
+    assert msg.tool_calls is not None
+    assert len(msg.tool_calls) == 2, (
+        f"Expected 2 tool calls, got {len(msg.tool_calls)}: {msg.tool_calls}"
+    )
+    names = [tc.function.name for tc in msg.tool_calls]
+    assert names == ["write_file", "log"], f"Wrong tool names: {names}"
 
 
-def test_extract_tool_calls_streaming_missing_closing_tag(
-    qwen3_tool_parser_parametrized, qwen3_tokenizer
+def test_streaming_content_before_and_between_two_tool_calls_one_delta(
+    qwen3_tool_parser,
 ):
-    """Test streaming with missing closing </parameter> tag"""
-    # Using get_current_weather from sample_tools but with malformed XML
-    model_output = """Let me check the weather for you:
-<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>"""
-
+    """MTP / spec-decode: a single delta delivers free text BEFORE tool 1
+    AND free text BETWEEN tool 1 and tool 2.  Both content fragments must
+    be emitted; the recursion path used to drop the second one because of a
+    ``not result.content`` guard that discarded the recursion's content
+    when the outer call already had content of its own.
+    """
     request = ChatCompletionRequest(model=MODEL, messages=[])
-
-    other_content = ""
-    tool_states = {}
-
-    for delta_message in stream_delta_message_generator(
-        qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request
-    ):
-        if delta_message.content:
-            other_content += delta_message.content
-
-        if delta_message.tool_calls:
-            for tool_call in delta_message.tool_calls:
-                idx = tool_call.index
-
-                if idx not in tool_states:
-                    tool_states[idx] = {
-                        "id": None,
-                        "name": None,
-                        "arguments": "",
-                        "type": None,
-                    }
-
-                if tool_call.id:
-                    tool_states[idx]["id"] = tool_call.id
-
-                if tool_call.type:
-                    assert tool_call.type == "function"
-                    tool_states[idx]["type"] = tool_call.type
-
-                if tool_call.function:
-                    if tool_call.function.name:
-                        tool_states[idx]["name"] = tool_call.function.name
-
-                    if tool_call.function.arguments is not None:
-                        tool_states[idx]["arguments"] += tool_call.function.arguments
-
-    # Verify content was streamed
-    assert "Let me check the weather for you:" in other_content
-    # Verify we got the tool call
-    assert len(tool_states) == 1
-    assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == 1
-
-    state = tool_states[0]
-    assert state["id"] is not None
-    assert state["type"] == "function"
-    assert state["name"] == "get_current_weather"
-
-    # Verify arguments were parsed correctly despite missing closing tag
-    assert state["arguments"] is not None
-    args = json.loads(state["arguments"])
-    assert args["city"] == "Dallas"
-    assert args["state"] == "TX"
-    assert args["unit"] == "fahrenheit"
+    delta = (
+        "before text "
+        "<tool_call>\n<function=foo>\n"
+        "<parameter=a>\n1\n</parameter>\n"
+        "</function>\n</tool_call>"
+        "between text "
+        "<tool_call>\n<function=bar>\n"
+        "<parameter=b>\n2\n</parameter>\n"
+        "</function>\n</tool_call>"
+    )
+    msg = qwen3_tool_parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text=delta,
+        delta_text=delta,
+        previous_token_ids=[],
+        current_token_ids=[1],
+        delta_token_ids=[1],
+        request=request,
+    )
+    assert msg is not None
+    assert msg.content is not None, "outer content lost"
+    assert "before text " in msg.content, (
+        f"missing 'before text' content: {msg.content!r}"
+    )
+    assert "between text " in msg.content, (
+        f"recursion content 'between text' was dropped because the outer "
+        f"already had content. Got: {msg.content!r}"
+    )
 
 
-def test_extract_tool_calls_streaming_incremental(
-    qwen3_tool_parser_parametrized, qwen3_tokenizer
-):
-    """Test that streaming is truly incremental"""
-    model_output = """I'll check the weather.<tool_call>
-<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-</function>
-</tool_call>"""
+def test_extract_tool_calls_streaming_split_tag(qwen3_tool_parser):
+    """``<tool_call>`` arrives split across two deltas (``<tool`` then
+    ``_call>``).  ``is_tool_call_started`` must flip to ``True`` once the
+    full tag exists in ``current_text``, and the partial tag must not leak
+    into ``DeltaMessage.content``.
 
+    This relies on the Coder parser's ``is_tool_call_started`` attribute,
+    which has no equivalent on the XML parser.
+    """
     request = ChatCompletionRequest(model=MODEL, messages=[])
 
-    chunks = []
-    for delta_message in stream_delta_message_generator(
-        qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request
-    ):
-        chunks.append(delta_message)
-
-    # Should have multiple chunks
-    assert len(chunks) > 3
+    prev_text_1 = "I will use a tool."
+    delta_text_1 = "<tool"
+    curr_text_1 = prev_text_1 + delta_text_1
+
+    msg1 = qwen3_tool_parser.extract_tool_calls_streaming(
+        previous_text=prev_text_1,
+        current_text=curr_text_1,
+        delta_text=delta_text_1,
+        previous_token_ids=[1, 2, 3],
+        current_token_ids=[1, 2, 3, 4],
+        delta_token_ids=[4],
+        request=request,
+    )
 
-    # First chunk(s) should be content
-    assert chunks[0].content is not None
-    assert chunks[0].tool_calls is None or chunks[0].tool_calls == []
+    prev_text_2 = curr_text_1
+    delta_text_2 = "_call>"
+    curr_text_2 = prev_text_2 + delta_text_2
+
+    msg2 = qwen3_tool_parser.extract_tool_calls_streaming(
+        previous_text=prev_text_2,
+        current_text=curr_text_2,
+        delta_text=delta_text_2,
+        previous_token_ids=[1, 2, 3, 4],
+        current_token_ids=[1, 2, 3, 4, 5],
+        delta_token_ids=[5],
+        request=request,
+    )
 
-    # Should have a chunk with tool header (id, name, type)
-    header_found = False
-    for chunk in chunks:
-        if chunk.tool_calls and chunk.tool_calls[0].id:
-            header_found = True
-            assert chunk.tool_calls[0].function.name == "get_current_weather"
-            assert chunk.tool_calls[0].type == "function"
-            # Empty initially
-            assert chunk.tool_calls[0].function.arguments == ""
-            break
-    assert header_found
+    assert qwen3_tool_parser.is_tool_call_started is True
 
-    # Should have chunks with incremental arguments
-    arg_chunks = []
-    for chunk in chunks:
-        if chunk.tool_calls and chunk.tool_calls[0].function.arguments:
-            arg_chunks.append(chunk.tool_calls[0].function.arguments)
+    if msg1 and msg1.content:
+        assert "<tool" not in msg1.content
+    if msg2 and msg2.content:
+        assert "_call>" not in msg2.content
 
-    # Arguments should be streamed incrementally
-    assert len(arg_chunks) > 1
 
-    # Concatenated arguments should form valid JSON
-    full_args = "".join(arg_chunks)
-    parsed_args = json.loads(full_args)
-    assert parsed_args["city"] == "Dallas"
-    assert parsed_args["state"] == "TX"
+def test_streaming_char_by_char_literal_balises_in_value(qwen3_tokenizer):
+    """Stress test: a WriteFile tool call whose ``content`` value embeds a
+    complete literal ``<tool_call>...</tool_call>`` block — including
+    ``<parameter=path>...</parameter>`` and ``<parameter=content>...
+    </parameter>`` with names that match the OUTER tool's schema —
+    streamed one character at a time.
 
+    Reproduces the qwen-code scenario where the model writes a parser
+    fixture file: every literal ``<tool_call>``, ``<function=...>``,
+    ``<parameter=NAME>``, ``</parameter>``, ``</function>`` and
+    ``</tool_call>`` inside the ``content`` value must stay inside the
+    value; no spurious second tool call, no value truncation.
+    """
+    from vllm.entrypoints.openai.chat_completion.protocol import (
+        ChatCompletionToolsParam,
+    )
 
-def test_extract_tool_calls_complex_type_with_single_quote(
-    qwen3_tokenizer,
-):
-    """Test parameter type conversion based on tool schema"""
     tools = [
         ChatCompletionToolsParam(
             type="function",
             function={
-                "name": "test_types",
+                "name": "write_file",
                 "parameters": {
                     "type": "object",
                     "properties": {
-                        "int_param": {"type": "integer"},
-                        "float_param": {"type": "float"},
-                        "bool_param": {"type": "boolean"},
-                        "str_param": {"type": "string"},
-                        "obj_param": {"type": "object"},
+                        "path": {"type": "string"},
+                        "content": {"type": "string"},
                     },
                 },
             },
         )
     ]
-
-    model_output = """<tool_call>
-<function=test_types>
-<parameter=obj_param>
-{'key': 'value'}
-</parameter>
-</function>
-</tool_call>"""
-
-    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
+    parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools)
     request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    extracted_tool_calls = parser.extract_tool_calls(model_output, request=request)
-
-    args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
-    assert args["obj_param"] == {"key": "value"}
-
-
-def test_extract_tool_calls_streaming_missing_opening_tag(
-    qwen3_tool_parser_parametrized, qwen3_tokenizer
-):
-    """Test streaming with missing opening <tool_call> tag
-
-    This tests that the streaming parser correctly handles
-    tool calls that start directly with <function=...>
-    """
-    model_output = """I'll check the weather for you.
-
-<function=get_current_weather>
-<parameter=city>
-Dallas
-</parameter>
-<parameter=state>
-TX
-</parameter>
-<parameter=unit>
-fahrenheit
-</parameter>
-</function>
-</tool_call>"""
-
-    request = ChatCompletionRequest(model=MODEL, messages=[])
-
-    other_content = ""
-    tool_states = {}
-
-    for delta_message in stream_delta_message_generator(
-        qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request
-    ):
-        if delta_message.content:
-            other_content += delta_message.content
-
-        if delta_message.tool_calls:
-            for tool_call in delta_message.tool_calls:
-                idx = tool_call.index
-
-                if idx not in tool_states:
-                    tool_states[idx] = {
-                        "id": None,
-                        "name": None,
-                        "arguments": "",
-                        "type": None,
-                    }
-
-                if tool_call.id:
-                    tool_states[idx]["id"] = tool_call.id
-
-                if tool_call.type:
-                    assert tool_call.type == "function"
-                    tool_states[idx]["type"] = tool_call.type
-
-                if tool_call.function:
-                    if tool_call.function.name:
-                        tool_states[idx]["name"] = tool_call.function.name
 
-                    if tool_call.function.arguments is not None:
-                        tool_states[idx]["arguments"] += tool_call.function.arguments
-
-    # Verify content was streamed
-    assert "I'll check the weather for you." in other_content
-
-    # Verify we got the tool call
-    assert len(tool_states) == 1
-    assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == 1
-
-    state = tool_states[0]
-    assert state["id"] is not None
-    assert state["type"] == "function"
-    assert state["name"] == "get_current_weather"
-
-    # Verify arguments were parsed correctly despite missing opening tag
-    assert state["arguments"] is not None
-    args = json.loads(state["arguments"])
-    assert args["city"] == "Dallas"
-    assert args["state"] == "TX"
-    assert args["unit"] == "fahrenheit"
-
-
-def test_malformed_xml_no_gt_delimiter(qwen3_tool_parser):
-    """Regression: malformed XML without '>' must not crash (PR #36774)."""
-    model_output = (
+    nested_content = (
+        'doc = """\n'
         "<tool_call>\n"
-        "<function=get_current_weather\n"
-        "<parameter=city>Dallas</parameter>\n"
-        "</function>\n"
-        "</tool_call>"
-    )
-
-    request = ChatCompletionRequest(model=MODEL, messages=[])
-    result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
-    assert result is not None
-    assert isinstance(result.tool_calls, list)
-    assert all(tc is not None for tc in result.tool_calls)
-
-
-def test_none_tool_calls_filtered(qwen3_tool_parser):
-    """Regression: None tool calls filtered from output (PR #36774)."""
-    model_output = (
-        "<tool_call>\n"
-        "<function=bad_func_no_gt\n"
+        "<function=write_file>\n"
+        "<parameter=path>\nliteral/value.txt\n</parameter>\n"
+        "<parameter=content>\nhello\n</parameter>\n"
         "</function>\n"
         "</tool_call>\n"
-        "<tool_call>\n"
-        "<function=get_current_weather>\n"
-        "<parameter=city>Dallas</parameter>\n"
-        "<parameter=state>TX</parameter>\n"
-        "</function>\n"
-        "</tool_call>"
+        '"""\n'
     )
 
-    request = ChatCompletionRequest(model=MODEL, messages=[])
-    result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
-    assert all(tc is not None for tc in result.tool_calls)
-    assert result.tools_called
-    assert len(result.tool_calls) == 1
-    assert result.tool_calls[0].function.name == "get_current_weather"
-    args = json.loads(result.tool_calls[0].function.arguments)
-    assert args["city"] == "Dallas"
-    assert args["state"] == "TX"
-
-
-def test_anyof_parameter_not_double_encoded(qwen3_tokenizer):
-    """Regression: anyOf parameters must not be double-encoded (PR #36032)."""
-    tools = [
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "update_record",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "data": {
-                            "anyOf": [{"type": "object"}, {"type": "null"}],
-                        },
-                    },
-                },
-            },
-        )
-    ]
-
-    parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=tools)
-
-    model_output = (
+    full_output = (
         "<tool_call>\n"
-        "<function=update_record>\n"
-        '<parameter=data>{"key": "value", "count": 42}</parameter>\n'
+        "<function=write_file>\n"
+        "<parameter=path>\nfixture.py\n</parameter>\n"
+        f"<parameter=content>\n{nested_content}</parameter>\n"
         "</function>\n"
         "</tool_call>"
     )
 
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    result = parser.extract_tool_calls(model_output, request=request)
-
-    assert result.tools_called
-    assert len(result.tool_calls) == 1
-    args = json.loads(result.tool_calls[0].function.arguments)
-    assert isinstance(args["data"], dict)
-    assert args["data"] == {"key": "value", "count": 42}
-
-
-def test_streaming_multi_param_single_chunk(qwen3_tool_parser, qwen3_tokenizer):
-    """Regression: speculative decode delivering multiple params at once (PR #35615)."""
-    request = ChatCompletionRequest(model=MODEL, messages=[])
-
-    deltas = [
-        "<tool_call>",
-        "\n<function=get_current_weather>",
-        "\n",  # triggers json_started -> sends "{"
-        # This single delta delivers all three parameters at once
-        "<parameter=city>\nDallas\n</parameter>"
-        "\n<parameter=state>\nTX\n</parameter>"
-        "\n<parameter=unit>\nfahrenheit\n</parameter>",
-        "\n</function>",
-        "\n</tool_call>",
-    ]
+    tool_states: dict[int, dict] = {}
+    current_text = ""
+    previous_text = ""
+    for ch in full_output:
+        previous_text = current_text
+        current_text += ch
+        delta_message = parser.extract_tool_calls_streaming(
+            previous_text=previous_text,
+            current_text=current_text,
+            delta_text=ch,
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[],
+            request=request,
+        )
+        if delta_message and delta_message.tool_calls:
+            for tool_call in delta_message.tool_calls:
+                idx = tool_call.index
+                state = tool_states.setdefault(
+                    idx, {"id": None, "name": None, "arguments": ""}
+                )
+                if tool_call.id:
+                    state["id"] = tool_call.id
+                if tool_call.function:
+                    if tool_call.function.name:
+                        state["name"] = tool_call.function.name
+                    if tool_call.function.arguments is not None:
+                        state["arguments"] += tool_call.function.arguments
 
-    from tests.tool_parsers.utils import (
-        run_tool_extraction_streaming,
+    assert list(tool_states.keys()) == [0], (
+        f"Expected exactly one tool call; got indices "
+        f"{list(tool_states.keys())} — a literal nested <tool_call> "
+        f"was promoted to a real call."
     )
-
-    reconstructor = run_tool_extraction_streaming(
-        qwen3_tool_parser,
-        deltas,
-        request,
-        assert_one_tool_per_delta=False,
+    state = tool_states[0]
+    assert state["name"] == "write_file"
+    args = json.loads(state["arguments"])
+    assert list(args.keys()) == ["path", "content"], (
+        f"Spurious params from embedded literals: {list(args.keys())}"
     )
-
-    assert len(reconstructor.tool_calls) == 1
-    args = json.loads(reconstructor.tool_calls[0].function.arguments)
-    assert args["city"] == "Dallas"
-    assert args["state"] == "TX"
-    assert args["unit"] == "fahrenheit"
-
-
-def test_no_double_serialization_string_args(qwen3_tool_parser):
-    """Regression: string arguments must not be double-serialized (PR #35615)."""
-    tools = [
-        ChatCompletionToolsParam(
-            type="function",
-            function={
-                "name": "greet",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "message": {"type": "string"},
-                    },
-                },
-            },
-        )
-    ]
-
-    model_output = (
-        "<tool_call>\n"
-        "<function=greet>\n"
-        "<parameter=message>hello world</parameter>\n"
-        "</function>\n"
-        "</tool_call>"
+    assert args["path"] == "fixture.py"
+    assert args["content"] == nested_content.rstrip("\n"), (
+        f"content was truncated/corrupted: {args.get('content')!r}"
     )
 
-    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
-    result = qwen3_tool_parser.extract_tool_calls(model_output, request=request)
 
-    assert result.tools_called
-    assert len(result.tool_calls) == 1
-    raw_arguments = result.tool_calls[0].function.arguments
-    args = json.loads(raw_arguments)
-    assert args["message"] == "hello world"
-    assert '\\"hello world\\"' not in raw_arguments
+def test_extract_tool_calls_streaming_various_chunk_sizes(
+    qwen3_tokenizer,
+):
+    """Coder streaming must reconstruct arguments correctly even when the
+    deltas arrive a single character at a time.
 
+    The XML parser's SAX-based streaming cannot tolerate ``chunk_size=1``
+    by design (an XML tag is not parseable until ``>`` arrives), so this
+    robustness test stays Coder-only.
+    """
+    request = ChatCompletionRequest(model="test", messages=[])
 
-def test_get_vllm_registry_structural_tag_returns_structural_tag(
-    qwen3_tool_parser: Qwen3CoderToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-) -> None:
-    request_tools = _as_chat_completion_tools(sample_tools)
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=request_tools,
-        tool_choice="auto",
-    )
-    tag = qwen3_tool_parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
+    template_text = """<tool_call>
+<function=example_function_name>
+<parameter=example_parameter_1>
+value_1
+</parameter>
+<parameter=example_parameter_2>
+This is the value for the second parameter
+that can span
+multiple lines
+</parameter>
+</function>
+</tool_call>"""
 
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=request_tools,
-        tool_choice="required",
-    )
-    tag = qwen3_tool_parser.get_structural_tag(req)
-    assert isinstance(tag, StructuralTag)
+    for chunk_size in [1, 3, 15, len(template_text)]:
+        parser = Qwen3CoderToolParser(qwen3_tokenizer, tools=None)
+
+        tool_states = {}
+        current_text = ""
+        previous_text = ""
+        ptr = 0
+
+        while ptr < len(template_text):
+            delta = template_text[ptr : ptr + chunk_size]
+            previous_text = current_text
+            current_text += delta
+            ptr += chunk_size
+
+            delta_message = parser.extract_tool_calls_streaming(
+                previous_text=previous_text,
+                current_text=current_text,
+                delta_text=delta,
+                previous_token_ids=[],
+                current_token_ids=[],
+                delta_token_ids=[],
+                request=request,
+            )
 
-    if request_tools:
-        tool = request_tools[0]
-        req = ChatCompletionRequest(
-            messages=[],
-            model="m",
-            tools=request_tools,
+            if delta_message and delta_message.tool_calls:
+                for tool_call in delta_message.tool_calls:
+                    idx = tool_call.index
+                    if idx not in tool_states:
+                        tool_states[idx] = {
+                            "id": None,
+                            "name": None,
+                            "arguments": "",
+                            "type": None,
+                        }
+                    if tool_call.id:
+                        tool_states[idx]["id"] = tool_call.id
+                    if tool_call.type:
+                        tool_states[idx]["type"] = tool_call.type
+                    if tool_call.function:
+                        if tool_call.function.name:
+                            tool_states[idx]["name"] = tool_call.function.name
+                        if tool_call.function.arguments is not None:
+                            tool_states[idx]["arguments"] += (
+                                tool_call.function.arguments
+                            )
+
+        assert 0 in tool_states, f"chunk_size={chunk_size}"
+        assert tool_states[0]["name"] == "example_function_name"
+        args = json.loads(tool_states[0]["arguments"])
+        assert args["example_parameter_1"] == "value_1"
+        assert args["example_parameter_2"] == (
+            "This is the value for the second parameter\nthat can span\nmultiple lines"
         )
-        req.tool_choice = ChatCompletionNamedToolChoiceParam(
-            function=ChatCompletionNamedFunction(name=tool.function.name)
-        )
-        tag = qwen3_tool_parser.get_structural_tag(req)
-        assert isinstance(tag, StructuralTag)
-
-
-@pytest.mark.parametrize("include_reasoning", [True, False])
-def test_adjust_request_auto_uses_vllm_registry_structural_tag(
-    monkeypatch: pytest.MonkeyPatch,
-    qwen3_tool_parser: Qwen3CoderToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-    include_reasoning: bool,
-) -> None:
-    monkeypatch.setattr(
-        "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING",
-        True,
-    )
-    request_tools = _as_chat_completion_tools(sample_tools)
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=request_tools,
-        tool_choice="auto",
-        include_reasoning=include_reasoning,
-    )
-    out = qwen3_tool_parser.adjust_request(req)
-    assert out.structured_outputs is not None
-    assert out.structured_outputs.structural_tag is not None
-    assert isinstance(out.structured_outputs.structural_tag, str)
-    loaded = json.loads(out.structured_outputs.structural_tag)
-    assert isinstance(loaded, dict)
-
-
-def test_adjust_request_required_prefers_structural_tag(
-    monkeypatch: pytest.MonkeyPatch,
-    qwen3_tool_parser: Qwen3CoderToolParser,
-    sample_tools: list[ChatCompletionToolsParam],
-) -> None:
-    monkeypatch.setattr(
-        "vllm.tool_parsers.abstract_tool_parser.VLLM_ENFORCE_STRICT_TOOL_CALLING",
-        True,
-    )
-    request_tools = _as_chat_completion_tools(sample_tools)
-    req = ChatCompletionRequest(
-        messages=[],
-        model="m",
-        tools=request_tools,
-        tool_choice="required",
-    )
-    out = qwen3_tool_parser.adjust_request(req)
-    assert out.structured_outputs is not None
-    assert out.structured_outputs.structural_tag is not None
diff --git a/tests/tool_parsers/test_qwen3xml_tool_parser.py b/tests/tool_parsers/test_qwen3xml_tool_parser.py
index 1ea9a1d65c04..c38268c62ec9 100644
--- a/tests/tool_parsers/test_qwen3xml_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3xml_tool_parser.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import json
 
 import pytest
 
@@ -8,6 +9,23 @@
     ToolParserTestConfig,
     ToolParserTests,
 )
+from tests.tool_parsers.test_qwen3_xml_coder_shared import (
+    stream_delta_message_generator,
+)
+from tests.tool_parsers.utils import run_tool_extraction_streaming
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionRequest,
+    ChatCompletionToolsParam,
+)
+from vllm.tokenizers import get_tokenizer
+from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
+
+MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"
+
+
+@pytest.fixture(scope="module")
+def qwen3_tokenizer():
+    return get_tokenizer(tokenizer_name=MODEL)
 
 
 class TestQwen3xmlToolParser(ToolParserTests):
@@ -54,19 +72,508 @@ def test_config(self) -> ToolParserTestConfig:
             single_tool_call_expected_args={"city": "Tokyo"},
             parallel_tool_calls_count=2,
             parallel_tool_calls_names=["get_weather", "get_time"],
-            # xfail markers - Qwen3XML has systematic streaming issues
-            xfail_streaming={
-                "test_single_tool_call_simple_args": (
-                    "Qwen3XML streaming has systematic issues"
-                ),
-                "test_parallel_tool_calls": "Qwen3XML streaming has systematic issues",
-                "test_various_data_types": "Qwen3XML streaming has systematic issues",
-                "test_empty_arguments": "Qwen3XML streaming has systematic issues",
-                "test_surrounding_text": "Qwen3XML streaming has systematic issues",
-                "test_escaped_strings": "Qwen3XML streaming has systematic issues",
-                "test_streaming_reconstruction": (
-                    "Qwen3XML streaming reconstruction has known issues"
-                ),
-            },
             supports_typed_arguments=False,
         )
+
+    def test_qwen3xml_async_streaming_free_text(self, qwen3_tokenizer):
+        parser = Qwen3XMLToolParser(qwen3_tokenizer)
+
+        # 1. First tool call
+        # 2. Free text
+        # 3. Second tool call
+        text_to_stream = (
+            "<tool_call>\n<function=get_weather>\n<parameter=city>Paris</parameter>\n</function>\n</tool_call>"
+            "\nNext, I will check the weather for London:\n"
+            "<tool_call>\n<function=get_weather>\n<parameter=city>London</parameter>\n</function>\n</tool_call>"
+        )
+
+        request = ChatCompletionRequest(messages=[], model="test")
+        emitted_messages = []
+        previous_text = ""
+        previous_tokens = []
+        token_ids = qwen3_tokenizer.encode(text_to_stream, add_special_tokens=False)
+
+        for i in range(1, len(token_ids) + 1):
+            current_token_ids = token_ids[:i]
+            current_text = qwen3_tokenizer.decode(current_token_ids)
+            delta_text = current_text[len(previous_text) :]
+            token_delta = current_token_ids[len(previous_tokens) :]
+
+            delta = parser.extract_tool_calls_streaming(
+                previous_text,
+                current_text,
+                delta_text,
+                previous_tokens,
+                current_token_ids,
+                token_delta,
+                request,
+            )
+            if delta is not None:
+                emitted_messages.append(delta)
+
+            previous_text = current_text
+            previous_tokens = current_token_ids
+
+        # Check that the free text is emitted BEFORE London's arguments are emitted.
+        found_early = False
+        accumulated_content = ""
+        for i, msg in enumerate(emitted_messages):
+            if msg.content:
+                accumulated_content += msg.content
+
+            if "Next, I will check the weather for London" in accumulated_content:
+                # Check if we already saw "London" in any previous or
+                # current tool call arguments
+                is_london_emitted = any(
+                    tc.function.arguments and "London" in tc.function.arguments
+                    for m in emitted_messages[: i + 1]
+                    if m.tool_calls
+                    for tc in m.tool_calls
+                )
+                if not is_london_emitted:
+                    found_early = True
+                break
+
+        assert found_early, (
+            "Free text between tool calls should be emitted as soon as the "
+            "second tool call starts, not delayed."
+        )
+
+    def test_qwen3xml_streaming_text_after_tool_call(self, qwen3_tokenizer):
+        parser = Qwen3XMLToolParser(qwen3_tokenizer)
+
+        # Tool call followed by free text
+        text_to_stream = (
+            "<tool_call>\n<function=get_weather>\n<parameter=city>Paris</parameter>\n</function>\n</tool_call>"
+            "\nI hope this helps!"
+        )
+
+        request = ChatCompletionRequest(messages=[], model="test")
+        emitted_messages = []
+        previous_text = ""
+        previous_tokens = []
+        token_ids = qwen3_tokenizer.encode(text_to_stream, add_special_tokens=False)
+
+        for i in range(1, len(token_ids) + 1):
+            current_token_ids = token_ids[:i]
+            current_text = qwen3_tokenizer.decode(current_token_ids)
+            delta_text = current_text[len(previous_text) :]
+            token_delta = current_token_ids[len(previous_tokens) :]
+
+            delta = parser.extract_tool_calls_streaming(
+                previous_text,
+                current_text,
+                delta_text,
+                previous_tokens,
+                current_token_ids,
+                token_delta,
+                request,
+            )
+            if delta is not None:
+                emitted_messages.append(delta)
+
+            previous_text = current_text
+            previous_tokens = current_token_ids
+
+        # Aggregate all emitted content
+        all_content = "".join([m.content for m in emitted_messages if m.content])
+
+        assert "I hope this helps!" in all_content, (
+            "Free text after the last tool call should be emitted."
+        )
+
+
+def test_qwen3xml_streaming_trailing_text_after_literal_close_in_value(
+    qwen3_tokenizer,
+):
+    """XML parser: a tool_call's parameter value contains a literal
+    ``</tool_call>``.  After the real ``</tool_call>``, trailing free
+    text must still be emitted.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "write_file",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "path": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            },
+        )
+    ]
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    deltas = [
+        # Tool 1 with literal </tool_call> embedded in 'content'.
+        "<tool_call>\n<function=write_file>\n"
+        "<parameter=path>foo.py</parameter>\n"
+        "<parameter=content>\n"
+        "doc = '<tool_call>example</tool_call>'\n"
+        "</parameter>\n</function>\n</tool_call>",
+        # Trailing text in a separate delta.
+        "\nDone, file written!",
+    ]
+
+    reconstructor = run_tool_extraction_streaming(
+        parser, deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1, (
+        f"Expected 1 tool call, got {len(reconstructor.tool_calls)}"
+    )
+    assert "Done, file written!" in reconstructor.other_content, (
+        f"Trailing text after a tool with literal </tool_call> in its "
+        f"value was dropped. Got content: {reconstructor.other_content!r}"
+    )
+
+
+def test_qwen3xml_streaming_python_none_int_char_by_char(qwen3_tokenizer):
+    """Streaming a nullable INTEGER param value of "None" (Qwen3.5 style)
+    char-by-char must produce VALID JSON.  The XML parser's incremental
+    char path used to emit "Non" then a "l" delta computed from the diff
+    between "Non" and "null", giving the cumulative invalid string
+    "Nonl".  The fix defers int/float conversion just like bool/object
+    so the full value is parsed at </parameter> close.
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "set_count",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "count": {
+                            "anyOf": [
+                                {"type": "integer"},
+                                {"type": "null"},
+                            ],
+                        },
+                    },
+                },
+            },
+        )
+    ]
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    # Char-by-char deltas emulate worst-case slow streaming.
+    char_deltas = [
+        "<tool_call>\n",
+        "<function=set_count>\n",
+        "<parameter=count>",
+        "\n",
+        "N",
+        "o",
+        "n",
+        "e",
+        "\n",
+        "</parameter>\n",
+        "</function>\n",
+        "</tool_call>",
+    ]
+    reconstructor = run_tool_extraction_streaming(
+        parser, char_deltas, request, assert_one_tool_per_delta=False
+    )
+    assert len(reconstructor.tool_calls) == 1
+    raw = reconstructor.tool_calls[0].function.arguments
+    args = json.loads(raw)  # must be valid JSON
+    assert args["count"] is None, (
+        f"streaming nullable int 'None' produced invalid JSON or wrong "
+        f"value. Raw: {raw!r}"
+    )
+
+
+def test_qwen36_xml_streaming_double_close_brace(qwen3_tokenizer):
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "get_weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            },
+        )
+    ]
+
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    deltas = [
+        "<tool_call>",
+        "\n<function=get_weather>",
+        "\n<parameter=city>\nDallas\n</parameter>",
+        "\n</function>",
+        "\n</tool_call>",
+    ]
+
+    reconstructor = run_tool_extraction_streaming(
+        parser,
+        deltas,
+        request,
+        assert_one_tool_per_delta=False,
+    )
+
+    assert len(reconstructor.tool_calls) == 1
+    full_args = reconstructor.tool_calls[0].function.arguments
+
+    assert not full_args.endswith("}}"), (
+        f"XML streaming parser emitted double closing brace: {full_args!r}. "
+        "parse_single_streaming_chunks fallback called _end_element('function') twice."
+    )
+    args = json.loads(full_args)
+    assert args == {"city": "Dallas"}
+
+
+def test_xml_streaming_parallel_tool_calls_preformed_chunks(qwen3_tokenizer):
+    """
+    Note: in normal token-by-token streaming this rarely triggers because
+    the tokenizer splits XML tags across multiple tokens.  It CAN trigger with
+    speculative decoding multi-token flushes.
+    """
+
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "get_weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            },
+        )
+    ]
+
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    deltas = [
+        "<tool_call>",
+        "\n<function=get_weather>",
+        "\n<parameter=city>Paris</parameter>",
+        "\n</function>",
+        "\n</tool_call>",
+        "<tool_call>",
+        "\n<function=get_weather>",
+        "\n<parameter=city>London</parameter>",
+        "\n</function>",
+        "\n</tool_call>",
+    ]
+
+    reconstructor = run_tool_extraction_streaming(
+        parser,
+        deltas,
+        request,
+        assert_one_tool_per_delta=False,
+    )
+
+    assert len(reconstructor.tool_calls) == 2, (
+        f"Expected 2 tool calls, got {len(reconstructor.tool_calls)}"
+    )
+
+    args0 = json.loads(reconstructor.tool_calls[0].function.arguments)
+    args1 = json.loads(reconstructor.tool_calls[1].function.arguments)
+
+    assert reconstructor.tool_calls[0].function.name == "get_weather"
+    assert reconstructor.tool_calls[1].function.name == "get_weather"
+    assert args0 == {"city": "Paris"}, f"First call args wrong: {args0!r}"
+    assert args1 == {"city": "London"}, f"Second call args wrong: {args1!r}"
+
+
+# ---------------------------------------------------------------------------
+# XML-specific streaming bugs (Coder parser is not affected)
+# ---------------------------------------------------------------------------
+
+
+def test_xml_streaming_boolean_true_not_false(qwen3_tokenizer):
+    """
+    Bug B: In streaming mode, a boolean parameter with value "true" is
+    streamed as "false".
+
+    Root cause: When "true" arrives character by character:
+      - 't' → _convert_param_value("t", "boolean") = False → emits "false"
+      - 'r','u','e' → no new delta (output_data[len("false"):] = "")
+    Final accumulated arguments contain "false" instead of "true".
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "set_flag",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "enabled": {"type": "boolean"},
+                    },
+                },
+            },
+        )
+    ]
+
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    # Feed character-by-character to trigger the streaming accumulation bug.
+    # Each chunk simulates a single-character token arriving in streaming.
+    char_deltas = [
+        "<tool_call>",
+        "\n<function=set_flag>",
+        "\n<parameter=enabled>",
+        "t",  # ← first char triggers False → emits "false"
+        "r",
+        "u",
+        "e",  # ← full "true" but delta = "true"[5:] = ""
+        "</parameter>",
+        "\n</function>",
+        "\n</tool_call>",
+    ]
+
+    reconstructor = run_tool_extraction_streaming(
+        parser,
+        char_deltas,
+        request,
+        assert_one_tool_per_delta=False,
+    )
+
+    assert len(reconstructor.tool_calls) == 1
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+
+    assert args["enabled"] is True, (
+        f"Boolean streaming bug: expected True, got {args['enabled']!r}. "
+        f"First char 't' emits 'false'; subsequent chars emit nothing; "
+        f"final value is 'false' even though the model said 'true'."
+    )
+
+
+def test_xml_streaming_string_null_last_char_not_dropped(qwen3_tokenizer):
+    """
+    Bug A (streaming variant): String parameter with value "null" loses
+    the last character 'l' when tokens arrive one by one.
+
+    Root cause: Accumulating 'n','u','l' emits correctly, but on the
+    fourth char 'l' the full value is "null" →
+    _convert_param_value("null", "string") → None →
+    _convert_for_json_streaming(None, "string") → "" → delta = ""[3:] = "".
+    The closing quote is then emitted, yielding "nul" not "null".
+    """
+    tools = [
+        ChatCompletionToolsParam(
+            type="function",
+            function={
+                "name": "search",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {"type": "string"},
+                    },
+                },
+            },
+        )
+    ]
+
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=tools)
+    request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
+
+    char_deltas = [
+        "<tool_call>",
+        "\n<function=search>",
+        "\n<parameter=query>",
+        "n",
+        "u",
+        "l",
+        "l",  # ← triggers _convert_param_value("null",…) = None → nothing emitted
+        "</parameter>",
+        "\n</function>",
+        "\n</tool_call>",
+    ]
+
+    reconstructor = run_tool_extraction_streaming(
+        parser,
+        char_deltas,
+        request,
+        assert_one_tool_per_delta=False,
+    )
+
+    assert len(reconstructor.tool_calls) == 1
+    args = json.loads(reconstructor.tool_calls[0].function.arguments)
+
+    assert "query" in args
+    assert args["query"] == "null", (
+        f"String 'null' streaming bug: last 'l' was dropped. "
+        f"Got: {args['query']!r}. "
+        f"When full value reaches 'null', _convert_param_value returns None "
+        f"and _convert_for_json_streaming(None, 'string') returns '', "
+        f"so the final delta is empty and the 'l' is never emitted."
+    )
+
+
+def test_xml_streaming_missing_opening_tool_call_tag(qwen3_tokenizer):
+    """The XML streaming parser must recover when the model emits a tool
+    call without the leading ``<tool_call>`` tag — i.e. directly with
+    ``<function=...>``.  The Coder parser does not support this in
+    streaming mode, so this regression stays XML-specific.
+    """
+    parser = Qwen3XMLToolParser(qwen3_tokenizer, tools=None)
+
+    model_output = """I'll check the weather for you.
+
+<function=get_current_weather>
+<parameter=city>
+Dallas
+</parameter>
+<parameter=state>
+TX
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>"""
+
+    request = ChatCompletionRequest(model=MODEL, messages=[])
+    other_content = ""
+    tool_states: dict = {}
+
+    for delta_message in stream_delta_message_generator(
+        parser, qwen3_tokenizer, model_output, request
+    ):
+        if delta_message.content:
+            other_content += delta_message.content
+        if delta_message.tool_calls:
+            for tool_call in delta_message.tool_calls:
+                idx = tool_call.index
+                if idx not in tool_states:
+                    tool_states[idx] = {
+                        "id": None,
+                        "name": None,
+                        "arguments": "",
+                        "type": None,
+                    }
+                if tool_call.id:
+                    tool_states[idx]["id"] = tool_call.id
+                if tool_call.type:
+                    assert tool_call.type == "function"
+                    tool_states[idx]["type"] = tool_call.type
+                if tool_call.function:
+                    if tool_call.function.name:
+                        tool_states[idx]["name"] = tool_call.function.name
+                    if tool_call.function.arguments is not None:
+                        tool_states[idx]["arguments"] += tool_call.function.arguments
+
+    assert "I'll check the weather for you." in other_content
+    assert len(tool_states) == 1
+    state = tool_states[0]
+    assert state["id"] is not None
+    assert state["type"] == "function"
+    assert state["name"] == "get_current_weather"
+    args = json.loads(state["arguments"])
+    assert args["city"] == "Dallas"
+    assert args["state"] == "TX"
+    assert args["unit"] == "fahrenheit"
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 7457590c5ac0..a3875118861d 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import ast
+import contextlib
 import json
 import uuid
 from collections.abc import Sequence
@@ -29,11 +31,7 @@
     get_enable_structured_outputs_in_reasoning,
     get_model_structural_tag,
 )
-from vllm.tool_parsers.utils import (
-    coerce_to_schema_type,
-    extract_types_from_schema,
-    find_tool_properties,
-)
+from vllm.tool_parsers.utils import find_tool_properties, partial_tag_overlap
 
 logger = init_logger(__name__)
 
@@ -119,16 +117,464 @@ def _reset_streaming_state(self):
         # Store accumulated parameters for type conversion
         self.accumulated_params = {}
         self.streaming_request = None
+        self._sent_content_idx = 0
 
     def _convert_param_value(
         self, param_value: str, param_name: str, param_config: dict, func_name: str
     ) -> Any:
         """Convert parameter value based on its type in the schema."""
-        if not isinstance(param_value, str):
+        if param_name not in param_config:
+            if param_config != {}:
+                logger.debug(
+                    "Parsed parameter '%s' is not defined in the tool "
+                    "parameters for tool '%s', directly returning the "
+                    "string value.",
+                    param_name,
+                    func_name,
+                )
+            return param_value
+
+        # ``allows_null`` is True when the schema explicitly admits a
+        # null value (either via ``"type": "null"`` or in an ``anyOf``
+        # union).  A nullable parameter must convert the literal
+        # ``"null"`` / ``"None"`` to JSON null even when the primary
+        # type is ``string`` — otherwise a Qwen3.5-trained model that
+        # emits the Python ``None`` literal leaves the client with the
+        # string ``"None"`` for a nullable optional.
+        allows_null = False
+        if (
+            isinstance(param_config[param_name], dict)
+            and "type" in param_config[param_name]
+        ):
+            param_type = str(param_config[param_name]["type"]).strip().lower()
+            allows_null = param_type == "null"
+        elif (
+            isinstance(param_config[param_name], dict)
+            and "anyOf" in param_config[param_name]
+        ):
+            # Extract the first non-null type from the anyOf list so that
+            # nullable schemas like {"anyOf": [{"type": "string"},
+            # {"type": "null"}]} behave as "string", not "object".
+            param_type = "string"
+            picked = False
+            for option in param_config[param_name]["anyOf"]:
+                if isinstance(option, dict) and "type" in option:
+                    opt_type = str(option["type"]).strip().lower()
+                    if opt_type == "null":
+                        allows_null = True
+                    elif not picked:
+                        param_type = opt_type
+                        picked = True
+        else:
+            param_type = "string"
+        # Nullable schemas: recognise "null" / "None" up front so a
+        # string-typed nullable still maps to JSON null.
+        if allows_null and param_value.lower() in ("null", "none"):
+            return None
+        # String type takes precedence: preserve the raw value (including
+        # the literal "null") rather than converting it to Python None.
+        if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
             return param_value
-        param_schema = param_config.get(param_name, {})
-        param_types = extract_types_from_schema(param_schema)
-        return coerce_to_schema_type(param_value, param_types)
+        # For non-string types, "null" maps to JSON null.  Also accept
+        # the Python literal "None" so that Qwen3.5-trained models — whose
+        # chat template renders null args via ``| string`` (yielding the
+        # literal "None" in the prompt) — round-trip nullable values
+        # correctly.
+        if param_value.lower() in ("null", "none"):
+            return None
+        if (
+            param_type.startswith("int")
+            or param_type.startswith("uint")
+            or param_type.startswith("long")
+            or param_type.startswith("short")
+            or param_type.startswith("unsigned")
+        ):
+            try:
+                return int(param_value)
+            except (ValueError, TypeError):
+                logger.debug(
+                    "Parsed value '%s' of parameter '%s' is not an "
+                    "integer in tool '%s', degenerating to string.",
+                    param_value,
+                    param_name,
+                    func_name,
+                )
+                return param_value
+        elif param_type.startswith("num") or param_type.startswith("float"):
+            try:
+                float_param_value = float(param_value)
+                return (
+                    float_param_value
+                    if float_param_value - int(float_param_value) != 0
+                    else int(float_param_value)
+                )
+            except (ValueError, TypeError):
+                logger.debug(
+                    "Parsed value '%s' of parameter '%s' is not a float "
+                    "in tool '%s', degenerating to string.",
+                    param_value,
+                    param_name,
+                    func_name,
+                )
+                return param_value
+        elif param_type in ["boolean", "bool", "binary"]:
+            param_value = param_value.lower()
+            if param_value not in ["true", "false"]:
+                logger.debug(
+                    "Parsed value '%s' of parameter '%s' is not a boolean "
+                    "(`true` or `false`) in tool '%s', degenerating to "
+                    "false.",
+                    param_value,
+                    param_name,
+                    func_name,
+                )
+            return param_value == "true"
+        else:
+            is_container_type = (
+                param_type in ["object", "array", "arr"]
+                or param_type.startswith("dict")
+                or param_type.startswith("list")
+            )
+            if is_container_type:
+                try:
+                    parsed = json.loads(param_value)
+                    # A model trained with a buggy template
+                    # (json.dumps(str(dict))) may output a JSON-encoded
+                    # Python repr like "{'k': 'v'}". json.loads returns a
+                    # string in that case — try one more parse.
+                    if isinstance(parsed, str):
+                        with contextlib.suppress(ValueError, SyntaxError, TypeError):
+                            parsed = ast.literal_eval(parsed)
+                    return parsed
+                except (json.JSONDecodeError, TypeError, ValueError):
+                    logger.debug(
+                        "Parsed value '%s' of parameter '%s' cannot be "
+                        "parsed with json.loads in tool '%s', will try "
+                        "other methods to parse it.",
+                        param_value,
+                        param_name,
+                        func_name,
+                    )
+            try:
+                param_value = ast.literal_eval(param_value)  # safer
+                # Same double-decode for container types whose raw text
+                # had no JSON outer layer (e.g. bare Python repr
+                # "{'k': 'v'}").
+                if is_container_type and isinstance(param_value, str):
+                    with contextlib.suppress(ValueError, SyntaxError, TypeError):
+                        param_value = ast.literal_eval(param_value)
+            except (ValueError, SyntaxError, TypeError):
+                logger.debug(
+                    "Parsed value '%s' of parameter '%s' cannot be "
+                    "converted via Python `ast.literal_eval()` in tool "
+                    "'%s', degenerating to string.",
+                    param_value,
+                    param_name,
+                    func_name,
+                )
+            return param_value
+
+    def _next_structural_param_start(
+        self,
+        text: str,
+        start_pos: int = 0,
+        valid_param_names: set[str] | None = None,
+    ) -> int:
+        """Return index of next structural ``<parameter=NAME>`` from
+        start_pos.  Structural means preceded by ``\\n`` or at position 0.
+        If valid_param_names is given, NAME must also be in that set.
+        Returns -1 if none found.
+        """
+        ni = start_pos
+        prefix_len = len(self.parameter_prefix)
+        while True:
+            ni = text.find(self.parameter_prefix, ni)
+            if ni == -1:
+                return -1
+            if ni == 0 or text[ni - 1] == "\n":
+                if valid_param_names is not None:
+                    name_end = text.find(">", ni + prefix_len)
+                    if (
+                        name_end != -1
+                        and text[ni + prefix_len : name_end] in valid_param_names
+                    ):
+                        return ni
+                    ni += 1
+                    continue
+                return ni
+            ni += 1
+
+    def _find_true_function_end(self, text: str) -> int:
+        """Return the index of the real structural ``</function>`` in text
+        (followed with optional whitespace by ``</tool_call>`` or end of
+        string), or -1 if none found.  Skips ``</function>`` that appears
+        as literal text inside a parameter value.
+        """
+        search_pos = 0
+        while True:
+            idx = text.find(self.function_end_token, search_pos)
+            if idx == -1:
+                return -1
+            after = text[idx + len(self.function_end_token) :]
+            stripped = after.lstrip()
+            if stripped == "" or stripped.startswith(self.tool_call_end_token):
+                return idx
+            search_pos = idx + len(self.function_end_token)
+
+    def _scan_to_structural_function_end(
+        self,
+        after_func_open: str,
+        valid_param_names: set[str] | None = None,
+    ) -> int:
+        """Scan a function body — text immediately following the closing
+        ``>`` of ``<function=NAME>`` — by walking through structural
+        ``<parameter=NAME>...</parameter>`` blocks and return the index of
+        the structural ``</function>`` in ``after_func_open``.
+
+        This is more robust than ``_find_true_function_end`` when the
+        parameter value embeds a complete literal ``<tool_call>...
+        </function>\\n</tool_call>`` block: that nested ``</function>``
+        is followed by ``</tool_call>`` and would pass the lookahead
+        heuristic, but it is INSIDE a parameter and must be skipped.
+
+        Handles a "missing </parameter>" malformation by treating the
+        next structural ``<parameter=NAME>`` (with NAME unseen so far)
+        as an implicit end.
+
+        Returns -1 if the body is incomplete or malformed.
+        """
+        pos = 0
+        n = len(after_func_open)
+        seen: set[str] = set()
+        while pos < n:
+            # Skip whitespace between params
+            while pos < n and after_func_open[pos] in " \t\n\r":
+                pos += 1
+            if pos >= n:
+                return -1
+            if after_func_open[pos:].startswith(self.function_end_token):
+                return pos
+            if not after_func_open[pos:].startswith(self.parameter_prefix):
+                # Unexpected token before </function>; fall back to the
+                # legacy heuristic on the rest of the text.
+                rest_offset = self._find_true_function_end(after_func_open[pos:])
+                return pos + rest_offset if rest_offset != -1 else -1
+            name_end = after_func_open.find(">", pos + len(self.parameter_prefix))
+            if name_end == -1:
+                return -1
+            param_name = after_func_open[pos + len(self.parameter_prefix) : name_end]
+            value_start = name_end + 1
+            if value_start < n and after_func_open[value_start] == "\n":
+                value_start += 1
+            param_end = self._find_true_param_end(
+                after_func_open[value_start:],
+                valid_param_names,
+                require_lookahead=True,
+            )
+            if param_end == -1:
+                # Missing </parameter> malformation: try the next
+                # structural <parameter=NAME> with NAME unseen so far
+                # as the implicit end.
+                unseen: set[str] | None = (
+                    (valid_param_names - seen - {param_name})
+                    if valid_param_names is not None
+                    else None
+                )
+                implicit_end = self._next_structural_param_start(
+                    after_func_open[value_start:], 0, unseen
+                )
+                if implicit_end == -1:
+                    return -1
+                pos = value_start + implicit_end
+                seen.add(param_name)
+                continue
+            seen.add(param_name)
+            pos = value_start + param_end + len(self.parameter_end_token)
+        return -1
+
+    def _advance_to_next_tool(self, current_text: str) -> None:
+        """Advance streaming state to the next tool call.
+
+        Updates _sent_content_idx to skip past the completed tool call's
+        closing tag, then resets per-tool state for the next invocation.
+        Called both on normal delta boundaries and during speculative-
+        decoding recursion when multiple complete tool calls arrive in one
+        delta.
+
+        Uses STRUCTURAL ``</tool_call>`` positions so a literal
+        ``</tool_call>`` embedded in a parameter value (e.g. a code
+        snippet) does not move ``_sent_content_idx`` to the wrong place.
+        """
+        end_positions = self._structural_tool_call_end_positions(current_text)
+        target = self.current_tool_index
+        if target < len(end_positions):
+            self._sent_content_idx = max(
+                self._sent_content_idx,
+                end_positions[target] + len(self.tool_call_end_token),
+            )
+
+        self.current_tool_index += 1
+        self.header_sent = False
+        self.param_count = 0
+        self.json_started = False
+        self.json_closed = False
+        self.accumulated_params = {}
+        self.is_tool_call_started = False
+
+    def _find_true_tool_call_end(self, text: str) -> int:
+        """Return the index of the real structural ``</tool_call>`` in
+        text (followed with optional whitespace by another ``<tool_call>``
+        or end of string), or -1 if none found.
+        """
+        search_pos = 0
+        while True:
+            idx = text.find(self.tool_call_end_token, search_pos)
+            if idx == -1:
+                return -1
+            after = text[idx + len(self.tool_call_end_token) :]
+            stripped = after.lstrip()
+            if stripped == "" or stripped.startswith(self.tool_call_start_token):
+                return idx
+            search_pos = idx + len(self.tool_call_end_token)
+
+    def _structural_tool_call_end_positions(self, text: str) -> list[int]:
+        """Return positions of every STRUCTURAL ``</tool_call>`` in text.
+
+        Walks each ``<tool_call>...</tool_call>`` top-level block by
+        following ``<function=NAME>``, scanning the body via
+        ``_scan_to_structural_function_end`` (which steps over parameter
+        values that may contain literal ``<tool_call>``, ``<function=...>``,
+        ``</function>`` or ``</tool_call>`` strings), then matching the
+        trailing ``</tool_call>``.
+
+        Falls back to a lookahead heuristic when the walker cannot
+        determine a structural close (incomplete body, malformed XML).
+        """
+        positions: list[int] = []
+        pos = 0
+        n = len(text)
+        while pos < n:
+            tc_start = text.find(self.tool_call_start_token, pos)
+            if tc_start == -1:
+                break
+            body_start = tc_start + len(self.tool_call_start_token)
+            func_open = text.find(self.tool_call_prefix, body_start)
+            if func_open == -1:
+                break
+            name_end = text.find(">", func_open + len(self.tool_call_prefix))
+            if name_end == -1:
+                break
+            func_name = text[func_open + len(self.tool_call_prefix) : name_end]
+            valid_params: set[str] | None = None
+            if self.tools:
+                cfg = find_tool_properties(self.tools, func_name)
+                if cfg:
+                    valid_params = set(cfg.keys())
+            body_after_name = text[name_end + 1 :]
+            func_end_rel = self._scan_to_structural_function_end(
+                body_after_name, valid_params
+            )
+            if func_end_rel == -1:
+                # Body incomplete; the structural </tool_call> is not
+                # yet known.  Stop walking — DO NOT fall back to the
+                # legacy heuristic for the rest of the text, because a
+                # literal </tool_call> embedded in an unfinished
+                # parameter would be erroneously treated as structural.
+                break
+            func_end_abs = (name_end + 1) + func_end_rel
+            after = text[func_end_abs + len(self.function_end_token) :]
+            i = 0
+            while i < len(after) and after[i] in " \t\n\r":
+                i += 1
+            if not after[i:].startswith(self.tool_call_end_token):
+                break
+            tc_end_pos = func_end_abs + len(self.function_end_token) + i
+            positions.append(tc_end_pos)
+            pos = tc_end_pos + len(self.tool_call_end_token)
+        return positions
+
+    def _find_true_param_end(
+        self,
+        value_text: str,
+        valid_param_names: set[str] | None = None,
+        require_lookahead: bool = False,
+    ) -> int:
+        """Find the true end of a parameter value in value_text.
+
+        A ``</parameter>`` is structural only when it is followed by
+        another structural delimiter (schema-known ``<parameter=NAME>``,
+        ``</function>``, ``</tool_call>``) or — in non-streaming mode —
+        end-of-string.  Nested ``<parameter=NAME>`` opens are tracked
+        for depth REGARDLESS of whether NAME is in the schema: a
+        literal nested tool_call may use NAMEs that are not in the
+        outer tool's schema, but its literal ``</parameter>`` still
+        pairs with the literal open and must not be mistaken for a
+        structural close.
+
+        Returns the index of the true ``</parameter>`` in value_text, or
+        -1 if incomplete.
+        """
+        depth = 0
+        pos = 0
+        param_prefix_len = len(self.parameter_prefix)
+        param_end_len = len(self.parameter_end_token)
+
+        while pos < len(value_text):
+            # Use UNFILTERED structural opens for depth tracking so that
+            # a literal ``<parameter=UNKNOWN>`` (NAME not in the outer
+            # schema) still increments depth and its matching literal
+            # ``</parameter>`` is balanced — otherwise that close would
+            # appear unmatched and pass the structural lookahead.
+            next_open = self._next_structural_param_start(value_text, pos, None)
+            next_close = value_text.find(self.parameter_end_token, pos)
+            if next_close == -1:
+                return -1
+
+            if next_open != -1 and next_open < next_close:
+                depth += 1
+                pos = next_open + param_prefix_len
+            elif depth == 0:
+                after = value_text[next_close + param_end_len :]
+                stripped = after.lstrip()
+                structural_next_param = False
+                if stripped.startswith(self.parameter_prefix):
+                    if valid_param_names is not None:
+                        name_start = len(self.parameter_prefix)
+                        name_end = stripped.find(">", name_start)
+                        if name_end != -1:
+                            structural_next_param = (
+                                stripped[name_start:name_end] in valid_param_names
+                            )
+                    else:
+                        structural_next_param = True
+                if (
+                    (stripped == "" and not require_lookahead)
+                    or structural_next_param
+                    or stripped.startswith(self.function_end_token)
+                    or stripped.startswith(self.tool_call_end_token)
+                ):
+                    return next_close
+                pos = next_close + param_end_len
+            else:
+                depth -= 1
+                pos = next_close + param_end_len
+
+        return -1
+
+    @staticmethod
+    def _is_valid_function_name(name: str) -> bool:
+        """Return True when ``name`` looks like a real function identifier
+        and not a stray template token, malformed tag, or freeform text.
+
+        Rejects names that contain template-syntax characters (``{``,
+        ``}``, ``<``, ``>``), whitespace, quotes, or are empty.  Permits
+        identifiers, dashes (``max-retries``), dots (``user.name``),
+        slashes (``namespace/tool``), and Unicode letters.
+        """
+        if not name:
+            return False
+        forbidden = set("{}<>\"' \t\n\r")
+        return not any(c in forbidden for c in name)
 
     def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None:
         # Extract function name
@@ -137,13 +583,59 @@ def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None:
         if end_index == -1:
             return None
         function_name = function_call_str[:end_index]
+        # Reject phantom tool calls produced when the model writes an
+        # unrendered Jinja template or pseudo-XML in its response (e.g.
+        # ``<function={{ tc.name }}>``).  Surfacing such names as real
+        # tool calls causes "tool not found" errors at the client and
+        # makes agents loop.
+        if not self._is_valid_function_name(function_name):
+            return None
         param_config = find_tool_properties(self.tools, function_name)
+        valid_param_names: set[str] | None = (
+            set(param_config.keys()) if param_config else None
+        )
         parameters = function_call_str[end_index + 1 :]
-        param_dict = {}
-        for match_text in self.tool_call_parameter_regex.findall(parameters):
-            idx = match_text.index(">")
-            param_name = match_text[:idx]
-            param_value = str(match_text[idx + 1 :])
+        param_dict: dict = {}
+        pos = 0
+        while True:
+            # Find next structural <parameter=NAME> at the top level.  We
+            # do NOT filter the outer search by schema: callers may
+            # legitimately send a parameter whose name is not declared
+            # in the schema (e.g. renamed fields).  Schema filtering is
+            # applied only when scanning INSIDE a parameter value, to
+            # disambiguate real nested delimiters from literal text.
+            param_start = self._next_structural_param_start(parameters, pos, None)
+            if param_start == -1:
+                break
+            name_start = param_start + len(self.parameter_prefix)
+            name_end = parameters.find(">", name_start)
+            if name_end == -1:
+                break
+            param_name = parameters[name_start:name_end]
+            value_text = parameters[name_end + 1 :]
+
+            param_end = self._find_true_param_end(value_text, valid_param_names)
+            if param_end == -1:
+                # No true </parameter> found (malformed XML or incomplete).
+                # Fallback 1: next structural <parameter= acts as implicit end.
+                next_struct_param = self._next_structural_param_start(
+                    value_text, 0, valid_param_names
+                )
+                if next_struct_param != -1:
+                    param_value = value_text[:next_struct_param]
+                    pos = (name_end + 1) + next_struct_param
+                else:
+                    # Fallback 2: use structural </function> boundary or end
+                    func_end = self._find_true_function_end(value_text)
+                    if func_end != -1:
+                        param_value = value_text[:func_end]
+                    else:
+                        param_value = value_text
+                    pos = len(parameters)
+            else:
+                param_value = value_text[:param_end]
+                pos = (name_end + 1) + param_end + len(self.parameter_end_token)
+
             # Remove prefix and trailing \n
             if param_value.startswith("\n"):
                 param_value = param_value[1:]
@@ -161,23 +653,79 @@ def _parse_xml_function_call(self, function_call_str: str) -> ToolCall | None:
         )
 
     def _get_function_calls(self, model_output: str) -> list[str]:
-        # Find all tool calls
-        matched_ranges = self.tool_call_regex.findall(model_output)
-        raw_tool_calls = [
-            match[0] if match[0] else match[1] for match in matched_ranges
-        ]
+        # Find tool_calls using a structural delimiter approach:
+        # a real </tool_call> is followed by another <tool_call> or
+        # end-of-text.  This skips </tool_call> that appears as literal
+        # text inside a parameter value.
+        raw_tool_calls: list[str] = []
+        search_pos = 0
+        while True:
+            tc_start = model_output.find(self.tool_call_start_token, search_pos)
+            if tc_start == -1:
+                break
+            after_open = model_output[tc_start + len(self.tool_call_start_token) :]
+            tc_end = -1
+            inner_search = 0
+            while True:
+                idx = after_open.find(self.tool_call_end_token, inner_search)
+                if idx == -1:
+                    tc_end = -1
+                    break
+                after_close = after_open[idx + len(self.tool_call_end_token) :]
+                stripped = after_close.lstrip()
+                if stripped == "" or stripped.startswith(self.tool_call_start_token):
+                    tc_end = idx
+                    break
+                inner_search = idx + len(self.tool_call_end_token)
+            if tc_end == -1:
+                raw_tool_calls.append(after_open)
+                break
+            raw_tool_calls.append(after_open[:tc_end])
+            search_pos = (
+                tc_start
+                + len(self.tool_call_start_token)
+                + tc_end
+                + len(self.tool_call_end_token)
+            )
 
         # Back-off strategy if no tool_call tags found
         if len(raw_tool_calls) == 0:
             raw_tool_calls = [model_output]
 
-        raw_function_calls = []
+        # Use a parameter-aware walk to find the structural </function>:
+        # when the value of a parameter embeds a complete literal
+        # ``<tool_call>...</function>\n</tool_call>`` block, the nested
+        # ``</function>`` is followed by ``</tool_call>`` and would pass
+        # the simple "followed by </tool_call>" lookahead.  Walking the
+        # body parameter-by-parameter with ``_find_true_param_end``
+        # correctly steps over the literal.
+        function_calls: list[str] = []
         for tool_call in raw_tool_calls:
-            raw_function_calls.extend(self.tool_call_function_regex.findall(tool_call))
-
-        function_calls = [
-            match[0] if match[0] else match[1] for match in raw_function_calls
-        ]
+            func_start = tool_call.find(self.tool_call_prefix)
+            if func_start == -1:
+                continue
+            after_func_open = tool_call[func_start + len(self.tool_call_prefix) :]
+            name_end = after_func_open.find(">")
+            valid_param_names: set[str] | None = None
+            body_start = 0
+            if name_end != -1:
+                func_name = after_func_open[:name_end]
+                cfg = find_tool_properties(self.tools, func_name)
+                if cfg:
+                    valid_param_names = set(cfg.keys())
+                body_start = name_end + 1
+            scan_end = self._scan_to_structural_function_end(
+                after_func_open[body_start:], valid_param_names
+            )
+            if scan_end != -1:
+                function_calls.append(after_func_open[: body_start + scan_end])
+                continue
+            # Fallback to legacy heuristic.
+            func_end = self._find_true_function_end(after_func_open)
+            if func_end == -1:
+                function_calls.append(after_func_open)
+            else:
+                function_calls.append(after_func_open[:func_end])
         return function_calls
 
     def extract_tool_calls(
@@ -213,11 +761,39 @@ def extract_tool_calls(
                         }
                     )
 
-            # Extract content before tool calls
-            content_index = model_output.find(self.tool_call_start_token)
-            idx = model_output.find(self.tool_call_prefix)
-            content_index = content_index if content_index >= 0 else idx
-            content = model_output[:content_index]  # .rstrip()
+            # Extract content before tool calls.  Anchor at the FIRST
+            # ``<tool_call>`` that contains a real ``<function=NAME>``
+            # opener — a bare ``<tool_call>...</tool_call>`` written by
+            # the model in its narrative text (no function inside) is
+            # NOT a real tool call and the surrounding text MUST stay
+            # in ``content``.
+            content_index = -1
+            search_pos = 0
+            tc_start_token = self.tool_call_start_token
+            tc_end_token = self.tool_call_end_token
+            while True:
+                tc_pos = model_output.find(tc_start_token, search_pos)
+                if tc_pos == -1:
+                    break
+                tc_close = model_output.find(tc_end_token, tc_pos + len(tc_start_token))
+                # Look for a ``<function=`` inside this tool_call block
+                # (or up to end-of-string if the block isn't closed).
+                limit = tc_close if tc_close != -1 else len(model_output)
+                func_pos = model_output.find(
+                    self.tool_call_prefix, tc_pos + len(tc_start_token), limit
+                )
+                if func_pos != -1:
+                    content_index = tc_pos
+                    break
+                search_pos = tc_close + len(tc_end_token) if tc_close != -1 else limit
+            if content_index == -1:
+                # No structural ``<tool_call>`` block contains a
+                # ``<function=``: fall back to the standalone
+                # ``<function=`` position (legacy behaviour).
+                content_index = model_output.find(self.tool_call_prefix)
+            content = (
+                model_output[:content_index] if content_index >= 0 else model_output
+            )
             valid_tool_calls = [tc for tc in tool_calls if tc is not None]
             return ExtractedToolCallInformation(
                 tools_called=(len(valid_tool_calls) > 0),
@@ -277,77 +853,116 @@ def extract_tool_calls_streaming(
 
         # Check if we need to advance to next tool
         if self.json_closed and not self.in_function:
-            # Check if this tool call has ended
-            tool_ends = current_text.count(self.tool_call_end_token)
+            # Use structural </tool_call> count: a literal </tool_call>
+            # embedded in a parameter value must not trigger spurious
+            # advance.
+            tool_ends = len(self._structural_tool_call_end_positions(current_text))
             if tool_ends > self.current_tool_index:
-                # This tool has ended, advance to next
-                self.current_tool_index += 1
-                self.header_sent = False
-                self.param_count = 0
-                self.json_started = False
-                self.json_closed = False
-                self.accumulated_params = {}
-
-                # Check if there are more tool calls
-                tool_starts = current_text.count(self.tool_call_start_token)
-                if self.current_tool_index >= tool_starts:
-                    # No more tool calls
-                    self.is_tool_call_started = False
-                # Continue processing next tool
-                return None
-
+                # Advance to next tool; is_tool_call_started is reset so
+                # content between or after tool calls is emitted correctly.
+                # We deliberately fall through (no early ``return None``):
+                # the rest of this delta may carry trailing free text after
+                # the closed </tool_call> or even an entire next tool call
+                # (MTP / speculative decoding). The downstream code handles
+                # both — emitting trailing content via the not-started
+                # branch, or starting the next tool via tool_starts_count.
+                self._advance_to_next_tool(current_text)
+
+        content_message = None
         # Handle normal content before tool calls
         if not self.is_tool_call_started:
-            # Check if tool call is starting
-            if (
+            tool_starts_count = current_text.count(self.tool_call_start_token)
+            start_signal = (
                 self.tool_call_start_token_id in delta_token_ids
-                or self.tool_call_start_token in delta_text
-            ):
+                or tool_starts_count > self.current_tool_index
+            )
+            # ``tool_starts_count`` is naive and over-counts when an
+            # earlier tool's parameter value contains a literal
+            # ``<tool_call>``.  Confirm a REAL next tool by locating an
+            # opener past ``_sent_content_idx`` (which sits after the last
+            # processed tool's structural ``</tool_call>``).
+            last_start = -1
+            if start_signal:
+                last_start = current_text.find(
+                    self.tool_call_start_token, self._sent_content_idx
+                )
+            if start_signal and last_start != -1:
                 self.is_tool_call_started = True
                 # Return any content before the tool call
-                if self.tool_call_start_token in delta_text:
-                    content_before = delta_text[
-                        : delta_text.index(self.tool_call_start_token)
-                    ]
+                if last_start > self._sent_content_idx:
+                    content_before = current_text[self._sent_content_idx : last_start]
+                    self._sent_content_idx = last_start
                     if content_before:
-                        return DeltaMessage(content=content_before)
-                return None
+                        content_message = DeltaMessage(content=content_before)
             else:
-                # Check if we're between tool calls - skip whitespace
+                # No real new tool starting in this delta — emit any
+                # trailing/inter-call content.
+                overlap = partial_tag_overlap(current_text, self.tool_call_start_token)
+                sendable_idx = len(current_text) - overlap
+
+                # Skip whitespace-only deltas right after a closed tool.
                 if (
                     current_text.rstrip().endswith(self.tool_call_end_token)
                     and delta_text.strip() == ""
                 ):
-                    # We just ended a tool call, skip whitespace
+                    self._sent_content_idx = len(current_text)
                     return None
-                # Normal content, no tool call
-                return DeltaMessage(content=delta_text)
-
-        # Check if we're between tool calls (waiting for next one)
-        # Count tool calls we've seen vs processed
-        tool_starts_count = current_text.count(self.tool_call_start_token)
-        if self.current_tool_index >= tool_starts_count:
-            # We're past all tool calls, shouldn't be here
-            return None
 
-        # We're in a tool call, find the current tool call portion
-        # Need to find the correct tool call based on current_tool_index
+                if sendable_idx > self._sent_content_idx:
+                    content = current_text[self._sent_content_idx : sendable_idx]
+                    self._sent_content_idx = sendable_idx
+                    if content:
+                        return DeltaMessage(content=content)
+                return None
+
+        # Check if we're between tool calls (waiting for next one).
+        # Only count structural <tool_call> starts (skip past each
+        # </tool_call> of completed calls) so that <tool_call> tokens
+        # embedded in a parameter value of a completed call are not
+        # counted as spurious new tool calls.
+        if self.tool_call_start_token not in current_text[self._sent_content_idx :]:
+            return content_message
+
+        # We're in a tool call, find the current tool call portion.
+        # Build tool_start_positions by jumping OVER completed tool
+        # calls (past each </tool_call>), so that <tool_call> tokens
+        # embedded in parameter values of completed calls are never
+        # included.
+        # Use STRUCTURAL </tool_call> positions when jumping past
+        # completed tool calls — naive ``current_text.find(</tool_call>)``
+        # matches a literal ``</tool_call>`` embedded in a parameter
+        # value and would land inside an earlier tool's content.
+        structural_ends = self._structural_tool_call_end_positions(current_text)
         tool_start_positions: list[int] = []
-        idx = 0
-        while True:
-            idx = current_text.find(self.tool_call_start_token, idx)
+        search_pos = 0
+        for i in range(self.current_tool_index + 1):
+            idx = current_text.find(self.tool_call_start_token, search_pos)
             if idx == -1:
                 break
             tool_start_positions.append(idx)
-            idx += len(self.tool_call_start_token)
+            if i < self.current_tool_index:
+                # Completed tool call: jump past its STRUCTURAL </tool_call>.
+                end_idx = -1
+                for end_pos in structural_ends:
+                    if end_pos > idx:
+                        end_idx = end_pos
+                        break
+                if end_idx == -1:
+                    break
+                search_pos = end_idx + len(self.tool_call_end_token)
 
         if self.current_tool_index >= len(tool_start_positions):
-            # No more tool calls to process yet
-            return None
+            return content_message
 
         tool_start_idx = tool_start_positions[self.current_tool_index]
-        # Find where this tool call ends (or current position if not ended yet)
-        tool_end_idx = current_text.find(self.tool_call_end_token, tool_start_idx)
+        # Find this tool call's STRUCTURAL end (or use rest of text if
+        # the tool isn't closed yet).  A naive find would truncate at a
+        # literal </tool_call> inside a parameter value.
+        tool_end_idx = -1
+        for end_pos in structural_ends:
+            if end_pos > tool_start_idx:
+                tool_end_idx = end_pos
+                break
         if tool_end_idx == -1:
             tool_text = current_text[tool_start_idx:]
         else:
@@ -355,6 +970,7 @@ def extract_tool_calls_streaming(
                 tool_start_idx : tool_end_idx + len(self.tool_call_end_token)
             ]
 
+        tool_call_fragments = None
         # Looking for function header
         if not self.header_sent:
             if self.tool_call_prefix in tool_text:
@@ -387,21 +1003,18 @@ def extract_tool_calls_streaming(
                     # accesses streamed_args_for_tool[index].
                     self.streamed_args_for_tool.append("")
 
-                    # Send header with function info
-                    return DeltaMessage(
-                        tool_calls=[
-                            DeltaToolCall(
-                                index=self.current_tool_index,
-                                id=self.current_tool_id,
-                                function=DeltaFunctionCall(
-                                    name=self.current_function_name, arguments=""
-                                ),
-                                type="function",
-                            )
-                        ]
+                    tool_call_fragments = DeltaToolCall(
+                        index=self.current_tool_index,
+                        id=self.current_tool_id,
+                        function=DeltaFunctionCall(
+                            name=self.current_function_name, arguments=""
+                        ),
+                        type="function",
                     )
-            return None
+            if not self.header_sent:
+                return content_message
 
+        arguments_to_emit = ""
         # We've sent header, now handle function body
         if self.in_function:
             # Always send opening brace first, regardless of whether
@@ -412,24 +1025,91 @@ def extract_tool_calls_streaming(
             if not self.json_started:
                 self.json_started = True
                 self.streamed_args_for_tool[self.current_tool_index] += "{"
-                return DeltaMessage(
-                    tool_calls=[
-                        DeltaToolCall(
-                            index=self.current_tool_index,
-                            function=DeltaFunctionCall(arguments="{"),
-                        )
-                    ]
-                )
-
-            # Find all parameter start positions in current tool_text
-            param_starts = []
+                arguments_to_emit += "{"
+
+            # Build param_starts using structural-aware lookup. Plain
+            # tool_text.find(parameter_prefix) would return positions
+            # inside parameter VALUES (e.g. Python code that embeds the
+            # XML format), creating spurious extra params.  Use the
+            # schema to filter nested <parameter=NAME> and advance
+            # sequentially past each complete parameter's value.
+            streaming_param_config = find_tool_properties(
+                self.tools, self.current_function_name or ""
+            )
+            valid_param_names: set[str] | None = (
+                set(streaming_param_config.keys()) if streaming_param_config else None
+            )
+            param_starts: list[int] = []
             search_idx = 0
             while True:
-                search_idx = tool_text.find(self.parameter_prefix, search_idx)
-                if search_idx == -1:
+                # Don't filter top-level <parameter=NAME> by schema:
+                # callers may send params whose names aren't declared
+                # (e.g. renamed fields).  Schema filtering is applied
+                # below when walking INSIDE a parameter value to
+                # disambiguate nested literal XML.
+                param_start_pos = self._next_structural_param_start(
+                    tool_text, search_idx, None
+                )
+                if param_start_pos == -1:
                     break
-                param_starts.append(search_idx)
-                search_idx += len(self.parameter_prefix)
+                param_starts.append(param_start_pos)
+                # Advance past this parameter's content.
+                name_end_pos = tool_text.find(
+                    ">", param_start_pos + len(self.parameter_prefix)
+                )
+                if name_end_pos == -1:
+                    break
+                after_name = tool_text[name_end_pos + 1 :]
+                after_name_stripped = (
+                    after_name[1:] if after_name.startswith("\n") else after_name
+                )
+                end_in_after = self._find_true_param_end(
+                    after_name_stripped,
+                    valid_param_names,
+                    require_lookahead=True,
+                )
+                if end_in_after == -1:
+                    # No structural ``</parameter>`` close yet.  A
+                    # legitimate "missing </parameter>" malformation —
+                    # the model jumps from ``<parameter=A>`` straight to
+                    # ``<parameter=B>`` — is recoverable: treat the
+                    # next structural ``<parameter=NAME>`` as implicit
+                    # end of the current param.  But only if NAME has
+                    # NOT already been parsed as a sibling param of this
+                    # tool call (and is not the param currently being
+                    # scanned).  A repeated NAME is almost always a
+                    # literal embedded in the unfinished value, not a
+                    # real next parameter.
+                    cand_name = tool_text[
+                        param_start_pos + len(self.parameter_prefix) : name_end_pos
+                    ]
+                    already_seen = set(self.accumulated_params.keys()) | (
+                        {cand_name} if cand_name else set()
+                    )
+                    unseen_valid: set[str] | None = (
+                        (valid_param_names - already_seen)
+                        if valid_param_names is not None
+                        else None
+                    )
+                    implicit_end = self._next_structural_param_start(
+                        after_name_stripped, 0, unseen_valid
+                    )
+                    if implicit_end != -1:
+                        search_idx = (
+                            (name_end_pos + 1)
+                            + (1 if after_name.startswith("\n") else 0)
+                            + implicit_end
+                        )
+                    else:
+                        # Wait for more data.
+                        break
+                else:
+                    search_idx = (
+                        (name_end_pos + 1)
+                        + (1 if after_name.startswith("\n") else 0)
+                        + end_in_after
+                        + len(self.parameter_end_token)
+                    )
 
             # Process ALL complete params in a loop (spec decode fix).
             # With speculative decoding a single delta can deliver
@@ -455,30 +1135,67 @@ def extract_tool_calls_streaming(
                 if value_text.startswith("\n"):
                     value_text = value_text[1:]
 
-                param_end_idx = value_text.find(self.parameter_end_token)
+                param_end_idx = self._find_true_param_end(
+                    value_text, valid_param_names, require_lookahead=True
+                )
                 if param_end_idx == -1:
-                    next_param_idx = value_text.find(self.parameter_prefix)
-                    func_end_idx = value_text.find(self.function_end_token)
-
-                    if next_param_idx != -1 and (
-                        func_end_idx == -1 or next_param_idx < func_end_idx
-                    ):
-                        param_end_idx = next_param_idx
-                    elif func_end_idx != -1:
-                        param_end_idx = func_end_idx
-                    else:
-                        # Fallback for malformed XML where </function>
-                        # is missing. Use </tool_call> as a delimiter
-                        # if present in the value so we don't include
-                        # the closing tag as part of the param value.
-                        tool_end_in_value = value_text.find(self.tool_call_end_token)
-                        if tool_end_in_value != -1:
-                            param_end_idx = tool_end_in_value
+                    # Confirm via the parameter-aware walker that the
+                    # function body is truly complete.  The legacy
+                    # ``_find_true_function_end`` matches a ``</function>``
+                    # at end-of-buffer (lstripped lookahead == ""), which
+                    # is wrong in streaming when the literal close of a
+                    # nested tool_call inside a parameter value sits at
+                    # the buffer's end.  Walking the body via
+                    # ``_scan_to_structural_function_end`` correctly
+                    # steps over literal tags inside parameter values
+                    # and returns -1 if any param is still open.
+                    tc_open_in_tool = tool_text.find(self.tool_call_prefix)
+                    body_func_end_in_value = -1
+                    if tc_open_in_tool != -1:
+                        name_end_in_tool = tool_text.find(
+                            ">", tc_open_in_tool + len(self.tool_call_prefix)
+                        )
+                        if name_end_in_tool != -1:
+                            body_after_name = tool_text[name_end_in_tool + 1 :]
+                            body_func_end_rel = self._scan_to_structural_function_end(
+                                body_after_name, valid_param_names
+                            )
+                            if body_func_end_rel != -1:
+                                body_func_end_abs = (
+                                    name_end_in_tool + 1 + body_func_end_rel
+                                )
+                                body_func_end_in_value = body_func_end_abs - value_start
+
+                    if body_func_end_in_value > 0:
+                        # Function body is structurally complete; the
+                        # current param has missing </parameter>.  Use
+                        # the next legitimate <parameter=NAME> (NAME
+                        # unseen) before the structural </function> as
+                        # the implicit end.
+                        already_seen = set(self.accumulated_params.keys()) | (
+                            {current_param_name} if current_param_name else set()
+                        )
+                        unseen_valid: set[str] | None = (
+                            (valid_param_names - already_seen)
+                            if valid_param_names is not None
+                            else None
+                        )
+                        next_param_idx = self._next_structural_param_start(
+                            value_text, 0, unseen_valid
+                        )
+                        if (
+                            next_param_idx != -1
+                            and next_param_idx < body_func_end_in_value
+                        ):
+                            param_end_idx = next_param_idx
                         else:
-                            # Parameter incomplete — break so we still
-                            # emit any fragments accumulated by earlier
-                            # loop iterations.
-                            break
+                            param_end_idx = body_func_end_in_value
+                    else:
+                        # Body not yet complete — wait for more data.
+                        # Do NOT truncate at a literal </function> or
+                        # </tool_call> that may sit inside a still-open
+                        # parameter value.
+                        break
 
                 if param_end_idx == -1:
                     break
@@ -522,15 +1239,7 @@ def extract_tool_calls_streaming(
                         self.current_tool_index,
                         len(self.streamed_args_for_tool),
                     )
-
-                return DeltaMessage(
-                    tool_calls=[
-                        DeltaToolCall(
-                            index=self.current_tool_index,
-                            function=DeltaFunctionCall(arguments=combined),
-                        )
-                    ]
-                )
+                arguments_to_emit += combined
 
             # Check for function end AFTER processing parameters.
             # This ordering is critical: with speculative decoding a
@@ -538,13 +1247,31 @@ def extract_tool_calls_streaming(
             # </function>. If the close check ran first it would emit
             # "}" and set in_function=False before the parameter loop
             # ever ran, causing the parameter to be silently dropped.
-            if not self.json_closed and self.function_end_token in tool_text:
+            # Use the parameter-aware walker so a literal '</function>'
+            # inside a parameter value (e.g. a content arg embedding a
+            # complete nested tool_call) does not trigger a premature
+            # close.
+            true_func_end = -1
+            tc_open_in_tool_for_close = tool_text.find(self.tool_call_prefix)
+            if tc_open_in_tool_for_close != -1:
+                name_end_in_tool = tool_text.find(
+                    ">",
+                    tc_open_in_tool_for_close + len(self.tool_call_prefix),
+                )
+                if name_end_in_tool != -1:
+                    body_after_name = tool_text[name_end_in_tool + 1 :]
+                    body_func_end_rel = self._scan_to_structural_function_end(
+                        body_after_name, valid_param_names
+                    )
+                    if body_func_end_rel != -1:
+                        true_func_end = name_end_in_tool + 1 + body_func_end_rel
+            if not self.json_closed and true_func_end != -1:
                 self.json_closed = True
 
                 func_start = tool_text.find(self.tool_call_prefix) + len(
                     self.tool_call_prefix
                 )
-                func_content_end = tool_text.find(self.function_end_token, func_start)
+                func_content_end = true_func_end
                 if func_content_end != -1:
                     func_content = tool_text[func_start:func_content_end]
                     try:
@@ -572,23 +1299,88 @@ def extract_tool_calls_streaming(
                         self.current_tool_index,
                         len(self.streamed_args_for_tool),
                     )
-
-                result = DeltaMessage(
-                    tool_calls=[
-                        DeltaToolCall(
-                            index=self.current_tool_index,
-                            function=DeltaFunctionCall(arguments="}"),
-                        )
-                    ]
-                )
-
+                arguments_to_emit += "}"
                 self.in_function = False
                 self.json_closed = True
                 self.accumulated_params = {}
 
-                return result
+        if tool_call_fragments or arguments_to_emit:
+            if not tool_call_fragments:
+                tool_call_fragments = DeltaToolCall(
+                    index=self.current_tool_index,
+                    function=DeltaFunctionCall(arguments=arguments_to_emit),
+                )
+            else:
+                tool_call_fragments.function.arguments += arguments_to_emit
+
+            if content_message:
+                content_message.tool_calls = [tool_call_fragments]
+                result = content_message
+            else:
+                result = DeltaMessage(tool_calls=[tool_call_fragments])
+
+            # Speculative decoding can deliver multiple complete tool
+            # calls in a single delta.  If we just finished one and
+            # another complete <tool_call>...</tool_call> remains in
+            # current_text, advance and re-enter to emit it.  We pass a
+            # non-empty `previous_text` sentinel so reset_streaming_state
+            # is NOT triggered inside the recursion (which would clear
+            # current_tool_index back to 0 and loop forever).
+            if (
+                self.json_closed
+                and not self.in_function
+                and len(self._structural_tool_call_end_positions(current_text))
+                > self.current_tool_index + 1
+            ):
+                # Speculative decoding delivered multiple complete tool
+                # calls in one delta; advance and recurse for the next.
+                self._advance_to_next_tool(current_text)
+
+                # Recurse with a sentinel previous_text so the entry
+                # check `if not previous_text` does NOT reset the state.
+                next_delta = self.extract_tool_calls_streaming(
+                    previous_text or " ",
+                    current_text,
+                    delta_text,
+                    previous_token_ids,
+                    current_token_ids,
+                    delta_token_ids,
+                    request,
+                )
+                if next_delta is not None and next_delta.tool_calls:
+                    if result.tool_calls is None:
+                        result.tool_calls = []
+                    result.tool_calls.extend(next_delta.tool_calls)
+                    # Concatenate the recursion's content (e.g. text
+                    # BETWEEN tool 1 and tool 2) with the outer's content
+                    # (e.g. text BEFORE tool 1). Without this, the "between"
+                    # fragment is silently dropped whenever the outer
+                    # already produced its own content.
+                    if next_delta.content:
+                        result.content = (result.content or "") + next_delta.content
+
+            # Emit trailing free text that follows the LAST structural
+            # </tool_call> in this delta (MTP / spec-decoding bursts that
+            # bundle N tool calls + trailing content into one chunk).
+            # Without this the trailing text is buffered indefinitely:
+            # the per-tool processing never advances ``_sent_content_idx``
+            # past its tool's ``</tool_call>``, and an EOS-style empty
+            # delta cannot recover content that was never emitted.
+            if self.json_closed and not self.in_function:
+                end_positions = self._structural_tool_call_end_positions(current_text)
+                if end_positions:
+                    last_end = end_positions[-1] + len(self.tool_call_end_token)
+                    if (
+                        last_end < len(current_text)
+                        and last_end > self._sent_content_idx
+                    ):
+                        trailing = current_text[last_end:]
+                        if trailing:
+                            self._sent_content_idx = len(current_text)
+                            result.content = (result.content or "") + trailing
+            return result
 
-        return None
+        return content_message
 
     def get_structural_tag(self, request: ChatCompletionRequest):
         return get_model_structural_tag(
diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py
index 8ee10dcbc9e6..3f2ae4d253bf 100644
--- a/vllm/tool_parsers/qwen3xml_tool_parser.py
+++ b/vllm/tool_parsers/qwen3xml_tool_parser.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import ast
+import contextlib
 import json
 from collections.abc import Sequence
 from typing import Any
@@ -26,11 +27,28 @@
     Tool,
     ToolParser,
 )
+from vllm.tool_parsers.structural_tag_registry import (
+    get_enable_structured_outputs_in_reasoning,
+    get_model_structural_tag,
+)
 from vllm.tool_parsers.utils import find_tool_properties
 
 logger = init_logger(__name__)
 
 
+def _is_valid_function_name(name: str) -> bool:
+    """Return True when ``name`` looks like a real function identifier and
+    not a stray template token, malformed tag, or freeform text.
+
+    Rejects names that contain template-syntax characters (``{``, ``}``,
+    ``<``, ``>``), whitespace, quotes, or are empty.
+    """
+    if not name:
+        return False
+    forbidden = set("{}<>\"' \t\n\r")
+    return not any(c in forbidden for c in name)
+
+
 class StreamingXMLToolCallParser:
     """
     Simplified streaming XML tool call parser
@@ -53,9 +71,16 @@ def reset_streaming_state(self):
         """Reset streaming parsing state"""
 
         self.deltas = []
+        # When True (delta-by-delta streaming), _process_complete_xml_elements
+        # holds off on </parameter> when nothing follows in the buffer yet —
+        # that would be ambiguous since more tokens may still arrive.  When
+        # False (full output passed at once), an empty lookahead is a
+        # genuine end.
+        self._streaming_mode: bool = False
         # state for streaming
         self.tool_call_index = 0
         self.current_call_id = None
+        self.id_emitted = False
         self.last_completed_call_id = None
         self.current_function_name = None
         self.current_function_open = False
@@ -79,6 +104,21 @@ def reset_streaming_state(self):
         self.defer_current_parameter = False
         self.deferred_param_raw_value = ""
 
+        # Depth of LITERAL nested ``<tool_call>``/``<function=...>`` opens
+        # encountered inside the current parameter's value.  Each literal
+        # opener bumps the depth; each ``</tool_call>``/``</function>``
+        # encountered while depth > 0 is also literal (decrements the
+        # depth) and must not be treated as a structural close.  Reset
+        # to 0 when leaving a parameter.
+        self._literal_tag_depth = 0
+        # Number of literal tool_call/function open or close events seen
+        # in the current ``parse_single_streaming_chunks`` call.  Used to
+        # suppress the post-processing structural-close fallback when
+        # the chunk contained literal nested-tag events: those events
+        # are already handled (escaped) by the preprocess pass and must
+        # not trigger ``_end_element`` calls.
+        self._literal_events_this_chunk = 0
+
         # recreate parser
         self.parser = ParserCreate()
         self.setup_parser()
@@ -98,72 +138,58 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage:
         # Record delta count before processing
         initial_delta_count = len(self.deltas)
 
+        # Reset literal-event counter for this chunk: it will be
+        # incremented by the preprocess pass whenever it encounters a
+        # literal nested ``<tool_call>``/``<function=...>`` open or
+        # the matching close inside a parameter value.
+        self._literal_events_this_chunk = 0
+
         self.streaming_buffer += xml_chunk
 
         found_elements = self._process_complete_xml_elements()
 
         if found_elements:
             # If complete elements found, check if end events were missed
-            # some tags may not have been triggered
+            # some tags may not have been triggered.  Use structural-aware
+            # checks so that </function>/</tool_call> appearing as literal
+            # text inside a parameter value (e.g. file content) does NOT
+            # trigger a spurious close that emits a duplicate '}' or ''.
+            # When ``_literal_tag_depth > 0`` we are still inside a
+            # literal nested ``<tool_call>``/``<function=...>`` block in
+            # the current parameter's value — the chunk's `</function>`
+            # or `</tool_call>` matches a literal opener, not a real
+            # structural close, so skip the fallback close events.
             try:
-                new_deltas = self.deltas[initial_delta_count:]
-                # If this chunk contains </function>
-                # but didn't generate '}', then complete it
+                # Skip the fallback close events when this chunk
+                # contained any literal nested-tag event: those
+                # ``</function>``/``</tool_call>`` strings are matched
+                # to literal openers in the param value and have
+                # already been escaped — firing ``_end_element`` here
+                # would prematurely close the OUTER parameter and
+                # truncate its value.
+                literals_in_chunk = self._literal_events_this_chunk > 0
                 if (
                     self.current_call_id is not None
-                    and self.function_end_token in xml_chunk
+                    and not literals_in_chunk
+                    and self._literal_tag_depth == 0
+                    and self._chunk_has_structural_function_end(xml_chunk)
+                    and self.current_function_open
                 ):
-                    # - Added '}' (non-empty parameter ending)
-                    # - Added '{}' (empty parameter function)
-                    has_function_close = any(
-                        (
-                            td.tool_calls
-                            and any(
-                                (
-                                    tc.function
-                                    and tc.id == self.current_call_id
-                                    and isinstance(tc.function.arguments, str)
-                                    and (tc.function.arguments in ("}", "{}"))
-                                )
-                                for tc in td.tool_calls
-                            )
-                        )
-                        for td in new_deltas
-                    )
-                    if not has_function_close:
-                        # Close potentially unclosed element
-                        if self.current_param_name:
-                            self._end_element("parameter")
-                        if self.current_function_name:
-                            self._end_element("function")
-                # If this chunk contains </tool_call>
-                # but didn't generate final empty delta, then complete it
+                    if self.current_param_name:
+                        self._end_element("parameter")
+                    if self.current_function_name:
+                        self._end_element("function")
                 if (
                     self.current_call_id is not None
-                    and self.tool_call_end_token in xml_chunk
+                    and not literals_in_chunk
+                    and self._literal_tag_depth == 0
+                    and self._chunk_has_structural_tool_call_end(xml_chunk)
                 ):
-                    has_toolcall_close = any(
-                        (
-                            td.tool_calls
-                            and any(
-                                (
-                                    tc.type == "function"
-                                    and tc.function
-                                    and tc.function.arguments == ""
-                                    and tc.id == self.current_call_id
-                                )
-                                for tc in td.tool_calls
-                            )
-                        )
-                        for td in new_deltas
-                    )
-                    if not has_toolcall_close:
-                        # Close potentially unclosed element
-                        if self.current_param_name:
-                            self._end_element("parameter")
-                        if self.current_function_name:
-                            self._end_element("function")
-                        self._end_element("tool_call")
+                    if self.current_param_name:
+                        self._end_element("parameter")
+                    if self.current_function_open:
+                        self._end_element("function")
+                    self._end_element("tool_call")
             except Exception as e:
                 logger.warning("Error with fallback parsing: %s", e)
             # Merge newly generated deltas into single response
@@ -173,29 +199,37 @@ def parse_single_streaming_chunks(self, xml_chunk: str) -> DeltaMessage:
             return result_delta
         else:
             # No complete elements, check if there's unoutput text content
-            if self.text_content_buffer and self.tool_call_index == 0:
-                # Has text content but no tool_call yet, output text content
+            if self.text_content_buffer:
+                # Output buffered text content
                 text_delta = DeltaMessage(content=self.text_content_buffer)
                 self._emit_delta(text_delta)
                 # Clear buffer to avoid duplicate output
                 self.text_content_buffer = ""
                 return text_delta
 
-            # If this chunk contains end tags but wasn't triggered by parser,
-            # manually complete end events
-            # Only execute when still on the same call as when entered,
-            # to prevent accidentally closing new calls
-            # in multi <tool_call> scenarios
-            if self.current_call_id is not None and (
-                self.function_end_token in xml_chunk
-                or self.tool_call_end_token in xml_chunk
+            # If this chunk contains structural end tags but wasn't
+            # triggered by parser, manually complete end events. Only
+            # execute when still on the same call as when entered, to
+            # prevent accidentally closing new calls in multi-<tool_call>
+            # scenarios.  Also skip when ``_literal_tag_depth > 0``: the
+            # chunk's `</function>`/`</tool_call>` matches a literal
+            # opener inside the current parameter's value.
+            if (
+                self.current_call_id is not None
+                and self._literal_tag_depth == 0
+                and (
+                    self._chunk_has_structural_function_end(xml_chunk)
+                    or self._chunk_has_structural_tool_call_end(xml_chunk)
+                )
             ):
-                # Close potentially unclosed element
                 if self.current_param_name:
                     self._end_element("parameter")
-                if self.function_end_token in xml_chunk and self.current_function_name:
+                if (
+                    self._chunk_has_structural_function_end(xml_chunk)
+                    and self.current_function_name
+                ):
                     self._end_element("function")
-                if self.tool_call_end_token in xml_chunk:
+                if self._chunk_has_structural_tool_call_end(xml_chunk):
                     self._end_element("tool_call")
                 # Return the merged delta result generated by this fallback
                 result_delta = self._merge_new_deltas_to_single_response(
@@ -227,6 +261,141 @@ def _escape_xml_special_chars(self, text: str) -> str:
 
         return text
 
+    def _is_structural_tag_position(self) -> bool:
+        """Return True when the current element is at a structural position.
+
+        A structural opening tag (e.g. <parameter=...>) must appear at the
+        beginning of a line in the raw output — i.e. the character
+        immediately before it in the streaming buffer is a newline (or it
+        is at position 0).  Opening tags inside parameter content (e.g.
+        '"<parameter=query>"') are preceded by a non-newline character
+        such as a quote.
+        """
+        if self.last_processed_pos == 0:
+            return True
+        return self.streaming_buffer[self.last_processed_pos - 1] == "\n"
+
+    def _get_valid_param_names(self) -> set[str] | None:
+        """Return the set of parameter names defined in the schema for the
+        current function, or None when the schema is not available.
+
+        Used to filter structural-looking <parameter=NAME> tokens that
+        appear as literal text inside a parameter value (e.g. Jinja2
+        templates, test fixtures, or files that document the tool-call
+        format).
+        """
+        if not self.tools or not self.current_function_name:
+            return None
+        props = find_tool_properties(self.tools, self.current_function_name)
+        return set(props.keys()) if props else None
+
+    def _is_already_emitted_param(self, name: str) -> bool:
+        """Return True when ``name`` has already appeared as a parameter
+        of the current tool call (either fully closed or currently open).
+
+        A ``<parameter=NAME>`` whose NAME is already used for the same
+        tool is almost always literal text inside another parameter's
+        value (e.g. a parser fixture or a file that documents the
+        tool-call format).  Treating it as a real structural opening
+        causes silent value truncation and spurious extra params.
+        """
+        if name == self.current_param_name:
+            return True
+        return name in self.parameters
+
+    def _is_structural_closing_tag(self, chunk: str) -> bool:
+        """Return True when a closing tag at the current buffer position is
+        a real structural delimiter rather than literal text content.
+
+        A closing tag is structural when the text that follows it in the
+        streaming buffer (after stripping leading whitespace) begins with
+        another structural token or is empty (end of buffered output).
+
+        When the schema is available, a following <parameter=NAME> is only
+        considered structural if NAME is a known parameter of the current
+        function.  This prevents literal lines like ``<parameter=new_string>``
+        in file content from being mistaken for real structural boundaries.
+        """
+        after_pos = self.last_processed_pos + len(chunk)
+        rest = self.streaming_buffer[after_pos:].lstrip()
+
+        structural_param_follows = False
+        if rest.startswith(self.parameter_start_token):
+            valid_names = self._get_valid_param_names()
+            name_start = len(self.parameter_start_token)
+            name_end = rest.find(">", name_start)
+            if name_end != -1:
+                candidate = rest[name_start:name_end]
+                if valid_names is not None:
+                    structural_param_follows = (
+                        candidate in valid_names
+                        and not self._is_already_emitted_param(candidate)
+                    )
+                else:
+                    # Fallback (no schema): trust the name unless it is a
+                    # repeat of the current/already-emitted param, which
+                    # is almost always a literal in a parser fixture.
+                    structural_param_follows = not self._is_already_emitted_param(
+                        candidate
+                    )
+
+        # Return True when rest is an incomplete prefix of a structural
+        # closing token (e.g. rest="</" when "</function>" hasn't fully
+        # arrived yet). The empty-rest case is handled by the deferral in
+        # _process_complete_xml_elements; this guards against the
+        # partial-tag scenario where the deferral does not fire (rest is
+        # non-empty) but the token is still incomplete.
+        is_partial_structural_prefix = any(
+            tok.startswith(rest)
+            for tok in (
+                self.parameter_end_token,
+                self.function_end_token,
+                self.tool_call_end_token,
+            )
+        )
+
+        return (
+            not rest
+            or is_partial_structural_prefix
+            or structural_param_follows
+            or rest.startswith(self.parameter_end_token)
+            or rest.startswith(self.function_end_token)
+            or rest.startswith(self.tool_call_end_token)
+        )
+
+    def _chunk_has_structural_function_end(self, chunk: str) -> bool:
+        """Return True if `chunk` contains a structural </function> tag.
+
+        A structural </function> is followed (after optional whitespace)
+        by </tool_call> or end-of-string — not inside parameter content
+        such as a file whose body contains '</function>'.
+        """
+        search = 0
+        token = self.function_end_token
+        end_token = self.tool_call_end_token
+        while True:
+            idx = chunk.find(token, search)
+            if idx == -1:
+                return False
+            rest = chunk[idx + len(token) :].lstrip()
+            if not rest or rest.startswith(end_token):
+                return True
+            search = idx + len(token)
+
+    def _chunk_has_structural_tool_call_end(self, chunk: str) -> bool:
+        """Return True if `chunk` contains a structural </tool_call> tag."""
+        search = 0
+        token = self.tool_call_end_token
+        start_token = self.tool_call_start_token
+        while True:
+            idx = chunk.find(token, search)
+            if idx == -1:
+                return False
+            rest = chunk[idx + len(token) :].lstrip()
+            if not rest or rest.startswith(start_token):
+                return True
+            search = idx + len(token)
+
     def _process_complete_xml_elements(self) -> bool:
         """
         Process complete XML elements in buffer
@@ -243,6 +412,23 @@ def _process_complete_xml_elements(self) -> bool:
                 # No complete element found, wait for more data
                 break
 
+            # In streaming mode, hold off on </parameter> when nothing
+            # follows in the buffer yet.  We need the lookahead to
+            # distinguish a real structural close (followed by
+            # </function> or a schema-known <parameter=NAME>) from
+            # literal text content that happens to be ``</parameter>`` on
+            # its own line (e.g. Jinja2 template files). When not in
+            # _pre_inside_parameter mode the SAX-level decision is made
+            # here; skip for now and re-evaluate on the next delta.
+            if (
+                self._streaming_mode
+                and element == self.parameter_end_token
+                and self.current_param_name is not None
+                and not self._pre_inside_parameter
+                and not self.streaming_buffer[end_pos:].lstrip()
+            ):
+                break
+
             # Check if this element should be skipped
             if self._should_skip_element(element):
                 self.last_processed_pos = end_pos
@@ -251,16 +437,12 @@ def _process_complete_xml_elements(self) -> bool:
             # Found complete XML element, process it
             try:
                 preprocessed_element = self._preprocess_xml_chunk(element)
-                # Check if this is the first tool_call start
+                # Check if a new tool_call starts and we have buffered text content
                 if (
-                    (
-                        preprocessed_element.strip().startswith("<tool_call>")
-                        or preprocessed_element.strip().startswith("<function name=")
-                    )
-                    and self.tool_call_index == 0
+                    preprocessed_element.strip().startswith("<tool_call>")
+                    or preprocessed_element.strip().startswith("<function name=")
                 ) and self.text_content_buffer:
-                    # First tool_call starts,
-                    # output previously collected text content first
+                    # Output previously collected text content first
                     text_delta = DeltaMessage(content=self.text_content_buffer)
                     self._emit_delta(text_delta)
                     # Clear buffer for potential subsequent text content
@@ -286,7 +468,7 @@ def _process_complete_xml_elements(self) -> bool:
                         tool_calls=[
                             DeltaToolCall(
                                 index=self.tool_call_index - 1,
-                                id=self.current_call_id,
+                                id=self._get_call_id_for_delta(),
                                 type="function",
                                 function=DeltaFunctionCall(name=None, arguments=""),
                             )
@@ -305,6 +487,16 @@ def _process_complete_xml_elements(self) -> bool:
             # Update processed position
             self.last_processed_pos = end_pos
 
+        # Flush any text accumulated AFTER the last </tool_call> processed
+        # in this batch. Without this, trailing free text that arrives in
+        # the SAME delta as the closing </tool_call> (MTP / speculative
+        # decoding) is buffered but never emitted — and is lost entirely
+        # if EOS comes before any subsequent delta.
+        if found_any and self.text_content_buffer and self.current_call_id is None:
+            text_delta = DeltaMessage(content=self.text_content_buffer)
+            self._emit_delta(text_delta)
+            self.text_content_buffer = ""
+
         return found_any
 
     def _should_skip_element(self, element: str) -> bool:
@@ -441,10 +633,10 @@ def _merge_new_deltas_to_single_response(self, initial_count: int) -> DeltaMessa
             if delta.tool_calls:
                 # For tool_calls, we need to intelligently merge arguments
                 for tool_call in delta.tool_calls:
-                    # Find if there's already a tool_call with the same call_id
+                    # Find if there's already a tool_call with the same index
                     existing_call = None
                     for existing in merged_tool_calls:
-                        if existing.id == tool_call.id:
+                        if existing.index == tool_call.index:
                             existing_call = existing
                             break
 
@@ -534,36 +726,59 @@ def _preprocess_xml_chunk(self, chunk: str) -> str:
                         if self._pre_current_param_name
                         else "string"
                     )
-                    # Only these types need deferred parsing to
-                    # handle Python literals containing single quotes
-                    is_object_type = param_type in ["object"]
+                    # Container types always need deferred parsing so the
+                    # full value is available for json.loads /
+                    # ast.literal_eval — even when the first streaming
+                    # token is just "\n".
+                    is_object_type = param_type == "object"
                     is_complex_type = (
                         param_type in ["array", "arr", "sequence"]
                         or param_type.startswith("dict")
                         or param_type.startswith("list")
                     )
-
-                    # Only delay when contains container symbols
-                    # and has single quotes and is complex type
-                    has_container_hint = (
-                        ("[" in original_chunk)
-                        or ("{" in original_chunk)
-                        or ("(" in original_chunk)
+                    # Boolean also needs deferral: streaming "t" as the
+                    # first char would otherwise be converted to False and
+                    # emit "false", shadowing the real "true" that follows.
+                    is_bool_type = param_type in ["boolean", "bool", "binary"]
+                    # Numeric types need deferral too: a nullable
+                    # parameter rendered as the literal "None" (Qwen3.5
+                    # template) or "null" (Qwen3.6 template) flips from
+                    # the partial-string fallback to JSON ``null`` only
+                    # when the FULL value is in.  Without deferral the
+                    # diff-based char emission would interleave the
+                    # partial string ("Non") with the JSON literal
+                    # ("null") and produce invalid output ("Nonl").
+                    is_numeric_type = (
+                        param_type.startswith("int")
+                        or param_type.startswith("uint")
+                        or param_type.startswith("long")
+                        or param_type.startswith("short")
+                        or param_type.startswith("unsigned")
+                        or param_type.startswith("num")
+                        or param_type.startswith("float")
                     )
 
-                    # Determine if deferred parsing is needed
-                    need_defer = False
-                    if is_complex_type:
-                        # Complex type, always need deferred parsing
-                        need_defer = True
-                    elif (
-                        is_object_type
-                        and has_container_hint
-                        and ("'" in original_chunk)
-                    ):
-                        # Object type with container symbols
-                        # and single quotes, need deferred parsing
-                        need_defer = True
+                    # Nullable string params (``anyOf: [string, null]``)
+                    # must defer too: the literal ``null`` / ``None`` is
+                    # only recognisable when the full value is in.
+                    # Without deferral, the streaming string path emits
+                    # ``"`` + chars + ``"`` and the literal stays
+                    # quoted.
+                    is_nullable_string = param_type in [
+                        "string",
+                        "str",
+                        "text",
+                        "varchar",
+                        "char",
+                        "enum",
+                    ] and self._param_allows_null(self._pre_current_param_name)
+                    need_defer = (
+                        is_complex_type
+                        or is_object_type
+                        or is_bool_type
+                        or is_numeric_type
+                        or is_nullable_string
+                    )
 
                     if not need_defer:
                         # No need for deferred parsing,
@@ -573,6 +788,69 @@ def _preprocess_xml_chunk(self, chunk: str) -> str:
                 self._pre_param_buffer += original_chunk
                 return ""
 
+        # When a parameter value is being streamed (SAX state says we are
+        # inside a <parameter>), structural-looking tokens that arrive as
+        # subsequent elements are literal text — e.g. a file whose content
+        # describes the tool-call format.  Escape them unless they are
+        # genuine structural delimiters.
+        if self.current_param_name is not None:
+            if chunk.startswith(self.tool_call_start_token) or chunk.startswith(
+                self.function_start_token
+            ):
+                # Opening tool_call/function tags are always literal inside
+                # a parameter value.  Track nesting depth so that the
+                # matching ``</function>`` / ``</tool_call>`` is also
+                # treated as literal even when its lookahead would
+                # otherwise satisfy the structural heuristic.
+                self._literal_tag_depth += 1
+                self._literal_events_this_chunk += 1
+                return self._escape_xml_special_chars(chunk)
+            if chunk.startswith(self.parameter_start_token):
+                # A structural <parameter=NAME> always follows a newline in
+                # the buffer.  When a schema is available, also require
+                # NAME to be a known parameter of the current function so
+                # that literal ``<parameter=new_string>`` inside file
+                # content is treated as text.  A NAME already emitted
+                # for this tool (or equal to the param currently being
+                # parsed) is also literal text — a parser fixture or a
+                # file that documents the tool-call format.
+                if not self._is_structural_tag_position():
+                    return self._escape_xml_special_chars(chunk)
+                name_start = len(self.parameter_start_token)
+                name_end = chunk.find(">", name_start)
+                if name_end != -1:
+                    candidate = chunk[name_start:name_end]
+                    if self._is_already_emitted_param(candidate):
+                        return self._escape_xml_special_chars(chunk)
+                    valid_names = self._get_valid_param_names()
+                    if valid_names is not None and candidate not in valid_names:
+                        return self._escape_xml_special_chars(chunk)
+            if (
+                chunk.startswith(self.parameter_end_token)
+                or chunk.startswith(self.function_end_token)
+                or chunk.startswith(self.tool_call_end_token)
+            ):
+                # Inside a literal nested tool_call/function (depth > 0),
+                # any closing tag pairs with the literal opener and is
+                # itself literal — regardless of what the lookahead says.
+                # ``</parameter>`` does not affect depth (parameters do
+                # not nest in the Qwen format).
+                if self._literal_tag_depth > 0:
+                    if chunk.startswith(self.function_end_token) or (
+                        chunk.startswith(self.tool_call_end_token)
+                    ):
+                        self._literal_tag_depth -= 1
+                        self._literal_events_this_chunk += 1
+                    else:
+                        # Literal `</parameter>` inside a nested literal
+                        # block — count it as a literal event so the
+                        # post-processing fallback knows the chunk
+                        # contained literals and skips spurious closes.
+                        self._literal_events_this_chunk += 1
+                    return self._escape_xml_special_chars(chunk)
+                if not self._is_structural_closing_tag(chunk):
+                    return self._escape_xml_special_chars(chunk)
+
         # Parameter start: enable accumulation
         if processed.startswith("<parameter name="):
             m = re.match(r'<parameter name="([^"]+)">', processed)
@@ -593,6 +871,12 @@ def _emit_delta(self, delta: DeltaMessage):
         """Emit Delta response (streaming output)"""
         self.deltas.append(delta)
 
+    def _get_call_id_for_delta(self) -> str | None:
+        if not self.id_emitted:
+            self.id_emitted = True
+            return self.current_call_id
+        return None
+
     def _auto_close_open_parameter_if_needed(self, incoming_tag: str | None = None):
         """Before starting to process new elements,
         if there are unclosed tags from before,
@@ -648,7 +932,7 @@ def _start_element(self, name: str, attrs: dict[str, str]):
                     tool_calls=[
                         DeltaToolCall(
                             index=self.tool_call_index - 1,
-                            id=self.current_call_id,
+                            id=self._get_call_id_for_delta(),
                             type="function",
                             function=DeltaFunctionCall(
                                 name=function_name, arguments=""
@@ -679,7 +963,7 @@ def _start_element(self, name: str, attrs: dict[str, str]):
                         tool_calls=[
                             DeltaToolCall(
                                 index=self.tool_call_index - 1,
-                                id=self.current_call_id,
+                                id=self._get_call_id_for_delta(),
                                 type="function",
                                 function=DeltaFunctionCall(
                                     name=None, arguments=json_start
@@ -697,7 +981,7 @@ def _start_element(self, name: str, attrs: dict[str, str]):
                         tool_calls=[
                             DeltaToolCall(
                                 index=self.tool_call_index - 1,
-                                id=self.current_call_id,
+                                id=self._get_call_id_for_delta(),
                                 type="function",
                                 function=DeltaFunctionCall(
                                     name=None, arguments=json_continue
@@ -740,7 +1024,7 @@ def _char_data(self, data: str):
                     tool_calls=[
                         DeltaToolCall(
                             index=self.tool_call_index - 1,
-                            id=self.current_call_id,
+                            id=self._get_call_id_for_delta(),
                             type="function",
                             function=DeltaFunctionCall(name=None, arguments='"'),
                         )
@@ -775,7 +1059,7 @@ def _char_data(self, data: str):
                 tool_calls=[
                     DeltaToolCall(
                         index=self.tool_call_index - 1,
-                        id=self.current_call_id,
+                        id=self._get_call_id_for_delta(),
                         type="function",
                         function=DeltaFunctionCall(name=None, arguments=delta_data),
                     )
@@ -799,7 +1083,9 @@ def _end_element(self, name: str):
         if (
             name.startswith("parameter") or name == "parameter"
         ) and self.current_param_name:
-            # End current parameter
+            # End current parameter; reset literal-tag depth tracker
+            # since we are leaving the param's value scope.
+            self._literal_tag_depth = 0
             param_name = self.current_param_name
             param_value = self.current_param_value
 
@@ -812,27 +1098,118 @@ def _end_element(self, name: str):
                     if self.deferred_param_raw_value
                     else param_value
                 )
-                parsed_value = None
-                output_arguments = None
-                try:
-                    # If previously delayed trailing newline,
-                    # add it back before parsing
-                    if self.should_emit_end_newline:
-                        raw_for_parse = raw_text + "\n"
+                parsed_value: Any = None
+                output_arguments: str | None = None
+                if self.should_emit_end_newline:
+                    raw_for_parse = raw_text + "\n"
+                else:
+                    raw_for_parse = raw_text
+                # Nullable-string short-circuit: when the schema is
+                # ``anyOf: [string, null]``, ``"null"`` and Python's
+                # ``"None"`` map to JSON null.  Any other value is
+                # kept verbatim as a string — never parsed as int,
+                # float, JSON, etc., even if it LOOKS like one.
+                _param_type_for_check = self._get_param_type(param_name)
+                if _param_type_for_check in [
+                    "string",
+                    "str",
+                    "text",
+                    "varchar",
+                    "char",
+                    "enum",
+                ] and self._param_allows_null(param_name):
+                    if raw_for_parse.strip().lower() in ("null", "none"):
+                        parsed_value = None
+                        output_arguments = "null"
                     else:
-                        raw_for_parse = raw_text
-                    parsed_value = ast.literal_eval(raw_for_parse)
-                    output_arguments = json.dumps(parsed_value, ensure_ascii=False)
-                except Exception:
-                    # Fallback: output as string as-is
-                    output_arguments = json.dumps(raw_text, ensure_ascii=False)
-                    parsed_value = raw_text
+                        parsed_value = raw_for_parse
+                        output_arguments = json.dumps(raw_for_parse, ensure_ascii=False)
+                    delta = DeltaMessage(
+                        tool_calls=[
+                            DeltaToolCall(
+                                index=self.tool_call_index - 1,
+                                id=self._get_call_id_for_delta(),
+                                type="function",
+                                function=DeltaFunctionCall(
+                                    name=None, arguments=output_arguments
+                                ),
+                            )
+                        ]
+                    )
+                    self._emit_delta(delta)
+                    self.parameters[param_name] = parsed_value
+                    self.current_param_name = None
+                    self.current_param_value = ""
+                    self.current_param_value_converted = ""
+                    self.start_quote_emitted = False
+                    self.should_emit_end_newline = False
+                    self.defer_current_parameter = False
+                    self.deferred_param_raw_value = ""
+                    return
+                raw_lower = raw_for_parse.strip().lower()
+                # Handle JSON literals that ast.literal_eval cannot parse
+                # (true/false/null are JSON, not Python).
+                if raw_lower == "null":
+                    parsed_value = None
+                    output_arguments = "null"
+                elif raw_lower == "true":
+                    parsed_value = True
+                    output_arguments = "true"
+                elif raw_lower == "false":
+                    parsed_value = False
+                    output_arguments = "false"
+                else:
+                    # Try JSON first: handles arrays/objects that use JSON
+                    # native tokens (true, false, null) which
+                    # ast.literal_eval cannot parse.
+                    try:
+                        parsed_value = json.loads(raw_for_parse)
+                        # A model trained with a buggy template
+                        # (json.dumps(str(dict))) may output a JSON-encoded
+                        # Python repr like "\"{'k': 'v'}\"". json.loads
+                        # returns a str in that case — try one more level.
+                        if isinstance(parsed_value, str):
+                            try:
+                                parsed_value = ast.literal_eval(parsed_value)
+                            except (ValueError, SyntaxError, TypeError):
+                                with contextlib.suppress(
+                                    json.JSONDecodeError, ValueError
+                                ):
+                                    parsed_value = json.loads(parsed_value)
+                        output_arguments = json.dumps(parsed_value, ensure_ascii=False)
+                    except (json.JSONDecodeError, ValueError):
+                        try:
+                            parsed_value = ast.literal_eval(raw_for_parse)
+                            # A model trained with a buggy template
+                            # (json.dumps(str(dict))) may output a
+                            # JSON-encoded Python repr like "{'k': 'v'}".
+                            # ast.literal_eval returns a str in that
+                            # case — try one more level.
+                            if isinstance(parsed_value, str):
+                                try:
+                                    parsed_value = ast.literal_eval(parsed_value)
+                                except (
+                                    ValueError,
+                                    SyntaxError,
+                                    TypeError,
+                                ):
+                                    with contextlib.suppress(
+                                        json.JSONDecodeError, ValueError
+                                    ):
+                                        parsed_value = json.loads(parsed_value)
+                            output_arguments = json.dumps(
+                                parsed_value, ensure_ascii=False
+                            )
+                        except (ValueError, SyntaxError, TypeError):
+                            # Fallback: output as string as-is
+                            output_arguments = json.dumps(raw_text, ensure_ascii=False)
+                            parsed_value = raw_text
 
                 delta = DeltaMessage(
                     tool_calls=[
                         DeltaToolCall(
                             index=self.tool_call_index - 1,
-                            id=self.current_call_id,
+                            id=self._get_call_id_for_delta(),
                             type="function",
                             function=DeltaFunctionCall(
                                 name=None, arguments=output_arguments
@@ -868,7 +1245,7 @@ def _end_element(self, name: str):
                         tool_calls=[
                             DeltaToolCall(
                                 index=self.tool_call_index - 1,
-                                id=self.current_call_id,
+                                id=self._get_call_id_for_delta(),
                                 type="function",
                                 function=DeltaFunctionCall(name=None, arguments='""'),
                             )
@@ -881,7 +1258,7 @@ def _end_element(self, name: str):
                         tool_calls=[
                             DeltaToolCall(
                                 index=self.tool_call_index - 1,
-                                id=self.current_call_id,
+                                id=self._get_call_id_for_delta(),
                                 type="function",
                                 function=DeltaFunctionCall(name=None, arguments='"'),
                             )
@@ -904,7 +1281,7 @@ def _end_element(self, name: str):
                     tool_calls=[
                         DeltaToolCall(
                             index=self.tool_call_index - 1,
-                            id=self.current_call_id,
+                            id=self._get_call_id_for_delta(),
                             type="function",
                             function=DeltaFunctionCall(name=None, arguments="}"),
                         )
@@ -917,7 +1294,7 @@ def _end_element(self, name: str):
                     tool_calls=[
                         DeltaToolCall(
                             index=self.tool_call_index - 1,
-                            id=self.current_call_id,
+                            id=self._get_call_id_for_delta(),
                             type="function",
                             function=DeltaFunctionCall(name=None, arguments="{}"),
                         )
@@ -940,7 +1317,7 @@ def _end_element(self, name: str):
                 tool_calls=[
                     DeltaToolCall(
                         index=self.tool_call_index - 1,
-                        id=self.current_call_id,
+                        id=self._get_call_id_for_delta(),
                         type="function",
                         function=DeltaFunctionCall(name=None, arguments=""),
                     )
@@ -1003,11 +1380,52 @@ def _get_param_type(self, param_name: str) -> str:
 
         properties = find_tool_properties(self.tools, self.current_function_name)
         if param_name in properties and isinstance(properties[param_name], dict):
-            return self.repair_param_type(
-                str(properties[param_name].get("type", "string"))
-            )
+            prop = properties[param_name]
+            param_type = prop.get("type")
+            if isinstance(param_type, list):
+                # JSON-Schema list-form type, e.g.
+                # {"type": ["integer", "null"]}. Pick the first non-null
+                # type, mirroring the anyOf handling below.
+                for option_type in param_type:
+                    if str(option_type).lower() != "null":
+                        return self.repair_param_type(str(option_type))
+                return "string"
+            if param_type is None and "anyOf" in prop:
+                # Handle anyOf schemas (e.g. nullable types like
+                # anyOf: [{type: "integer"}, {type: "null"}]).
+                # Pick the first non-null type; fall back to "string".
+                for option in prop["anyOf"]:
+                    if isinstance(option, dict) and "type" in option:
+                        opt_type = str(option["type"])
+                        if opt_type != "null":
+                            return self.repair_param_type(opt_type)
+                return "string"
+
+            return self.repair_param_type(str(param_type or "string"))
         return "string"
 
+    def _param_allows_null(self, param_name: str | None) -> bool:
+        """Return True when the schema for ``param_name`` admits a null
+        value — either via ``"type": "null"`` or as one alternative in
+        an ``anyOf`` union.  Used to recognise the literal ``"null"`` /
+        ``"None"`` as JSON null even when the primary type is string.
+        """
+        if not self.tools or not self.current_function_name or not param_name:
+            return False
+        properties = find_tool_properties(self.tools, self.current_function_name)
+        if param_name not in properties or not isinstance(properties[param_name], dict):
+            return False
+        prop = properties[param_name]
+        if str(prop.get("type", "")).lower() == "null":
+            return True
+        for option in prop.get("anyOf", []) or []:
+            if (
+                isinstance(option, dict)
+                and str(option.get("type", "")).lower() == "null"
+            ):
+                return True
+        return False
+
     def repair_param_type(self, param_type: str) -> str:
         """Repair unknown parameter types by treating them as string
         Args:
@@ -1045,13 +1463,29 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any:
         Returns:
             Converted value
         """
-        if param_value.lower() == "null":
-            return None
-
         param_type = param_type.strip().lower()
+        # Nullable schemas (``anyOf: [string, null]`` or similar): the
+        # primary type may be string but the literal ``"null"`` /
+        # ``"None"`` must still convert to JSON null.  Caller passes the
+        # current parameter name via the parser state so we can query
+        # the schema.
+        if self._param_allows_null(self.current_param_name) and param_value.lower() in (
+            "null",
+            "none",
+        ):
+            return None
+        # String type takes precedence: the literal value "null" must remain
+        # the string "null" instead of being converted to Python None.
         if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
             return param_value
-        elif (
+        # Non-string: "null" → Python None → JSON null.  Also accept the
+        # Python literal "None" so that Qwen3.5-trained models — whose
+        # chat template renders null args via ``| string`` (yielding the
+        # literal "None" in the prompt) — round-trip nullable values
+        # correctly.
+        if param_value.lower() in ("null", "none"):
+            return None
+        if (
             param_type.startswith("int")
             or param_type.startswith("uint")
             or param_type.startswith("long")
@@ -1062,11 +1496,10 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any:
                 return int(param_value)
             except (ValueError, TypeError):
                 logger.warning(
-                    "Parsed value '%s' of parameter '%s' is not an integer "
-                    "in tool '%s', degenerating to string.",
+                    "Parsed value '%s' is not an integer, degenerating to string.",
                     param_value,
                 )
-            return param_value
+                return param_value
         elif param_type.startswith("num") or param_type.startswith("float"):
             try:
                 float_param_value: float = float(param_value)
@@ -1077,14 +1510,12 @@ def _convert_param_value(self, param_value: str, param_type: str) -> Any:
                 )
             except (ValueError, TypeError):
                 logger.warning(
-                    "Parsed value '%s' of parameter '%s' is not a float "
-                    "in tool '%s', degenerating to string.",
+                    "Parsed value '%s' is not a float, degenerating to string.",
                     param_value,
                 )
-            return param_value
+                return param_value
         elif param_type in ["boolean", "bool", "binary"]:
-            param_value = param_value.lower()
-            return param_value == "true"
+            return param_value.lower() == "true"
         else:
             return param_value
 
@@ -1098,9 +1529,12 @@ def _convert_for_json_streaming(self, converted_value: Any, param_type: str) ->
         Returns:
             Converted string for streaming output
         """
-        # Check if value is empty, but exclude numeric 0
-        if converted_value is None or converted_value == "":
+        # Empty string: no output.
+        if converted_value == "":
             return ""
+        # None → JSON null literal (e.g. for nullable integer/object params).
+        if converted_value is None:
+            return "null"
 
         if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
             # String type, remove double quotes
@@ -1126,6 +1560,7 @@ def _reset_xml_parser_after_tool_call(self):
         if self.current_call_id:
             self.last_completed_call_id = self.current_call_id
         self.current_call_id = None
+        self.id_emitted = False
         self.current_function_name = None
         self.current_function_open = False
         self.parameters = {}
@@ -1179,6 +1614,13 @@ def extract_tool_calls(
             tool_calls = []
             for tool_call in result.tool_calls:
                 if tool_call.function and tool_call.function.name:
+                    # Reject phantom tool calls produced when the model
+                    # writes an unrendered Jinja template or pseudo-XML
+                    # in its response (e.g. ``<function={{ tc.name }}>``).
+                    # Surfacing such names as real tool calls causes
+                    # "tool not found" errors at the client.
+                    if not _is_valid_function_name(tool_call.function.name):
+                        continue
                     tool_calls.append(
                         ToolCall(
                             id=tool_call.id,
@@ -1235,6 +1677,7 @@ def extract_tool_calls_streaming(
     ) -> DeltaMessage | None:
         if not previous_text:
             self.parser.reset_streaming_state()
+            self.parser._streaming_mode = True
             # Reset tool call tracking arrays for new streaming session
             self.prev_tool_call_arr = []
             self.streamed_args_for_tool = []
@@ -1296,3 +1739,11 @@ def extract_tool_calls_streaming(
             # If no content and no tool calls, return None to indicate no update
             return None
         return delta
+
+    def get_structural_tag(self, request: ChatCompletionRequest):
+        return get_model_structural_tag(
+            model="qwen_3_5",
+            tools=request.tools,
+            tool_choice=request.tool_choice,
+            reasoning=get_enable_structured_outputs_in_reasoning(),
+        )