diff --git a/tests/test_responses_api.py b/tests/test_responses_api.py
index 769199b8..94c9224e 100644
--- a/tests/test_responses_api.py
+++ b/tests/test_responses_api.py
@@ -574,6 +574,68 @@ def test_streaming_response_sequence_metadata_is_monotonic(self, client):
         assert created_payload["response"]["id"] == completed_payload["response"]["id"]
         assert completed_payload["response"]["output_text"] == "Hello stream"
 
+    def test_streaming_response_bracket_tool_call_does_not_leak_text(
+        self, client, monkeypatch
+    ):
+        import vllm_mlx.server as srv
+
+        engine = _mock_engine(_output("unused"))
+        engine.chat = AsyncMock(
+            side_effect=AssertionError("stream path should not call chat")
+        )
+        engine._stream_outputs = [
+            _stream_output('[Calling tool: add({"a": 1, "b": 2})'),
+            _stream_output("]", completion_tokens=2, finish_reason="stop"),
+        ]
+        srv._engine = engine
+        monkeypatch.setattr(srv, "_enable_auto_tool_choice", True)
+        monkeypatch.setattr(srv, "_tool_call_parser", "qwen3")
+        monkeypatch.setattr(srv, "_tool_parser_instance", None)
+        monkeypatch.setattr(srv, "_reasoning_parser", None)
+
+        with client.stream(
+            "POST",
+            "/v1/responses",
+            json={
+                "model": "test-model",
+                "input": "Add two numbers",
+                "stream": True,
+                "tools": [
+                    {
+                        "type": "function",
+                        "name": "add",
+                        "description": "Add two numbers",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "a": {"type": "integer"},
+                                "b": {"type": "integer"},
+                            },
+                            "required": ["a", "b"],
+                        },
+                    }
+                ],
+            },
+        ) as resp:
+            body = "".join(resp.iter_text())
+
+        assert resp.status_code == 200
+        events = _parse_sse_events(body)
+        output_text_deltas = [
+            payload["delta"]
+            for event_type, payload in events
+            if event_type == "response.output_text.delta"
+        ]
+        function_call_deltas = [
+            payload
+            for event_type, payload in events
+            if event_type == "response.function_call_arguments.delta"
+        ]
+
+        assert not any("[Calling tool:" in delta for delta in output_text_deltas)
+        assert len(function_call_deltas) == 1
+        assert function_call_deltas[0]["delta"] == '{"a": 1, "b": 2}'
+
     def test_json_object_response_format_is_rejected(self, client):
         import vllm_mlx.server as srv
 
diff --git a/tests/test_tool_parsers.py b/tests/test_tool_parsers.py
index 1d37e948..6e0211a3 100644
--- a/tests/test_tool_parsers.py
+++ b/tests/test_tool_parsers.py
@@ -1286,6 +1286,29 @@ def test_streaming_function_format_complete(self, parser):
                 break
         assert tool_calls_found
 
+    def test_streaming_bracket_call_closing_marker_split(self, parser):
+        """Qwen bracket calls should complete when ')' and ']' split chunks."""
+        chunks = [
+            '[Calling tool: add({"a": 1, "b": 2})',
+            "]",
+        ]
+
+        accumulated = ""
+        emitted = None
+        for chunk in chunks:
+            previous = accumulated
+            accumulated += chunk
+            emitted = parser.extract_tool_calls_streaming(
+                previous_text=previous,
+                current_text=accumulated,
+                delta_text=chunk,
+            )
+
+        assert emitted is not None
+        assert "tool_calls" in emitted
+        assert emitted["tool_calls"][0]["function"]["name"] == "add"
+        assert emitted["tool_calls"][0]["function"]["arguments"] == ('{"a": 1, "b": 2}')
+
     def test_streaming_partial_marker_buffered(self, parser):
         """Test that partial '<function' is buffered (not leaked as content)."""
         r = parser.extract_tool_calls_streaming(
diff --git a/vllm_mlx/server.py b/vllm_mlx/server.py
index 539dad07..4a523896 100644
--- a/vllm_mlx/server.py
+++ b/vllm_mlx/server.py
@@ -1365,7 +1365,14 @@ def _start_reasoning_item() -> list[str]:
 
         content = SPECIAL_TOKENS_PATTERN.sub("", delta_text)
         if tool_parser and delta_text:
-            if not tool_markup_possible and "<" not in delta_text:
+            # Fast path: skip parsing until a tool-markup marker appears.
+            # Use _streaming_tool_markup_possible to catch all supported
+            # shapes (<tool_call>, <function=, [Calling tool:, [TOOL_CALLS],
+            # bare bracket [func({...})], etc.) — the old `"<" not in` check
+            # missed bracket formats and let Qwen3.6-style tool calls leak.
+            if not tool_markup_possible and not _streaming_tool_markup_possible(
+                tool_accumulated_text + delta_text
+            ):
                 tool_accumulated_text += delta_text
             else:
                 if not tool_markup_possible:
diff --git a/vllm_mlx/tool_parsers/qwen_tool_parser.py b/vllm_mlx/tool_parsers/qwen_tool_parser.py
index e235a3c7..aa911782 100644
--- a/vllm_mlx/tool_parsers/qwen_tool_parser.py
+++ b/vllm_mlx/tool_parsers/qwen_tool_parser.py
@@ -272,9 +272,11 @@ def extract_tool_calls_streaming(
 
             return None
 
-        # If we're in a tool call, accumulate and parse at the end
-        # For simplicity, return None during accumulation
-        if "</tool_call>" in delta_text or ")]" in delta_text:
+        # If we're in a tool call, accumulate and parse at the end.
+        # Check current_text (accumulated), not delta_text — closing markers
+        # like ")]" or "</tool_call>" often span token boundaries and may
+        # never appear within a single delta chunk.
+        if "</tool_call>" in current_text or ")]" in current_text:
             # Tool call complete, parse the whole thing
             result = self.extract_tool_calls(current_text)
             if result.tools_called: