diff --git a/tests/test_anthropic_stream_scrubber.py b/tests/test_anthropic_stream_scrubber.py
new file mode 100644
index 000000000..466673867
--- /dev/null
+++ b/tests/test_anthropic_stream_scrubber.py
@@ -0,0 +1,1504 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for _AnthropicStreamScrubber – stateful tag stripping for Anthropic streaming.
+
+Tests the scrubber introduced in commit 6805baf which strips <think>, <tool_call>,
+<function=...>, and <parameter=...> markup from streamed text deltas on the
+Anthropic /v1/messages endpoint.
+
+These are pure logic tests with no MLX dependency.
+"""
+
+import pytest
+
+from vllm_mlx.server import _AnthropicStreamScrubber
+
+# =============================================================================
+# Basic Construction / Initial State
+# =============================================================================
+
+
+class TestScrubberInitialState:
+    """Test scrubber creation and initial state."""
+
+    def test_initial_mode_is_text(self):
+        scrubber = _AnthropicStreamScrubber()
+        assert scrubber.mode == "TEXT"
+
+    def test_initial_carry_is_empty(self):
+        scrubber = _AnthropicStreamScrubber()
+        assert scrubber.carry == ""
+
+    def test_class_constants_exist(self):
+        """Verify key class-level constants are defined."""
+        assert _AnthropicStreamScrubber.THINK_OPEN == "<think>"
+        assert _AnthropicStreamScrubber.THINK_CLOSE == "</think>"
+        assert _AnthropicStreamScrubber.TOOL_OPEN == "<tool_call>"
+        assert _AnthropicStreamScrubber.TOOL_CLOSE == "</tool_call>"
+        assert _AnthropicStreamScrubber.FUNC_CLOSE == "</function>"
+        assert _AnthropicStreamScrubber.PARAM_CLOSE == "</parameter>"
+        assert _AnthropicStreamScrubber.FUNC_PREFIX == "<function="
+        assert _AnthropicStreamScrubber.PARAM_PREFIX == "<parameter="
+
+    def test_carry_n_large_enough(self):
+        """CARRY_N must be at least max(len(tag)) - 1."""
+        assert _AnthropicStreamScrubber.CARRY_N >= _AnthropicStreamScrubber.MAX_TAG - 1
+
+
+# =============================================================================
+# Plain Text (no tags) – passthrough
+# =============================================================================
+
+
+class TestScrubberPlainText:
+    """Scrubber should pass through normal text unchanged."""
+
+    def test_empty_string(self):
+        scrubber = _AnthropicStreamScrubber()
+        assert scrubber.feed("") == ""
+
+    def test_none_delta(self):
+        """feed(None) should not crash."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed(None)
+        assert result == ""
+
+    def test_short_text(self):
+        scrubber = _AnthropicStreamScrubber()
+        # Short text with no '<' should emit immediately (zero carry)
+        result = scrubber.feed("Hi")
+        assert result == "Hi"
+        assert scrubber.carry == ""
+
+    def test_long_plain_text(self):
+        """Text longer than CARRY_N should emit most of it immediately."""
+        scrubber = _AnthropicStreamScrubber()
+        text = (
+            "Hello, this is a long sentence with no markup at all, just ordinary text."
+        )
+        result = scrubber.feed(text)
+        flushed = scrubber.flush()
+        assert result + flushed == text
+
+    def test_multiple_plain_deltas(self):
+        """Multiple consecutive plain-text deltas should reconstruct fully."""
+        scrubber = _AnthropicStreamScrubber()
+        parts = ["Hello ", "world, ", "how ", "are ", "you?"]
+        collected = ""
+        for p in parts:
+            collected += scrubber.feed(p)
+        collected += scrubber.flush()
+        assert collected == "".join(parts)
+
+    def test_flush_in_text_mode_returns_carry(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("abc")
+        flushed = scrubber.flush()
+        # After flush, carry should be empty
+        assert scrubber.carry == ""
+
+
+# =============================================================================
+# <think>...</think> suppression
+# =============================================================================
+
+
+class TestScrubberThinkTags:
+    """Test suppression of <think>...</think> blocks."""
+
+    def test_think_block_in_single_delta(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("Hello <think>internal reasoning</think> world")
+        result += scrubber.flush()
+        assert "<think>" not in result
+        assert "internal reasoning" not in result
+        assert "</think>" not in result
+        assert "Hello " in result
+        assert " world" in result
+
+    def test_think_block_removes_content(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("<think>some reasoning</think>After thought")
+        result += scrubber.flush()
+        assert "some reasoning" not in result
+        assert "After thought" in result
+
+    def test_think_block_split_across_deltas(self):
+        """Tag split across multiple feed() calls."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("Before <thi")
+        collected += scrubber.feed("nk>secret reasoning here")
+        collected += scrubber.feed("</think> After")
+        collected += scrubber.flush()
+        assert "secret reasoning" not in collected
+        assert "<think>" not in collected
+        assert "</think>" not in collected
+        assert "Before" in collected
+        assert "After" in collected
+
+    def test_think_block_close_tag_split(self):
+        """Closing </think> tag split across deltas."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("<think>reasoning</th")
+        collected += scrubber.feed("ink>visible text")
+        collected += scrubber.flush()
+        assert "reasoning" not in collected
+        assert "visible text" in collected
+
+    def test_multiple_think_blocks(self):
+        scrubber = _AnthropicStreamScrubber()
+        text = "A<think>r1</think>B<think>r2</think>C"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert "r1" not in result
+        assert "r2" not in result
+        assert "A" in result
+        assert "B" in result
+        assert "C" in result
+
+    def test_think_with_newlines(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("Hi<think>\nStep 1\nStep 2\n</think> Done")
+        result += scrubber.flush()
+        assert "Step 1" not in result
+        assert "Step 2" not in result
+        assert "Hi" in result
+        assert "Done" in result
+
+    def test_think_at_end_of_stream_flushed_away(self):
+        """If stream ends inside a <think> block, flush discards carry."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("Hello <think>unclosed reasoning")
+        flushed = scrubber.flush()
+        # In suppression mode, flush returns ""
+        assert "unclosed reasoning" not in collected + flushed
+        assert "Hello" in collected + flushed
+
+
+# =============================================================================
+# <tool_call>...</tool_call> suppression
+# =============================================================================
+
+
+class TestScrubberToolCallTags:
+    """Test suppression of <tool_call>...</tool_call> blocks."""
+
+    def test_tool_call_in_single_delta(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed('Before <tool_call>{"name":"fn"}</tool_call> After')
+        result += scrubber.flush()
+        assert "<tool_call>" not in result
+        assert '{"name":"fn"}' not in result
+        assert "</tool_call>" not in result
+        assert "Before" in result
+        assert "After" in result
+
+    def test_tool_call_split_across_deltas(self):
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("Text <tool_")
+        collected += scrubber.feed('call>{"name":"search","args":{}}</tool_call> rest')
+        collected += scrubber.flush()
+        assert '{"name":"search"' not in collected
+        assert "<tool_call>" not in collected
+        assert "Text" in collected
+        assert "rest" in collected
+
+    def test_tool_call_close_split(self):
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("<tool_call>data</tool_")
+        collected += scrubber.feed("call>visible")
+        collected += scrubber.flush()
+        assert "data" not in collected
+        assert "visible" in collected
+
+    def test_multiple_tool_calls(self):
+        scrubber = _AnthropicStreamScrubber()
+        text = "A<tool_call>call1</tool_call>B<tool_call>call2</tool_call>C"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert "call1" not in result
+        assert "call2" not in result
+        assert "A" in result
+        assert "B" in result
+        assert "C" in result
+
+    def test_tool_call_at_end_of_stream(self):
+        """Unclosed tool_call at end of stream – suppressed by flush."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("Prefix <tool_call>unclosed")
+        flushed = scrubber.flush()
+        assert "unclosed" not in collected + flushed
+        assert "Prefix" in collected + flushed
+
+
+# =============================================================================
+# <function=NAME>...</function> suppression
+# =============================================================================
+
+
+class TestScrubberFunctionTags:
+    """Test suppression of <function=name>...</function> (Llama-style)."""
+
+    def test_function_tag_single_delta(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed('Hello <function=search>{"q":"test"}</function> world')
+        result += scrubber.flush()
+        assert "<function=" not in result
+        assert '{"q":"test"}' not in result
+        assert "</function>" not in result
+        assert "Hello" in result
+        assert "world" in result
+
+    def test_function_tag_split_across_deltas(self):
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("Text <func")
+        collected += scrubber.feed("tion=get_weather")
+        collected += scrubber.feed('>{"city":"NYC"}</function> done')
+        collected += scrubber.flush()
+        assert "get_weather" not in collected
+        assert '{"city":"NYC"}' not in collected
+        assert "Text" in collected
+        assert "done" in collected
+
+    def test_function_tag_with_complex_name(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("<function=my_long_function_name>body</function>after")
+        result += scrubber.flush()
+        assert "my_long_function_name" not in result
+        assert "body" not in result
+        assert "after" in result
+
+    def test_stray_function_close_suppressed(self):
+        """A stray </function> outside a function block should be suppressed."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("text</function>more")
+        result += scrubber.flush()
+        assert "</function>" not in result
+        assert "text" in result
+        assert "more" in result
+
+
+# =============================================================================
+# <parameter=NAME>...</parameter> suppression
+# =============================================================================
+
+
+class TestScrubberParameterTags:
+    """Test suppression of <parameter=name>...</parameter> (Llama-style)."""
+
+    def test_parameter_tag_single_delta(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("Before <parameter=city>NYC</parameter> After")
+        # <parameter= opens IN_PARAMETER mode, closes on </parameter>
+        result += scrubber.flush()
+        assert "<parameter=" not in result
+        assert "NYC" not in result
+        assert "Before" in result
+        assert "After" in result
+
+    def test_stray_parameter_close_suppressed(self):
+        """A stray </parameter> tag should be stripped."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("text</parameter>more")
+        result += scrubber.flush()
+        assert "</parameter>" not in result
+        assert "text" in result
+        assert "more" in result
+
+    def test_parameter_inside_function_block(self):
+        """Parameter tags typically appear inside function blocks."""
+        scrubber = _AnthropicStreamScrubber()
+        text = "<function=search><parameter=query>test</parameter></function>done"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        # Everything inside <function=...>...</function> should be suppressed
+        assert "query" not in result
+        assert "test" not in result
+        assert "done" in result
+
+
+# =============================================================================
+# Stray Closing Tags
+# =============================================================================
+
+
+class TestScrubberStrayClosingTags:
+    """Test that stray closing tags outside their context are consumed."""
+
+    def test_stray_think_close(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("hello</think>world")
+        result += scrubber.flush()
+        assert "</think>" not in result
+        assert "hello" in result
+        assert "world" in result
+
+    def test_stray_tool_call_close(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("hello</tool_call>world")
+        result += scrubber.flush()
+        assert "</tool_call>" not in result
+        assert "hello" in result
+        assert "world" in result
+
+    def test_stray_function_close(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("hello</function>world")
+        result += scrubber.flush()
+        assert "</function>" not in result
+        assert "hello" in result
+        assert "world" in result
+
+    def test_stray_parameter_close(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("hello</parameter>world")
+        result += scrubber.flush()
+        assert "</parameter>" not in result
+        assert "hello" in result
+        assert "world" in result
+
+    def test_multiple_stray_closing_tags(self):
+        scrubber = _AnthropicStreamScrubber()
+        text = "a</think>b</tool_call>c</function>d</parameter>e"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert "</think>" not in result
+        assert "</tool_call>" not in result
+        assert "</function>" not in result
+        assert "</parameter>" not in result
+        assert "a" in result
+        assert "b" in result
+        assert "c" in result
+        assert "d" in result
+        assert "e" in result
+
+
+# =============================================================================
+# Mixed Scenarios
+# =============================================================================
+
+
+class TestScrubberMixedContent:
+    """Test combinations of tags and text."""
+
+    def test_think_then_tool_call(self):
+        scrubber = _AnthropicStreamScrubber()
+        text = (
+            'Before<think>reasoning</think>Middle<tool_call>{"fn":"x"}</tool_call>After'
+        )
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert "reasoning" not in result
+        assert '{"fn":"x"}' not in result
+        assert "Before" in result
+        assert "Middle" in result
+        assert "After" in result
+
+    def test_tool_call_then_think(self):
+        scrubber = _AnthropicStreamScrubber()
+        text = "<tool_call>data</tool_call>text<think>thought</think>end"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert "data" not in result
+        assert "thought" not in result
+        assert "text" in result
+        assert "end" in result
+
+    def test_think_with_function_inside_tool_call(self):
+        """Nested-looking tags – only outer suppression matters."""
+        scrubber = _AnthropicStreamScrubber()
+        text = "<tool_call>outer<function=inner>nested</function></tool_call>visible"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert "outer" not in result
+        assert "nested" not in result
+        assert "visible" in result
+
+    def test_interleaved_text_and_tags(self):
+        scrubber = _AnthropicStreamScrubber()
+        parts = [
+            "Hello ",
+            "<think>",
+            "Let me think about this...",
+            "</think>",
+            " Here's my answer.",
+        ]
+        collected = ""
+        for p in parts:
+            collected += scrubber.feed(p)
+        collected += scrubber.flush()
+        assert "Let me think about this" not in collected
+        assert "Hello" in collected
+        assert "Here's my answer." in collected
+
+    def test_realistic_streaming_scenario(self):
+        """Simulate a realistic token-by-token streaming scenario."""
+        scrubber = _AnthropicStreamScrubber()
+        # Model outputs: "<think>Let me check</think>The weather is sunny."
+        # Split into small token-like deltas
+        tokens = [
+            "<",
+            "think",
+            ">",
+            "Let",
+            " me",
+            " check",
+            "</",
+            "think",
+            ">",
+            "The",
+            " weather",
+            " is",
+            " sunny",
+            ".",
+        ]
+        collected = ""
+        for tok in tokens:
+            collected += scrubber.feed(tok)
+        collected += scrubber.flush()
+        assert "Let me check" not in collected
+        assert "<think>" not in collected
+        assert "</think>" not in collected
+        assert "The weather is sunny." in collected
+
+    def test_realistic_tool_call_streaming(self):
+        """Simulate tool-call markup arriving token by token."""
+        scrubber = _AnthropicStreamScrubber()
+        tokens = [
+            "I'll ",
+            "search",
+            " for ",
+            "that.",
+            "<tool",
+            "_call",
+            ">",
+            '{"name',
+            '":"',
+            "search",
+            '","',
+            "arguments",
+            '":{"',
+            "q",
+            '":"',
+            "weather",
+            '"}}',
+            "</tool",
+            "_call",
+            ">",
+        ]
+        collected = ""
+        for tok in tokens:
+            collected += scrubber.feed(tok)
+        collected += scrubber.flush()
+        assert "<tool_call>" not in collected
+        assert "</tool_call>" not in collected
+        assert '"name"' not in collected
+        assert "I'll search for that." in collected
+
+
+# =============================================================================
+# Tag Split Across Boundaries (carry buffer tests)
+# =============================================================================
+
+
+class TestScrubberCarryBuffer:
+    """Test the carry buffer behavior for tags split across deltas."""
+
+    def test_tag_split_at_every_character(self):
+        """Split <think> one char at a time."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        for ch in "before<think>hidden</think>after":
+            collected += scrubber.feed(ch)
+        collected += scrubber.flush()
+        assert "hidden" not in collected
+        assert "before" in collected
+        assert "after" in collected
+
+    def test_close_tag_split_at_every_character(self):
+        """Split </think> one char at a time."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        for ch in "<think>suppressed</think>visible":
+            collected += scrubber.feed(ch)
+        collected += scrubber.flush()
+        assert "suppressed" not in collected
+        assert "visible" in collected
+
+    def test_tool_call_tag_split_at_every_character(self):
+        """Split <tool_call> one char at a time."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        for ch in "pre<tool_call>body</tool_call>post":
+            collected += scrubber.feed(ch)
+        collected += scrubber.flush()
+        assert "body" not in collected
+        assert "pre" in collected
+        assert "post" in collected
+
+    def test_carry_cleared_after_flush(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("some text")
+        scrubber.flush()
+        assert scrubber.carry == ""
+
+    def test_carry_cleared_after_full_consumption(self):
+        """When all content is consumed, carry should be empty."""
+        scrubber = _AnthropicStreamScrubber()
+        # Feed a complete tag that consumes everything
+        scrubber.feed("<think>x</think>")
+        result = scrubber.flush()
+        assert scrubber.carry == ""
+
+
+# =============================================================================
+# flush() Behavior
+# =============================================================================
+
+
+class TestScrubberFlush:
+    """Test flush() method at end of stream."""
+
+    def test_flush_emits_remaining_text(self):
+        scrubber = _AnthropicStreamScrubber()
+        # "hi" has no '<' so is emitted immediately by feed().
+        result = scrubber.feed("hi")
+        assert result == "hi"
+        # flush() should return empty since carry is empty.
+        flushed = scrubber.flush()
+        assert flushed == ""
+
+    def test_flush_emits_carry_with_angle_bracket(self):
+        """Text ending with '<' is held in carry; flush emits it."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("text<")
+        assert scrubber.carry == "<"
+        flushed = scrubber.flush()
+        # '<' alone is not a valid tag, flush strips nothing extra
+        assert flushed == "<"
+
+    def test_flush_in_think_mode_discards(self):
+        """If stream ends while inside <think>, flush returns empty."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<think>unfinished")
+        assert scrubber.mode == "IN_THINK"
+        flushed = scrubber.flush()
+        assert flushed == ""
+        assert scrubber.carry == ""
+
+    def test_flush_in_toolcall_mode_discards(self):
+        """If stream ends while inside <tool_call>, flush returns empty."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<tool_call>unfinished")
+        assert scrubber.mode == "IN_TOOLCALL"
+        flushed = scrubber.flush()
+        assert flushed == ""
+
+    def test_flush_in_function_mode_discards(self):
+        """If stream ends while inside <function=...>, flush returns empty."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<function=test>unfinished")
+        assert scrubber.mode == "IN_FUNCTION"
+        flushed = scrubber.flush()
+        assert flushed == ""
+
+    def test_flush_strips_residual_exact_tags(self):
+        """flush() in TEXT mode strips any leftover exact tags from carry."""
+        scrubber = _AnthropicStreamScrubber()
+        # Manually set carry to simulate leftover tag fragment
+        scrubber.carry = "text<think>leftover"
+        scrubber.mode = "TEXT"
+        flushed = scrubber.flush()
+        assert "<think>" not in flushed
+        assert "text" in flushed
+
+    def test_flush_strips_residual_function_tags(self):
+        """flush() strips residual <function=name> from carry."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.carry = "before<function=test>after"
+        scrubber.mode = "TEXT"
+        flushed = scrubber.flush()
+        assert "<function=" not in flushed
+        assert "before" in flushed
+
+    def test_flush_strips_residual_parameter_tags(self):
+        """flush() strips residual <parameter=name> from carry."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.carry = "before<parameter=x>after"
+        scrubber.mode = "TEXT"
+        flushed = scrubber.flush()
+        assert "<parameter=" not in flushed
+        assert "before" in flushed
+
+    def test_double_flush_returns_empty(self):
+        """Calling flush() twice should return empty the second time."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("hello")
+        first = scrubber.flush()
+        second = scrubber.flush()
+        assert second == ""
+
+
+# =============================================================================
+# State Machine Transitions
+# =============================================================================
+
+
+class TestScrubberStateMachine:
+    """Test state transitions of the scrubber."""
+
+    def test_text_to_think(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("text<think>")
+        assert scrubber.mode == "IN_THINK"
+
+    def test_think_back_to_text(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<think>content</think>")
+        # After consuming </think>, should be back in TEXT
+        assert scrubber.mode == "TEXT"
+
+    def test_text_to_toolcall(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("text<tool_call>")
+        assert scrubber.mode == "IN_TOOLCALL"
+
+    def test_toolcall_back_to_text(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<tool_call>data</tool_call>")
+        assert scrubber.mode == "TEXT"
+
+    def test_text_to_function(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("text<function=test>")
+        assert scrubber.mode == "IN_FUNCTION"
+
+    def test_function_back_to_text(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<function=test>body</function>")
+        assert scrubber.mode == "TEXT"
+
+    def test_text_to_parameter_enters_parameter_mode(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("text<parameter=x>")
+        assert scrubber.mode == "IN_PARAMETER"
+
+    def test_parameter_back_to_text(self):
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<parameter=x>body</parameter>")
+        assert scrubber.mode == "TEXT"
+
+    def test_stray_close_stays_in_text(self):
+        """Stray closing tags should not change mode."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("text</think>more")
+        assert scrubber.mode == "TEXT"
+
+        scrubber2 = _AnthropicStreamScrubber()
+        scrubber2.feed("text</tool_call>more")
+        assert scrubber2.mode == "TEXT"
+
+
+# =============================================================================
+# Edge Cases
+# =============================================================================
+
+
+class TestScrubberEdgeCases:
+    """Edge cases and boundary conditions."""
+
+    def test_empty_think_block(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("before<think></think>after")
+        result += scrubber.flush()
+        assert "before" in result
+        assert "after" in result
+        assert "<think>" not in result
+
+    def test_empty_tool_call_block(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("before<tool_call></tool_call>after")
+        result += scrubber.flush()
+        assert "before" in result
+        assert "after" in result
+
+    def test_angle_brackets_in_plain_text(self):
+        """Plain < and > that aren't tags should eventually be emitted."""
+        scrubber = _AnthropicStreamScrubber()
+        # These don't form valid tags so should pass through
+        result = scrubber.feed("x < y and a > b are normal math expressions here")
+        result += scrubber.flush()
+        assert "x < y" in result or ("x" in result and "< y" in result)
+        assert "a > b" in result or ("a" in result and "> b" in result)
+
+    def test_partial_tag_that_is_not_a_tag(self):
+        """Something like '<thinkable>' shouldn't be treated as <think>."""
+        scrubber = _AnthropicStreamScrubber()
+        # "<think" prefix matches but "able>" is not the same as ">"
+        # Actually <think> is an exact match so <thinkable> has <think> inside it
+        # The scrubber will find <think> at the start... let's see
+        # Actually "<thinkable>" contains "<think" but not "<think>" exactly
+        # Let me re-check: "<thinkable>" – scanning for "<think>" won't match
+        # because the 7th char 'a' != '>'
+        result = scrubber.feed("a<thinkable>test")
+        result += scrubber.flush()
+        # <thinkable> is not a recognized tag, should pass through
+        assert "test" in result
+
+    def test_only_tags_no_text(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("<think>hidden</think>")
+        result += scrubber.flush()
+        assert result == ""
+
+    def test_consecutive_think_blocks_no_gap(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("<think>aaa</think><think>bbb</think>visible")
+        result += scrubber.flush()
+        assert "aaa" not in result  # suppressed
+        assert "bbb" not in result  # suppressed
+        assert "visible" in result
+
+    def test_very_long_suppressed_content(self):
+        """Test with large content inside tags."""
+        scrubber = _AnthropicStreamScrubber()
+        long_content = "x" * 10000
+        text = f"before<think>{long_content}</think>after"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert long_content not in result
+        assert "before" in result
+        assert "after" in result
+
+    def test_unicode_text_preserved(self):
+        """Unicode text outside tags should be preserved."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("Héllo wörld 你好 🌍<think>secret</think> done")
+        result += scrubber.flush()
+        assert "secret" not in result
+        assert "Héllo" in result
+        assert "done" in result
+
+    def test_newlines_between_tags(self):
+        scrubber = _AnthropicStreamScrubber()
+        text = "line1\n<think>hidden\n</think>\nline2"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert "hidden" not in result
+        assert "line1" in result
+        assert "line2" in result
+
+    def test_back_to_back_different_tags(self):
+        scrubber = _AnthropicStreamScrubber()
+        text = "<think>t</think><tool_call>tc</tool_call><function=f>fc</function>end"
+        result = scrubber.feed(text)
+        result += scrubber.flush()
+        assert "end" in result
+        # All tagged content suppressed
+        for s in [
+            "<think>",
+            "</think>",
+            "<tool_call>",
+            "</tool_call>",
+            "<function=",
+            "</function>",
+        ]:
+            assert s not in result
+
+
+# =============================================================================
+# _find_earliest_marker Internal Method
+# =============================================================================
+
+
+class TestFindEarliestMarker:
+    """Test the _find_earliest_marker helper directly."""
+
+    def test_no_markers(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber._find_earliest_marker("hello world", 0)
+        assert result is None
+
+    def test_finds_think_open(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber._find_earliest_marker("text<think>more", 0)
+        assert result is not None
+        pos, marker, consume = result
+        assert pos == 4
+        assert marker == "<think>"
+        assert consume == len("<think>")
+
+    def test_finds_earliest_of_multiple(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber._find_earliest_marker("<think>text<tool_call>", 0)
+        assert result is not None
+        pos, marker, _ = result
+        assert pos == 0
+        assert marker == "<think>"
+
+    def test_finds_function_prefix(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber._find_earliest_marker("text<function=test>more", 0)
+        assert result is not None
+        pos, marker, consume = result
+        assert pos == 4
+        assert marker == "<function="
+        assert consume == len("<function=test>")
+
+    def test_function_prefix_missing_close_angle(self):
+        """If '>' is missing for a prefix tag, consume should be -1."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber._find_earliest_marker("text<function=test", 0)
+        assert result is not None
+        pos, marker, consume = result
+        assert pos == 4
+        assert marker == "<function="
+        assert consume == -1  # truncated
+
+    def test_start_offset(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber._find_earliest_marker("<think>more<think>", 7)
+        assert result is not None
+        pos, _, _ = result
+        assert pos == 11  # second <think>
+
+    def test_parameter_prefix(self):
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber._find_earliest_marker("<parameter=name>val", 0)
+        assert result is not None
+        pos, marker, consume = result
+        assert pos == 0
+        assert marker == "<parameter="
+        assert consume == len("<parameter=name>")
+
+
+# =============================================================================
+# Integration with Scrubber in Streaming Context
+# =============================================================================
+
+
+class TestScrubberStreamingIntegration:
+    """Simulate real streaming patterns to ensure correctness end-to-end."""
+
+    def _stream_through(self, scrubber, deltas):
+        """Feed a list of deltas through the scrubber, return collected output."""
+        collected = ""
+        for d in deltas:
+            collected += scrubber.feed(d)
+        collected += scrubber.flush()
+        return collected
+
+    def test_clean_text_passthrough(self):
+        """Normal text with no tags should come through unchanged."""
+        scrubber = _AnthropicStreamScrubber()
+        text = "The weather today is sunny and warm."
+        words = text.split(" ")
+        deltas = [w + " " for w in words[:-1]] + [words[-1]]
+        result = self._stream_through(scrubber, deltas)
+        assert result == text
+
+    def test_think_then_answer_streaming(self):
+        """Model thinks, then answers."""
+        scrubber = _AnthropicStreamScrubber()
+        deltas = [
+            "<think>",
+            "Let me reason about this...\n",
+            "The user wants weather info.\n",
+            "</think>",
+            "The weather ",
+            "is sunny ",
+            "today.",
+        ]
+        result = self._stream_through(scrubber, deltas)
+        assert "reason" not in result
+        assert "The weather is sunny today." in result
+
+    def test_tool_call_json_streaming(self):
+        """Model emits tool call JSON in small chunks."""
+        scrubber = _AnthropicStreamScrubber()
+        deltas = [
+            "Let me look that up.",
+            "<tool_call>",
+            '{"',
+            'name": "',
+            "get_weather",
+            '", "arguments',
+            '": {"city": "',
+            "San Francisco",
+            '"}}',
+            "</tool_call>",
+        ]
+        result = self._stream_through(scrubber, deltas)
+        assert "Let me look that up." in result
+        assert "get_weather" not in result
+        assert "San Francisco" not in result
+
+    def test_think_then_tool_call_streaming(self):
+        """Model reasons then makes a tool call."""
+        scrubber = _AnthropicStreamScrubber()
+        deltas = [
+            "<think>",
+            "I need to ",
+            "search for this.",
+            "</think>",
+            "I'll help with that.",
+            "<tool_call>",
+            '{"name":"search"}',
+            "</tool_call>",
+        ]
+        result = self._stream_through(scrubber, deltas)
+        assert "I need to" not in result
+        assert "I'll help with that." in result
+        assert "search" not in result or result == "I'll help with that."
+
+    def test_scrubber_reuse_not_recommended(self):
+        """After flush, feeding more data should still work (even if atypical)."""
+        scrubber = _AnthropicStreamScrubber()
+        r1 = scrubber.feed("first")
+        r1 += scrubber.flush()
+        # Reuse
+        r2 = scrubber.feed("second")
+        r2 += scrubber.flush()
+        assert "first" in r1
+        assert "second" in r2
+
+
+# =============================================================================
+# Zero-Latency Carry: plain text should not be held back
+# =============================================================================
+
+
+class TestScrubberZeroLatencyCarry:
+    """Verify that the conditional carry buffer doesn't stall plain text."""
+
+    def test_plain_text_emits_immediately(self):
+        """No '<' in text → carry should be empty, all text emitted."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("Hello world")
+        assert result == "Hello world"
+        assert scrubber.carry == ""
+
+    def test_plain_deltas_no_carry(self):
+        """Multiple plain deltas should each emit fully."""
+        scrubber = _AnthropicStreamScrubber()
+        for word in ["The ", "quick ", "brown ", "fox."]:
+            result = scrubber.feed(word)
+            assert result == word
+            assert scrubber.carry == ""
+
+    def test_angle_bracket_at_end_triggers_carry(self):
+        """A '<' near the end should be held in carry (could be tag start)."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("text<")
+        assert "<" not in result
+        assert scrubber.carry == "<"
+
+    def test_angle_bracket_resolved_next_delta(self):
+        """Carry '<' is resolved when next delta shows it's not a tag."""
+        scrubber = _AnthropicStreamScrubber()
+        r1 = scrubber.feed("value < ")
+        r2 = scrubber.feed("other")
+        r2 += scrubber.flush()
+        full = r1 + r2
+        assert "value < other" in full
+
+    def test_angle_bracket_resolved_as_tag(self):
+        """Carry '<' is resolved when next delta completes a tag."""
+        scrubber = _AnthropicStreamScrubber()
+        r1 = scrubber.feed("before<")
+        r2 = scrubber.feed("think>hidden</think>after")
+        r2 += scrubber.flush()
+        full = r1 + r2
+        assert "before" in full
+        assert "after" in full
+        assert "hidden" not in full
+
+    def test_first_token_emits_immediately(self):
+        """The very first token should not be stalled by carry buffer."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("Hi")
+        assert result == "Hi"
+
+    def test_long_plain_text_no_carry(self):
+        """Long text with no '<' should all be emitted, carry empty."""
+        scrubber = _AnthropicStreamScrubber()
+        text = "A" * 500
+        result = scrubber.feed(text)
+        assert result == text
+        assert scrubber.carry == ""
+
+
+# =============================================================================
+# Carry Buffer Cap (unbounded growth prevention)
+# =============================================================================
+
+
+class TestScrubberCarryCap:
+    """Test that the carry buffer is capped to prevent unbounded growth."""
+
+    def test_max_carry_constant_exists(self):
+        """MAX_CARRY constant should be defined and reasonable."""
+        assert hasattr(_AnthropicStreamScrubber, "MAX_CARRY")
+        assert _AnthropicStreamScrubber.MAX_CARRY > _AnthropicStreamScrubber.CARRY_N
+
+    def test_carry_cap_emits_as_literal(self):
+        """If a prefix tag never closes (>), carry cap emits content as literal text."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("text <function=name_that_never_closes")
+        # Feed enough characters to exceed MAX_CARRY
+        collected += scrubber.feed("x" * 300)
+        collected += scrubber.flush()
+        # The content should have been emitted as literal text
+        assert "<function=" in collected
+        assert "text" in collected
+
+    def test_carry_does_not_grow_unbounded(self):
+        """Carry buffer should not grow without limit."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<function=start")
+        for _ in range(100):
+            scrubber.feed("more_content_" * 5)
+        # After many feeds, carry should be bounded (cap kicked in)
+        assert len(scrubber.carry) <= scrubber.MAX_CARRY + 100
+
+    def test_carry_cap_resets_cleanly(self):
+        """After cap kicks in, scrubber should resume normal processing."""
+        scrubber = _AnthropicStreamScrubber()
+        collected = ""
+        collected += scrubber.feed("<function=never_closes" + "x" * 300)
+        # Carry cap should have emitted everything as literal text
+        collected += scrubber.feed("normal text after cap")
+        collected += scrubber.flush()
+        assert "normal text after cap" in collected
+
+
+# =============================================================================
+# flush() with Incomplete Prefix Tags
+# =============================================================================
+
+
+class TestScrubberFlushIncompletePrefixTags:
+    """Test that flush() strips incomplete prefix tags without closing '>'."""
+
+    def test_flush_strips_incomplete_function_prefix(self):
+        """flush() strips incomplete <function= prefix without closing >."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.carry = "text<function=myFunc"
+        scrubber.mode = "TEXT"
+        flushed = scrubber.flush()
+        assert "<function=" not in flushed
+        assert "text" in flushed
+
+    def test_flush_strips_incomplete_parameter_prefix(self):
+        """flush() strips incomplete <parameter= prefix without closing >."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.carry = "text<parameter=arg"
+        scrubber.mode = "TEXT"
+        flushed = scrubber.flush()
+        assert "<parameter=" not in flushed
+        assert "text" in flushed
+
+    def test_flush_strips_both_complete_and_incomplete(self):
+        """flush() handles a mix of complete and incomplete prefix tags."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.carry = "a<function=foo>b<parameter=bar"
+        scrubber.mode = "TEXT"
+        flushed = scrubber.flush()
+        assert "<function=" not in flushed
+        assert "<parameter=" not in flushed
+        assert "a" in flushed
+
+    def test_flush_in_parameter_mode_discards(self):
+        """If stream ends inside <parameter=...>, flush returns empty."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<parameter=test>unfinished")
+        assert scrubber.mode == "IN_PARAMETER"
+        flushed = scrubber.flush()
+        assert flushed == ""
+
+
+# =============================================================================
+# IN_PARAMETER State (separate from IN_FUNCTION)
+# =============================================================================
+
+
+class TestScrubberParameterState:
+    """Test the IN_PARAMETER state closes on </parameter> (not </function>)."""
+
+    def test_parameter_closes_on_parameter_tag(self):
+        """<parameter=x>...</parameter> correctly suppresses and closes."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed("before<parameter=city>NYC</parameter>after")
+        result += scrubber.flush()
+        assert "NYC" not in result
+        assert "before" in result
+        assert "after" in result
+        assert scrubber.mode == "TEXT"
+
+    def test_parameter_does_not_close_on_function_tag(self):
+        """IN_PARAMETER mode should NOT close on </function>."""
+        scrubber = _AnthropicStreamScrubber()
+        scrubber.feed("<parameter=x>content</function>still_suppressed")
+        # Should still be in IN_PARAMETER since </function> doesn't close it
+        assert scrubber.mode == "IN_PARAMETER"
+
+    def test_standalone_parameter_with_text_after(self):
+        """Standalone <parameter=name>value</parameter> followed by text."""
+        scrubber = _AnthropicStreamScrubber()
+        result = scrubber.feed(
+            "<parameter=query>search term</parameter> Here is my response."
+        )
+        result += scrubber.flush()
+        assert "search term" not in result
+        assert "<parameter=" not in result
+        assert "</parameter>" not in result
+        assert "Here is my response." in result
+
+
+# =============================================================================
+# Router Inherits Scrubber Behavior
+# =============================================================================
+
+
+class TestRouterInheritsScrubber:
+    """Test that the Router (subclass) inherits all Scrubber capabilities."""
+
+    def test_router_is_subclass(self):
+        from vllm_mlx.server import _AnthropicStreamRouter
+
+        assert issubclass(_AnthropicStreamRouter, _AnthropicStreamScrubber)
+
+    def test_router_has_carry_cap(self):
+        from vllm_mlx.server import _AnthropicStreamRouter
+
+        router = _AnthropicStreamRouter()
+        assert hasattr(router, "MAX_CARRY")
+        assert router.MAX_CARRY == _AnthropicStreamScrubber.MAX_CARRY
+
+    def test_router_parameter_mode(self):
+        """Router should handle IN_PARAMETER the same as scrubber."""
+        from vllm_mlx.server import _AnthropicStreamRouter
+
+        router = _AnthropicStreamRouter()
+        pieces = router.feed("text<parameter=x>suppressed</parameter>after")
+        pieces += router.flush()
+        text = "".join(t for k, t in pieces if k == "text")
+        assert "suppressed" not in text
+        assert "text" in text
+        assert "after" in text
+
+
+# =============================================================================
+# _AnthropicStreamRouter Tests
+# =============================================================================
+
+from vllm_mlx.server import _AnthropicStreamRouter, _is_thinking_enabled
+
+
+class TestRouterInitialState:
+    """Test router creation and initial state."""
+
+    def test_initial_mode_is_text(self):
+        router = _AnthropicStreamRouter()
+        assert router.mode == "TEXT"
+
+    def test_initial_carry_is_empty(self):
+        router = _AnthropicStreamRouter()
+        assert router.carry == ""
+
+
+class TestRouterPlainText:
+    """Router should pass through plain text as ('text', ...) pieces."""
+
+    def test_plain_text_emits_text_piece(self):
+        router = _AnthropicStreamRouter()
+        pieces = router.feed("Hello world")
+        assert len(pieces) == 1
+        assert pieces[0] == ("text", "Hello world")
+
+    def test_empty_string(self):
+        router = _AnthropicStreamRouter()
+        pieces = router.feed("")
+        assert pieces == []
+
+    def test_multiple_plain_deltas(self):
+        router = _AnthropicStreamRouter()
+        all_text = ""
+        for word in ["The ", "quick ", "brown ", "fox."]:
+            for kind, text in router.feed(word):
+                assert kind == "text"
+                all_text += text
+        for kind, text in router.flush():
+            if kind == "text":
+                all_text += text
+        assert all_text == "The quick brown fox."
+
+
+class TestRouterThinkingBlocks:
+    """Router should emit thinking_start/thinking/thinking_stop for <think> blocks."""
+
+    def test_think_block_single_delta(self):
+        router = _AnthropicStreamRouter()
+        pieces = router.feed("Hello <think>reasoning</think> world")
+        pieces += router.flush()
+
+        kinds = [k for k, _ in pieces]
+        assert "thinking_start" in kinds
+        assert "thinking" in kinds
+        assert "thinking_stop" in kinds
+
+        # Collect text and thinking separately
+        text = "".join(t for k, t in pieces if k == "text")
+        thinking = "".join(t for k, t in pieces if k == "thinking")
+        assert "Hello" in text
+        assert "world" in text
+        assert "reasoning" in thinking
+
+    def test_think_block_split_across_deltas(self):
+        router = _AnthropicStreamRouter()
+        all_pieces = []
+        for delta in ["Before <thi", "nk>secret</think> After"]:
+            all_pieces.extend(router.feed(delta))
+        all_pieces.extend(router.flush())
+
+        text = "".join(t for k, t in all_pieces if k == "text")
+        thinking = "".join(t for k, t in all_pieces if k == "thinking")
+        assert "Before" in text
+        assert "After" in text
+        assert "secret" in thinking
+
+    def test_think_then_text_streaming(self):
+        """Simulate realistic think-then-answer streaming."""
+        router = _AnthropicStreamRouter()
+        all_pieces = []
+        for delta in ["<think>", "Let me reason.", "</think>", "The answer."]:
+            all_pieces.extend(router.feed(delta))
+        all_pieces.extend(router.flush())
+
+        kinds = [k for k, _ in all_pieces]
+        text = "".join(t for k, t in all_pieces if k == "text")
+        thinking = "".join(t for k, t in all_pieces if k == "thinking")
+
+        assert "thinking_start" in kinds
+        assert "thinking_stop" in kinds
+        assert "Let me reason." in thinking
+        assert "The answer." in text
+
+    def test_multiple_think_blocks(self):
+        router = _AnthropicStreamRouter()
+        pieces = router.feed("A<think>r1</think>B<think>r2</think>C")
+        pieces += router.flush()
+
+        text = "".join(t for k, t in pieces if k == "text")
+        thinking = "".join(t for k, t in pieces if k == "thinking")
+        starts = sum(1 for k, _ in pieces if k == "thinking_start")
+        stops = sum(1 for k, _ in pieces if k == "thinking_stop")
+
+        assert "A" in text
+        assert "B" in text
+        assert "C" in text
+        assert "r1" in thinking
+        assert "r2" in thinking
+        assert starts == 2
+        assert stops == 2
+
+    def test_unclosed_think_at_end(self):
+        """Unclosed <think> at end should flush remaining as thinking."""
+        router = _AnthropicStreamRouter()
+        pieces = router.feed("<think>unfinished")
+        pieces += router.flush()
+
+        kinds = [k for k, _ in pieces]
+        thinking = "".join(t for k, t in pieces if k == "thinking")
+        assert "thinking_start" in kinds
+        assert "thinking_stop" in kinds  # flush closes it
+        assert "unfinished" in thinking
+
+
+class TestRouterToolCallSuppression:
+    """Router should suppress tool_call/function/parameter content (no pieces)."""
+
+    def test_tool_call_suppressed(self):
+        router = _AnthropicStreamRouter()
+        pieces = router.feed('Before <tool_call>{"fn":"x"}</tool_call> After')
+        pieces += router.flush()
+
+        text = "".join(t for k, t in pieces if k == "text")
+        all_content = "".join(t for _, t in pieces)
+        assert "Before" in text
+        assert "After" in text
+        assert '{"fn":"x"}' not in all_content
+
+    def test_function_tag_suppressed(self):
+        router = _AnthropicStreamRouter()
+        pieces = router.feed("<function=search>body</function>after")
+        pieces += router.flush()
+
+        text = "".join(t for k, t in pieces if k == "text")
+        assert "after" in text
+        assert "body" not in "".join(t for _, t in pieces)
+
+
+class TestRouterMixedContent:
+    """Test router with think + tool_call combined."""
+
+    def test_think_then_tool_call(self):
+        router = _AnthropicStreamRouter()
+        text_input = "<think>reasoning</think>visible<tool_call>data</tool_call>end"
+        pieces = router.feed(text_input)
+        pieces += router.flush()
+
+        text = "".join(t for k, t in pieces if k == "text")
+        thinking = "".join(t for k, t in pieces if k == "thinking")
+
+        assert "reasoning" in thinking
+        assert "visible" in text
+        assert "end" in text
+        assert "data" not in text
+        assert "data" not in thinking
+
+    def test_realistic_streaming(self):
+        """Token-by-token streaming with thinking."""
+        router = _AnthropicStreamRouter()
+        tokens = [
+            "<",
+            "think",
+            ">",
+            "Let",
+            " me",
+            " check",
+            "</",
+            "think",
+            ">",
+            "The",
+            " answer",
+            " is",
+            " 42",
+            ".",
+        ]
+        all_pieces = []
+        for tok in tokens:
+            all_pieces.extend(router.feed(tok))
+        all_pieces.extend(router.flush())
+
+        text = "".join(t for k, t in all_pieces if k == "text")
+        thinking = "".join(t for k, t in all_pieces if k == "thinking")
+
+        assert "Let me check" in thinking
+        assert "The answer is 42." in text
+
+
+class TestRouterFlush:
+    """Test router flush() behavior."""
+
+    def test_flush_text_mode(self):
+        router = _AnthropicStreamRouter()
+        router.feed("text<")  # '<' held in carry
+        pieces = router.flush()
+        # Should emit the '<' as text
+        text = "".join(t for k, t in pieces if k == "text")
+        assert "<" in text
+
+    def test_flush_in_think_mode(self):
+        router = _AnthropicStreamRouter()
+        router.feed("<think>leftover")
+        pieces = router.flush()
+        kinds = [k for k, _ in pieces]
+        assert "thinking" in kinds
+        assert "thinking_stop" in kinds
+
+    def test_flush_in_toolcall_mode(self):
+        router = _AnthropicStreamRouter()
+        router.feed("<tool_call>stuff")
+        pieces = router.flush()
+        # Should discard (tool_call content suppressed)
+        assert pieces == []
+
+
+# =============================================================================
+# _is_thinking_enabled Helper
+# =============================================================================
+
+
+class TestIsThinkingEnabled:
+    """Test the _is_thinking_enabled helper function."""
+
+    def test_none_thinking(self):
+        from vllm_mlx.api.anthropic_models import AnthropicRequest, AnthropicMessage
+
+        req = AnthropicRequest(
+            model="test",
+            messages=[AnthropicMessage(role="user", content="hi")],
+            max_tokens=100,
+            thinking=None,
+        )
+        assert _is_thinking_enabled(req) is False
+
+    def test_no_thinking_field(self):
+        from vllm_mlx.api.anthropic_models import AnthropicRequest, AnthropicMessage
+
+        req = AnthropicRequest(
+            model="test",
+            messages=[AnthropicMessage(role="user", content="hi")],
+            max_tokens=100,
+        )
+        assert _is_thinking_enabled(req) is False
+
+    def test_thinking_enabled_dict(self):
+        from vllm_mlx.api.anthropic_models import AnthropicRequest, AnthropicMessage
+
+        req = AnthropicRequest(
+            model="test",
+            messages=[AnthropicMessage(role="user", content="hi")],
+            max_tokens=100,
+            thinking={"type": "enabled", "budget_tokens": 5000},
+        )
+        assert _is_thinking_enabled(req) is True
+
+    def test_thinking_disabled_dict(self):
+        from vllm_mlx.api.anthropic_models import AnthropicRequest, AnthropicMessage
+
+        req = AnthropicRequest(
+            model="test",
+            messages=[AnthropicMessage(role="user", content="hi")],
+            max_tokens=100,
+            thinking={"type": "disabled"},
+        )
+        assert _is_thinking_enabled(req) is False
+
+    def test_thinking_enabled_model(self):
+        from vllm_mlx.api.anthropic_models import (
+            AnthropicRequest,
+            AnthropicMessage,
+            AnthropicThinkingConfig,
+        )
+
+        req = AnthropicRequest(
+            model="test",
+            messages=[AnthropicMessage(role="user", content="hi")],
+            max_tokens=100,
+            thinking=AnthropicThinkingConfig(type="enabled", budget_tokens=8000),
+        )
+        assert _is_thinking_enabled(req) is True
+
+    def test_thinking_disabled_model(self):
+        from vllm_mlx.api.anthropic_models import (
+            AnthropicRequest,
+            AnthropicMessage,
+            AnthropicThinkingConfig,
+        )
+
+        req = AnthropicRequest(
+            model="test",
+            messages=[AnthropicMessage(role="user", content="hi")],
+            max_tokens=100,
+            thinking=AnthropicThinkingConfig(type="disabled"),
+        )
+        assert _is_thinking_enabled(req) is False
diff --git a/vllm_mlx/api/anthropic_models.py b/vllm_mlx/api/anthropic_models.py
index a5bc6f776..0831916bf 100644
--- a/vllm_mlx/api/anthropic_models.py
+++ b/vllm_mlx/api/anthropic_models.py
@@ -50,6 +50,13 @@ class AnthropicToolDef(BaseModel):
     input_schema: dict | None = None
 
 
+class AnthropicThinkingConfig(BaseModel):
+    """Configuration for extended thinking (Anthropic streaming)."""
+
+    type: str = "enabled"  # "enabled" or "disabled"
+    budget_tokens: int | None = None
+
+
 class AnthropicRequest(BaseModel):
     """Request for Anthropic Messages API."""
 
@@ -65,6 +72,7 @@ class AnthropicRequest(BaseModel):
     tool_choice: dict | None = None
     metadata: dict | None = None
     top_k: int | None = None
+    thinking: AnthropicThinkingConfig | dict | None = None
 
 
 # =============================================================================
diff --git a/vllm_mlx/server.py b/vllm_mlx/server.py
index f0328d4e6..e176834ed 100644
--- a/vllm_mlx/server.py
+++ b/vllm_mlx/server.py
@@ -42,6 +42,7 @@
 import json
 import logging
 import os
+import re
 import secrets
 import tempfile
 import threading
@@ -1666,6 +1667,333 @@ async def count_anthropic_tokens(request: Request):
     return {"input_tokens": total_tokens}
 
 
+class _AnthropicStreamScrubber:
+    """Stateful scrubber that strips model tool-call and reasoning markup
+    from streamed text deltas on the Anthropic ``/v1/messages`` endpoint.
+
+    Suppressed patterns:
+
+    * ``<think>...</think>`` – internal reasoning blocks
+    * ``<tool_call>...</tool_call>`` – Qwen/Hermes-style tool calls
+    * ``<function=NAME>...</function>`` – Llama-style tool calls
+    * ``<parameter=NAME>...</parameter>`` – Llama-style parameters
+    * Stray closing tags (``</think>``, ``</tool_call>``, ``</function>``,
+      ``</parameter>``) appearing outside their expected context
+
+    Handles tags that may be split across multiple token boundaries by
+    maintaining a small carry buffer.  The carry buffer always retains
+    the last ``CARRY_N`` characters so that a tag split across two
+    consecutive deltas can still be detected.  A cap (``MAX_CARRY``)
+    prevents unbounded growth when a prefix tag like ``<function=``
+    appears but the closing ``>`` never arrives.
+
+    The scrubber operates as a simple state machine:
+
+    * **TEXT** – emit characters; scan for opening/stray-closing tags.
+    * **IN_THINK** – suppress until ``</think>``.
+    * **IN_TOOLCALL** – suppress until ``</tool_call>``.
+    * **IN_FUNCTION** – suppress until ``</function>``.
+    * **IN_PARAMETER** – suppress until ``</parameter>``.
+    """
+
+    # --- Fixed (exact-match) tags ----------------------------------------
+    THINK_OPEN = "<think>"
+    THINK_CLOSE = "</think>"
+    TOOL_OPEN = "<tool_call>"
+    TOOL_CLOSE = "</tool_call>"
+    FUNC_CLOSE = "</function>"
+    PARAM_CLOSE = "</parameter>"
+
+    # Exact tags to scan for in TEXT mode.  Order doesn't matter – we
+    # always pick the earliest match.
+    _EXACT_TAGS = [
+        THINK_OPEN,
+        THINK_CLOSE,
+        TOOL_OPEN,
+        TOOL_CLOSE,
+        FUNC_CLOSE,
+        PARAM_CLOSE,
+    ]
+
+    # --- Prefix (variable-length) opening tags ---------------------------
+    # These look like ``<function=name>`` or ``<parameter=name>`` where
+    # the name varies.  We detect the prefix then scan forward for ``>``.
+    FUNC_PREFIX = "<function="
+    PARAM_PREFIX = "<parameter="
+    _PREFIX_TAGS = [FUNC_PREFIX, PARAM_PREFIX]
+
+    # Carry buffer size – must be at least ``max(len(tag)) - 1`` for all
+    # fixed tags *and* all prefixes so we can detect split boundaries.
+    _ALL_MARKERS = _EXACT_TAGS + _PREFIX_TAGS
+    MAX_TAG = max(len(t) for t in _ALL_MARKERS)
+    CARRY_N = MAX_TAG - 1
+
+    # Maximum carry buffer size.  When a prefix tag like ``<function=``
+    # is found but ``>`` never arrives, carry keeps accumulating each
+    # new delta.  Cap it and emit as literal text to prevent unbounded
+    # growth.
+    MAX_CARRY = CARRY_N + 256
+
+    # Map from opening signal → suppression mode
+    _MODE_MAP = {
+        THINK_OPEN: "IN_THINK",
+        TOOL_OPEN: "IN_TOOLCALL",
+        FUNC_PREFIX: "IN_FUNCTION",
+        PARAM_PREFIX: "IN_PARAMETER",
+    }
+
+    # Map from suppression mode → closing tag
+    _CLOSE_MAP = {
+        "IN_THINK": THINK_CLOSE,
+        "IN_TOOLCALL": TOOL_CLOSE,
+        "IN_FUNCTION": FUNC_CLOSE,
+        "IN_PARAMETER": PARAM_CLOSE,
+    }
+
+    def __init__(self) -> None:
+        self.mode: str = "TEXT"
+        self.carry: str = ""
+
+    # -----------------------------------------------------------------
+    # Internal helpers
+    # -----------------------------------------------------------------
+
+    def _find_earliest_marker(self, s: str, start: int) -> tuple[int, str, int] | None:
+        """Find the earliest opening or stray-closing tag in *s* from *start*.
+
+        Returns ``(position, marker, consume_length)`` or ``None``.
+        *consume_length* is how many characters to skip past the marker
+        (for exact tags this equals ``len(marker)``; for prefix tags it
+        extends to the closing ``>``).
+        """
+        best: tuple[int, str, int] | None = None
+
+        # Check exact tags.
+        for tag in self._EXACT_TAGS:
+            pos = s.find(tag, start)
+            if pos != -1 and (best is None or pos < best[0]):
+                best = (pos, tag, len(tag))
+
+        # Check prefix tags (e.g. ``<function=name>``).
+        for prefix in self._PREFIX_TAGS:
+            pos = s.find(prefix, start)
+            if pos != -1 and (best is None or pos < best[0]):
+                # Need to find the closing '>' to know full tag length.
+                gt = s.find(">", pos + len(prefix))
+                if gt != -1:
+                    consume = gt + 1 - pos  # e.g. len("<function=foo>")
+                    best = (pos, prefix, consume)
+                else:
+                    # '>' not yet in buffer – treat as a partial tag.
+                    # consume = -1 signals "truncated".
+                    best = (pos, prefix, -1)
+
+        return best
+
+    # -----------------------------------------------------------------
+    # Core processing – returns typed pieces
+    # -----------------------------------------------------------------
+
+    def _feed_pieces(self, delta: str) -> list[tuple[str, str]]:
+        """Core processing: returns ``(kind, text)`` pieces.
+
+        *kind* is one of ``"text"``, ``"thinking_start"``,
+        ``"thinking"``, ``"thinking_stop"``.  Tool / function /
+        parameter content is silently dropped (no pieces emitted).
+
+        Used directly by :class:`_AnthropicStreamRouter`; the
+        scrubber's public :meth:`feed` extracts only ``"text"`` pieces.
+        """
+        s = self.carry + (delta or "")
+        pieces: list[tuple[str, str]] = []
+        slen = len(s)
+        i = 0
+
+        while i < slen:
+            if self.mode == "TEXT":
+                hit = self._find_earliest_marker(s, i)
+
+                if hit is None:
+                    # No marker anywhere.  Only retain a carry suffix if
+                    # there is a '<' near the tail that could be the start
+                    # of a split tag.  Otherwise emit everything immediately
+                    # so plain text streams with zero latency.
+                    tail = s[max(i, slen - self.CARRY_N) :]
+                    lt_pos = tail.rfind("<")
+                    if lt_pos != -1:
+                        # Keep from the '<' onward as carry.
+                        carry_start = max(i, slen - self.CARRY_N) + lt_pos
+                        if carry_start > i:
+                            pieces.append(("text", s[i:carry_start]))
+                        self.carry = s[carry_start:]
+                    else:
+                        # No '<' in tail – emit everything.
+                        if slen > i:
+                            pieces.append(("text", s[i:]))
+                        self.carry = ""
+                    return pieces
+
+                pos, marker, consume = hit
+
+                if consume < 0:
+                    # Prefix tag found but closing '>' missing – truncated.
+                    if pos > i:
+                        pieces.append(("text", s[i:pos]))
+                    # Cap carry growth: if the partial prefix tag region
+                    # exceeds MAX_CARRY, treat it as plain text.
+                    if slen - pos > self.MAX_CARRY:
+                        pieces.append(("text", s[pos:]))
+                        self.carry = ""
+                    else:
+                        self.carry = s[pos:]
+                    return pieces
+
+                tag_end = pos + consume
+                if tag_end > slen:
+                    # Full tag not in buffer yet.
+                    if pos > i:
+                        pieces.append(("text", s[i:pos]))
+                    self.carry = s[pos:]
+                    return pieces
+
+                # Emit text before the tag.
+                if pos > i:
+                    pieces.append(("text", s[i:pos]))
+
+                # Consume the tag.
+                i = tag_end
+
+                # Determine new mode (if any).
+                new_mode = self._MODE_MAP.get(marker)
+                if new_mode:
+                    self.mode = new_mode
+                    if new_mode == "IN_THINK":
+                        pieces.append(("thinking_start", ""))
+                # else: stray closing tag – consumed and suppressed, stay TEXT.
+
+            elif self.mode == "IN_THINK":
+                # Find closing </think>.
+                close_pos = s.find(self.THINK_CLOSE, i)
+                if close_pos == -1:
+                    # Emit thinking content up to carry boundary.
+                    safe_end = max(i, slen - self.CARRY_N)
+                    if safe_end > i:
+                        pieces.append(("thinking", s[i:safe_end]))
+                    self.carry = s[safe_end:]
+                    return pieces
+                # Emit thinking content before closing tag.
+                if close_pos > i:
+                    pieces.append(("thinking", s[i:close_pos]))
+                pieces.append(("thinking_stop", ""))
+                i = close_pos + len(self.THINK_CLOSE)
+                self.mode = "TEXT"
+
+            else:
+                # IN_TOOLCALL, IN_FUNCTION, or IN_PARAMETER – suppress.
+                close_tag = self._CLOSE_MAP[self.mode]
+                close_pos = s.find(close_tag, i)
+                if close_pos == -1:
+                    # Closing tag not yet in buffer.
+                    self.carry = s[max(i, slen - self.CARRY_N) :]
+                    return pieces
+                i = close_pos + len(close_tag)
+                self.mode = "TEXT"
+
+        # Entire buffer consumed.
+        self.carry = ""
+        return pieces
+
+    def _flush_pieces(self) -> list[tuple[str, str]]:
+        """Core flush processing: returns ``(kind, text)`` pieces."""
+        pieces: list[tuple[str, str]] = []
+        if self.mode == "IN_THINK":
+            # Emit any remaining thinking content.
+            if self.carry:
+                pieces.append(("thinking", self.carry))
+            pieces.append(("thinking_stop", ""))
+        elif self.mode == "TEXT" and self.carry:
+            result = self.carry
+            # Strip residual exact tags.
+            for tag in self._EXACT_TAGS:
+                result = result.replace(tag, "")
+            # Strip complete prefix tags (e.g. ``<function=foo>``).
+            result = re.sub(r"<function=[^>]*>", "", result)
+            result = re.sub(r"<parameter=[^>]*>", "", result)
+            # Strip incomplete prefix tags without closing ``>``.
+            result = re.sub(r"<function=[^>]*$", "", result)
+            result = re.sub(r"<parameter=[^>]*$", "", result)
+            if result:
+                pieces.append(("text", result))
+        # IN_TOOLCALL / IN_FUNCTION / IN_PARAMETER – discard.
+        self.carry = ""
+        self.mode = "TEXT"
+        return pieces
+
+    # -----------------------------------------------------------------
+    # Public API
+    # -----------------------------------------------------------------
+
+    def feed(self, delta: str) -> str:
+        """Process a new text delta and return only the safe-to-emit portion.
+
+        Think content is suppressed (dropped).
+        """
+        pieces = self._feed_pieces(delta)
+        return "".join(text for kind, text in pieces if kind == "text")
+
+    def flush(self) -> str:
+        """Flush remaining carry buffer at end of stream.
+
+        Emits leftover text only in TEXT mode (stripping any stray tags);
+        discards carry if inside a suppressed region.
+        """
+        pieces = self._flush_pieces()
+        return "".join(text for kind, text in pieces if kind == "text")
+
+
+class _AnthropicStreamRouter(_AnthropicStreamScrubber):
+    """Stream router that translates ``<think>`` regions into Anthropic
+    ``thinking_delta`` events while still suppressing tool-call markup.
+
+    Subclasses :class:`_AnthropicStreamScrubber` to reuse tag detection,
+    carry-buffer management, and the core state-machine loop.  Instead
+    of dropping ``<think>`` content, the router exposes the typed
+    ``(kind, text)`` pieces from ``_feed_pieces`` / ``_flush_pieces``
+    so they can be emitted on separate content-block channels.
+
+    ``feed()`` returns a list of ``(kind, text)`` tuples:
+
+    * ``("text", "...")`` – normal text for ``text_delta``
+    * ``("thinking_start", "")`` – signals start of a thinking block
+    * ``("thinking", "...")`` – thinking content for ``thinking_delta``
+    * ``("thinking_stop", "")`` – signals end of a thinking block
+    * Tool-call / function / parameter content is silently suppressed.
+    """
+
+    def __init__(self, start_in_thinking: bool = False) -> None:
+        super().__init__()
+        if start_in_thinking:
+            self.mode = "IN_THINK"
+
+    def feed(self, delta: str) -> list[tuple[str, str]]:
+        """Process a delta and return a list of ``(kind, text)`` pieces."""
+        return self._feed_pieces(delta)
+
+    def flush(self) -> list[tuple[str, str]]:
+        """Flush at end of stream."""
+        return self._flush_pieces()
+
+
+def _is_thinking_enabled(anthropic_request: AnthropicRequest) -> bool:
+    """Check if the client has requested extended thinking."""
+    thinking = getattr(anthropic_request, "thinking", None)
+    if thinking is None:
+        return False
+    if isinstance(thinking, dict):
+        return thinking.get("type") == "enabled"
+    return getattr(thinking, "type", None) == "enabled"
+
+
 async def _stream_anthropic_messages(
     engine: BaseEngine,
     openai_request: ChatCompletionRequest,
@@ -1677,6 +2005,10 @@ async def _stream_anthropic_messages(
     Converts OpenAI streaming chunks to Anthropic event format:
     message_start -> content_block_start -> content_block_delta* ->
     content_block_stop -> message_delta -> message_stop
+
+    A streaming scrubber always filters out <think>...</think> and
+    <tool_call>...</tool_call> markup that the model may emit, so
+    clients only see clean text and structured tool_use blocks.
     """
     msg_id = f"msg_{uuid.uuid4().hex[:24]}"
     start_time = time.perf_counter()
@@ -1715,13 +2047,64 @@ async def _stream_anthropic_messages(
     }
     yield f"event: message_start\ndata: {json.dumps(message_start)}\n\n"
 
-    # Emit content_block_start for text
-    content_block_start = {
-        "type": "content_block_start",
-        "index": 0,
-        "content_block": {"type": "text", "text": ""},
-    }
-    yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
+    # Determine whether the client requested extended thinking.
+    thinking_enabled = _is_thinking_enabled(anthropic_request)
+
+    # Content block index tracking.  When thinking is enabled the
+    # thinking block is emitted first (index 0) and the text block
+    # follows (index 1).  Otherwise only the text block exists (index 0).
+    # These values are updated dynamically as blocks are opened.
+    next_block_index = 0
+    thinking_block_index: int | None = None
+    thinking_block_open = False
+    text_block_index: int | None = None
+    text_block_open = False
+
+    if thinking_enabled:
+        # Use the stream router which yields typed (kind, text) pieces
+        # that separate thinking content from user-facing text.
+        router: _AnthropicStreamRouter | None = _AnthropicStreamRouter(
+            start_in_thinking=False
+        )
+        scrubber: _AnthropicStreamScrubber | None = None
+
+        # Open both content blocks upfront so clients know the layout:
+        #   index 0 = thinking block
+        #   index 1 = text block
+        thinking_block_index = next_block_index
+        next_block_index += 1
+        thinking_block_open = True
+        ev = {
+            "type": "content_block_start",
+            "index": thinking_block_index,
+            "content_block": {"type": "thinking", "thinking": ""},
+        }
+        yield f"event: content_block_start\ndata: {json.dumps(ev)}\n\n"
+
+        text_block_index = next_block_index
+        next_block_index += 1
+        text_block_open = True
+        ev = {
+            "type": "content_block_start",
+            "index": text_block_index,
+            "content_block": {"type": "text", "text": ""},
+        }
+        yield f"event: content_block_start\ndata: {json.dumps(ev)}\n\n"
+    else:
+        # Use the scrubber which simply strips all <think> content.
+        router = None
+        scrubber = _AnthropicStreamScrubber()
+
+        # Only text block (index 0).
+        text_block_index = next_block_index
+        next_block_index += 1
+        text_block_open = True
+        content_block_start = {
+            "type": "content_block_start",
+            "index": text_block_index,
+            "content_block": {"type": "text", "text": ""},
+        }
+        yield f"event: content_block_start\ndata: {json.dumps(content_block_start)}\n\n"
 
     # Stream content deltas
     accumulated_text = ""
@@ -1739,24 +2122,122 @@ async def _stream_anthropic_messages(
             content = SPECIAL_TOKENS_PATTERN.sub("", delta_text)
 
             if content:
+                # Always accumulate the raw (unfiltered) content for tool-call
+                # parsing at the end of the stream.
                 accumulated_text += content
-                delta_event = {
+
+                if router is not None:
+                    # ---- Thinking-enabled path (stream router) ----
+                    # Both blocks are opened upfront (thinking=0, text=1).
+                    for kind, text in router.feed(content):
+                        if kind == "thinking_start":
+                            # Block already opened upfront – nothing to do.
+                            pass
+
+                        elif kind == "thinking" and text:
+                            ev = {
+                                "type": "content_block_delta",
+                                "index": thinking_block_index,
+                                "delta": {"type": "thinking_delta", "thinking": text},
+                            }
+                            yield f"event: content_block_delta\ndata: {json.dumps(ev)}\n\n"
+
+                        elif kind == "thinking_stop":
+                            if thinking_block_open:
+                                ev = {
+                                    "type": "content_block_stop",
+                                    "index": thinking_block_index,
+                                }
+                                yield f"event: content_block_stop\ndata: {json.dumps(ev)}\n\n"
+                                thinking_block_open = False
+
+                        elif kind == "text" and text:
+                            ev = {
+                                "type": "content_block_delta",
+                                "index": text_block_index,
+                                "delta": {"type": "text_delta", "text": text},
+                            }
+                            yield f"event: content_block_delta\ndata: {json.dumps(ev)}\n\n"
+
+                elif scrubber is not None:
+                    # ---- Scrubber path (thinking suppressed) ----
+                    content = scrubber.feed(content)
+                    if content:
+                        delta_event = {
+                            "type": "content_block_delta",
+                            "index": text_block_index,
+                            "delta": {"type": "text_delta", "text": content},
+                        }
+                        yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
+
+    # Flush remaining carry buffer
+    if router is not None:
+        for kind, text in router.flush():
+            if kind == "thinking" and text:
+                if thinking_block_open:
+                    ev = {
+                        "type": "content_block_delta",
+                        "index": thinking_block_index,
+                        "delta": {"type": "thinking_delta", "thinking": text},
+                    }
+                    yield f"event: content_block_delta\ndata: {json.dumps(ev)}\n\n"
+            elif kind == "thinking_stop":
+                if thinking_block_open:
+                    ev = {"type": "content_block_stop", "index": thinking_block_index}
+                    yield f"event: content_block_stop\ndata: {json.dumps(ev)}\n\n"
+                    thinking_block_open = False
+            elif kind == "text" and text:
+                if text_block_index is None:
+                    text_block_index = next_block_index
+                    next_block_index += 1
+                    text_block_open = True
+                    ev = {
+                        "type": "content_block_start",
+                        "index": text_block_index,
+                        "content_block": {"type": "text", "text": ""},
+                    }
+                    yield f"event: content_block_start\ndata: {json.dumps(ev)}\n\n"
+                ev = {
                     "type": "content_block_delta",
-                    "index": 0,
-                    "delta": {"type": "text_delta", "text": content},
+                    "index": text_block_index,
+                    "delta": {"type": "text_delta", "text": text},
                 }
-                yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
+                yield f"event: content_block_delta\ndata: {json.dumps(ev)}\n\n"
+    elif scrubber is not None:
+        flushed = scrubber.flush()
+        if flushed:
+            delta_event = {
+                "type": "content_block_delta",
+                "index": text_block_index,
+                "delta": {"type": "text_delta", "text": flushed},
+            }
+            yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
+
+    # Ensure text block was opened (even if model produced no text)
+    if text_block_index is None:
+        text_block_index = next_block_index
+        next_block_index += 1
+        text_block_open = True
+        ev = {
+            "type": "content_block_start",
+            "index": text_block_index,
+            "content_block": {"type": "text", "text": ""},
+        }
+        yield f"event: content_block_start\ndata: {json.dumps(ev)}\n\n"
 
     # Check for tool calls in accumulated text
     _, tool_calls = _parse_tool_calls_with_parser(accumulated_text, openai_request)
 
-    # Emit content_block_stop for text block
-    yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': 0})}\n\n"
+    # Close any remaining open blocks
+    if thinking_block_open:
+        yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': thinking_block_index})}\n\n"
+    if text_block_open:
+        yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': text_block_index})}\n\n"
 
     # If there are tool calls, emit tool_use blocks
     if tool_calls:
         for i, tc in enumerate(tool_calls):
-            tool_index = i + 1
+            tool_index = next_block_index + i
             try:
                 tool_input = json.loads(tc.function.arguments)
             except (json.JSONDecodeError, AttributeError):