vllm-project · DarkLight1337 · Mar 16, 2026 · Mar 13, 2026 · Mar 14, 2026 · Mar 15, 2026
@@ -4,6 +4,9 @@
 
 Tests the image source handling and tool_result content parsing in
 AnthropicServingMessages._convert_anthropic_to_openai_request().
+
+Also covers extended-thinking edge cases such as ``redacted_thinking``
+blocks echoed back by Anthropic clients.
 """
 
 from vllm.entrypoints.anthropic.protocol import (
@@ -373,3 +376,262 @@ def test_system_string_unchanged(self):
         result = _convert(request)
         system_msg = result.messages[0]
         assert system_msg["content"] == "You are a helpful assistant."
+
+
+# ======================================================================
+# Thinking block conversion (Anthropic → OpenAI)
+# ======================================================================
+
+
+class TestThinkingBlockConversion:
+    """Verify that thinking blocks in assistant messages are correctly
+    moved to the ``reasoning`` field and stripped from ``content`` during
+    the Anthropic→OpenAI conversion.
+
+    This is the Anthropic-endpoint path: the client echoes back the full
+    assistant message (including thinking blocks emitted by vllm) in
+    subsequent requests.
+    """
+
+    def test_thinking_plus_text_in_assistant_message(self):
+        """thinking + text → reasoning field + plain-string content."""
+        request = _make_request(
+            [
+                {"role": "user", "content": "Write me some code."},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "thinking",
+                            "thinking": "I should write a simple example.",
+                            "signature": "sig_abc123",
+                        },
+                        {"type": "text", "text": "Sure! Here is the code."},
+                    ],
+                },
+                {"role": "user", "content": "Can you fix the bug?"},
+            ]
+        )
+        result = _convert(request)
+
+        # Find the assistant message in the converted output.
+        asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+        assert len(asst_msgs) == 1
+        asst = asst_msgs[0]
+
+        # Thinking content must be in reasoning, NOT in content.
+        assert asst.get("reasoning") == "I should write a simple example."
+        assert asst.get("content") == "Sure! Here is the code."
+
+    def test_thinking_only_in_assistant_message(self):
+        """Assistant message with only a thinking block (no visible text).
+
+        This can happen when the model emits reasoning but no final answer
+        yet (e.g. a mid-turn reasoning step).  Content should be None.
+        """
+        request = _make_request(
+            [
+                {"role": "user", "content": "Hello"},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "thinking",
+                            "thinking": "Just thinking...",
+                            "signature": "sig_xyz",
+                        }
+                    ],
+                },
+                {"role": "user", "content": "Go on."},
+            ]
+        )
+        result = _convert(request)
+
+        asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+        assert len(asst_msgs) == 1
+        asst = asst_msgs[0]
+
+        assert asst.get("reasoning") == "Just thinking..."
+        # No visible text → content should be absent or None.
+        assert asst.get("content") is None
+
+    def test_thinking_plus_tool_use_in_assistant_message(self):
+        """thinking + tool_use: reasoning field set, tool_calls populated."""
+        request = _make_request(
+            [
+                {"role": "user", "content": "What is 2+2?"},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "thinking",
+                            "thinking": "I need to call the calculator.",
+                            "signature": "sig_tool",
+                        },
+                        {
+                            "type": "tool_use",
+                            "id": "call_001",
+                            "name": "calculator",
+                            "input": {"expression": "2+2"},
+                        },
+                    ],
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "call_001",
+                            "content": "4",
+                        }
+                    ],
+                },
+            ]
+        )
+        result = _convert(request)
+
+        asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+        assert len(asst_msgs) == 1
+        asst = asst_msgs[0]
+
+        assert asst.get("reasoning") == "I need to call the calculator."
+        tool_calls = list(asst.get("tool_calls", []))
+        assert len(tool_calls) == 1
+        assert tool_calls[0]["function"]["name"] == "calculator"
+        # No text content alongside reasoning + tool_use.
+        assert asst.get("content") is None
+
+    def test_multiple_thinking_blocks_concatenated(self):
+        """Multiple thinking blocks should be joined in order."""
+        request = _make_request(
+            [
+                {"role": "user", "content": "Think hard."},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "thinking",
+                            "thinking": "First thought. ",
+                            "signature": "s1",
+                        },
+                        {
+                            "type": "thinking",
+                            "thinking": "Second thought.",
+                            "signature": "s2",
+                        },
+                        {"type": "text", "text": "Done."},
+                    ],
+                },
+            ]
+        )
+        result = _convert(request)
+
+        asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+        assert len(asst_msgs) == 1
+        asst = asst_msgs[0]
+
+        assert asst.get("reasoning") == "First thought. Second thought."
+        assert asst.get("content") == "Done."
+
+    def test_no_thinking_blocks_unchanged(self):
+        """Messages without thinking blocks must not be modified."""
+        request = _make_request(
+            [
+                {"role": "user", "content": "Hi"},
+                {"role": "assistant", "content": "Hello!"},
+            ]
+        )
+        result = _convert(request)
+
+        asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+        assert len(asst_msgs) == 1
+        asst = asst_msgs[0]
+
+        assert asst.get("content") == "Hello!"
+        assert "reasoning" not in asst
+
+    def test_multi_turn_with_thinking_blocks(self):
+        """Full multi-turn conversation: previous assistant messages that
+        include thinking blocks must all be converted without a 400 error.
+
+        This is the primary regression scenario from the bug report:
+        upgrading vllm from v0.15.1 → v0.17.0 introduced thinking-block
+        support in responses, but echoing those responses back in subsequent
+        requests caused a Pydantic validation failure.
+        """
+        request = _make_request(
+            [
+                {"role": "user", "content": "Turn 1 question"},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "thinking",
+                            "thinking": "Reasoning for turn 1.",
+                            "signature": "s_t1",
+                        },
+                        {"type": "text", "text": "Answer for turn 1."},
+                    ],
+                },
+                {"role": "user", "content": "Turn 2 question"},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "thinking",
+                            "thinking": "Reasoning for turn 2.",
+                            "signature": "s_t2",
+                        },
+                        {"type": "text", "text": "Answer for turn 2."},
+                    ],
+                },
+                {"role": "user", "content": "Turn 3 question"},
+            ]
+        )
+        # Must not raise a ValidationError / 400.
+        result = _convert(request)
+
+        asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+        assert len(asst_msgs) == 2
+
+        assert asst_msgs[0].get("reasoning") == "Reasoning for turn 1."
+        assert asst_msgs[0].get("content") == "Answer for turn 1."
+        assert asst_msgs[1].get("reasoning") == "Reasoning for turn 2."
+        assert asst_msgs[1].get("content") == "Answer for turn 2."
+
+    def test_redacted_thinking_block_is_accepted(self):
+        """Anthropic clients may echo back redacted thinking blocks.
+
+        vLLM should accept these blocks (to avoid 400 validation errors)
+        and ignore them when constructing the OpenAI-format prompt.
+        """
+        request = _make_request(
+            [
+                {"role": "user", "content": "Hello"},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "thinking",
+                            "thinking": "Thinking...",
+                            "signature": "sig_think",
+                        },
+                        {
+                            "type": "redacted_thinking",
+                            "data": "BASE64_OR_OTHER_OPAQUE_DATA",
+                        },
+                        {"type": "text", "text": "Hi!"},
+                    ],
+                },
+                {"role": "user", "content": "Continue"},
+            ]
+        )
+        result = _convert(request)
+
+        asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
+        assert len(asst_msgs) == 1
+        asst = asst_msgs[0]
+
+        # Redacted thinking is ignored, normal thinking still becomes reasoning.
+        assert asst.get("reasoning") == "Thinking..."
+        assert asst.get("content") == "Hi!"
@@ -34,7 +34,14 @@ class AnthropicUsage(BaseModel):
 class AnthropicContentBlock(BaseModel):
     """Content block in message"""
 
-    type: Literal["text", "image", "tool_use", "tool_result", "thinking"]
+    type: Literal[
+        "text",
+        "image",
+        "tool_use",
+        "tool_result",
+        "thinking",
+        "redacted_thinking",
+    ]
     text: str | None = None
     # For image content
     source: dict[str, Any] | None = None
@@ -48,6 +55,8 @@ class AnthropicContentBlock(BaseModel):
     # For thinking content
     thinking: str | None = None
     signature: str | None = None
+    # For redacted thinking content (safety-filtered by the API)
+    data: str | None = None
 
 
 class AnthropicMessage(BaseModel):

@@ -224,6 +224,12 @@ def _convert_block(
             content_parts.append({"type": "image_url", "image_url": {"url": image_url}})
         elif block.type == "thinking" and block.thinking is not None:
             reasoning_parts.append(block.thinking)
+        elif block.type == "redacted_thinking":
+            # Redacted thinking blocks contain safety-filtered reasoning.
+            # We skip them as the content is opaque (base64 'data' field),
+            # but accepting the block prevents a validation error when the
+            # client echoes back the full assistant message.
+            pass
         elif block.type == "tool_use":
             cls._convert_tool_use_block(block, tool_calls)
         elif block.type == "tool_result":