From 056b7f723f996f12727a50457ee828162b0298e7 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Fri, 13 Mar 2026 17:04:52 +0000 Subject: [PATCH 1/2] [Bugfix] accept redacted thinking blocks in Anthropic messages Allow Anthropic clients to replay assistant turns that include redacted thinking blocks without failing request validation, and keep opaque redacted reasoning out of the converted OpenAI prompt.\n\nGenerated-by: OpenCode (GPT-5.4) Signed-off-by: Benjamin Bartels --- .../test_anthropic_messages_conversion.py | 261 ++++++++++++++++++ vllm/entrypoints/anthropic/protocol.py | 11 +- vllm/entrypoints/anthropic/serving.py | 6 + 3 files changed, 277 insertions(+), 1 deletion(-) diff --git a/tests/entrypoints/openai/test_anthropic_messages_conversion.py b/tests/entrypoints/openai/test_anthropic_messages_conversion.py index e3b006c16a97..c7b2becf960c 100644 --- a/tests/entrypoints/openai/test_anthropic_messages_conversion.py +++ b/tests/entrypoints/openai/test_anthropic_messages_conversion.py @@ -4,6 +4,9 @@ Tests the image source handling and tool_result content parsing in AnthropicServingMessages._convert_anthropic_to_openai_request(). + +Also covers extended-thinking edge cases such as ``redacted_thinking`` +blocks echoed back by Anthropic clients. """ from vllm.entrypoints.anthropic.protocol import ( @@ -373,3 +376,261 @@ def test_system_string_unchanged(self): result = _convert(request) system_msg = result.messages[0] assert system_msg["content"] == "You are a helpful assistant." + + +# ====================================================================== +# Thinking block conversion (Anthropic → OpenAI) +# ====================================================================== + + +class TestThinkingBlockConversion: + """Verify that thinking blocks in assistant messages are correctly + moved to the ``reasoning`` field and stripped from ``content`` during + the Anthropic→OpenAI conversion. + + This is the Anthropic-endpoint path: the client echoes back the full + assistant message (including thinking blocks emitted by vllm) in + subsequent requests. + """ + + def test_thinking_plus_text_in_assistant_message(self): + """thinking + text → reasoning field + plain-string content.""" + request = _make_request( + [ + {"role": "user", "content": "Write me some code."}, + { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": "I should write a simple example.", + "signature": "sig_abc123", + }, + {"type": "text", "text": "Sure! Here is the code."}, + ], + }, + {"role": "user", "content": "Can you fix the bug?"}, + ] + ) + result = _convert(request) + + # Find the assistant message in the converted output. + asst_msgs = [m for m in result.messages if m.get("role") == "assistant"] + assert len(asst_msgs) == 1 + asst = asst_msgs[0] + + # Thinking content must be in reasoning, NOT in content. + assert asst.get("reasoning") == "I should write a simple example." + assert asst.get("content") == "Sure! Here is the code." + + def test_thinking_only_in_assistant_message(self): + """Assistant message with only a thinking block (no visible text). + + This can happen when the model emits reasoning but no final answer + yet (e.g. a mid-turn reasoning step). Content should be None. + """ + request = _make_request( + [ + {"role": "user", "content": "Hello"}, + { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": "Just thinking...", + "signature": "sig_xyz", + } + ], + }, + {"role": "user", "content": "Go on."}, + ] + ) + result = _convert(request) + + asst_msgs = [m for m in result.messages if m.get("role") == "assistant"] + assert len(asst_msgs) == 1 + asst = asst_msgs[0] + + assert asst.get("reasoning") == "Just thinking..." + # No visible text → content should be absent or None. + assert asst.get("content") is None + + def test_thinking_plus_tool_use_in_assistant_message(self): + """thinking + tool_use: reasoning field set, tool_calls populated.""" + request = _make_request( + [ + {"role": "user", "content": "What is 2+2?"}, + { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": "I need to call the calculator.", + "signature": "sig_tool", + }, + { + "type": "tool_use", + "id": "call_001", + "name": "calculator", + "input": {"expression": "2+2"}, + }, + ], + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_001", + "content": "4", + } + ], + }, + ] + ) + result = _convert(request) + + asst_msgs = [m for m in result.messages if m.get("role") == "assistant"] + assert len(asst_msgs) == 1 + asst = asst_msgs[0] + + assert asst.get("reasoning") == "I need to call the calculator." + assert len(asst.get("tool_calls", [])) == 1 + assert asst["tool_calls"][0]["function"]["name"] == "calculator" + # No text content alongside reasoning + tool_use. + assert asst.get("content") is None + + def test_multiple_thinking_blocks_concatenated(self): + """Multiple thinking blocks should be joined in order.""" + request = _make_request( + [ + {"role": "user", "content": "Think hard."}, + { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": "First thought. ", + "signature": "s1", + }, + { + "type": "thinking", + "thinking": "Second thought.", + "signature": "s2", + }, + {"type": "text", "text": "Done."}, + ], + }, + ] + ) + result = _convert(request) + + asst_msgs = [m for m in result.messages if m.get("role") == "assistant"] + assert len(asst_msgs) == 1 + asst = asst_msgs[0] + + assert asst.get("reasoning") == "First thought. Second thought." + assert asst.get("content") == "Done." + + def test_no_thinking_blocks_unchanged(self): + """Messages without thinking blocks must not be modified.""" + request = _make_request( + [ + {"role": "user", "content": "Hi"}, + {"role": "assistant", "content": "Hello!"}, + ] + ) + result = _convert(request) + + asst_msgs = [m for m in result.messages if m.get("role") == "assistant"] + assert len(asst_msgs) == 1 + asst = asst_msgs[0] + + assert asst.get("content") == "Hello!" + assert "reasoning" not in asst + + def test_multi_turn_with_thinking_blocks(self): + """Full multi-turn conversation: previous assistant messages that + include thinking blocks must all be converted without a 400 error. + + This is the primary regression scenario from the bug report: + upgrading vllm from v0.15.1 → v0.17.0 introduced thinking-block + support in responses, but echoing those responses back in subsequent + requests caused a Pydantic validation failure. + """ + request = _make_request( + [ + {"role": "user", "content": "Turn 1 question"}, + { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": "Reasoning for turn 1.", + "signature": "s_t1", + }, + {"type": "text", "text": "Answer for turn 1."}, + ], + }, + {"role": "user", "content": "Turn 2 question"}, + { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": "Reasoning for turn 2.", + "signature": "s_t2", + }, + {"type": "text", "text": "Answer for turn 2."}, + ], + }, + {"role": "user", "content": "Turn 3 question"}, + ] + ) + # Must not raise a ValidationError / 400. + result = _convert(request) + + asst_msgs = [m for m in result.messages if m.get("role") == "assistant"] + assert len(asst_msgs) == 2 + + assert asst_msgs[0].get("reasoning") == "Reasoning for turn 1." + assert asst_msgs[0].get("content") == "Answer for turn 1." + assert asst_msgs[1].get("reasoning") == "Reasoning for turn 2." + assert asst_msgs[1].get("content") == "Answer for turn 2." + + def test_redacted_thinking_block_is_accepted(self): + """Anthropic clients may echo back redacted thinking blocks. + + vLLM should accept these blocks (to avoid 400 validation errors) + and ignore them when constructing the OpenAI-format prompt. + """ + request = _make_request( + [ + {"role": "user", "content": "Hello"}, + { + "role": "assistant", + "content": [ + { + "type": "thinking", + "thinking": "Thinking...", + "signature": "sig_think", + }, + { + "type": "redacted_thinking", + "data": "BASE64_OR_OTHER_OPAQUE_DATA", + }, + {"type": "text", "text": "Hi!"}, + ], + }, + {"role": "user", "content": "Continue"}, + ] + ) + result = _convert(request) + + asst_msgs = [m for m in result.messages if m.get("role") == "assistant"] + assert len(asst_msgs) == 1 + asst = asst_msgs[0] + + # Redacted thinking is ignored, normal thinking still becomes reasoning. + assert asst.get("reasoning") == "Thinking..." + assert asst.get("content") == "Hi!" diff --git a/vllm/entrypoints/anthropic/protocol.py b/vllm/entrypoints/anthropic/protocol.py index c541db5139d3..ab3ca66e2cd0 100644 --- a/vllm/entrypoints/anthropic/protocol.py +++ b/vllm/entrypoints/anthropic/protocol.py @@ -34,7 +34,14 @@ class AnthropicUsage(BaseModel): class AnthropicContentBlock(BaseModel): """Content block in message""" - type: Literal["text", "image", "tool_use", "tool_result", "thinking"] + type: Literal[ + "text", + "image", + "tool_use", + "tool_result", + "thinking", + "redacted_thinking", + ] text: str | None = None # For image content source: dict[str, Any] | None = None @@ -48,6 +55,8 @@ class AnthropicContentBlock(BaseModel): # For thinking content thinking: str | None = None signature: str | None = None + # For redacted thinking content (safety-filtered by the API) + data: str | None = None class AnthropicMessage(BaseModel): diff --git a/vllm/entrypoints/anthropic/serving.py b/vllm/entrypoints/anthropic/serving.py index f301ed499f86..8fbe2c405e7e 100644 --- a/vllm/entrypoints/anthropic/serving.py +++ b/vllm/entrypoints/anthropic/serving.py @@ -224,6 +224,12 @@ def _convert_block( content_parts.append({"type": "image_url", "image_url": {"url": image_url}}) elif block.type == "thinking" and block.thinking is not None: reasoning_parts.append(block.thinking) + elif block.type == "redacted_thinking": + # Redacted thinking blocks contain safety-filtered reasoning. + # We skip them as the content is opaque (base64 'data' field), + # but accepting the block prevents a validation error when the + # client echoes back the full assistant message. + pass elif block.type == "tool_use": cls._convert_tool_use_block(block, tool_calls) elif block.type == "tool_result": From b45f105dd587db6332f5a177c5d849888c99e7e3 Mon Sep 17 00:00:00 2001 From: bbartels Date: Sun, 15 Mar 2026 16:57:32 +0000 Subject: [PATCH 2/2] fixes test Signed-off-by: bbartels --- .../anthropic/test_anthropic_messages_conversion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py b/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py index c7b2becf960c..eb9798980f06 100644 --- a/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py +++ b/tests/entrypoints/anthropic/test_anthropic_messages_conversion.py @@ -495,8 +495,9 @@ def test_thinking_plus_tool_use_in_assistant_message(self): asst = asst_msgs[0] assert asst.get("reasoning") == "I need to call the calculator." - assert len(asst.get("tool_calls", [])) == 1 - assert asst["tool_calls"][0]["function"]["name"] == "calculator" + tool_calls = list(asst.get("tool_calls", [])) + assert len(tool_calls) == 1 + assert tool_calls[0]["function"]["name"] == "calculator" # No text content alongside reasoning + tool_use. assert asst.get("content") is None