Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 262 additions & 0 deletions tests/entrypoints/anthropic/test_anthropic_messages_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

Tests the image source handling and tool_result content parsing in
AnthropicServingMessages._convert_anthropic_to_openai_request().

Also covers extended-thinking edge cases such as ``redacted_thinking``
blocks echoed back by Anthropic clients.
"""

from vllm.entrypoints.anthropic.protocol import (
Expand Down Expand Up @@ -373,3 +376,262 @@ def test_system_string_unchanged(self):
result = _convert(request)
system_msg = result.messages[0]
assert system_msg["content"] == "You are a helpful assistant."


# ======================================================================
# Thinking block conversion (Anthropic → OpenAI)
# ======================================================================


class TestThinkingBlockConversion:
"""Verify that thinking blocks in assistant messages are correctly
moved to the ``reasoning`` field and stripped from ``content`` during
the Anthropic→OpenAI conversion.

This is the Anthropic-endpoint path: the client echoes back the full
assistant message (including thinking blocks emitted by vllm) in
subsequent requests.
"""

def test_thinking_plus_text_in_assistant_message(self):
"""thinking + text → reasoning field + plain-string content."""
request = _make_request(
[
{"role": "user", "content": "Write me some code."},
{
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "I should write a simple example.",
"signature": "sig_abc123",
},
{"type": "text", "text": "Sure! Here is the code."},
],
},
{"role": "user", "content": "Can you fix the bug?"},
]
)
result = _convert(request)

# Find the assistant message in the converted output.
asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
assert len(asst_msgs) == 1
asst = asst_msgs[0]

# Thinking content must be in reasoning, NOT in content.
assert asst.get("reasoning") == "I should write a simple example."
assert asst.get("content") == "Sure! Here is the code."

def test_thinking_only_in_assistant_message(self):
"""Assistant message with only a thinking block (no visible text).

This can happen when the model emits reasoning but no final answer
yet (e.g. a mid-turn reasoning step). Content should be None.
"""
request = _make_request(
[
{"role": "user", "content": "Hello"},
{
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "Just thinking...",
"signature": "sig_xyz",
}
],
},
{"role": "user", "content": "Go on."},
]
)
result = _convert(request)

asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
assert len(asst_msgs) == 1
asst = asst_msgs[0]

assert asst.get("reasoning") == "Just thinking..."
# No visible text → content should be absent or None.
assert asst.get("content") is None

def test_thinking_plus_tool_use_in_assistant_message(self):
"""thinking + tool_use: reasoning field set, tool_calls populated."""
request = _make_request(
[
{"role": "user", "content": "What is 2+2?"},
{
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "I need to call the calculator.",
"signature": "sig_tool",
},
{
"type": "tool_use",
"id": "call_001",
"name": "calculator",
"input": {"expression": "2+2"},
},
],
},
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "call_001",
"content": "4",
}
],
},
]
)
result = _convert(request)

asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
assert len(asst_msgs) == 1
asst = asst_msgs[0]

assert asst.get("reasoning") == "I need to call the calculator."
tool_calls = list(asst.get("tool_calls", []))
assert len(tool_calls) == 1
assert tool_calls[0]["function"]["name"] == "calculator"
# No text content alongside reasoning + tool_use.
assert asst.get("content") is None

def test_multiple_thinking_blocks_concatenated(self):
"""Multiple thinking blocks should be joined in order."""
request = _make_request(
[
{"role": "user", "content": "Think hard."},
{
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "First thought. ",
"signature": "s1",
},
{
"type": "thinking",
"thinking": "Second thought.",
"signature": "s2",
},
{"type": "text", "text": "Done."},
],
},
]
)
result = _convert(request)

asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
assert len(asst_msgs) == 1
asst = asst_msgs[0]

assert asst.get("reasoning") == "First thought. Second thought."
assert asst.get("content") == "Done."

def test_no_thinking_blocks_unchanged(self):
"""Messages without thinking blocks must not be modified."""
request = _make_request(
[
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hello!"},
]
)
result = _convert(request)

asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
assert len(asst_msgs) == 1
asst = asst_msgs[0]

assert asst.get("content") == "Hello!"
assert "reasoning" not in asst

def test_multi_turn_with_thinking_blocks(self):
"""Full multi-turn conversation: previous assistant messages that
include thinking blocks must all be converted without a 400 error.

This is the primary regression scenario from the bug report:
upgrading vllm from v0.15.1 → v0.17.0 introduced thinking-block
support in responses, but echoing those responses back in subsequent
requests caused a Pydantic validation failure.
"""
request = _make_request(
[
{"role": "user", "content": "Turn 1 question"},
{
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "Reasoning for turn 1.",
"signature": "s_t1",
},
{"type": "text", "text": "Answer for turn 1."},
],
},
{"role": "user", "content": "Turn 2 question"},
{
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "Reasoning for turn 2.",
"signature": "s_t2",
},
{"type": "text", "text": "Answer for turn 2."},
],
},
{"role": "user", "content": "Turn 3 question"},
]
)
# Must not raise a ValidationError / 400.
result = _convert(request)

asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
assert len(asst_msgs) == 2

assert asst_msgs[0].get("reasoning") == "Reasoning for turn 1."
assert asst_msgs[0].get("content") == "Answer for turn 1."
assert asst_msgs[1].get("reasoning") == "Reasoning for turn 2."
assert asst_msgs[1].get("content") == "Answer for turn 2."

def test_redacted_thinking_block_is_accepted(self):
"""Anthropic clients may echo back redacted thinking blocks.

vLLM should accept these blocks (to avoid 400 validation errors)
and ignore them when constructing the OpenAI-format prompt.
"""
request = _make_request(
[
{"role": "user", "content": "Hello"},
{
"role": "assistant",
"content": [
{
"type": "thinking",
"thinking": "Thinking...",
"signature": "sig_think",
},
{
"type": "redacted_thinking",
"data": "BASE64_OR_OTHER_OPAQUE_DATA",
},
{"type": "text", "text": "Hi!"},
],
},
{"role": "user", "content": "Continue"},
]
)
result = _convert(request)

asst_msgs = [m for m in result.messages if m.get("role") == "assistant"]
assert len(asst_msgs) == 1
asst = asst_msgs[0]

# Redacted thinking is ignored, normal thinking still becomes reasoning.
assert asst.get("reasoning") == "Thinking..."
assert asst.get("content") == "Hi!"
11 changes: 10 additions & 1 deletion vllm/entrypoints/anthropic/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,14 @@ class AnthropicUsage(BaseModel):
class AnthropicContentBlock(BaseModel):
"""Content block in message"""

type: Literal["text", "image", "tool_use", "tool_result", "thinking"]
type: Literal[
"text",
"image",
"tool_use",
"tool_result",
"thinking",
"redacted_thinking",
]
text: str | None = None
# For image content
source: dict[str, Any] | None = None
Expand All @@ -48,6 +55,8 @@ class AnthropicContentBlock(BaseModel):
# For thinking content
thinking: str | None = None
signature: str | None = None
# For redacted thinking content (safety-filtered by the API)
data: str | None = None


class AnthropicMessage(BaseModel):
Expand Down
6 changes: 6 additions & 0 deletions vllm/entrypoints/anthropic/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,12 @@ def _convert_block(
content_parts.append({"type": "image_url", "image_url": {"url": image_url}})
elif block.type == "thinking" and block.thinking is not None:
reasoning_parts.append(block.thinking)
elif block.type == "redacted_thinking":
# Redacted thinking blocks contain safety-filtered reasoning.
# We skip them as the content is opaque (base64 'data' field),
# but accepting the block prevents a validation error when the
# client echoes back the full assistant message.
pass
elif block.type == "tool_use":
cls._convert_tool_use_block(block, tool_calls)
elif block.type == "tool_result":
Expand Down
Loading