Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions litellm/llms/bedrock/chat/converse_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ def get_supported_openai_params(self, model: str) -> List[str]:
"response_format",
"requestMetadata",
"service_tier",
"parallel_tool_calls",
]

if (
Expand Down Expand Up @@ -913,6 +914,13 @@ def map_openai_params(
)
if _tool_choice_value is not None:
optional_params["tool_choice"] = _tool_choice_value
if param == "parallel_tool_calls":
disable_parallel = not value
optional_params["_parallel_tool_use_config"] = {
"tool_choice": {
"disable_parallel_tool_use": disable_parallel
}
}
if param == "thinking":
optional_params["thinking"] = value
elif param == "reasoning_effort" and isinstance(value, str):
Expand Down
47 changes: 38 additions & 9 deletions tests/test_litellm/litellm_core_utils/test_realtime_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,19 +430,36 @@ async def apply_guardrail(self, inputs, request_data, input_type, logging_obj=No
streaming = RealTimeStreaming(client_ws, backend_ws, logging_obj)
await streaming.backend_to_client_send_messages()

# ASSERT 1: no response.create was sent to backend (injection blocked).
# ASSERT 1: the guardrail blocked the normal auto-response and instead
# injected a conversation.item.create + response.create to voice the
# violation message. There should be exactly ONE response.create (the
# guardrail-triggered one), preceded by a response.cancel and a
# conversation.item.create carrying the violation text.
sent_to_backend = [
json.loads(c.args[0])
for c in backend_ws.send.call_args_list
if c.args
]
response_creates = [
response_cancels = [
e for e in sent_to_backend if e.get("type") == "response.cancel"
]
assert len(response_cancels) == 1, (
f"Guardrail should send response.cancel, got: {response_cancels}"
)
guardrail_items = [
e for e in sent_to_backend
if e.get("type") == "response.create"
if e.get("type") == "conversation.item.create"
]
assert len(response_creates) == 0, (
f"Guardrail should prevent response.create for injected content, "
f"but got: {response_creates}"
assert len(guardrail_items) == 1, (
f"Guardrail should inject a conversation.item.create with violation message, "
f"got: {guardrail_items}"
)
response_creates = [
e for e in sent_to_backend if e.get("type") == "response.create"
]
assert len(response_creates) == 1, (
f"Guardrail should send exactly one response.create to voice the violation, "
f"got: {response_creates}"
)

# ASSERT 2: error event was sent directly to the client WebSocket
Expand Down Expand Up @@ -595,14 +612,26 @@ async def apply_guardrail(self, inputs, request_data, input_type, logging_obj=No
assert len(error_events) == 1, f"Expected one error event, got: {sent_texts}"
assert error_events[0]["error"]["type"] == "guardrail_violation"

# ASSERT: blocked item was NOT forwarded to the backend
# ASSERT: the original blocked item was NOT forwarded to the backend.
# The guardrail handler injects its own conversation.item.create with
# the violation message — only that one should be present, not the
# original user message.
sent_to_backend = [c.args[0] for c in backend_ws.send.call_args_list if c.args]
forwarded_items = [
json.loads(m) for m in sent_to_backend
if isinstance(m, str) and json.loads(m).get("type") == "conversation.item.create"
]
assert len(forwarded_items) == 0, (
f"Blocked item should not be forwarded to backend, got: {forwarded_items}"
# Filter out guardrail-injected items (contain "Say exactly the following message")
original_items = [
item for item in forwarded_items
if not any(
"Say exactly the following message" in c.get("text", "")
for c in item.get("item", {}).get("content", [])
if isinstance(c, dict)
)
]
assert len(original_items) == 0, (
f"Blocked item should not be forwarded to backend, got: {original_items}"
)

litellm.callbacks = [] # cleanup
Expand Down
Loading