Skip to content
37 changes: 29 additions & 8 deletions litellm/proxy/_experimental/mcp_server/discoverable_endpoints.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json
from typing import Optional
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
from urllib.parse import parse_qs, parse_qsl, urlencode, urlparse, urlunparse

from fastapi import APIRouter, Form, HTTPException, Request
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse

from litellm.llms.custom_httpx.http_handler import (
Expand Down Expand Up @@ -369,12 +369,6 @@ async def authorize(
@router.post("/token")
async def token_endpoint(
request: Request,
grant_type: str = Form(...),
code: str = Form(None),
redirect_uri: str = Form(None),
client_id: str = Form(...),
client_secret: Optional[str] = Form(None),
code_verifier: str = Form(None),
mcp_server_name: Optional[str] = None,
):
"""
Expand All @@ -390,6 +384,33 @@ async def token_endpoint(
global_mcp_server_manager,
)

content_type = request.headers.get("content-type", "")
if "multipart/form-data" in content_type:
try:
form = await request.form()
except RuntimeError as exc:
if "python-multipart" in str(exc).lower():
raise HTTPException(
status_code=500,
detail='Form data requires "python-multipart" to be installed.',
) from exc
raise
form_data = {key: form.get(key) for key in form.keys()}
else:
raw_body = await request.body()
parsed = parse_qs(raw_body.decode() if raw_body else "")
form_data = {key: values[0] if values else None for key, values in parsed.items()}

grant_type = form_data.get("grant_type")
client_id = form_data.get("client_id")
if not grant_type or not client_id:
raise HTTPException(status_code=400, detail="Missing required form fields")

code = form_data.get("code")
redirect_uri = form_data.get("redirect_uri")
client_secret = form_data.get("client_secret")
code_verifier = form_data.get("code_verifier")

lookup_name = mcp_server_name or client_id
client_ip = IPAddressUtils.get_mcp_client_ip(request)
mcp_server = global_mcp_server_manager.get_mcp_server_by_name(
Expand Down
54 changes: 53 additions & 1 deletion litellm/responses/litellm_completion_transformation/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Handler for transforming responses api requests to litellm.completion requests
"""

from typing import Any, Coroutine, Dict, Optional, Union
from typing import Any, Coroutine, Dict, List, Optional, Union

import litellm
from litellm.responses.litellm_completion_transformation.streaming_iterator import (
Expand All @@ -20,6 +20,50 @@
from litellm.types.utils import ModelResponse


def _strip_empty_text_content_parts(litellm_completion_request: dict) -> dict:
"""
Strip empty text content parts from messages before calling litellm.completion().

Some strict OpenAI-compatible models (e.g. Kimi-K2.5, gpt-oss-120b on Azure AI)
reject messages whose content array contains {"type": "text", "text": ""}. This
happens when the Responses API → chat/completions transformation produces a text
part next to a tool-output part but the text is an empty string.

Only returns a modified copy of the dict when changes are actually needed.
"""
messages: Optional[List[Any]] = litellm_completion_request.get("messages")
if not messages:
return litellm_completion_request

modified = False
new_messages: List[Any] = []
for msg in messages:
content = msg.get("content") if isinstance(msg, dict) else None
if isinstance(content, list):
filtered = [
part
for part in content
if not (
isinstance(part, dict)
and part.get("type") == "text"
and part.get("text") == ""
)
]
if len(filtered) != len(content):
modified = True
msg = dict(msg)
# Avoid sending an empty content list; fall back to empty string.
msg["content"] = filtered if filtered else ""
new_messages.append(msg)

if not modified:
return litellm_completion_request

result = dict(litellm_completion_request)
result["messages"] = new_messages
return result


class LiteLLMCompletionTransformationHandler:
def response_api_handler(
self,
Expand Down Expand Up @@ -56,6 +100,10 @@ def response_api_handler(
**kwargs,
)

litellm_completion_request = _strip_empty_text_content_parts(
litellm_completion_request
)

completion_args = {}
completion_args.update(kwargs)
completion_args.update(litellm_completion_request)
Expand Down Expand Up @@ -105,6 +153,10 @@ async def async_response_api_handler(
litellm_completion_request=litellm_completion_request,
)

litellm_completion_request = _strip_empty_text_content_parts(
litellm_completion_request
)

acompletion_args = {}
acompletion_args.update(kwargs)
acompletion_args.update(litellm_completion_request)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1268,9 +1268,9 @@ def _transform_responses_api_content_to_chat_completion_content(
)
)
else:
# Skip text blocks with None text to avoid downstream errors
# Skip text blocks with None or empty text to avoid downstream errors
text_value = item.get("text")
if text_value is None:
if text_value is None or text_value == "":
continue
content_list.append(
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path

from litellm.responses.litellm_completion_transformation.handler import (
_strip_empty_text_content_parts,
)
from litellm.responses.litellm_completion_transformation.transformation import (
LiteLLMCompletionResponsesConfig,
TOOL_CALLS_CACHE,
Expand Down Expand Up @@ -1899,3 +1902,159 @@ def test_single_tool_call_still_works_after_merge_fix(self):
else getattr(assistant_messages[0], "tool_calls", None)
)
assert tool_calls is not None and len(tool_calls) == 1


class TestStripEmptyTextContentParts:
"""
Unit tests for _strip_empty_text_content_parts helper in handler.py.

The helper filters {"type": "text", "text": ""} parts from message content lists
before the request reaches strict OpenAI-compatible endpoints that reject them
(e.g. Kimi-K2.5, gpt-oss-120b on Azure AI).
"""

def test_strips_empty_text_part_adjacent_to_tool_calls(self):
"""
The primary real-world case: an assistant message with a tool_calls array and
an empty text content part produced by the Responses API -> chat/completions
transformation.
"""
request = {
"model": "azure_ai/kimi-k2.5",
"messages": [
{"role": "user", "content": "Use a tool"},
{
"role": "assistant",
"content": [
{"type": "text", "text": ""},
{
"type": "tool_calls",
"tool_calls": [
{
"id": "call_abc",
"type": "function",
"function": {"name": "my_tool", "arguments": "{}"},
}
],
},
],
},
],
}

result = _strip_empty_text_content_parts(request)

assistant_content = result["messages"][1]["content"]
assert isinstance(assistant_content, list)
# The empty text part must be gone
text_parts = [
p
for p in assistant_content
if isinstance(p, dict) and p.get("type") == "text"
]
assert text_parts == [], f"Expected no text parts, got: {text_parts}"
# The non-text part must still be present
assert len(assistant_content) == 1
assert assistant_content[0]["type"] == "tool_calls"

def test_preserves_nonempty_text_parts(self):
"""Non-empty text parts must not be removed."""
request = {
"model": "azure_ai/kimi-k2.5",
"messages": [
{
"role": "assistant",
"content": [
{"type": "text", "text": ""},
{"type": "text", "text": "Here is the result:"},
{"type": "text", "text": ""},
],
}
],
}

result = _strip_empty_text_content_parts(request)

content = result["messages"][0]["content"]
assert isinstance(content, list)
assert len(content) == 1
assert content[0] == {"type": "text", "text": "Here is the result:"}

def test_falls_back_to_empty_string_when_all_parts_are_empty(self):
"""
If filtering removes every part, content must fall back to "" rather than
leaving an empty list (which also breaks strict endpoints).
"""
request = {
"model": "azure_ai/kimi-k2.5",
"messages": [
{
"role": "assistant",
"content": [
{"type": "text", "text": ""},
{"type": "text", "text": ""},
],
}
],
}

result = _strip_empty_text_content_parts(request)

assert result["messages"][0]["content"] == ""

def test_no_copy_when_no_changes_needed(self):
"""
When there are no empty text parts, the original dict must be returned
unchanged (not a copy) to avoid unnecessary allocations.
"""
request = {
"model": "gpt-4o",
"messages": [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
],
}

result = _strip_empty_text_content_parts(request)

assert result is request

def test_non_text_parts_preserved(self):
"""image_url and other non-text parts must survive the filter."""
request = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": ""},
{
"type": "image_url",
"image_url": {"url": "https://example.com/img.png"},
},
{"type": "text", "text": "What is this?"},
],
}
],
}

result = _strip_empty_text_content_parts(request)

content = result["messages"][0]["content"]
assert len(content) == 2
assert content[0]["type"] == "image_url"
assert content[1] == {"type": "text", "text": "What is this?"}

def test_string_content_untouched(self):
"""Messages whose content is already a string must not be modified."""
request = {
"model": "gpt-4o",
"messages": [
{"role": "user", "content": "Hello"},
],
}

result = _strip_empty_text_content_parts(request)

assert result is request
assert result["messages"][0]["content"] == "Hello"
Loading