Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 54 additions & 16 deletions litellm/llms/moonshot/chat/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
Translates from OpenAI's `/v1/chat/completions` to Moonshot AI's `/v1/chat/completions`
"""

from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, overload
from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, cast, overload

import litellm
from litellm.litellm_core_utils.prompt_templates.common_utils import (
handle_messages_with_content_list_to_str_conversion,
)
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
from litellm.utils import supports_reasoning

from ...openai.chat.gpt_transformation import OpenAIGPTConfig

Expand All @@ -17,17 +19,15 @@ class MoonshotChatConfig(OpenAIGPTConfig):
@overload
def _transform_messages(
self, messages: List[AllMessageValues], model: str, is_async: Literal[True]
) -> Coroutine[Any, Any, List[AllMessageValues]]:
...
) -> Coroutine[Any, Any, List[AllMessageValues]]: ...

@overload
def _transform_messages(
self,
messages: List[AllMessageValues],
model: str,
is_async: Literal[False] = False,
) -> List[AllMessageValues]:
...
) -> List[AllMessageValues]: ...

def _transform_messages(
self, messages: List[AllMessageValues], model: str, is_async: bool = False
Expand All @@ -53,22 +53,14 @@ def _transform_messages(
messages = handle_messages_with_content_list_to_str_conversion(messages)

if is_async:
return super()._transform_messages(
messages=messages, model=model, is_async=True
)
return super()._transform_messages(messages=messages, model=model, is_async=True)
else:
return super()._transform_messages(
messages=messages, model=model, is_async=False
)
return super()._transform_messages(messages=messages, model=model, is_async=False)

def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str]
) -> Tuple[Optional[str], Optional[str]]:
api_base = (
api_base
or get_secret_str("MOONSHOT_API_BASE")
or "https://api.moonshot.ai/v1"
) # type: ignore
api_base = api_base or get_secret_str("MOONSHOT_API_BASE") or "https://api.moonshot.ai/v1" # type: ignore
dynamic_api_key = api_key or get_secret_str("MOONSHOT_API_KEY")
return api_base, dynamic_api_key

Expand Down Expand Up @@ -149,6 +141,48 @@ def map_openai_params(
optional_params["temperature"] = 0.3
return optional_params

def fill_reasoning_content(self, messages: List[AllMessageValues]) -> List[AllMessageValues]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Public method name for an internal helper

fill_reasoning_content has no leading underscore, making it appear as part of the public API surface of MoonshotChatConfig. It is only called from transform_request within the same class and is exclusively tested via direct invocation in unit tests. Consider renaming it to _fill_reasoning_content to signal that it is an internal implementation detail, which also makes future refactoring safer.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

"""
Moonshot reasoning models require `reasoning_content` on every assistant
message that contains tool_calls (multi-turn tool-calling flows).
For each such message that is missing the field:
1. Promote provider_specific_fields["reasoning_content"] if present and non-empty
(this is where LiteLLM stores it from a previous response)
2. Otherwise inject a single space — the minimum value the API accepts
Messages that already carry the field, or are not assistant/tool-call messages,
are appended as-is (no copy made).
"""
result: List[AllMessageValues] = []
for msg in messages:
if (
msg.get("role") == "assistant"
and msg.get("tool_calls")
and "reasoning_content" not in msg
):
Comment on lines +158 to +162
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reasoning_content: None bypasses placeholder injection

The condition "reasoning_content" not in msg only checks for key presence, not value. If an assistant message has "reasoning_content": None (e.g., from a deserialised response where the field was null, or manually constructed history), the if branch is skipped and None is forwarded to the Moonshot API, which is likely to result in the same 400 Bad Request the PR is trying to fix.

Replacing the key-presence check with a falsy check handles the None and "" cases alongside the absent-key case:

Suggested change
if (
msg.get("role") == "assistant"
and msg.get("tool_calls")
and "reasoning_content" not in msg
):
if (
msg.get("role") == "assistant"
and msg.get("tool_calls")
and not msg.get("reasoning_content")
):

patched = dict(cast(dict, msg))
provider_fields = patched.get("provider_specific_fields") or {}
stored = provider_fields.get("reasoning_content")
if stored:
patched["reasoning_content"] = stored
Comment on lines +163 to +167
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Promoted reasoning_content left duplicated in provider_specific_fields

When reasoning_content is found in provider_specific_fields and promoted to the top level (line 167), the original entry inside provider_specific_fields is not removed. This means the patched message dict ends up containing the value in two places simultaneously.

If LiteLLM does not strip provider_specific_fields before serializing the request body, the Moonshot API will receive an unexpected extra field. More practically, if any downstream code reads provider_specific_fields to check whether reasoning_content has already been handled, it will still see the value there and may act on it again.

Consider cleaning up the promoted key after copying it to the top level:

if stored:
    patched["reasoning_content"] = stored
    # Remove from provider_specific_fields to avoid duplication
    pf = dict(provider_fields)
    pf.pop("reasoning_content", None)
    patched["provider_specific_fields"] = pf

# Remove the promoted key from provider_specific_fields to
# avoid sending the value twice in the serialised request body
cleaned_provider_fields = dict(provider_fields)
cleaned_provider_fields.pop("reasoning_content", None)
patched["provider_specific_fields"] = cleaned_provider_fields
Comment on lines +170 to +172
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Empty provider_specific_fields dict left in message after promotion

When reasoning_content was the only key in provider_specific_fields, after pop the resulting cleaned_provider_fields will be {}. The code then writes patched["provider_specific_fields"] = {}, leaving an empty dict on the message. Any downstream consumer (e.g., response logging, another middleware) that does if msg.get("provider_specific_fields"): would treat this as a no-op, but it's unexpected to find an explicitly empty dict where the original may have been absent. Consider removing the key entirely when the result is empty:

Suggested change
cleaned_provider_fields = dict(provider_fields)
cleaned_provider_fields.pop("reasoning_content", None)
patched["provider_specific_fields"] = cleaned_provider_fields
cleaned_provider_fields = dict(provider_fields)
cleaned_provider_fields.pop("reasoning_content", None)
if cleaned_provider_fields:
patched["provider_specific_fields"] = cleaned_provider_fields
else:
patched.pop("provider_specific_fields", None)

else:
litellm.verbose_logger.warning(
"Moonshot reasoning model: assistant tool-call message is missing "
"`reasoning_content`. Injecting a placeholder to satisfy API validation. "
"For best results, preserve `reasoning_content` from the original "
"assistant response when building multi-turn conversation history."
)
patched["reasoning_content"] = " "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic string for reasoning content placeholder

The value " " (single space) injected as the minimum placeholder is a magic string embedded directly in the code. If the Moonshot API ever tightens validation to require a non-whitespace value, this would need to be tracked down across usages. Define a module-level constant to make the intent explicit and simplify future changes:

Suggested change
patched["reasoning_content"] = " "
patched["reasoning_content"] = _REASONING_PLACEHOLDER

With the constant defined at the top of the file:

# Minimum value accepted by the Moonshot API when reasoning_content is unavailable
_REASONING_PLACEHOLDER = " "

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

result.append(cast(AllMessageValues, patched))
else:
result.append(msg)
return result

def transform_request(
self,
model: str,
Expand All @@ -169,6 +203,10 @@ def transform_request(
optional_params=optional_params,
)

# Moonshot reasoning models: fill in reasoning_content before the API call
if supports_reasoning(model=model, custom_llm_provider="moonshot"):
messages = self.fill_reasoning_content(messages)
Comment on lines +206 to +208
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kimi-thinking-preview is gated by supports_reasoning but excludes tools

fill_reasoning_content is invoked whenever supports_reasoning(model, "moonshot") is True. kimi-thinking-preview is now being given "supports_reasoning": true in the JSON, but that same model explicitly has tools and tool_choice removed from get_supported_openai_params (line 100). Because the function only patches assistant messages that have a non-empty tool_calls list, it will always be a no-op for kimi-thinking-preview in normal usage.

However, if a caller passes in a conversation history that contains tool-call messages originally generated by a different model (e.g. kimi-k2-thinking), fill_reasoning_content will silently inject reasoning_content into those messages before forwarding to the API. The API will still likely reject the request for having unsupported tool calls — but the injected reasoning_content may obscure the root cause.

Consider documenting or guarding against this edge-case, for example:

# Only patch tool-call messages if the model actually supports tool calls
if (
    supports_reasoning(model=model, custom_llm_provider="moonshot")
    and "tools" in self.get_supported_openai_params(model)
):
    messages = self.fill_reasoning_content(messages)


# Call parent transform_request which handles _transform_messages
return super().transform_request(
model=model,
Expand Down
3 changes: 3 additions & 0 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -22083,6 +22083,7 @@
"output_cost_per_token": 3e-06,
"source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_tool_choice": true,
"supports_video_input": true,
"supports_vision": true
Expand Down Expand Up @@ -22166,6 +22167,7 @@
"output_cost_per_token": 2.5e-06,
"source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_tool_choice": true,
"supports_web_search": true
},
Expand All @@ -22180,6 +22182,7 @@
"output_cost_per_token": 8e-06,
"source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_tool_choice": true,
"supports_web_search": true
},
Expand Down
3 changes: 3 additions & 0 deletions model_prices_and_context_window.json
Original file line number Diff line number Diff line change
Expand Up @@ -22083,6 +22083,7 @@
"output_cost_per_token": 3e-06,
"source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_tool_choice": true,
"supports_video_input": true,
"supports_vision": true
Expand Down Expand Up @@ -22166,6 +22167,7 @@
"output_cost_per_token": 2.5e-06,
"source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_tool_choice": true,
"supports_web_search": true
},
Expand All @@ -22180,6 +22182,7 @@
"output_cost_per_token": 8e-06,
"source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2",
"supports_function_calling": true,
"supports_reasoning": true,
"supports_tool_choice": true,
"supports_web_search": true
},
Expand Down
148 changes: 147 additions & 1 deletion tests/test_litellm/llms/moonshot/test_moonshot_chat_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import os
import sys
from unittest.mock import patch

sys.path.insert(
0, os.path.abspath("../../../../..")
Expand Down Expand Up @@ -404,4 +405,149 @@ def test_transform_messages_flattens_text_only_content(self):

# Content should be flattened to a plain string
assert isinstance(result["messages"][0]["content"], str)
assert result["messages"][0]["content"] == "Hello, how are you?"
assert result["messages"][0]["content"] == "Hello, how are you?"

# ------------------------------------------------------------------ #
# Tests for fill_reasoning_content #
# ------------------------------------------------------------------ #

def test_reasoning_content_space_injected_when_absent(self):
"""Assistant tool-call message with no reasoning_content gets a space injected."""
config = MoonshotChatConfig()

messages = [
{"role": "user", "content": "What's the weather?"},
{
"role": "assistant",
"content": None,
"tool_calls": [
{"id": "call_1", "type": "function", "function": {"name": "get_weather", "arguments": "{}"}}
],
},
{"role": "tool", "tool_call_id": "call_1", "content": "Sunny, 22°C"},
]

result = config.fill_reasoning_content(messages)

assert result[1].get("reasoning_content") == " "
# Non-assistant messages are untouched
assert "reasoning_content" not in result[0]
assert "reasoning_content" not in result[2]

def test_empty_tool_calls_list_not_injected(self):
"""Assistant message with tool_calls: [] should not get reasoning_content injected."""
config = MoonshotChatConfig()

original_msg = {
"role": "assistant",
"content": "Here is the answer.",
"tool_calls": [],
}
messages = [original_msg]

result = config.fill_reasoning_content(messages)

assert "reasoning_content" not in result[0]
assert result[0] is original_msg

def test_existing_reasoning_content_not_overwritten(self):
"""Message that already has reasoning_content is passed through unchanged."""
config = MoonshotChatConfig()

original_msg = {
"role": "assistant",
"content": None,
"tool_calls": [
{"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}}
],
"reasoning_content": "<actual thinking>",
}
messages = [original_msg]

result = config.fill_reasoning_content(messages)

assert result[0].get("reasoning_content") == "<actual thinking>"
# Same object — no copy was made
assert result[0] is original_msg

def test_provider_specific_fields_reasoning_content_promoted(self):
"""reasoning_content stored in provider_specific_fields is promoted to top level."""
config = MoonshotChatConfig()

messages = [
{
"role": "assistant",
"content": None,
"tool_calls": [
{"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}}
],
"provider_specific_fields": {"reasoning_content": "stored thinking"},
}
]

result = config.fill_reasoning_content(messages)

assert result[0].get("reasoning_content") == "stored thinking"
# The promoted key must be removed from provider_specific_fields to
# avoid sending the value twice in the serialised request body
assert "reasoning_content" not in (result[0].get("provider_specific_fields") or {})

def test_reasoning_model_fill_called_from_transform_request(self):
"""transform_request injects reasoning_content end-to-end for reasoning models."""
config = MoonshotChatConfig()

messages = [
{"role": "user", "content": "Call a tool"},
{
"role": "assistant",
"content": None,
"tool_calls": [
{"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}}
],
},
]

with patch(
"litellm.llms.moonshot.chat.transformation.supports_reasoning",
return_value=True,
):
result = config.transform_request(
model="kimi-k2-thinking",
messages=messages,
optional_params={},
litellm_params={},
headers={},
)

assert result["messages"][1].get("reasoning_content") == " "

def test_non_reasoning_model_messages_untouched(self):
"""For non-reasoning models, transform_request leaves messages unchanged."""
config = MoonshotChatConfig()

messages = [
{"role": "user", "content": "Hello"},
{
"role": "assistant",
"content": None,
"tool_calls": [
{"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}}
],
},
]

with patch(
"litellm.llms.moonshot.chat.transformation.supports_reasoning",
return_value=False,
):
result = config.transform_request(
model="moonshot-v1-8k",
messages=messages,
optional_params={},
litellm_params={},
headers={},
)

# reasoning_content must not have been injected
for msg in result["messages"]:
assert "reasoning_content" not in msg
Comment on lines +524 to +553
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No integration test for reasoning model path through transform_request

The four new tests call fill_reasoning_content directly or patch supports_reasoning to False (the non-reasoning path). There is no test that exercises the full transform_request pipeline for an actual reasoning model without mocking supports_reasoning.

This means the integration wiring — specifically, that transform_request actually invokes fill_reasoning_content when supports_reasoning returns True for a Moonshot reasoning model — is untested. A regression here (e.g., checking the wrong provider string) would not be caught by the current suite.

Consider adding a test similar to test_non_reasoning_model_messages_untouched but with the mock returning True (or using an actual reasoning model name like kimi-k2-thinking):

def test_reasoning_model_fill_called_from_transform_request(self):
    """transform_request injects reasoning_content for reasoning models."""
    config = MoonshotChatConfig()
    messages = [
        {"role": "user", "content": "Call a tool"},
        {
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {"id": "c1", "type": "function", "function": {"name": "fn", "arguments": "{}"}}
            ],
        },
    ]
    with patch(
        "litellm.llms.moonshot.chat.transformation.supports_reasoning",
        return_value=True,
    ):
        result = config.transform_request(
            model="kimi-k2-thinking",
            messages=messages,
            optional_params={},
            litellm_params={},
            headers={},
        )
    assert result["messages"][1].get("reasoning_content") == " "

Loading