-
-
Notifications
You must be signed in to change notification settings - Fork 6.6k
fix: auto-fill reasoning_content for moonshot kimi reasoning models #23580
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,13 +2,15 @@ | |||||||||||||||||||||
| Translates from OpenAI's `/v1/chat/completions` to Moonshot AI's `/v1/chat/completions` | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, overload | ||||||||||||||||||||||
| from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, cast, overload | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| import litellm | ||||||||||||||||||||||
| from litellm.litellm_core_utils.prompt_templates.common_utils import ( | ||||||||||||||||||||||
| handle_messages_with_content_list_to_str_conversion, | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| from litellm.secret_managers.main import get_secret_str | ||||||||||||||||||||||
| from litellm.types.llms.openai import AllMessageValues | ||||||||||||||||||||||
| from litellm.utils import supports_reasoning | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| from ...openai.chat.gpt_transformation import OpenAIGPTConfig | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
|
|
@@ -17,17 +19,15 @@ class MoonshotChatConfig(OpenAIGPTConfig): | |||||||||||||||||||||
| @overload | ||||||||||||||||||||||
| def _transform_messages( | ||||||||||||||||||||||
| self, messages: List[AllMessageValues], model: str, is_async: Literal[True] | ||||||||||||||||||||||
| ) -> Coroutine[Any, Any, List[AllMessageValues]]: | ||||||||||||||||||||||
| ... | ||||||||||||||||||||||
| ) -> Coroutine[Any, Any, List[AllMessageValues]]: ... | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| @overload | ||||||||||||||||||||||
| def _transform_messages( | ||||||||||||||||||||||
| self, | ||||||||||||||||||||||
| messages: List[AllMessageValues], | ||||||||||||||||||||||
| model: str, | ||||||||||||||||||||||
| is_async: Literal[False] = False, | ||||||||||||||||||||||
| ) -> List[AllMessageValues]: | ||||||||||||||||||||||
| ... | ||||||||||||||||||||||
| ) -> List[AllMessageValues]: ... | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| def _transform_messages( | ||||||||||||||||||||||
| self, messages: List[AllMessageValues], model: str, is_async: bool = False | ||||||||||||||||||||||
|
|
@@ -53,22 +53,14 @@ def _transform_messages( | |||||||||||||||||||||
| messages = handle_messages_with_content_list_to_str_conversion(messages) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if is_async: | ||||||||||||||||||||||
| return super()._transform_messages( | ||||||||||||||||||||||
| messages=messages, model=model, is_async=True | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| return super()._transform_messages(messages=messages, model=model, is_async=True) | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| return super()._transform_messages( | ||||||||||||||||||||||
| messages=messages, model=model, is_async=False | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| return super()._transform_messages(messages=messages, model=model, is_async=False) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| def _get_openai_compatible_provider_info( | ||||||||||||||||||||||
| self, api_base: Optional[str], api_key: Optional[str] | ||||||||||||||||||||||
| ) -> Tuple[Optional[str], Optional[str]]: | ||||||||||||||||||||||
| api_base = ( | ||||||||||||||||||||||
| api_base | ||||||||||||||||||||||
| or get_secret_str("MOONSHOT_API_BASE") | ||||||||||||||||||||||
| or "https://api.moonshot.ai/v1" | ||||||||||||||||||||||
| ) # type: ignore | ||||||||||||||||||||||
| api_base = api_base or get_secret_str("MOONSHOT_API_BASE") or "https://api.moonshot.ai/v1" # type: ignore | ||||||||||||||||||||||
| dynamic_api_key = api_key or get_secret_str("MOONSHOT_API_KEY") | ||||||||||||||||||||||
| return api_base, dynamic_api_key | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
|
|
@@ -149,6 +141,48 @@ def map_openai_params( | |||||||||||||||||||||
| optional_params["temperature"] = 0.3 | ||||||||||||||||||||||
| return optional_params | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| def fill_reasoning_content(self, messages: List[AllMessageValues]) -> List[AllMessageValues]: | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
| Moonshot reasoning models require `reasoning_content` on every assistant | ||||||||||||||||||||||
| message that contains tool_calls (multi-turn tool-calling flows). | ||||||||||||||||||||||
| For each such message that is missing the field: | ||||||||||||||||||||||
| 1. Promote provider_specific_fields["reasoning_content"] if present and non-empty | ||||||||||||||||||||||
| (this is where LiteLLM stores it from a previous response) | ||||||||||||||||||||||
| 2. Otherwise inject a single space — the minimum value the API accepts | ||||||||||||||||||||||
| Messages that already carry the field, or are not assistant/tool-call messages, | ||||||||||||||||||||||
| are appended as-is (no copy made). | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
| result: List[AllMessageValues] = [] | ||||||||||||||||||||||
| for msg in messages: | ||||||||||||||||||||||
| if ( | ||||||||||||||||||||||
| msg.get("role") == "assistant" | ||||||||||||||||||||||
| and msg.get("tool_calls") | ||||||||||||||||||||||
| and "reasoning_content" not in msg | ||||||||||||||||||||||
| ): | ||||||||||||||||||||||
|
Comment on lines
+158
to
+162
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The condition Replacing the key-presence check with a falsy check handles the
Suggested change
|
||||||||||||||||||||||
| patched = dict(cast(dict, msg)) | ||||||||||||||||||||||
| provider_fields = patched.get("provider_specific_fields") or {} | ||||||||||||||||||||||
| stored = provider_fields.get("reasoning_content") | ||||||||||||||||||||||
| if stored: | ||||||||||||||||||||||
| patched["reasoning_content"] = stored | ||||||||||||||||||||||
|
Comment on lines
+163
to
+167
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Promoted When If LiteLLM does not strip Consider cleaning up the promoted key after copying it to the top level: if stored:
patched["reasoning_content"] = stored
# Remove from provider_specific_fields to avoid duplication
pf = dict(provider_fields)
pf.pop("reasoning_content", None)
patched["provider_specific_fields"] = pf |
||||||||||||||||||||||
| # Remove the promoted key from provider_specific_fields to | ||||||||||||||||||||||
| # avoid sending the value twice in the serialised request body | ||||||||||||||||||||||
| cleaned_provider_fields = dict(provider_fields) | ||||||||||||||||||||||
| cleaned_provider_fields.pop("reasoning_content", None) | ||||||||||||||||||||||
| patched["provider_specific_fields"] = cleaned_provider_fields | ||||||||||||||||||||||
|
Comment on lines
+170
to
+172
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Empty When
Suggested change
|
||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| litellm.verbose_logger.warning( | ||||||||||||||||||||||
| "Moonshot reasoning model: assistant tool-call message is missing " | ||||||||||||||||||||||
| "`reasoning_content`. Injecting a placeholder to satisfy API validation. " | ||||||||||||||||||||||
| "For best results, preserve `reasoning_content` from the original " | ||||||||||||||||||||||
| "assistant response when building multi-turn conversation history." | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| patched["reasoning_content"] = " " | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Magic string for reasoning content placeholder The value
Suggested change
With the constant defined at the top of the file: # Minimum value accepted by the Moonshot API when reasoning_content is unavailable
_REASONING_PLACEHOLDER = " "Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time! |
||||||||||||||||||||||
| result.append(cast(AllMessageValues, patched)) | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| result.append(msg) | ||||||||||||||||||||||
| return result | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| def transform_request( | ||||||||||||||||||||||
| self, | ||||||||||||||||||||||
| model: str, | ||||||||||||||||||||||
|
|
@@ -169,6 +203,10 @@ def transform_request( | |||||||||||||||||||||
| optional_params=optional_params, | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Moonshot reasoning models: fill in reasoning_content before the API call | ||||||||||||||||||||||
| if supports_reasoning(model=model, custom_llm_provider="moonshot"): | ||||||||||||||||||||||
| messages = self.fill_reasoning_content(messages) | ||||||||||||||||||||||
|
Comment on lines
+206
to
+208
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
However, if a caller passes in a conversation history that contains tool-call messages originally generated by a different model (e.g. Consider documenting or guarding against this edge-case, for example: # Only patch tool-call messages if the model actually supports tool calls
if (
supports_reasoning(model=model, custom_llm_provider="moonshot")
and "tools" in self.get_supported_openai_params(model)
):
messages = self.fill_reasoning_content(messages) |
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| # Call parent transform_request which handles _transform_messages | ||||||||||||||||||||||
| return super().transform_request( | ||||||||||||||||||||||
| model=model, | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
|
|
||
| import os | ||
| import sys | ||
| from unittest.mock import patch | ||
|
|
||
| sys.path.insert( | ||
| 0, os.path.abspath("../../../../..") | ||
|
|
@@ -404,4 +405,149 @@ def test_transform_messages_flattens_text_only_content(self): | |
|
|
||
| # Content should be flattened to a plain string | ||
| assert isinstance(result["messages"][0]["content"], str) | ||
| assert result["messages"][0]["content"] == "Hello, how are you?" | ||
| assert result["messages"][0]["content"] == "Hello, how are you?" | ||
|
|
||
| # ------------------------------------------------------------------ # | ||
| # Tests for fill_reasoning_content # | ||
| # ------------------------------------------------------------------ # | ||
|
|
||
| def test_reasoning_content_space_injected_when_absent(self): | ||
| """Assistant tool-call message with no reasoning_content gets a space injected.""" | ||
| config = MoonshotChatConfig() | ||
|
|
||
| messages = [ | ||
| {"role": "user", "content": "What's the weather?"}, | ||
| { | ||
| "role": "assistant", | ||
| "content": None, | ||
| "tool_calls": [ | ||
| {"id": "call_1", "type": "function", "function": {"name": "get_weather", "arguments": "{}"}} | ||
| ], | ||
| }, | ||
| {"role": "tool", "tool_call_id": "call_1", "content": "Sunny, 22°C"}, | ||
| ] | ||
|
|
||
| result = config.fill_reasoning_content(messages) | ||
|
|
||
| assert result[1].get("reasoning_content") == " " | ||
| # Non-assistant messages are untouched | ||
| assert "reasoning_content" not in result[0] | ||
| assert "reasoning_content" not in result[2] | ||
|
|
||
| def test_empty_tool_calls_list_not_injected(self): | ||
| """Assistant message with tool_calls: [] should not get reasoning_content injected.""" | ||
| config = MoonshotChatConfig() | ||
|
|
||
| original_msg = { | ||
| "role": "assistant", | ||
| "content": "Here is the answer.", | ||
| "tool_calls": [], | ||
| } | ||
| messages = [original_msg] | ||
|
|
||
| result = config.fill_reasoning_content(messages) | ||
|
|
||
| assert "reasoning_content" not in result[0] | ||
| assert result[0] is original_msg | ||
|
|
||
| def test_existing_reasoning_content_not_overwritten(self): | ||
| """Message that already has reasoning_content is passed through unchanged.""" | ||
| config = MoonshotChatConfig() | ||
|
|
||
| original_msg = { | ||
| "role": "assistant", | ||
| "content": None, | ||
| "tool_calls": [ | ||
| {"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}} | ||
| ], | ||
| "reasoning_content": "<actual thinking>", | ||
| } | ||
| messages = [original_msg] | ||
|
|
||
| result = config.fill_reasoning_content(messages) | ||
|
|
||
| assert result[0].get("reasoning_content") == "<actual thinking>" | ||
| # Same object — no copy was made | ||
| assert result[0] is original_msg | ||
|
|
||
| def test_provider_specific_fields_reasoning_content_promoted(self): | ||
| """reasoning_content stored in provider_specific_fields is promoted to top level.""" | ||
| config = MoonshotChatConfig() | ||
|
|
||
| messages = [ | ||
| { | ||
| "role": "assistant", | ||
| "content": None, | ||
| "tool_calls": [ | ||
| {"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}} | ||
| ], | ||
| "provider_specific_fields": {"reasoning_content": "stored thinking"}, | ||
| } | ||
| ] | ||
|
|
||
| result = config.fill_reasoning_content(messages) | ||
|
|
||
| assert result[0].get("reasoning_content") == "stored thinking" | ||
| # The promoted key must be removed from provider_specific_fields to | ||
| # avoid sending the value twice in the serialised request body | ||
| assert "reasoning_content" not in (result[0].get("provider_specific_fields") or {}) | ||
|
|
||
| def test_reasoning_model_fill_called_from_transform_request(self): | ||
| """transform_request injects reasoning_content end-to-end for reasoning models.""" | ||
| config = MoonshotChatConfig() | ||
|
|
||
| messages = [ | ||
| {"role": "user", "content": "Call a tool"}, | ||
| { | ||
| "role": "assistant", | ||
| "content": None, | ||
| "tool_calls": [ | ||
| {"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}} | ||
| ], | ||
| }, | ||
| ] | ||
|
|
||
| with patch( | ||
| "litellm.llms.moonshot.chat.transformation.supports_reasoning", | ||
| return_value=True, | ||
| ): | ||
| result = config.transform_request( | ||
| model="kimi-k2-thinking", | ||
| messages=messages, | ||
| optional_params={}, | ||
| litellm_params={}, | ||
| headers={}, | ||
| ) | ||
|
|
||
| assert result["messages"][1].get("reasoning_content") == " " | ||
|
|
||
| def test_non_reasoning_model_messages_untouched(self): | ||
| """For non-reasoning models, transform_request leaves messages unchanged.""" | ||
| config = MoonshotChatConfig() | ||
|
|
||
| messages = [ | ||
| {"role": "user", "content": "Hello"}, | ||
| { | ||
| "role": "assistant", | ||
| "content": None, | ||
| "tool_calls": [ | ||
| {"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}} | ||
| ], | ||
| }, | ||
| ] | ||
|
|
||
| with patch( | ||
| "litellm.llms.moonshot.chat.transformation.supports_reasoning", | ||
| return_value=False, | ||
| ): | ||
| result = config.transform_request( | ||
| model="moonshot-v1-8k", | ||
| messages=messages, | ||
| optional_params={}, | ||
| litellm_params={}, | ||
| headers={}, | ||
| ) | ||
|
|
||
| # reasoning_content must not have been injected | ||
| for msg in result["messages"]: | ||
| assert "reasoning_content" not in msg | ||
|
Comment on lines
+524
to
+553
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No integration test for reasoning model path through The four new tests call This means the integration wiring — specifically, that Consider adding a test similar to def test_reasoning_model_fill_called_from_transform_request(self):
"""transform_request injects reasoning_content for reasoning models."""
config = MoonshotChatConfig()
messages = [
{"role": "user", "content": "Call a tool"},
{
"role": "assistant",
"content": None,
"tool_calls": [
{"id": "c1", "type": "function", "function": {"name": "fn", "arguments": "{}"}}
],
},
]
with patch(
"litellm.llms.moonshot.chat.transformation.supports_reasoning",
return_value=True,
):
result = config.transform_request(
model="kimi-k2-thinking",
messages=messages,
optional_params={},
litellm_params={},
headers={},
)
assert result["messages"][1].get("reasoning_content") == " " |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Public method name for an internal helper
fill_reasoning_contenthas no leading underscore, making it appear as part of the public API surface ofMoonshotChatConfig. It is only called fromtransform_requestwithin the same class and is exclusively tested via direct invocation in unit tests. Consider renaming it to_fill_reasoning_contentto signal that it is an internal implementation detail, which also makes future refactoring safer.Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!