diff --git a/litellm/llms/moonshot/chat/transformation.py b/litellm/llms/moonshot/chat/transformation.py index 3ed08f51c8d..40096be05c9 100644 --- a/litellm/llms/moonshot/chat/transformation.py +++ b/litellm/llms/moonshot/chat/transformation.py @@ -2,13 +2,15 @@ Translates from OpenAI's `/v1/chat/completions` to Moonshot AI's `/v1/chat/completions` """ -from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, overload +from typing import Any, Coroutine, List, Literal, Optional, Tuple, Union, cast, overload +import litellm from litellm.litellm_core_utils.prompt_templates.common_utils import ( handle_messages_with_content_list_to_str_conversion, ) from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues +from litellm.utils import supports_reasoning from ...openai.chat.gpt_transformation import OpenAIGPTConfig @@ -17,8 +19,7 @@ class MoonshotChatConfig(OpenAIGPTConfig): @overload def _transform_messages( self, messages: List[AllMessageValues], model: str, is_async: Literal[True] - ) -> Coroutine[Any, Any, List[AllMessageValues]]: - ... + ) -> Coroutine[Any, Any, List[AllMessageValues]]: ... @overload def _transform_messages( @@ -26,8 +27,7 @@ def _transform_messages( messages: List[AllMessageValues], model: str, is_async: Literal[False] = False, - ) -> List[AllMessageValues]: - ... + ) -> List[AllMessageValues]: ... def _transform_messages( self, messages: List[AllMessageValues], model: str, is_async: bool = False @@ -53,22 +53,14 @@ def _transform_messages( messages = handle_messages_with_content_list_to_str_conversion(messages) if is_async: - return super()._transform_messages( - messages=messages, model=model, is_async=True - ) + return super()._transform_messages(messages=messages, model=model, is_async=True) else: - return super()._transform_messages( - messages=messages, model=model, is_async=False - ) + return super()._transform_messages(messages=messages, model=model, is_async=False) def _get_openai_compatible_provider_info( self, api_base: Optional[str], api_key: Optional[str] ) -> Tuple[Optional[str], Optional[str]]: - api_base = ( - api_base - or get_secret_str("MOONSHOT_API_BASE") - or "https://api.moonshot.ai/v1" - ) # type: ignore + api_base = api_base or get_secret_str("MOONSHOT_API_BASE") or "https://api.moonshot.ai/v1" # type: ignore dynamic_api_key = api_key or get_secret_str("MOONSHOT_API_KEY") return api_base, dynamic_api_key @@ -149,6 +141,48 @@ def map_openai_params( optional_params["temperature"] = 0.3 return optional_params + def fill_reasoning_content(self, messages: List[AllMessageValues]) -> List[AllMessageValues]: + """ + Moonshot reasoning models require `reasoning_content` on every assistant + message that contains tool_calls (multi-turn tool-calling flows). + + For each such message that is missing the field: + 1. Promote provider_specific_fields["reasoning_content"] if present and non-empty + (this is where LiteLLM stores it from a previous response) + 2. Otherwise inject a single space — the minimum value the API accepts + Messages that already carry the field, or are not assistant/tool-call messages, + are appended as-is (no copy made). + """ + result: List[AllMessageValues] = [] + for msg in messages: + if ( + msg.get("role") == "assistant" + and msg.get("tool_calls") + and "reasoning_content" not in msg + ): + patched = dict(cast(dict, msg)) + provider_fields = patched.get("provider_specific_fields") or {} + stored = provider_fields.get("reasoning_content") + if stored: + patched["reasoning_content"] = stored + # Remove the promoted key from provider_specific_fields to + # avoid sending the value twice in the serialised request body + cleaned_provider_fields = dict(provider_fields) + cleaned_provider_fields.pop("reasoning_content", None) + patched["provider_specific_fields"] = cleaned_provider_fields + else: + litellm.verbose_logger.warning( + "Moonshot reasoning model: assistant tool-call message is missing " + "`reasoning_content`. Injecting a placeholder to satisfy API validation. " + "For best results, preserve `reasoning_content` from the original " + "assistant response when building multi-turn conversation history." + ) + patched["reasoning_content"] = " " + result.append(cast(AllMessageValues, patched)) + else: + result.append(msg) + return result + def transform_request( self, model: str, @@ -169,6 +203,10 @@ def transform_request( optional_params=optional_params, ) + # Moonshot reasoning models: fill in reasoning_content before the API call + if supports_reasoning(model=model, custom_llm_provider="moonshot"): + messages = self.fill_reasoning_content(messages) + # Call parent transform_request which handles _transform_messages return super().transform_request( model=model, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 9b1d81fee40..6786fc33595 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -22083,6 +22083,7 @@ "output_cost_per_token": 3e-06, "source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart", "supports_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true @@ -22166,6 +22167,7 @@ "output_cost_per_token": 2.5e-06, "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_web_search": true }, @@ -22180,6 +22182,7 @@ "output_cost_per_token": 8e-06, "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_web_search": true }, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 9b1d81fee40..6786fc33595 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -22083,6 +22083,7 @@ "output_cost_per_token": 3e-06, "source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart", "supports_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_video_input": true, "supports_vision": true @@ -22166,6 +22167,7 @@ "output_cost_per_token": 2.5e-06, "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_web_search": true }, @@ -22180,6 +22182,7 @@ "output_cost_per_token": 8e-06, "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", "supports_function_calling": true, + "supports_reasoning": true, "supports_tool_choice": true, "supports_web_search": true }, diff --git a/tests/test_litellm/llms/moonshot/test_moonshot_chat_transformation.py b/tests/test_litellm/llms/moonshot/test_moonshot_chat_transformation.py index 345186e8a69..c557fb395f9 100644 --- a/tests/test_litellm/llms/moonshot/test_moonshot_chat_transformation.py +++ b/tests/test_litellm/llms/moonshot/test_moonshot_chat_transformation.py @@ -7,6 +7,7 @@ import os import sys +from unittest.mock import patch sys.path.insert( 0, os.path.abspath("../../../../..") @@ -404,4 +405,149 @@ def test_transform_messages_flattens_text_only_content(self): # Content should be flattened to a plain string assert isinstance(result["messages"][0]["content"], str) - assert result["messages"][0]["content"] == "Hello, how are you?" \ No newline at end of file + assert result["messages"][0]["content"] == "Hello, how are you?" + + # ------------------------------------------------------------------ # + # Tests for fill_reasoning_content # + # ------------------------------------------------------------------ # + + def test_reasoning_content_space_injected_when_absent(self): + """Assistant tool-call message with no reasoning_content gets a space injected.""" + config = MoonshotChatConfig() + + messages = [ + {"role": "user", "content": "What's the weather?"}, + { + "role": "assistant", + "content": None, + "tool_calls": [ + {"id": "call_1", "type": "function", "function": {"name": "get_weather", "arguments": "{}"}} + ], + }, + {"role": "tool", "tool_call_id": "call_1", "content": "Sunny, 22°C"}, + ] + + result = config.fill_reasoning_content(messages) + + assert result[1].get("reasoning_content") == " " + # Non-assistant messages are untouched + assert "reasoning_content" not in result[0] + assert "reasoning_content" not in result[2] + + def test_empty_tool_calls_list_not_injected(self): + """Assistant message with tool_calls: [] should not get reasoning_content injected.""" + config = MoonshotChatConfig() + + original_msg = { + "role": "assistant", + "content": "Here is the answer.", + "tool_calls": [], + } + messages = [original_msg] + + result = config.fill_reasoning_content(messages) + + assert "reasoning_content" not in result[0] + assert result[0] is original_msg + + def test_existing_reasoning_content_not_overwritten(self): + """Message that already has reasoning_content is passed through unchanged.""" + config = MoonshotChatConfig() + + original_msg = { + "role": "assistant", + "content": None, + "tool_calls": [ + {"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}} + ], + "reasoning_content": "", + } + messages = [original_msg] + + result = config.fill_reasoning_content(messages) + + assert result[0].get("reasoning_content") == "" + # Same object — no copy was made + assert result[0] is original_msg + + def test_provider_specific_fields_reasoning_content_promoted(self): + """reasoning_content stored in provider_specific_fields is promoted to top level.""" + config = MoonshotChatConfig() + + messages = [ + { + "role": "assistant", + "content": None, + "tool_calls": [ + {"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}} + ], + "provider_specific_fields": {"reasoning_content": "stored thinking"}, + } + ] + + result = config.fill_reasoning_content(messages) + + assert result[0].get("reasoning_content") == "stored thinking" + # The promoted key must be removed from provider_specific_fields to + # avoid sending the value twice in the serialised request body + assert "reasoning_content" not in (result[0].get("provider_specific_fields") or {}) + + def test_reasoning_model_fill_called_from_transform_request(self): + """transform_request injects reasoning_content end-to-end for reasoning models.""" + config = MoonshotChatConfig() + + messages = [ + {"role": "user", "content": "Call a tool"}, + { + "role": "assistant", + "content": None, + "tool_calls": [ + {"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}} + ], + }, + ] + + with patch( + "litellm.llms.moonshot.chat.transformation.supports_reasoning", + return_value=True, + ): + result = config.transform_request( + model="kimi-k2-thinking", + messages=messages, + optional_params={}, + litellm_params={}, + headers={}, + ) + + assert result["messages"][1].get("reasoning_content") == " " + + def test_non_reasoning_model_messages_untouched(self): + """For non-reasoning models, transform_request leaves messages unchanged.""" + config = MoonshotChatConfig() + + messages = [ + {"role": "user", "content": "Hello"}, + { + "role": "assistant", + "content": None, + "tool_calls": [ + {"id": "call_1", "type": "function", "function": {"name": "fn", "arguments": "{}"}} + ], + }, + ] + + with patch( + "litellm.llms.moonshot.chat.transformation.supports_reasoning", + return_value=False, + ): + result = config.transform_request( + model="moonshot-v1-8k", + messages=messages, + optional_params={}, + litellm_params={}, + headers={}, + ) + + # reasoning_content must not have been injected + for msg in result["messages"]: + assert "reasoning_content" not in msg \ No newline at end of file