diff --git a/docs/my-website/blog/minimax_m2_5/index.md b/docs/my-website/blog/minimax_m2_5/index.md new file mode 100644 index 00000000000..9259d1caad2 --- /dev/null +++ b/docs/my-website/blog/minimax_m2_5/index.md @@ -0,0 +1,393 @@ +--- +slug: minimax_m2_5 +title: "Day 0 Support: MiniMax-M2.5" +date: 2026-02-12T10:00:00 +authors: + - name: Sameer Kankute + title: SWE @ LiteLLM (LLM Translation) + url: https://www.linkedin.com/in/sameer-kankute/ + image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg + - name: Krrish Dholakia + title: "CEO, LiteLLM" + url: https://www.linkedin.com/in/krish-d/ + image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg + - name: Ishaan Jaff + title: "CTO, LiteLLM" + url: https://www.linkedin.com/in/reffajnaahsi/ + image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg +description: "Day 0 support for MiniMax-M2.5 on LiteLLM" +tags: [minimax, M2.5, llm] +hide_table_of_contents: false +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +LiteLLM now supports MiniMax-M2.5 on Day 0. Use it across OpenAI-compatible and Anthropic-compatible APIs through the LiteLLM AI Gateway. + +## Supported Models + +LiteLLM supports the following MiniMax models: + +| Model | Description | Input Cost | Output Cost | Context Window | +|-------|-------------|------------|-------------|----------------| +| **MiniMax-M2.5** | Advanced reasoning, Agentic capabilities | $0.3/M tokens | $1.2/M tokens | 1M tokens | +| **MiniMax-M2.5-lightning** | Faster and More Agile (~100 tps) | $0.3/M tokens | $2.4/M tokens | 1M tokens | + +## Features Supported + +- **Prompt Caching**: Reduce costs with cached prompts ($0.03/M tokens for cache read, $0.375/M tokens for cache write) +- **Function Calling**: Built-in tool calling support +- **Reasoning**: Advanced reasoning capabilities with thinking support +- **System Messages**: Full system message support +- **Cost Tracking**: Automatic cost calculation for all requests + +## Docker Image + +```bash +docker pull litellm/litellm:v1.81.3-stable + +## Usage - OpenAI Compatible API (/v1/chat/completions) + + + + +**1. Setup config.yaml** + +```yaml +model_list: + - model_name: minimax-m2-5 + litellm_params: + model: minimax/MiniMax-M2.5 + api_key: os.environ/MINIMAX_API_KEY + api_base: https://api.minimax.io/v1 +``` + +**2. Start the proxy** + +```bash +docker run -d \ + -p 4000:4000 \ + -e MINIMAX_API_KEY=$MINIMAX_API_KEY \ + -v $(pwd)/config.yaml:/app/config.yaml \ + ghcr.io/berriai/litellm:v1.81.3-stable \ + --config /app/config.yaml +``` + +**3. Test it!** + +```bash +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer $LITELLM_KEY' \ +--data '{ + "model": "minimax-m2-5", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] +}' +``` + + + + +### With Reasoning Split + +```bash +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer $LITELLM_KEY' \ +--data '{ + "model": "minimax-m2-5", + "messages": [ + { + "role": "user", + "content": "Solve: 2+2=?" + } + ], + "extra_body": { + "reasoning_split": true + } +}' +``` + +## Usage - Anthropic Compatible API (/v1/messages) + + + + +**1. Setup config.yaml** + +```yaml +model_list: + - model_name: minimax-m2-5 + litellm_params: + model: minimax/MiniMax-M2.5 + api_key: os.environ/MINIMAX_API_KEY + api_base: https://api.minimax.io/anthropic/v1/messages +``` + +**2. Start the proxy** + +```bash +docker run -d \ + -p 4000:4000 \ + -e MINIMAX_API_KEY=$MINIMAX_API_KEY \ + -v $(pwd)/config.yaml:/app/config.yaml \ + ghcr.io/berriai/litellm:v1.81.3-stable \ + --config /app/config.yaml +``` + +**3. Test it!** + +```bash +curl --location 'http://0.0.0.0:4000/v1/messages' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer $LITELLM_KEY' \ +--data '{ + "model": "minimax-m2-5", + "max_tokens": 1000, + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] +}' +``` + + + + +### With Thinking + +```bash +curl --location 'http://0.0.0.0:4000/v1/messages' \ +--header 'Content-Type: application/json' \ +--header 'Authorization: Bearer $LITELLM_KEY' \ +--data '{ + "model": "minimax-m2-5", + "max_tokens": 1000, + "thinking": { + "type": "enabled", + "budget_tokens": 1000 + }, + "messages": [ + { + "role": "user", + "content": "Solve: 2+2=?" + } + ] +}' +``` + +## Usage - LiteLLM SDK + +### OpenAI-compatible API + +```python +import litellm + +response = litellm.completion( + model="minimax/MiniMax-M2.5", + messages=[ + {"role": "user", "content": "Hello, how are you?"} + ], + api_key="your-minimax-api-key", + api_base="https://api.minimax.io/v1" +) + +print(response.choices[0].message.content) +``` + +### Anthropic-compatible API + +```python +import litellm + +response = litellm.anthropic.messages.acreate( + model="minimax/MiniMax-M2.5", + messages=[{"role": "user", "content": "Hello, how are you?"}], + api_key="your-minimax-api-key", + api_base="https://api.minimax.io/anthropic/v1/messages", + max_tokens=1000 +) + +print(response.choices[0].message.content) +``` + +### With Thinking + +```python +response = litellm.anthropic.messages.acreate( + model="minimax/MiniMax-M2.5", + messages=[{"role": "user", "content": "Solve: 2+2=?"}], + thinking={"type": "enabled", "budget_tokens": 1000}, + api_key="your-minimax-api-key" +) + +# Access thinking content +for block in response.choices[0].message.content: + if hasattr(block, 'type') and block.type == 'thinking': + print(f"Thinking: {block.thinking}") +``` + +### With Reasoning Split (OpenAI API) + +```python +response = litellm.completion( + model="minimax/MiniMax-M2.5", + messages=[ + {"role": "user", "content": "Solve: 2+2=?"} + ], + extra_body={"reasoning_split": True}, + api_key="your-minimax-api-key", + api_base="https://api.minimax.io/v1" +) + +# Access thinking and response +if hasattr(response.choices[0].message, 'reasoning_details'): + print(f"Thinking: {response.choices[0].message.reasoning_details}") +print(f"Response: {response.choices[0].message.content}") +``` + +## Cost Tracking + +LiteLLM automatically tracks costs for MiniMax-M2.5 requests. The pricing is: + +- **Input**: $0.3 per 1M tokens +- **Output**: $1.2 per 1M tokens +- **Cache Read**: $0.03 per 1M tokens +- **Cache Write**: $0.375 per 1M tokens + +### Accessing Cost Information + +```python +response = litellm.completion( + model="minimax/MiniMax-M2.5", + messages=[{"role": "user", "content": "Hello!"}], + api_key="your-minimax-api-key" +) + +# Access cost information +print(f"Cost: ${response._hidden_params.get('response_cost', 0)}") +``` + +## Streaming Support + +### OpenAI API + +```python +response = litellm.completion( + model="minimax/MiniMax-M2.5", + messages=[{"role": "user", "content": "Tell me a story"}], + stream=True, + api_key="your-minimax-api-key", + api_base="https://api.minimax.io/v1" +) + +for chunk in response: + if chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="") +``` + +### Streaming with Reasoning Split + +```python +stream = litellm.completion( + model="minimax/MiniMax-M2.5", + messages=[ + {"role": "user", "content": "Tell me a story"}, + ], + extra_body={"reasoning_split": True}, + stream=True, + api_key="your-minimax-api-key", + api_base="https://api.minimax.io/v1" +) + +reasoning_buffer = "" +text_buffer = "" + +for chunk in stream: + if hasattr(chunk.choices[0].delta, "reasoning_details") and chunk.choices[0].delta.reasoning_details: + for detail in chunk.choices[0].delta.reasoning_details: + if "text" in detail: + reasoning_text = detail["text"] + new_reasoning = reasoning_text[len(reasoning_buffer):] + if new_reasoning: + print(new_reasoning, end="", flush=True) + reasoning_buffer = reasoning_text + + if chunk.choices[0].delta.content: + content_text = chunk.choices[0].delta.content + new_text = content_text[len(text_buffer):] if text_buffer else content_text + if new_text: + print(new_text, end="", flush=True) + text_buffer = content_text +``` + +## Using with Native SDKs + +### Anthropic SDK via LiteLLM Proxy + +```python +import os +os.environ["ANTHROPIC_BASE_URL"] = "http://localhost:4000" +os.environ["ANTHROPIC_API_KEY"] = "sk-1234" # Your LiteLLM proxy key + +import anthropic + +client = anthropic.Anthropic() + +message = client.messages.create( + model="minimax-m2-5", + max_tokens=1000, + system="You are a helpful assistant.", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Hi, how are you?" + } + ] + } + ] +) + +for block in message.content: + if block.type == "thinking": + print(f"Thinking:\n{block.thinking}\n") + elif block.type == "text": + print(f"Text:\n{block.text}\n") +``` + +### OpenAI SDK via LiteLLM Proxy + +```python +import os +os.environ["OPENAI_BASE_URL"] = "http://localhost:4000" +os.environ["OPENAI_API_KEY"] = "sk-1234" # Your LiteLLM proxy key + +from openai import OpenAI + +client = OpenAI() + +response = client.chat.completions.create( + model="minimax-m2-5", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hi, how are you?"}, + ], + extra_body={"reasoning_split": True}, +) + +# Access thinking and response +if hasattr(response.choices[0].message, 'reasoning_details'): + print(f"Thinking:\n{response.choices[0].message.reasoning_details[0]['text']}\n") +print(f"Text:\n{response.choices[0].message.content}\n") +``` diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index f6edcf7efd0..7d0225af599 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -21432,6 +21432,36 @@ "max_input_tokens": 1000000, "max_output_tokens": 8192 }, + "minimax/MiniMax-M2.5": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.5-lightning": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.4e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, "minimax/MiniMax-M2": { "input_cost_per_token": 3e-07, "output_cost_per_token": 1.2e-06, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index f6edcf7efd0..7d0225af599 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -21432,6 +21432,36 @@ "max_input_tokens": 1000000, "max_output_tokens": 8192 }, + "minimax/MiniMax-M2.5": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.5-lightning": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.4e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, "minimax/MiniMax-M2": { "input_cost_per_token": 3e-07, "output_cost_per_token": 1.2e-06,