diff --git a/docs/my-website/blog/gemini_3_1_flash_lite/index.md b/docs/my-website/blog/gemini_3_1_flash_lite/index.md new file mode 100644 index 00000000000..9ef4bacb2ad --- /dev/null +++ b/docs/my-website/blog/gemini_3_1_flash_lite/index.md @@ -0,0 +1,175 @@ +--- +slug: gemini_3_1_flash_lite_preview +title: "DAY 0 Support: Gemini 3.1 Flash Lite Preview on LiteLLM" +date: 2026-03-03T08:00:00 +authors: + - name: Sameer Kankute + title: SWE @ LiteLLM (LLM Translation) + url: https://www.linkedin.com/in/sameer-kankute/ + image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg + - name: Krrish Dholakia + title: "CEO, LiteLLM" + url: https://www.linkedin.com/in/krish-d/ + image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg + - name: Ishaan Jaff + title: "CTO, LiteLLM" + url: https://www.linkedin.com/in/reffajnaahsi/ + image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg +description: "Guide to using Gemini 3.1 Flash Lite Preview on LiteLLM Proxy and SDK with day 0 support." +tags: [gemini, day 0 support, llms, supernova] +hide_table_of_contents: false +--- + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Gemini 3.1 Flash Lite Preview Day 0 Support + +LiteLLM now supports `gemini-3.1-flash-lite-preview` with full day 0 support! + +:::note +If you only want cost tracking, you need no change in your current Litellm version. But if you want the support for new features introduced along with it like thinking levels, you will need to use v1.80.8-stable.1 or above. +::: + +## Deploy this version + + + + +``` showLineNumbers title="docker run litellm" +docker run \ +-e STORE_MODEL_IN_DB=True \ +-p 4000:4000 \ +ghcr.io/berriai/litellm:main-v1.80.8-stable.1 +``` + + + + + +``` showLineNumbers title="pip install litellm" +pip install litellm==v1.80.8-stable.1 +``` + + + + +## What's New + +Supports all four thinking levels: +- **MINIMAL**: Ultra-fast responses with minimal reasoning +- **LOW**: Simple instruction following +- **MEDIUM**: Balanced reasoning for complex tasks +- **HIGH**: Maximum reasoning depth (dynamic) + +--- + +## Quick Start + + + + +**Basic Usage** + +```python +from litellm import completion + +response = completion( + model="gemini/gemini-3.1-flash-lite-preview", + messages=[{"role": "user", "content": "Extract key entities from this text: ..."}], +) + +print(response.choices[0].message.content) +``` + +**With Thinking Levels** + +```python +from litellm import completion + +# Use MEDIUM thinking for complex reasoning tasks +response = completion( + model="gemini/gemini-3.1-flash-lite-preview", + messages=[{"role": "user", "content": "Analyze this dataset and identify patterns"}], + reasoning_effort="medium", # low, medium , high +) + +print(response.choices[0].message.content) +``` + + + + + +**1. Setup config.yaml** + +```yaml +model_list: + - model_name: gemini-3.1-flash-lite + litellm_params: + model: gemini/gemini-3.1-flash-lite-preview + api_key: os.environ/GEMINI_API_KEY + + # Or use Vertex AI + - model_name: vertex-gemini-3.1-flash-lite + litellm_params: + model: vertex_ai/gemini-3.1-flash-lite-preview + vertex_project: your-project-id + vertex_location: us-central1 +``` + +**2. Start proxy** + +```bash +litellm --config /path/to/config.yaml +``` + +**3. Make requests** + +```bash +curl -X POST http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "gemini-3.1-flash-lite", + "messages": [{"role": "user", "content": "Extract structured data from this text"}], + "reasoning_effort": "low" + }' +``` + + + + +--- + +## Supported Endpoints + +LiteLLM provides **full end-to-end support** for Gemini 3.1 Flash Lite Preview on: + +- ✅ `/v1/chat/completions` - OpenAI-compatible chat completions endpoint +- ✅ `/v1/responses` - OpenAI Responses API endpoint (streaming and non-streaming) +- ✅ [`/v1/messages`](../../docs/anthropic_unified) - Anthropic-compatible messages endpoint +- ✅ `/v1/generateContent` – [Google Gemini API](../../docs/generateContent.md) compatible endpoint + +All endpoints support: +- Streaming and non-streaming responses +- Function calling with thought signatures +- Multi-turn conversations +- All Gemini 3-specific features (thinking levels, thought signatures) +- Full multimodal support (text, image, audio, video) + +--- + +## `reasoning_effort` Mapping for Gemini 3.1 + +LiteLLM automatically maps OpenAI's `reasoning_effort` parameter to Gemini's `thinkingLevel`: + +| reasoning_effort | thinking_level | Use Case | +|------------------|----------------|----------| +| `minimal` | `minimal` | Ultra-fast responses, simple queries | +| `low` | `low` | Basic instruction following | +| `medium` | `medium` | Balanced reasoning for moderate complexity | +| `high` | `high` | Maximum reasoning depth, complex problems | +| `disable` | `minimal` | Disable extended reasoning | +| `none` | `minimal` | No extended reasoning | \ No newline at end of file diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md index 6de2263916c..f97f025c19b 100644 --- a/docs/my-website/docs/providers/gemini.md +++ b/docs/my-website/docs/providers/gemini.md @@ -2041,6 +2041,7 @@ response = litellm.completion( | gemini-2.0-flash-lite-preview-02-05 | `completion(model='gemini/gemini-2.0-flash-lite-preview-02-05', messages)` | `os.environ['GEMINI_API_KEY']` | | gemini-2.5-flash-preview-09-2025 | `completion(model='gemini/gemini-2.5-flash-preview-09-2025', messages)` | `os.environ['GEMINI_API_KEY']` | | gemini-2.5-flash-lite-preview-09-2025 | `completion(model='gemini/gemini-2.5-flash-lite-preview-09-2025', messages)` | `os.environ['GEMINI_API_KEY']` | +| gemini-3.1-flash-lite-preview | `completion(model='gemini/gemini-3.1-flash-lite-preview', messages)` | `os.environ['GEMINI_API_KEY']` | | gemini-flash-latest | `completion(model='gemini/gemini-flash-latest', messages)` | `os.environ['GEMINI_API_KEY']` | | gemini-flash-lite-latest | `completion(model='gemini/gemini-flash-lite-latest', messages)` | `os.environ['GEMINI_API_KEY']` | diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md index 63e4dceec00..94619082e88 100644 --- a/docs/my-website/docs/providers/vertex.md +++ b/docs/my-website/docs/providers/vertex.md @@ -1685,6 +1685,7 @@ litellm.vertex_location = "us-central1 # Your Location | gemini-2.5-pro | `completion('gemini-2.5-pro', messages)`, `completion('vertex_ai/gemini-2.5-pro', messages)` | | gemini-2.5-flash-preview-09-2025 | `completion('gemini-2.5-flash-preview-09-2025', messages)`, `completion('vertex_ai/gemini-2.5-flash-preview-09-2025', messages)` | | gemini-2.5-flash-lite-preview-09-2025 | `completion('gemini-2.5-flash-lite-preview-09-2025', messages)`, `completion('vertex_ai/gemini-2.5-flash-lite-preview-09-2025', messages)` | +| gemini-3.1-flash-lite-preview | `completion('gemini-3.1-flash-lite-preview', messages)`, `completion('vertex_ai/gemini-3.1-flash-lite-preview', messages)` | ## Private Service Connect (PSC) Endpoints diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d4c5b476af6..5de764c5cec 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -846,7 +846,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 }, "anthropic.claude-3-opus-20240229-v1:0": { "input_cost_per_token": 1.5e-05, @@ -859,7 +861,9 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 1.875e-05 }, "anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 3e-06, @@ -873,7 +877,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "anthropic.claude-instant-v1": { "input_cost_per_token": 8e-07, @@ -1512,7 +1518,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "apac.anthropic.claude-3-5-sonnet-20241022-v2:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -1545,7 +1553,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 }, "apac.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 1.375e-06, @@ -1581,7 +1591,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "apac.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -6925,7 +6937,9 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 4.45e-06, @@ -7344,7 +7358,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3.6e-07, + "cache_creation_input_token_cost": 4.5e-06 }, "bedrock/us-gov-east-1/anthropic.claude-3-haiku-20240307-v1:0": { "input_cost_per_token": 3e-07, @@ -7358,7 +7374,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07 }, "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": { "input_cost_per_token": 3.3e-06, @@ -7376,7 +7394,9 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3.3e-07, + "cache_creation_input_token_cost": 4.125e-06 }, "bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 2.65e-06, @@ -7489,7 +7509,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3.6e-07, + "cache_creation_input_token_cost": 4.5e-06 }, "bedrock/us-gov-west-1/anthropic.claude-3-haiku-20240307-v1:0": { "input_cost_per_token": 3e-07, @@ -7503,7 +7525,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07 }, "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": { "input_cost_per_token": 3.3e-06, @@ -7521,7 +7545,9 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3.3e-07, + "cache_creation_input_token_cost": 4.125e-06 }, "bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": { "input_cost_per_token": 2.65e-06, @@ -9753,6 +9779,74 @@ } ] }, + "dashscope/qwen3-vl-plus": { + "litellm_provider": "dashscope", + "max_input_tokens": 260096, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 1.6e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.4e-06, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 6e-07, + "output_cost_per_token": 4.8e-06, + "range": [ + 128000.0, + 256000.0 + ] + } + ] + }, + "dashscope/qwen3.5-plus": { + "litellm_provider": "dashscope", + "max_input_tokens": 991808, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-07, + "output_cost_per_token": 2.4e-06, + "range": [ + 0, + 256000.0 + ] + }, + { + "input_cost_per_token": 5e-07, + "output_cost_per_token": 3e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, "dashscope/qwq-plus": { "input_cost_per_token": 8e-07, "litellm_provider": "dashscope", @@ -11089,7 +11183,7 @@ "supports_tool_choice": true }, "deepinfra/google/gemini-2.0-flash-001": { - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, @@ -11950,7 +12044,9 @@ "supports_pdf_input": true, "supports_prompt_caching": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 }, "eu.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 1.375e-06, @@ -11987,7 +12083,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { "input_cost_per_token": 3e-06, @@ -12004,7 +12102,9 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "eu.anthropic.claude-3-7-sonnet-20250219-v1:0": { "input_cost_per_token": 3e-06, @@ -12022,7 +12122,9 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "eu.anthropic.claude-3-haiku-20240307-v1:0": { "input_cost_per_token": 2.5e-07, @@ -12036,7 +12138,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 }, "eu.anthropic.claude-3-opus-20240229-v1:0": { "input_cost_per_token": 1.5e-05, @@ -12049,7 +12153,9 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 1.875e-05 }, "eu.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 3e-06, @@ -12063,7 +12169,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "eu.anthropic.claude-opus-4-1-20250805-v1:0": { "cache_creation_input_token_cost": 1.875e-05, @@ -13590,7 +13698,7 @@ }, "gemini-2.0-flash": { "cache_read_input_token_cost": 2.5e-08, - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7e-07, "input_cost_per_token": 1e-07, "litellm_provider": "vertex_ai-language-models", @@ -13630,7 +13738,7 @@ }, "gemini-2.0-flash-001": { "cache_read_input_token_cost": 3.75e-08, - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 1e-06, "input_cost_per_token": 1.5e-07, "litellm_provider": "vertex_ai-language-models", @@ -13716,7 +13824,7 @@ }, "gemini-2.0-flash-lite": { "cache_read_input_token_cost": 1.875e-08, - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7.5e-08, "input_cost_per_token": 7.5e-08, "litellm_provider": "vertex_ai-language-models", @@ -13752,7 +13860,7 @@ }, "gemini-2.0-flash-lite-001": { "cache_read_input_token_cost": 1.875e-08, - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7.5e-08, "input_cost_per_token": 7.5e-08, "litellm_provider": "vertex_ai-language-models", @@ -14226,6 +14334,57 @@ "supports_vision": true, "supports_web_search": true }, + "gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "source": "https://ai.google.dev/gemini-api/docs/models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true + }, "deep-research-pro-preview-12-2025": { "input_cost_per_image": 0.0011, "input_cost_per_token": 2e-06, @@ -14669,6 +14828,7 @@ "supports_web_search": true }, "gemini-3-pro-preview": { + "deprecation_date": "2026-03-26", "cache_read_input_token_cost": 2e-07, "cache_read_input_token_cost_above_200k_tokens": 4e-07, "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, @@ -15805,7 +15965,7 @@ }, "gemini/gemini-2.0-flash": { "cache_read_input_token_cost": 2.5e-08, - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7e-07, "input_cost_per_token": 1e-07, "litellm_provider": "gemini", @@ -15846,7 +16006,7 @@ }, "gemini/gemini-2.0-flash-001": { "cache_read_input_token_cost": 2.5e-08, - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7e-07, "input_cost_per_token": 1e-07, "litellm_provider": "gemini", @@ -15934,7 +16094,7 @@ }, "gemini/gemini-2.0-flash-lite": { "cache_read_input_token_cost": 1.875e-08, - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7.5e-08, "input_cost_per_token": 7.5e-08, "litellm_provider": "gemini", @@ -15970,7 +16130,7 @@ "tpm": 4000000 }, "gemini/gemini-2.0-flash-lite-preview-02-05": { - "deprecation_date": "2025-12-02", + "deprecation_date": "2025-12-09", "cache_read_input_token_cost": 1.875e-08, "input_cost_per_audio_token": 7.5e-08, "input_cost_per_token": 7.5e-08, @@ -16925,6 +17085,7 @@ "tpm": 800000 }, "gemini/gemini-3-pro-preview": { + "deprecation_date": "2026-03-09", "cache_read_input_token_cost": 2e-07, "cache_read_input_token_cost_above_200k_tokens": 4e-07, "input_cost_per_token": 2e-06, @@ -16980,6 +17141,59 @@ "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, "supports_service_tier": true }, + "gemini/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "tpm": 250000 + }, "gemini/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-08, "input_cost_per_audio_token": 1e-06, @@ -23112,6 +23326,21 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/magistral-medium-1-2-2509": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/mistral-ocr-latest": { "litellm_provider": "mistral", "ocr_cost_per_page": 0.001, @@ -23177,6 +23406,21 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/magistral-small-1-2-2509": { + "input_cost_per_token": 5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://mistral.ai/pricing#api-pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/mistral-embed": { "input_cost_per_token": 1e-07, "litellm_provider": "mistral", @@ -23238,24 +23482,41 @@ "supports_tool_choice": true }, "mistral/mistral-large-latest": { - "input_cost_per_token": 2e-06, + "input_cost_per_token": 5e-07, "litellm_provider": "mistral", - "max_input_tokens": 128000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", - "output_cost_per_token": 6e-06, + "output_cost_per_token": 1.5e-06, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true }, "mistral/mistral-large-3": { "input_cost_per_token": 5e-07, "litellm_provider": "mistral", - "max_input_tokens": 256000, - "max_output_tokens": 8191, - "max_tokens": 8191, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-large-2512": { + "input_cost_per_token": 5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, "mode": "chat", "output_cost_per_token": 1.5e-06, "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", @@ -23306,14 +23567,30 @@ "input_cost_per_token": 4e-07, "litellm_provider": "mistral", "max_input_tokens": 131072, - "max_output_tokens": 8191, - "max_tokens": 8191, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", "output_cost_per_token": 2e-06, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-medium-3-1-2508": { + "input_cost_per_token": 4e-07, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://mistral.ai/news/mistral-medium-3", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true }, "mistral/mistral-small": { "input_cost_per_token": 1e-07, @@ -23329,17 +23606,79 @@ "supports_tool_choice": true }, "mistral/mistral-small-latest": { + "input_cost_per_token": 6e-08, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.8e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-small-3-2-2506": { + "input_cost_per_token": 6e-08, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.8e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-3b-2512": { "input_cost_per_token": 1e-07, "litellm_provider": "mistral", - "max_input_tokens": 32000, - "max_output_tokens": 8191, - "max_tokens": 8191, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 3e-07, + "output_cost_per_token": 1e-07, + "source": "https://mistral.ai/pricing", "supports_assistant_prefill": true, "supports_function_calling": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-8b-2512": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-14b-2512": { + "input_cost_per_token": 2e-07, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true }, "mistral/mistral-tiny": { "input_cost_per_token": 2.5e-07, @@ -25657,7 +25996,7 @@ "supports_tool_choice": true }, "openrouter/google/gemini-2.0-flash-001": { - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7e-07, "input_cost_per_token": 1e-07, "litellm_provider": "openrouter", @@ -29554,7 +29893,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { "cache_creation_input_token_cost": 3.75e-06, @@ -29607,7 +29948,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 }, "us.anthropic.claude-3-opus-20240229-v1:0": { "input_cost_per_token": 1.5e-05, @@ -29620,7 +29963,9 @@ "supports_function_calling": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 1.875e-05 }, "us.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 3e-06, @@ -29634,7 +29979,9 @@ "supports_pdf_input": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 }, "us.anthropic.claude-opus-4-1-20250805-v1:0": { "cache_creation_input_token_cost": 1.875e-05, @@ -30527,7 +30874,7 @@ "supports_tool_choice": true }, "vercel_ai_gateway/google/gemini-2.0-flash": { - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_token": 1.5e-07, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, @@ -30541,7 +30888,7 @@ "supports_response_schema": true }, "vercel_ai_gateway/google/gemini-2.0-flash-lite": { - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_token": 7.5e-08, "litellm_provider": "vercel_ai_gateway", "max_input_tokens": 1048576, @@ -32059,6 +32406,57 @@ "output_cost_per_token": 3e-06, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models" }, + "vertex_ai/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true + }, "vertex_ai/deep-research-pro-preview-12-2025": { "input_cost_per_image": 0.0011, "input_cost_per_token": 2e-06, @@ -37898,7 +38296,7 @@ }, "gemini/gemini-2.0-flash-lite-001": { "cache_read_input_token_cost": 1.875e-08, - "deprecation_date": "2026-03-31", + "deprecation_date": "2026-06-01", "input_cost_per_audio_token": 7.5e-08, "input_cost_per_token": 7.5e-08, "litellm_provider": "gemini", diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 4934f11d456..5de764c5cec 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -14334,6 +14334,57 @@ "supports_vision": true, "supports_web_search": true }, + "gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "source": "https://ai.google.dev/gemini-api/docs/models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true + }, "deep-research-pro-preview-12-2025": { "input_cost_per_image": 0.0011, "input_cost_per_token": 2e-06, @@ -17090,6 +17141,59 @@ "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, "supports_service_tier": true }, + "gemini/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "tpm": 250000 + }, "gemini/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-08, "input_cost_per_audio_token": 1e-06, @@ -32302,6 +32406,57 @@ "output_cost_per_token": 3e-06, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models" }, + "vertex_ai/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true + }, "vertex_ai/deep-research-pro-preview-12-2025": { "input_cost_per_image": 0.0011, "input_cost_per_token": 2e-06, diff --git a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py index 7e8848be301..2e033b6f068 100644 --- a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py +++ b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py @@ -33,8 +33,8 @@ ) # Adds the parent directory to the system path from litellm.litellm_core_utils.llm_cost_calc.utils import ( - _calculate_input_cost, PromptTokensDetailsResult, + _calculate_input_cost, calculate_cache_writing_cost, generic_cost_per_token, ) @@ -127,6 +127,52 @@ def test_reasoning_tokens_gemini(): ) +def test_reasoning_tokens_gemini_3_1_flash_lite(): + """Test cost calculation for gemini-3.1-flash-lite-preview with reasoning tokens""" + model = "gemini-3.1-flash-lite-preview" + custom_llm_provider = "gemini" + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + usage = Usage( + completion_tokens=1000, + prompt_tokens=500, + total_tokens=1500, + completion_tokens_details=CompletionTokensDetailsWrapper( + accepted_prediction_tokens=None, + audio_tokens=None, + reasoning_tokens=400, + rejected_prediction_tokens=None, + text_tokens=600, + ), + prompt_tokens_details=PromptTokensDetailsWrapper( + audio_tokens=None, cached_tokens=None, text_tokens=500, image_tokens=None + ), + ) + model_cost_map = litellm.model_cost[model] + prompt_cost, completion_cost = generic_cost_per_token( + model=model, + usage=usage, + custom_llm_provider=custom_llm_provider, + ) + + assert round(prompt_cost, 10) == round( + model_cost_map["input_cost_per_token"] * usage.prompt_tokens, + 10, + ) + assert round(completion_cost, 10) == round( + ( + model_cost_map["output_cost_per_token"] + * usage.completion_tokens_details.text_tokens + ) + + ( + model_cost_map["output_cost_per_reasoning_token"] + * usage.completion_tokens_details.reasoning_tokens + ), + 10, + ) + + def test_image_tokens_with_custom_pricing(): """Test that image_tokens in completion are properly costed with output_cost_per_image_token.""" from unittest.mock import patch diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index 196bb00f40d..596897f7157 100644 --- a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -2453,8 +2453,8 @@ def test_gemini_3_image_models_no_thinking_config(): def test_gemini_3_text_models_get_thinking_config(): """ - Test that Gemini 3 text models DO receive automatic thinkingConfig. - This ensures we didn't break the existing behavior for non-image models. + Test that Gemini 3 text models do NOT receive automatic thinkingConfig + when no reasoning_effort or thinking param is provided. """ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( VertexGeminiConfig, @@ -2462,7 +2462,7 @@ def test_gemini_3_text_models_get_thinking_config(): v = VertexGeminiConfig() - # Test gemini-3-pro-preview (text model, should get thinking) + # Test gemini-3-pro-preview (text model, no explicit thinking params) model = "gemini-3-pro-preview" optional_params = {} non_default_params = {} @@ -2474,9 +2474,8 @@ def test_gemini_3_text_models_get_thinking_config(): drop_params=False, ) - # Should have thinkingConfig automatically added - assert "thinkingConfig" in result - assert result["thinkingConfig"]["thinkingLevel"] == "low" + # Should NOT have thinkingConfig automatically added when user provides no reasoning_effort + assert "thinkingConfig" not in result assert result["temperature"] == 1.0