diff --git a/docs/my-website/blog/gemini_3_1_flash_lite/index.md b/docs/my-website/blog/gemini_3_1_flash_lite/index.md
new file mode 100644
index 00000000000..9ef4bacb2ad
--- /dev/null
+++ b/docs/my-website/blog/gemini_3_1_flash_lite/index.md
@@ -0,0 +1,175 @@
+---
+slug: gemini_3_1_flash_lite_preview
+title: "DAY 0 Support: Gemini 3.1 Flash Lite Preview on LiteLLM"
+date: 2026-03-03T08:00:00
+authors:
+ - name: Sameer Kankute
+ title: SWE @ LiteLLM (LLM Translation)
+ url: https://www.linkedin.com/in/sameer-kankute/
+ image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg
+ - name: Krrish Dholakia
+ title: "CEO, LiteLLM"
+ url: https://www.linkedin.com/in/krish-d/
+ image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
+ - name: Ishaan Jaff
+ title: "CTO, LiteLLM"
+ url: https://www.linkedin.com/in/reffajnaahsi/
+ image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
+description: "Guide to using Gemini 3.1 Flash Lite Preview on LiteLLM Proxy and SDK with day 0 support."
+tags: [gemini, day 0 support, llms, supernova]
+hide_table_of_contents: false
+---
+
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Gemini 3.1 Flash Lite Preview Day 0 Support
+
+LiteLLM now supports `gemini-3.1-flash-lite-preview` with full day 0 support!
+
+:::note
+If you only want cost tracking, you need no change in your current Litellm version. But if you want the support for new features introduced along with it like thinking levels, you will need to use v1.80.8-stable.1 or above.
+:::
+
+## Deploy this version
+
+
+
+
+``` showLineNumbers title="docker run litellm"
+docker run \
+-e STORE_MODEL_IN_DB=True \
+-p 4000:4000 \
+ghcr.io/berriai/litellm:main-v1.80.8-stable.1
+```
+
+
+
+
+
+``` showLineNumbers title="pip install litellm"
+pip install litellm==v1.80.8-stable.1
+```
+
+
+
+
+## What's New
+
+Supports all four thinking levels:
+- **MINIMAL**: Ultra-fast responses with minimal reasoning
+- **LOW**: Simple instruction following
+- **MEDIUM**: Balanced reasoning for complex tasks
+- **HIGH**: Maximum reasoning depth (dynamic)
+
+---
+
+## Quick Start
+
+
+
+
+**Basic Usage**
+
+```python
+from litellm import completion
+
+response = completion(
+ model="gemini/gemini-3.1-flash-lite-preview",
+ messages=[{"role": "user", "content": "Extract key entities from this text: ..."}],
+)
+
+print(response.choices[0].message.content)
+```
+
+**With Thinking Levels**
+
+```python
+from litellm import completion
+
+# Use MEDIUM thinking for complex reasoning tasks
+response = completion(
+ model="gemini/gemini-3.1-flash-lite-preview",
+ messages=[{"role": "user", "content": "Analyze this dataset and identify patterns"}],
+ reasoning_effort="medium", # low, medium , high
+)
+
+print(response.choices[0].message.content)
+```
+
+
+
+
+
+**1. Setup config.yaml**
+
+```yaml
+model_list:
+ - model_name: gemini-3.1-flash-lite
+ litellm_params:
+ model: gemini/gemini-3.1-flash-lite-preview
+ api_key: os.environ/GEMINI_API_KEY
+
+ # Or use Vertex AI
+ - model_name: vertex-gemini-3.1-flash-lite
+ litellm_params:
+ model: vertex_ai/gemini-3.1-flash-lite-preview
+ vertex_project: your-project-id
+ vertex_location: us-central1
+```
+
+**2. Start proxy**
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+**3. Make requests**
+
+```bash
+curl -X POST http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer " \
+ -d '{
+ "model": "gemini-3.1-flash-lite",
+ "messages": [{"role": "user", "content": "Extract structured data from this text"}],
+ "reasoning_effort": "low"
+ }'
+```
+
+
+
+
+---
+
+## Supported Endpoints
+
+LiteLLM provides **full end-to-end support** for Gemini 3.1 Flash Lite Preview on:
+
+- ✅ `/v1/chat/completions` - OpenAI-compatible chat completions endpoint
+- ✅ `/v1/responses` - OpenAI Responses API endpoint (streaming and non-streaming)
+- ✅ [`/v1/messages`](../../docs/anthropic_unified) - Anthropic-compatible messages endpoint
+- ✅ `/v1/generateContent` – [Google Gemini API](../../docs/generateContent.md) compatible endpoint
+
+All endpoints support:
+- Streaming and non-streaming responses
+- Function calling with thought signatures
+- Multi-turn conversations
+- All Gemini 3-specific features (thinking levels, thought signatures)
+- Full multimodal support (text, image, audio, video)
+
+---
+
+## `reasoning_effort` Mapping for Gemini 3.1
+
+LiteLLM automatically maps OpenAI's `reasoning_effort` parameter to Gemini's `thinkingLevel`:
+
+| reasoning_effort | thinking_level | Use Case |
+|------------------|----------------|----------|
+| `minimal` | `minimal` | Ultra-fast responses, simple queries |
+| `low` | `low` | Basic instruction following |
+| `medium` | `medium` | Balanced reasoning for moderate complexity |
+| `high` | `high` | Maximum reasoning depth, complex problems |
+| `disable` | `minimal` | Disable extended reasoning |
+| `none` | `minimal` | No extended reasoning |
\ No newline at end of file
diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md
index 6de2263916c..f97f025c19b 100644
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@@ -2041,6 +2041,7 @@ response = litellm.completion(
| gemini-2.0-flash-lite-preview-02-05 | `completion(model='gemini/gemini-2.0-flash-lite-preview-02-05', messages)` | `os.environ['GEMINI_API_KEY']` |
| gemini-2.5-flash-preview-09-2025 | `completion(model='gemini/gemini-2.5-flash-preview-09-2025', messages)` | `os.environ['GEMINI_API_KEY']` |
| gemini-2.5-flash-lite-preview-09-2025 | `completion(model='gemini/gemini-2.5-flash-lite-preview-09-2025', messages)` | `os.environ['GEMINI_API_KEY']` |
+| gemini-3.1-flash-lite-preview | `completion(model='gemini/gemini-3.1-flash-lite-preview', messages)` | `os.environ['GEMINI_API_KEY']` |
| gemini-flash-latest | `completion(model='gemini/gemini-flash-latest', messages)` | `os.environ['GEMINI_API_KEY']` |
| gemini-flash-lite-latest | `completion(model='gemini/gemini-flash-lite-latest', messages)` | `os.environ['GEMINI_API_KEY']` |
diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md
index 63e4dceec00..94619082e88 100644
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@@ -1685,6 +1685,7 @@ litellm.vertex_location = "us-central1 # Your Location
| gemini-2.5-pro | `completion('gemini-2.5-pro', messages)`, `completion('vertex_ai/gemini-2.5-pro', messages)` |
| gemini-2.5-flash-preview-09-2025 | `completion('gemini-2.5-flash-preview-09-2025', messages)`, `completion('vertex_ai/gemini-2.5-flash-preview-09-2025', messages)` |
| gemini-2.5-flash-lite-preview-09-2025 | `completion('gemini-2.5-flash-lite-preview-09-2025', messages)`, `completion('vertex_ai/gemini-2.5-flash-lite-preview-09-2025', messages)` |
+| gemini-3.1-flash-lite-preview | `completion('gemini-3.1-flash-lite-preview', messages)`, `completion('vertex_ai/gemini-3.1-flash-lite-preview', messages)` |
## Private Service Connect (PSC) Endpoints
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index d4c5b476af6..5de764c5cec 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -846,7 +846,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_creation_input_token_cost": 3.125e-07
},
"anthropic.claude-3-opus-20240229-v1:0": {
"input_cost_per_token": 1.5e-05,
@@ -859,7 +861,9 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 1.5e-06,
+ "cache_creation_input_token_cost": 1.875e-05
},
"anthropic.claude-3-sonnet-20240229-v1:0": {
"input_cost_per_token": 3e-06,
@@ -873,7 +877,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"anthropic.claude-instant-v1": {
"input_cost_per_token": 8e-07,
@@ -1512,7 +1518,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"apac.anthropic.claude-3-5-sonnet-20241022-v2:0": {
"cache_creation_input_token_cost": 3.75e-06,
@@ -1545,7 +1553,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_creation_input_token_cost": 3.125e-07
},
"apac.anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 1.375e-06,
@@ -1581,7 +1591,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"apac.anthropic.claude-sonnet-4-20250514-v1:0": {
"cache_creation_input_token_cost": 3.75e-06,
@@ -6925,7 +6937,9 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": {
"input_cost_per_token": 4.45e-06,
@@ -7344,7 +7358,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3.6e-07,
+ "cache_creation_input_token_cost": 4.5e-06
},
"bedrock/us-gov-east-1/anthropic.claude-3-haiku-20240307-v1:0": {
"input_cost_per_token": 3e-07,
@@ -7358,7 +7374,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-08,
+ "cache_creation_input_token_cost": 3.75e-07
},
"bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": {
"input_cost_per_token": 3.3e-06,
@@ -7376,7 +7394,9 @@
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3.3e-07,
+ "cache_creation_input_token_cost": 4.125e-06
},
"bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": {
"input_cost_per_token": 2.65e-06,
@@ -7489,7 +7509,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3.6e-07,
+ "cache_creation_input_token_cost": 4.5e-06
},
"bedrock/us-gov-west-1/anthropic.claude-3-haiku-20240307-v1:0": {
"input_cost_per_token": 3e-07,
@@ -7503,7 +7525,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-08,
+ "cache_creation_input_token_cost": 3.75e-07
},
"bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": {
"input_cost_per_token": 3.3e-06,
@@ -7521,7 +7545,9 @@
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3.3e-07,
+ "cache_creation_input_token_cost": 4.125e-06
},
"bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": {
"input_cost_per_token": 2.65e-06,
@@ -9753,6 +9779,74 @@
}
]
},
+ "dashscope/qwen3-vl-plus": {
+ "litellm_provider": "dashscope",
+ "max_input_tokens": 260096,
+ "max_output_tokens": 32768,
+ "max_tokens": 32768,
+ "mode": "chat",
+ "source": "https://www.alibabacloud.com/help/en/model-studio/models",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tiered_pricing": [
+ {
+ "input_cost_per_token": 2e-07,
+ "output_cost_per_token": 1.6e-06,
+ "range": [
+ 0,
+ 32000.0
+ ]
+ },
+ {
+ "input_cost_per_token": 3e-07,
+ "output_cost_per_token": 2.4e-06,
+ "range": [
+ 32000.0,
+ 128000.0
+ ]
+ },
+ {
+ "input_cost_per_token": 6e-07,
+ "output_cost_per_token": 4.8e-06,
+ "range": [
+ 128000.0,
+ 256000.0
+ ]
+ }
+ ]
+ },
+ "dashscope/qwen3.5-plus": {
+ "litellm_provider": "dashscope",
+ "max_input_tokens": 991808,
+ "max_output_tokens": 65536,
+ "max_tokens": 65536,
+ "mode": "chat",
+ "source": "https://www.alibabacloud.com/help/en/model-studio/models",
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_tool_choice": true,
+ "supports_vision": true,
+ "tiered_pricing": [
+ {
+ "input_cost_per_token": 4e-07,
+ "output_cost_per_token": 2.4e-06,
+ "range": [
+ 0,
+ 256000.0
+ ]
+ },
+ {
+ "input_cost_per_token": 5e-07,
+ "output_cost_per_token": 3e-06,
+ "range": [
+ 256000.0,
+ 1000000.0
+ ]
+ }
+ ]
+ },
"dashscope/qwq-plus": {
"input_cost_per_token": 8e-07,
"litellm_provider": "dashscope",
@@ -11089,7 +11183,7 @@
"supports_tool_choice": true
},
"deepinfra/google/gemini-2.0-flash-001": {
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"max_tokens": 1000000,
"max_input_tokens": 1000000,
"max_output_tokens": 1000000,
@@ -11950,7 +12044,9 @@
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_creation_input_token_cost": 3.125e-07
},
"eu.anthropic.claude-haiku-4-5-20251001-v1:0": {
"cache_creation_input_token_cost": 1.375e-06,
@@ -11987,7 +12083,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
"input_cost_per_token": 3e-06,
@@ -12004,7 +12102,9 @@
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"eu.anthropic.claude-3-7-sonnet-20250219-v1:0": {
"input_cost_per_token": 3e-06,
@@ -12022,7 +12122,9 @@
"supports_reasoning": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"eu.anthropic.claude-3-haiku-20240307-v1:0": {
"input_cost_per_token": 2.5e-07,
@@ -12036,7 +12138,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_creation_input_token_cost": 3.125e-07
},
"eu.anthropic.claude-3-opus-20240229-v1:0": {
"input_cost_per_token": 1.5e-05,
@@ -12049,7 +12153,9 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 1.5e-06,
+ "cache_creation_input_token_cost": 1.875e-05
},
"eu.anthropic.claude-3-sonnet-20240229-v1:0": {
"input_cost_per_token": 3e-06,
@@ -12063,7 +12169,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"eu.anthropic.claude-opus-4-1-20250805-v1:0": {
"cache_creation_input_token_cost": 1.875e-05,
@@ -13590,7 +13698,7 @@
},
"gemini-2.0-flash": {
"cache_read_input_token_cost": 2.5e-08,
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 7e-07,
"input_cost_per_token": 1e-07,
"litellm_provider": "vertex_ai-language-models",
@@ -13630,7 +13738,7 @@
},
"gemini-2.0-flash-001": {
"cache_read_input_token_cost": 3.75e-08,
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 1e-06,
"input_cost_per_token": 1.5e-07,
"litellm_provider": "vertex_ai-language-models",
@@ -13716,7 +13824,7 @@
},
"gemini-2.0-flash-lite": {
"cache_read_input_token_cost": 1.875e-08,
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 7.5e-08,
"input_cost_per_token": 7.5e-08,
"litellm_provider": "vertex_ai-language-models",
@@ -13752,7 +13860,7 @@
},
"gemini-2.0-flash-lite-001": {
"cache_read_input_token_cost": 1.875e-08,
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 7.5e-08,
"input_cost_per_token": 7.5e-08,
"litellm_provider": "vertex_ai-language-models",
@@ -14226,6 +14334,57 @@
"supports_vision": true,
"supports_web_search": true
},
+ "gemini-3.1-flash-lite-preview": {
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_read_input_token_cost_per_audio_token": 5e-08,
+ "input_cost_per_audio_token": 5e-07,
+ "input_cost_per_token": 2.5e-07,
+ "litellm_provider": "vertex_ai-language-models",
+ "max_audio_length_hours": 8.4,
+ "max_audio_per_prompt": 1,
+ "max_images_per_prompt": 3000,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_pdf_size_mb": 30,
+ "max_tokens": 65536,
+ "max_video_length": 1,
+ "max_videos_per_prompt": 10,
+ "mode": "chat",
+ "output_cost_per_reasoning_token": 1.5e-06,
+ "output_cost_per_token": 1.5e-06,
+ "source": "https://ai.google.dev/gemini-api/docs/models",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/completions",
+ "/v1/batch"
+ ],
+ "supported_modalities": [
+ "text",
+ "image",
+ "audio",
+ "video"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_audio_input": true,
+ "supports_audio_output": false,
+ "supports_code_execution": true,
+ "supports_file_search": true,
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_url_context": true,
+ "supports_video_input": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_native_streaming": true
+ },
"deep-research-pro-preview-12-2025": {
"input_cost_per_image": 0.0011,
"input_cost_per_token": 2e-06,
@@ -14669,6 +14828,7 @@
"supports_web_search": true
},
"gemini-3-pro-preview": {
+ "deprecation_date": "2026-03-26",
"cache_read_input_token_cost": 2e-07,
"cache_read_input_token_cost_above_200k_tokens": 4e-07,
"cache_creation_input_token_cost_above_200k_tokens": 2.5e-07,
@@ -15805,7 +15965,7 @@
},
"gemini/gemini-2.0-flash": {
"cache_read_input_token_cost": 2.5e-08,
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 7e-07,
"input_cost_per_token": 1e-07,
"litellm_provider": "gemini",
@@ -15846,7 +16006,7 @@
},
"gemini/gemini-2.0-flash-001": {
"cache_read_input_token_cost": 2.5e-08,
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 7e-07,
"input_cost_per_token": 1e-07,
"litellm_provider": "gemini",
@@ -15934,7 +16094,7 @@
},
"gemini/gemini-2.0-flash-lite": {
"cache_read_input_token_cost": 1.875e-08,
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 7.5e-08,
"input_cost_per_token": 7.5e-08,
"litellm_provider": "gemini",
@@ -15970,7 +16130,7 @@
"tpm": 4000000
},
"gemini/gemini-2.0-flash-lite-preview-02-05": {
- "deprecation_date": "2025-12-02",
+ "deprecation_date": "2025-12-09",
"cache_read_input_token_cost": 1.875e-08,
"input_cost_per_audio_token": 7.5e-08,
"input_cost_per_token": 7.5e-08,
@@ -16925,6 +17085,7 @@
"tpm": 800000
},
"gemini/gemini-3-pro-preview": {
+ "deprecation_date": "2026-03-09",
"cache_read_input_token_cost": 2e-07,
"cache_read_input_token_cost_above_200k_tokens": 4e-07,
"input_cost_per_token": 2e-06,
@@ -16980,6 +17141,59 @@
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
"supports_service_tier": true
},
+ "gemini/gemini-3.1-flash-lite-preview": {
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_read_input_token_cost_per_audio_token": 5e-08,
+ "input_cost_per_audio_token": 5e-07,
+ "input_cost_per_token": 2.5e-07,
+ "litellm_provider": "gemini",
+ "max_audio_length_hours": 8.4,
+ "max_audio_per_prompt": 1,
+ "max_images_per_prompt": 3000,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_pdf_size_mb": 30,
+ "max_tokens": 65536,
+ "max_video_length": 1,
+ "max_videos_per_prompt": 10,
+ "mode": "chat",
+ "output_cost_per_reasoning_token": 1.5e-06,
+ "output_cost_per_token": 1.5e-06,
+ "rpm": 15,
+ "source": "https://ai.google.dev/gemini-api/docs/models",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/completions",
+ "/v1/batch"
+ ],
+ "supported_modalities": [
+ "text",
+ "image",
+ "audio",
+ "video"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_audio_input": true,
+ "supports_audio_output": false,
+ "supports_code_execution": true,
+ "supports_file_search": true,
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_url_context": true,
+ "supports_video_input": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_native_streaming": true,
+ "tpm": 250000
+ },
"gemini/gemini-3-flash-preview": {
"cache_read_input_token_cost": 5e-08,
"input_cost_per_audio_token": 1e-06,
@@ -23112,6 +23326,21 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "mistral/magistral-medium-1-2-2509": {
+ "input_cost_per_token": 2e-06,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 40000,
+ "max_output_tokens": 40000,
+ "max_tokens": 40000,
+ "mode": "chat",
+ "output_cost_per_token": 5e-06,
+ "source": "https://mistral.ai/news/magistral",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true
+ },
"mistral/mistral-ocr-latest": {
"litellm_provider": "mistral",
"ocr_cost_per_page": 0.001,
@@ -23177,6 +23406,21 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
+ "mistral/magistral-small-1-2-2509": {
+ "input_cost_per_token": 5e-07,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 40000,
+ "max_output_tokens": 40000,
+ "max_tokens": 40000,
+ "mode": "chat",
+ "output_cost_per_token": 1.5e-06,
+ "source": "https://mistral.ai/pricing#api-pricing",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true
+ },
"mistral/mistral-embed": {
"input_cost_per_token": 1e-07,
"litellm_provider": "mistral",
@@ -23238,24 +23482,41 @@
"supports_tool_choice": true
},
"mistral/mistral-large-latest": {
- "input_cost_per_token": 2e-06,
+ "input_cost_per_token": 5e-07,
"litellm_provider": "mistral",
- "max_input_tokens": 128000,
- "max_output_tokens": 128000,
- "max_tokens": 128000,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
"mode": "chat",
- "output_cost_per_token": 6e-06,
+ "output_cost_per_token": 1.5e-06,
+ "source": "https://docs.mistral.ai/models/mistral-large-3-25-12",
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_response_schema": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_vision": true
},
"mistral/mistral-large-3": {
"input_cost_per_token": 5e-07,
"litellm_provider": "mistral",
- "max_input_tokens": 256000,
- "max_output_tokens": 8191,
- "max_tokens": 8191,
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 1.5e-06,
+ "source": "https://docs.mistral.ai/models/mistral-large-3-25-12",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "mistral/mistral-large-2512": {
+ "input_cost_per_token": 5e-07,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
"mode": "chat",
"output_cost_per_token": 1.5e-06,
"source": "https://docs.mistral.ai/models/mistral-large-3-25-12",
@@ -23306,14 +23567,30 @@
"input_cost_per_token": 4e-07,
"litellm_provider": "mistral",
"max_input_tokens": 131072,
- "max_output_tokens": 8191,
- "max_tokens": 8191,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
"mode": "chat",
"output_cost_per_token": 2e-06,
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_response_schema": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "mistral/mistral-medium-3-1-2508": {
+ "input_cost_per_token": 4e-07,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 2e-06,
+ "source": "https://mistral.ai/news/mistral-medium-3",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
},
"mistral/mistral-small": {
"input_cost_per_token": 1e-07,
@@ -23329,17 +23606,79 @@
"supports_tool_choice": true
},
"mistral/mistral-small-latest": {
+ "input_cost_per_token": 6e-08,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 1.8e-07,
+ "source": "https://mistral.ai/pricing",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "mistral/mistral-small-3-2-2506": {
+ "input_cost_per_token": 6e-08,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
+ "mode": "chat",
+ "output_cost_per_token": 1.8e-07,
+ "source": "https://mistral.ai/pricing",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "mistral/ministral-3-3b-2512": {
"input_cost_per_token": 1e-07,
"litellm_provider": "mistral",
- "max_input_tokens": 32000,
- "max_output_tokens": 8191,
- "max_tokens": 8191,
+ "max_input_tokens": 131072,
+ "max_output_tokens": 131072,
+ "max_tokens": 131072,
"mode": "chat",
- "output_cost_per_token": 3e-07,
+ "output_cost_per_token": 1e-07,
+ "source": "https://mistral.ai/pricing",
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_response_schema": true,
- "supports_tool_choice": true
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "mistral/ministral-3-8b-2512": {
+ "input_cost_per_token": 1.5e-07,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 1.5e-07,
+ "source": "https://mistral.ai/pricing",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
+ },
+ "mistral/ministral-3-14b-2512": {
+ "input_cost_per_token": 2e-07,
+ "litellm_provider": "mistral",
+ "max_input_tokens": 262144,
+ "max_output_tokens": 262144,
+ "max_tokens": 262144,
+ "mode": "chat",
+ "output_cost_per_token": 2e-07,
+ "source": "https://mistral.ai/pricing",
+ "supports_assistant_prefill": true,
+ "supports_function_calling": true,
+ "supports_response_schema": true,
+ "supports_tool_choice": true,
+ "supports_vision": true
},
"mistral/mistral-tiny": {
"input_cost_per_token": 2.5e-07,
@@ -25657,7 +25996,7 @@
"supports_tool_choice": true
},
"openrouter/google/gemini-2.0-flash-001": {
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 7e-07,
"input_cost_per_token": 1e-07,
"litellm_provider": "openrouter",
@@ -29554,7 +29893,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
"cache_creation_input_token_cost": 3.75e-06,
@@ -29607,7 +29948,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_creation_input_token_cost": 3.125e-07
},
"us.anthropic.claude-3-opus-20240229-v1:0": {
"input_cost_per_token": 1.5e-05,
@@ -29620,7 +29963,9 @@
"supports_function_calling": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 1.5e-06,
+ "cache_creation_input_token_cost": 1.875e-05
},
"us.anthropic.claude-3-sonnet-20240229-v1:0": {
"input_cost_per_token": 3e-06,
@@ -29634,7 +29979,9 @@
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
- "supports_vision": true
+ "supports_vision": true,
+ "cache_read_input_token_cost": 3e-07,
+ "cache_creation_input_token_cost": 3.75e-06
},
"us.anthropic.claude-opus-4-1-20250805-v1:0": {
"cache_creation_input_token_cost": 1.875e-05,
@@ -30527,7 +30874,7 @@
"supports_tool_choice": true
},
"vercel_ai_gateway/google/gemini-2.0-flash": {
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_token": 1.5e-07,
"litellm_provider": "vercel_ai_gateway",
"max_input_tokens": 1048576,
@@ -30541,7 +30888,7 @@
"supports_response_schema": true
},
"vercel_ai_gateway/google/gemini-2.0-flash-lite": {
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_token": 7.5e-08,
"litellm_provider": "vercel_ai_gateway",
"max_input_tokens": 1048576,
@@ -32059,6 +32406,57 @@
"output_cost_per_token": 3e-06,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models"
},
+ "vertex_ai/gemini-3.1-flash-lite-preview": {
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_read_input_token_cost_per_audio_token": 5e-08,
+ "input_cost_per_audio_token": 5e-07,
+ "input_cost_per_token": 2.5e-07,
+ "litellm_provider": "vertex_ai-language-models",
+ "max_audio_length_hours": 8.4,
+ "max_audio_per_prompt": 1,
+ "max_images_per_prompt": 3000,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_pdf_size_mb": 30,
+ "max_tokens": 65536,
+ "max_video_length": 1,
+ "max_videos_per_prompt": 10,
+ "mode": "chat",
+ "output_cost_per_reasoning_token": 1.5e-06,
+ "output_cost_per_token": 1.5e-06,
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/completions",
+ "/v1/batch"
+ ],
+ "supported_modalities": [
+ "text",
+ "image",
+ "audio",
+ "video"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_audio_input": true,
+ "supports_audio_output": false,
+ "supports_code_execution": true,
+ "supports_file_search": true,
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_url_context": true,
+ "supports_video_input": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_native_streaming": true
+ },
"vertex_ai/deep-research-pro-preview-12-2025": {
"input_cost_per_image": 0.0011,
"input_cost_per_token": 2e-06,
@@ -37898,7 +38296,7 @@
},
"gemini/gemini-2.0-flash-lite-001": {
"cache_read_input_token_cost": 1.875e-08,
- "deprecation_date": "2026-03-31",
+ "deprecation_date": "2026-06-01",
"input_cost_per_audio_token": 7.5e-08,
"input_cost_per_token": 7.5e-08,
"litellm_provider": "gemini",
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 4934f11d456..5de764c5cec 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -14334,6 +14334,57 @@
"supports_vision": true,
"supports_web_search": true
},
+ "gemini-3.1-flash-lite-preview": {
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_read_input_token_cost_per_audio_token": 5e-08,
+ "input_cost_per_audio_token": 5e-07,
+ "input_cost_per_token": 2.5e-07,
+ "litellm_provider": "vertex_ai-language-models",
+ "max_audio_length_hours": 8.4,
+ "max_audio_per_prompt": 1,
+ "max_images_per_prompt": 3000,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_pdf_size_mb": 30,
+ "max_tokens": 65536,
+ "max_video_length": 1,
+ "max_videos_per_prompt": 10,
+ "mode": "chat",
+ "output_cost_per_reasoning_token": 1.5e-06,
+ "output_cost_per_token": 1.5e-06,
+ "source": "https://ai.google.dev/gemini-api/docs/models",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/completions",
+ "/v1/batch"
+ ],
+ "supported_modalities": [
+ "text",
+ "image",
+ "audio",
+ "video"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_audio_input": true,
+ "supports_audio_output": false,
+ "supports_code_execution": true,
+ "supports_file_search": true,
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_url_context": true,
+ "supports_video_input": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_native_streaming": true
+ },
"deep-research-pro-preview-12-2025": {
"input_cost_per_image": 0.0011,
"input_cost_per_token": 2e-06,
@@ -17090,6 +17141,59 @@
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
"supports_service_tier": true
},
+ "gemini/gemini-3.1-flash-lite-preview": {
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_read_input_token_cost_per_audio_token": 5e-08,
+ "input_cost_per_audio_token": 5e-07,
+ "input_cost_per_token": 2.5e-07,
+ "litellm_provider": "gemini",
+ "max_audio_length_hours": 8.4,
+ "max_audio_per_prompt": 1,
+ "max_images_per_prompt": 3000,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_pdf_size_mb": 30,
+ "max_tokens": 65536,
+ "max_video_length": 1,
+ "max_videos_per_prompt": 10,
+ "mode": "chat",
+ "output_cost_per_reasoning_token": 1.5e-06,
+ "output_cost_per_token": 1.5e-06,
+ "rpm": 15,
+ "source": "https://ai.google.dev/gemini-api/docs/models",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/completions",
+ "/v1/batch"
+ ],
+ "supported_modalities": [
+ "text",
+ "image",
+ "audio",
+ "video"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_audio_input": true,
+ "supports_audio_output": false,
+ "supports_code_execution": true,
+ "supports_file_search": true,
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_url_context": true,
+ "supports_video_input": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_native_streaming": true,
+ "tpm": 250000
+ },
"gemini/gemini-3-flash-preview": {
"cache_read_input_token_cost": 5e-08,
"input_cost_per_audio_token": 1e-06,
@@ -32302,6 +32406,57 @@
"output_cost_per_token": 3e-06,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models"
},
+ "vertex_ai/gemini-3.1-flash-lite-preview": {
+ "cache_read_input_token_cost": 2.5e-08,
+ "cache_read_input_token_cost_per_audio_token": 5e-08,
+ "input_cost_per_audio_token": 5e-07,
+ "input_cost_per_token": 2.5e-07,
+ "litellm_provider": "vertex_ai-language-models",
+ "max_audio_length_hours": 8.4,
+ "max_audio_per_prompt": 1,
+ "max_images_per_prompt": 3000,
+ "max_input_tokens": 1048576,
+ "max_output_tokens": 65536,
+ "max_pdf_size_mb": 30,
+ "max_tokens": 65536,
+ "max_video_length": 1,
+ "max_videos_per_prompt": 10,
+ "mode": "chat",
+ "output_cost_per_reasoning_token": 1.5e-06,
+ "output_cost_per_token": 1.5e-06,
+ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models",
+ "supported_endpoints": [
+ "/v1/chat/completions",
+ "/v1/completions",
+ "/v1/batch"
+ ],
+ "supported_modalities": [
+ "text",
+ "image",
+ "audio",
+ "video"
+ ],
+ "supported_output_modalities": [
+ "text"
+ ],
+ "supports_audio_input": true,
+ "supports_audio_output": false,
+ "supports_code_execution": true,
+ "supports_file_search": true,
+ "supports_function_calling": true,
+ "supports_parallel_function_calling": true,
+ "supports_pdf_input": true,
+ "supports_prompt_caching": true,
+ "supports_reasoning": true,
+ "supports_response_schema": true,
+ "supports_system_messages": true,
+ "supports_tool_choice": true,
+ "supports_url_context": true,
+ "supports_video_input": true,
+ "supports_vision": true,
+ "supports_web_search": true,
+ "supports_native_streaming": true
+ },
"vertex_ai/deep-research-pro-preview-12-2025": {
"input_cost_per_image": 0.0011,
"input_cost_per_token": 2e-06,
diff --git a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
index 7e8848be301..2e033b6f068 100644
--- a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
+++ b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
@@ -33,8 +33,8 @@
) # Adds the parent directory to the system path
from litellm.litellm_core_utils.llm_cost_calc.utils import (
- _calculate_input_cost,
PromptTokensDetailsResult,
+ _calculate_input_cost,
calculate_cache_writing_cost,
generic_cost_per_token,
)
@@ -127,6 +127,52 @@ def test_reasoning_tokens_gemini():
)
+def test_reasoning_tokens_gemini_3_1_flash_lite():
+ """Test cost calculation for gemini-3.1-flash-lite-preview with reasoning tokens"""
+ model = "gemini-3.1-flash-lite-preview"
+ custom_llm_provider = "gemini"
+ os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+ litellm.model_cost = litellm.get_model_cost_map(url="")
+
+ usage = Usage(
+ completion_tokens=1000,
+ prompt_tokens=500,
+ total_tokens=1500,
+ completion_tokens_details=CompletionTokensDetailsWrapper(
+ accepted_prediction_tokens=None,
+ audio_tokens=None,
+ reasoning_tokens=400,
+ rejected_prediction_tokens=None,
+ text_tokens=600,
+ ),
+ prompt_tokens_details=PromptTokensDetailsWrapper(
+ audio_tokens=None, cached_tokens=None, text_tokens=500, image_tokens=None
+ ),
+ )
+ model_cost_map = litellm.model_cost[model]
+ prompt_cost, completion_cost = generic_cost_per_token(
+ model=model,
+ usage=usage,
+ custom_llm_provider=custom_llm_provider,
+ )
+
+ assert round(prompt_cost, 10) == round(
+ model_cost_map["input_cost_per_token"] * usage.prompt_tokens,
+ 10,
+ )
+ assert round(completion_cost, 10) == round(
+ (
+ model_cost_map["output_cost_per_token"]
+ * usage.completion_tokens_details.text_tokens
+ )
+ + (
+ model_cost_map["output_cost_per_reasoning_token"]
+ * usage.completion_tokens_details.reasoning_tokens
+ ),
+ 10,
+ )
+
+
def test_image_tokens_with_custom_pricing():
"""Test that image_tokens in completion are properly costed with output_cost_per_image_token."""
from unittest.mock import patch
diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py
index 196bb00f40d..596897f7157 100644
--- a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py
+++ b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py
@@ -2453,8 +2453,8 @@ def test_gemini_3_image_models_no_thinking_config():
def test_gemini_3_text_models_get_thinking_config():
"""
- Test that Gemini 3 text models DO receive automatic thinkingConfig.
- This ensures we didn't break the existing behavior for non-image models.
+ Test that Gemini 3 text models do NOT receive automatic thinkingConfig
+ when no reasoning_effort or thinking param is provided.
"""
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
VertexGeminiConfig,
@@ -2462,7 +2462,7 @@ def test_gemini_3_text_models_get_thinking_config():
v = VertexGeminiConfig()
- # Test gemini-3-pro-preview (text model, should get thinking)
+ # Test gemini-3-pro-preview (text model, no explicit thinking params)
model = "gemini-3-pro-preview"
optional_params = {}
non_default_params = {}
@@ -2474,9 +2474,8 @@ def test_gemini_3_text_models_get_thinking_config():
drop_params=False,
)
- # Should have thinkingConfig automatically added
- assert "thinkingConfig" in result
- assert result["thinkingConfig"]["thinkingLevel"] == "low"
+ # Should NOT have thinkingConfig automatically added when user provides no reasoning_effort
+ assert "thinkingConfig" not in result
assert result["temperature"] == 1.0