Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
-- DropIndex
DROP INDEX "LiteLLM_PromptTable_prompt_id_key";
DROP INDEX IF EXISTS "LiteLLM_PromptTable_prompt_id_key";

-- AlterTable
ALTER TABLE "LiteLLM_PromptTable" ADD COLUMN "version" INTEGER NOT NULL DEFAULT 1;
ALTER TABLE "LiteLLM_PromptTable"
ADD COLUMN "version" INTEGER NOT NULL DEFAULT 1;

-- CreateIndex
CREATE INDEX "LiteLLM_PromptTable_prompt_id_idx" ON "LiteLLM_PromptTable"("prompt_id");
CREATE INDEX "LiteLLM_PromptTable_prompt_id_idx" ON "LiteLLM_PromptTable" ("prompt_id");

-- CreateIndex
CREATE UNIQUE INDEX "LiteLLM_PromptTable_prompt_id_version_key" ON "LiteLLM_PromptTable"("prompt_id", "version");

CREATE UNIQUE INDEX "LiteLLM_PromptTable_prompt_id_version_key" ON "LiteLLM_PromptTable" ("prompt_id", "version");
17 changes: 13 additions & 4 deletions litellm/llms/anthropic/chat/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,10 +290,19 @@ def _map_tool_choice(
elif tool_choice == "none":
_tool_choice = AnthropicMessagesToolChoice(type="none")
elif isinstance(tool_choice, dict):
_tool_name = tool_choice.get("function", {}).get("name")
_tool_choice = AnthropicMessagesToolChoice(type="tool")
if _tool_name is not None:
_tool_choice["name"] = _tool_name
if "type" in tool_choice and "function" not in tool_choice:
tool_type = tool_choice.get("type")
if tool_type == "auto":
_tool_choice = AnthropicMessagesToolChoice(type="auto")
elif tool_type == "required" or tool_type == "any":
_tool_choice = AnthropicMessagesToolChoice(type="any")
elif tool_type == "none":
_tool_choice = AnthropicMessagesToolChoice(type="none")
else:
_tool_name = tool_choice.get("function", {}).get("name")
if _tool_name is not None:
_tool_choice = AnthropicMessagesToolChoice(type="tool")
_tool_choice["name"] = _tool_name

if parallel_tool_use is not None:
# Anthropic uses 'disable_parallel_tool_use' flag to determine if parallel tool use is allowed
Expand Down
8 changes: 7 additions & 1 deletion litellm/llms/azure/chat/gpt_5_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def is_model_gpt_5_model(cls, model: str) -> bool:
Accepts both explicit gpt-5 model names and the ``gpt5_series/`` prefix
used for manual routing.
"""
return "gpt-5" in model or "gpt5_series" in model
# gpt-5-chat* is a chat model and shouldn't go through GPT-5 reasoning restrictions.
return ("gpt-5" in model and "gpt-5-chat" not in model) or "gpt5_series" in model

def get_supported_openai_params(self, model: str) -> List[str]:
"""Get supported parameters for Azure OpenAI GPT-5 models.
Expand All @@ -37,6 +38,11 @@ def get_supported_openai_params(self, model: str) -> List[str]:
"""
params = OpenAIGPT5Config.get_supported_openai_params(self, model=model)

# Azure supports tool_choice for GPT-5 deployments, but the base GPT-5 config
# can drop it when the deployment name isn't in the OpenAI model registry.
if "tool_choice" not in params:
params.append("tool_choice")

# Only gpt-5.2 has been verified to support logprobs on Azure
if self.is_model_gpt_5_2_model(model):
azure_supported_params = ["logprobs", "top_logprobs"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,26 @@ def map_openai_params(
model: str,
drop_params: bool,
) -> dict:
return AnthropicConfig.map_openai_params(
# Force tool-based structured outputs for Bedrock Invoke
# (similar to VertexAI fix in #19201)
# Bedrock Invoke doesn't support output_format parameter
original_model = model
if "response_format" in non_default_params:
# Use a model name that forces tool-based approach
model = "claude-3-sonnet-20240229"

optional_params = AnthropicConfig.map_openai_params(
self,
non_default_params,
optional_params,
model,
drop_params,
)

# Restore original model name
model = original_model

return optional_params


def transform_request(
Expand Down Expand Up @@ -90,6 +103,8 @@ def transform_request(

_anthropic_request.pop("model", None)
_anthropic_request.pop("stream", None)
# Bedrock Invoke doesn't support output_format parameter
_anthropic_request.pop("output_format", None)
if "anthropic_version" not in _anthropic_request:
_anthropic_request["anthropic_version"] = self.anthropic_version

Expand Down Expand Up @@ -117,6 +132,26 @@ def transform_request(
if "opus-4" in model.lower() or "opus_4" in model.lower():
beta_set.add("tool-search-tool-2025-10-19")

# Filter out beta headers that Bedrock Invoke doesn't support
# AWS Bedrock only supports a specific whitelist of beta flags
# Reference: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages-request-response.html
BEDROCK_SUPPORTED_BETAS = {
"computer-use-2024-10-22", # Legacy computer use
"computer-use-2025-01-24", # Current computer use (Claude 3.7 Sonnet)
"token-efficient-tools-2025-02-19", # Tool use (Claude 3.7+ and Claude 4+)
"interleaved-thinking-2025-05-14", # Interleaved thinking (Claude 4+)
"output-128k-2025-02-19", # 128K output tokens (Claude 3.7 Sonnet)
"dev-full-thinking-2025-05-14", # Developer mode for raw thinking (Claude 4+)
"context-1m-2025-08-07", # 1 million tokens (Claude Sonnet 4)
"context-management-2025-06-27", # Context management (Claude Sonnet/Haiku 4.5)
"effort-2025-11-24", # Effort parameter (Claude Opus 4.5)
"tool-search-tool-2025-10-19", # Tool search (Claude Opus 4.5)
"tool-examples-2025-10-29", # Tool use examples (Claude Opus 4.5)
}

# Only keep beta headers that Bedrock supports
beta_set = {beta for beta in beta_set if beta in BEDROCK_SUPPORTED_BETAS}

if beta_set:
_anthropic_request["anthropic_beta"] = list(beta_set)

Expand Down
4 changes: 3 additions & 1 deletion litellm/llms/openai/chat/gpt_5_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ class OpenAIGPT5Config(OpenAIGPTConfig):

@classmethod
def is_model_gpt_5_model(cls, model: str) -> bool:
return "gpt-5" in model
# gpt-5-chat* behaves like a regular chat model (supports temperature, etc.)
# Don't route it through GPT-5 reasoning-specific parameter restrictions.
return "gpt-5" in model and "gpt-5-chat" not in model

@classmethod
def is_model_gpt_5_codex_model(cls, model: str) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1657,7 +1657,17 @@ def _calculate_usage( # noqa: PLR0915
## This is necessary because promptTokensDetails includes both cached and non-cached tokens
## See: https://github.com/BerriAI/litellm/issues/18750
if cached_text_tokens is not None and prompt_text_tokens is not None:
# Explicit caching: subtract cached tokens per modality from cacheTokensDetails
prompt_text_tokens = prompt_text_tokens - cached_text_tokens
elif (
cached_tokens is not None
and prompt_text_tokens is not None
and cached_text_tokens is None
):
# Implicit caching: only cachedContentTokenCount is provided (no cacheTokensDetails)
# Subtract from text tokens since implicit caching is primarily for text content
# See: https://github.com/BerriAI/litellm/issues/16341
prompt_text_tokens = prompt_text_tokens - cached_tokens
if cached_audio_tokens is not None and prompt_audio_tokens is not None:
prompt_audio_tokens = prompt_audio_tokens - cached_audio_tokens
if cached_image_tokens is not None and prompt_image_tokens is not None:
Expand Down
7 changes: 5 additions & 2 deletions litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7280,8 +7280,11 @@ def _get_encoding():
def __getattr__(name: str) -> Any:
"""Lazy import handler for main module"""
if name == "encoding":
# Lazy load encoding to avoid heavy tiktoken import at module load time
_encoding = tiktoken.get_encoding("cl100k_base")
# Use _get_default_encoding which properly sets TIKTOKEN_CACHE_DIR
# before loading tiktoken, ensuring the local cache is used
# instead of downloading from the internet
from litellm._lazy_imports import _get_default_encoding
_encoding = _get_default_encoding()
# Cache it in the module's __dict__ for subsequent accesses
import sys

Expand Down
4 changes: 2 additions & 2 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -3130,7 +3130,7 @@
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": false,
"supports_tool_choice": true,
"supports_vision": true
},
"azure/gpt-5-chat-latest": {
Expand Down Expand Up @@ -3162,7 +3162,7 @@
"supports_reasoning": true,
"supports_response_schema": true,
"supports_system_messages": true,
"supports_tool_choice": false,
"supports_tool_choice": true,
"supports_vision": true
},
"azure/gpt-5-codex": {
Expand Down
12 changes: 8 additions & 4 deletions litellm/proxy/common_request_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,11 +650,15 @@ async def base_process_llm_request(
)

tasks = []
# Start the moderation check (during_call_hook) as early as possible
# This gives it a head start to mask/validate input while the proxy handles routing
tasks.append(
proxy_logging_obj.during_call_hook(
data=self.data,
user_api_key_dict=user_api_key_dict,
call_type=route_type, # type: ignore
asyncio.create_task(
proxy_logging_obj.during_call_hook(
data=self.data,
user_api_key_dict=user_api_key_dict,
call_type=route_type, # type: ignore
)
)
)

Expand Down
Loading
Loading