diff --git a/Dockerfile b/Dockerfile index 2c54e2dec28..2987a44b394 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,8 +46,8 @@ FROM $LITELLM_RUNTIME_IMAGE AS runtime # Ensure runtime stage runs as root USER root -# Install runtime dependencies -RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip +# Install runtime dependencies (libsndfile needed for audio processing on ARM64) +RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile WORKDIR /app # Copy the current directory contents into the container at /app diff --git a/litellm/__init__.py b/litellm/__init__.py index f5db57f76fd..9e88d3282ff 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1469,6 +1469,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None: from .llms.azure.chat.gpt_5_transformation import AzureOpenAIGPT5Config as AzureOpenAIGPT5Config from .llms.azure.completion.transformation import AzureOpenAITextConfig as AzureOpenAITextConfig from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig as HostedVLLMChatConfig + from .llms.hosted_vllm.embedding.transformation import HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig from .llms.github_copilot.chat.transformation import GithubCopilotConfig as GithubCopilotConfig from .llms.github_copilot.responses.transformation import GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig from .llms.github_copilot.embedding.transformation import GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig diff --git a/litellm/_lazy_imports_registry.py b/litellm/_lazy_imports_registry.py index a92c6f95b0e..0e52e9a59eb 100644 --- a/litellm/_lazy_imports_registry.py +++ b/litellm/_lazy_imports_registry.py @@ -20,25 +20,53 @@ # Utils names that support lazy loading via _lazy_import_utils UTILS_NAMES = ( - "exception_type", "get_optional_params", "get_response_string", "token_counter", - "create_pretrained_tokenizer", "create_tokenizer", "supports_function_calling", - "supports_web_search", "supports_url_context", "supports_response_schema", - "supports_parallel_function_calling", "supports_vision", "supports_audio_input", - "supports_audio_output", "supports_system_messages", "supports_reasoning", - "get_litellm_params", "acreate", "get_max_tokens", "get_model_info", - "register_prompt_template", "validate_environment", "check_valid_key", - "register_model", "encode", "decode", "_calculate_retry_after", "_should_retry", - "get_supported_openai_params", "get_api_base", "get_first_chars_messages", - "ModelResponse", "ModelResponseStream", "EmbeddingResponse", "ImageResponse", - "TranscriptionResponse", "TextCompletionResponse", "get_provider_fields", - "ModelResponseListIterator", "get_valid_models", "timeout", - "get_llm_provider", "remove_index_from_tool_calls", + "exception_type", + "get_optional_params", + "get_response_string", + "token_counter", + "create_pretrained_tokenizer", + "create_tokenizer", + "supports_function_calling", + "supports_web_search", + "supports_url_context", + "supports_response_schema", + "supports_parallel_function_calling", + "supports_vision", + "supports_audio_input", + "supports_audio_output", + "supports_system_messages", + "supports_reasoning", + "get_litellm_params", + "acreate", + "get_max_tokens", + "get_model_info", + "register_prompt_template", + "validate_environment", + "check_valid_key", + "register_model", + "encode", + "decode", + "_calculate_retry_after", + "_should_retry", + "get_supported_openai_params", + "get_api_base", + "get_first_chars_messages", + "ModelResponse", + "ModelResponseStream", + "EmbeddingResponse", + "ImageResponse", + "TranscriptionResponse", + "TextCompletionResponse", + "get_provider_fields", + "ModelResponseListIterator", + "get_valid_models", + "timeout", + "get_llm_provider", + "remove_index_from_tool_calls", ) # Token counter names that support lazy loading via _lazy_import_token_counter -TOKEN_COUNTER_NAMES = ( - "get_modified_max_tokens", -) +TOKEN_COUNTER_NAMES = ("get_modified_max_tokens",) # LLM client cache names that support lazy loading via _lazy_import_llm_client_cache LLM_CLIENT_CACHE_NAMES = ( @@ -47,9 +75,7 @@ ) # Bedrock type names that support lazy loading via _lazy_import_bedrock_types -BEDROCK_TYPES_NAMES = ( - "COHERE_EMBEDDING_INPUT_TYPES", -) +BEDROCK_TYPES_NAMES = ("COHERE_EMBEDDING_INPUT_TYPES",) # Common types from litellm.types.utils that support lazy loading via # _lazy_import_types_utils @@ -236,6 +262,7 @@ "AzureOpenAIGPT5Config", "AzureOpenAITextConfig", "HostedVLLMChatConfig", + "HostedVLLMEmbeddingConfig", # Alias for backwards compatibility "VolcEngineConfig", # Alias for VolcEngineChatConfig "LlamafileChatConfig", @@ -388,7 +415,10 @@ "supports_web_search": (".utils", "supports_web_search"), "supports_url_context": (".utils", "supports_url_context"), "supports_response_schema": (".utils", "supports_response_schema"), - "supports_parallel_function_calling": (".utils", "supports_parallel_function_calling"), + "supports_parallel_function_calling": ( + ".utils", + "supports_parallel_function_calling", + ), "supports_vision": (".utils", "supports_vision"), "supports_audio_input": (".utils", "supports_audio_input"), "supports_audio_output": (".utils", "supports_audio_output"), @@ -419,8 +449,14 @@ "ModelResponseListIterator": (".utils", "ModelResponseListIterator"), "get_valid_models": (".utils", "get_valid_models"), "timeout": (".timeout", "timeout"), - "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"), - "remove_index_from_tool_calls": ("litellm.litellm_core_utils.core_helpers", "remove_index_from_tool_calls"), + "get_llm_provider": ( + "litellm.litellm_core_utils.get_llm_provider_logic", + "get_llm_provider", + ), + "remove_index_from_tool_calls": ( + "litellm.litellm_core_utils.core_helpers", + "remove_index_from_tool_calls", + ), } _COST_CALCULATOR_IMPORT_MAP = { @@ -442,11 +478,17 @@ } _TOKEN_COUNTER_IMPORT_MAP = { - "get_modified_max_tokens": ("litellm.litellm_core_utils.token_counter", "get_modified_max_tokens"), + "get_modified_max_tokens": ( + "litellm.litellm_core_utils.token_counter", + "get_modified_max_tokens", + ), } _BEDROCK_TYPES_IMPORT_MAP = { - "COHERE_EMBEDDING_INPUT_TYPES": ("litellm.types.llms.bedrock", "COHERE_EMBEDDING_INPUT_TYPES"), + "COHERE_EMBEDDING_INPUT_TYPES": ( + "litellm.types.llms.bedrock", + "COHERE_EMBEDDING_INPUT_TYPES", + ), } _CACHING_IMPORT_MAP = { @@ -458,294 +500,868 @@ _LITELLM_LOGGING_IMPORT_MAP = { "Logging": ("litellm.litellm_core_utils.litellm_logging", "Logging"), - "modify_integration": ("litellm.litellm_core_utils.litellm_logging", "modify_integration"), + "modify_integration": ( + "litellm.litellm_core_utils.litellm_logging", + "modify_integration", + ), } _DOTPROMPT_IMPORT_MAP = { - "global_prompt_manager": ("litellm.integrations.dotprompt", "global_prompt_manager"), - "global_prompt_directory": ("litellm.integrations.dotprompt", "global_prompt_directory"), - "set_global_prompt_directory": ("litellm.integrations.dotprompt", "set_global_prompt_directory"), + "global_prompt_manager": ( + "litellm.integrations.dotprompt", + "global_prompt_manager", + ), + "global_prompt_directory": ( + "litellm.integrations.dotprompt", + "global_prompt_directory", + ), + "set_global_prompt_directory": ( + "litellm.integrations.dotprompt", + "set_global_prompt_directory", + ), } _TYPES_IMPORT_MAP = { "GuardrailItem": ("litellm.types.guardrails", "GuardrailItem"), - "DefaultTeamSSOParams": ("litellm.types.proxy.management_endpoints.ui_sso", "DefaultTeamSSOParams"), - "LiteLLM_UpperboundKeyGenerateParams": ("litellm.types.proxy.management_endpoints.ui_sso", "LiteLLM_UpperboundKeyGenerateParams"), - "KeyManagementSystem": ("litellm.types.secret_managers.main", "KeyManagementSystem"), - "PriorityReservationSettings": ("litellm.types.utils", "PriorityReservationSettings"), + "DefaultTeamSSOParams": ( + "litellm.types.proxy.management_endpoints.ui_sso", + "DefaultTeamSSOParams", + ), + "LiteLLM_UpperboundKeyGenerateParams": ( + "litellm.types.proxy.management_endpoints.ui_sso", + "LiteLLM_UpperboundKeyGenerateParams", + ), + "KeyManagementSystem": ( + "litellm.types.secret_managers.main", + "KeyManagementSystem", + ), + "PriorityReservationSettings": ( + "litellm.types.utils", + "PriorityReservationSettings", + ), "CustomLogger": ("litellm.integrations.custom_logger", "CustomLogger"), - "LoggingCallbackManager": ("litellm.litellm_core_utils.logging_callback_manager", "LoggingCallbackManager"), - "DatadogLLMObsInitParams": ("litellm.types.integrations.datadog_llm_obs", "DatadogLLMObsInitParams"), + "LoggingCallbackManager": ( + "litellm.litellm_core_utils.logging_callback_manager", + "LoggingCallbackManager", + ), + "DatadogLLMObsInitParams": ( + "litellm.types.integrations.datadog_llm_obs", + "DatadogLLMObsInitParams", + ), } _LLM_PROVIDER_LOGIC_IMPORT_MAP = { - "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"), - "remove_index_from_tool_calls": ("litellm.litellm_core_utils.core_helpers", "remove_index_from_tool_calls"), + "get_llm_provider": ( + "litellm.litellm_core_utils.get_llm_provider_logic", + "get_llm_provider", + ), + "remove_index_from_tool_calls": ( + "litellm.litellm_core_utils.core_helpers", + "remove_index_from_tool_calls", + ), } _LLM_CONFIGS_IMPORT_MAP = { - "AmazonConverseConfig": (".llms.bedrock.chat.converse_transformation", "AmazonConverseConfig"), + "AmazonConverseConfig": ( + ".llms.bedrock.chat.converse_transformation", + "AmazonConverseConfig", + ), "OpenAILikeChatConfig": (".llms.openai_like.chat.handler", "OpenAILikeChatConfig"), - "GaladrielChatConfig": (".llms.galadriel.chat.transformation", "GaladrielChatConfig"), + "GaladrielChatConfig": ( + ".llms.galadriel.chat.transformation", + "GaladrielChatConfig", + ), "GithubChatConfig": (".llms.github.chat.transformation", "GithubChatConfig"), - "AzureAnthropicConfig": (".llms.azure_ai.anthropic.transformation", "AzureAnthropicConfig"), + "AzureAnthropicConfig": ( + ".llms.azure_ai.anthropic.transformation", + "AzureAnthropicConfig", + ), "BytezChatConfig": (".llms.bytez.chat.transformation", "BytezChatConfig"), - "CompactifAIChatConfig": (".llms.compactifai.chat.transformation", "CompactifAIChatConfig"), + "CompactifAIChatConfig": ( + ".llms.compactifai.chat.transformation", + "CompactifAIChatConfig", + ), "EmpowerChatConfig": (".llms.empower.chat.transformation", "EmpowerChatConfig"), "MinimaxChatConfig": (".llms.minimax.chat.transformation", "MinimaxChatConfig"), - "AiohttpOpenAIChatConfig": (".llms.aiohttp_openai.chat.transformation", "AiohttpOpenAIChatConfig"), - "HuggingFaceChatConfig": (".llms.huggingface.chat.transformation", "HuggingFaceChatConfig"), - "HuggingFaceEmbeddingConfig": (".llms.huggingface.embedding.transformation", "HuggingFaceEmbeddingConfig"), + "AiohttpOpenAIChatConfig": ( + ".llms.aiohttp_openai.chat.transformation", + "AiohttpOpenAIChatConfig", + ), + "HuggingFaceChatConfig": ( + ".llms.huggingface.chat.transformation", + "HuggingFaceChatConfig", + ), + "HuggingFaceEmbeddingConfig": ( + ".llms.huggingface.embedding.transformation", + "HuggingFaceEmbeddingConfig", + ), "OobaboogaConfig": (".llms.oobabooga.chat.transformation", "OobaboogaConfig"), "MaritalkConfig": (".llms.maritalk", "MaritalkConfig"), "OpenrouterConfig": (".llms.openrouter.chat.transformation", "OpenrouterConfig"), "DataRobotConfig": (".llms.datarobot.chat.transformation", "DataRobotConfig"), "AnthropicConfig": (".llms.anthropic.chat.transformation", "AnthropicConfig"), - "AnthropicTextConfig": (".llms.anthropic.completion.transformation", "AnthropicTextConfig"), + "AnthropicTextConfig": ( + ".llms.anthropic.completion.transformation", + "AnthropicTextConfig", + ), "GroqSTTConfig": (".llms.groq.stt.transformation", "GroqSTTConfig"), "TritonConfig": (".llms.triton.completion.transformation", "TritonConfig"), - "TritonGenerateConfig": (".llms.triton.completion.transformation", "TritonGenerateConfig"), - "TritonInferConfig": (".llms.triton.completion.transformation", "TritonInferConfig"), - "TritonEmbeddingConfig": (".llms.triton.embedding.transformation", "TritonEmbeddingConfig"), - "HuggingFaceRerankConfig": (".llms.huggingface.rerank.transformation", "HuggingFaceRerankConfig"), + "TritonGenerateConfig": ( + ".llms.triton.completion.transformation", + "TritonGenerateConfig", + ), + "TritonInferConfig": ( + ".llms.triton.completion.transformation", + "TritonInferConfig", + ), + "TritonEmbeddingConfig": ( + ".llms.triton.embedding.transformation", + "TritonEmbeddingConfig", + ), + "HuggingFaceRerankConfig": ( + ".llms.huggingface.rerank.transformation", + "HuggingFaceRerankConfig", + ), "DatabricksConfig": (".llms.databricks.chat.transformation", "DatabricksConfig"), - "DatabricksEmbeddingConfig": (".llms.databricks.embed.transformation", "DatabricksEmbeddingConfig"), + "DatabricksEmbeddingConfig": ( + ".llms.databricks.embed.transformation", + "DatabricksEmbeddingConfig", + ), "PredibaseConfig": (".llms.predibase.chat.transformation", "PredibaseConfig"), "ReplicateConfig": (".llms.replicate.chat.transformation", "ReplicateConfig"), "SnowflakeConfig": (".llms.snowflake.chat.transformation", "SnowflakeConfig"), "CohereRerankConfig": (".llms.cohere.rerank.transformation", "CohereRerankConfig"), - "CohereRerankV2Config": (".llms.cohere.rerank_v2.transformation", "CohereRerankV2Config"), - "AzureAIRerankConfig": (".llms.azure_ai.rerank.transformation", "AzureAIRerankConfig"), - "InfinityRerankConfig": (".llms.infinity.rerank.transformation", "InfinityRerankConfig"), + "CohereRerankV2Config": ( + ".llms.cohere.rerank_v2.transformation", + "CohereRerankV2Config", + ), + "AzureAIRerankConfig": ( + ".llms.azure_ai.rerank.transformation", + "AzureAIRerankConfig", + ), + "InfinityRerankConfig": ( + ".llms.infinity.rerank.transformation", + "InfinityRerankConfig", + ), "JinaAIRerankConfig": (".llms.jina_ai.rerank.transformation", "JinaAIRerankConfig"), - "DeepinfraRerankConfig": (".llms.deepinfra.rerank.transformation", "DeepinfraRerankConfig"), - "HostedVLLMRerankConfig": (".llms.hosted_vllm.rerank.transformation", "HostedVLLMRerankConfig"), - "NvidiaNimRerankConfig": (".llms.nvidia_nim.rerank.transformation", "NvidiaNimRerankConfig"), - "NvidiaNimRankingConfig": (".llms.nvidia_nim.rerank.ranking_transformation", "NvidiaNimRankingConfig"), - "VertexAIRerankConfig": (".llms.vertex_ai.rerank.transformation", "VertexAIRerankConfig"), - "FireworksAIRerankConfig": (".llms.fireworks_ai.rerank.transformation", "FireworksAIRerankConfig"), + "DeepinfraRerankConfig": ( + ".llms.deepinfra.rerank.transformation", + "DeepinfraRerankConfig", + ), + "HostedVLLMRerankConfig": ( + ".llms.hosted_vllm.rerank.transformation", + "HostedVLLMRerankConfig", + ), + "NvidiaNimRerankConfig": ( + ".llms.nvidia_nim.rerank.transformation", + "NvidiaNimRerankConfig", + ), + "NvidiaNimRankingConfig": ( + ".llms.nvidia_nim.rerank.ranking_transformation", + "NvidiaNimRankingConfig", + ), + "VertexAIRerankConfig": ( + ".llms.vertex_ai.rerank.transformation", + "VertexAIRerankConfig", + ), + "FireworksAIRerankConfig": ( + ".llms.fireworks_ai.rerank.transformation", + "FireworksAIRerankConfig", + ), "VoyageRerankConfig": (".llms.voyage.rerank.transformation", "VoyageRerankConfig"), "ClarifaiConfig": (".llms.clarifai.chat.transformation", "ClarifaiConfig"), "AI21ChatConfig": (".llms.ai21.chat.transformation", "AI21ChatConfig"), "LlamaAPIConfig": (".llms.meta_llama.chat.transformation", "LlamaAPIConfig"), - "TogetherAITextCompletionConfig": (".llms.together_ai.completion.transformation", "TogetherAITextCompletionConfig"), - "CloudflareChatConfig": (".llms.cloudflare.chat.transformation", "CloudflareChatConfig"), + "TogetherAITextCompletionConfig": ( + ".llms.together_ai.completion.transformation", + "TogetherAITextCompletionConfig", + ), + "CloudflareChatConfig": ( + ".llms.cloudflare.chat.transformation", + "CloudflareChatConfig", + ), "NovitaConfig": (".llms.novita.chat.transformation", "NovitaConfig"), "PetalsConfig": (".llms.petals.completion.transformation", "PetalsConfig"), "OllamaChatConfig": (".llms.ollama.chat.transformation", "OllamaChatConfig"), "OllamaConfig": (".llms.ollama.completion.transformation", "OllamaConfig"), "SagemakerConfig": (".llms.sagemaker.completion.transformation", "SagemakerConfig"), - "SagemakerChatConfig": (".llms.sagemaker.chat.transformation", "SagemakerChatConfig"), + "SagemakerChatConfig": ( + ".llms.sagemaker.chat.transformation", + "SagemakerChatConfig", + ), "CohereChatConfig": (".llms.cohere.chat.transformation", "CohereChatConfig"), - "AnthropicMessagesConfig": (".llms.anthropic.experimental_pass_through.messages.transformation", "AnthropicMessagesConfig"), - "AmazonAnthropicClaudeMessagesConfig": (".llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation", "AmazonAnthropicClaudeMessagesConfig"), + "AnthropicMessagesConfig": ( + ".llms.anthropic.experimental_pass_through.messages.transformation", + "AnthropicMessagesConfig", + ), + "AmazonAnthropicClaudeMessagesConfig": ( + ".llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation", + "AmazonAnthropicClaudeMessagesConfig", + ), "TogetherAIConfig": (".llms.together_ai.chat", "TogetherAIConfig"), "NLPCloudConfig": (".llms.nlp_cloud.chat.handler", "NLPCloudConfig"), - "VertexGeminiConfig": (".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", "VertexGeminiConfig"), - "GoogleAIStudioGeminiConfig": (".llms.gemini.chat.transformation", "GoogleAIStudioGeminiConfig"), - "VertexAIAnthropicConfig": (".llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation", "VertexAIAnthropicConfig"), - "VertexAILlama3Config": (".llms.vertex_ai.vertex_ai_partner_models.llama3.transformation", "VertexAILlama3Config"), - "VertexAIAi21Config": (".llms.vertex_ai.vertex_ai_partner_models.ai21.transformation", "VertexAIAi21Config"), - "AmazonCohereChatConfig": (".llms.bedrock.chat.invoke_handler", "AmazonCohereChatConfig"), - "AmazonBedrockGlobalConfig": (".llms.bedrock.common_utils", "AmazonBedrockGlobalConfig"), - "AmazonAI21Config": (".llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation", "AmazonAI21Config"), - "AmazonInvokeNovaConfig": (".llms.bedrock.chat.invoke_transformations.amazon_nova_transformation", "AmazonInvokeNovaConfig"), - "AmazonQwen2Config": (".llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation", "AmazonQwen2Config"), - "AmazonQwen3Config": (".llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation", "AmazonQwen3Config"), + "VertexGeminiConfig": ( + ".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", + "VertexGeminiConfig", + ), + "GoogleAIStudioGeminiConfig": ( + ".llms.gemini.chat.transformation", + "GoogleAIStudioGeminiConfig", + ), + "VertexAIAnthropicConfig": ( + ".llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation", + "VertexAIAnthropicConfig", + ), + "VertexAILlama3Config": ( + ".llms.vertex_ai.vertex_ai_partner_models.llama3.transformation", + "VertexAILlama3Config", + ), + "VertexAIAi21Config": ( + ".llms.vertex_ai.vertex_ai_partner_models.ai21.transformation", + "VertexAIAi21Config", + ), + "AmazonCohereChatConfig": ( + ".llms.bedrock.chat.invoke_handler", + "AmazonCohereChatConfig", + ), + "AmazonBedrockGlobalConfig": ( + ".llms.bedrock.common_utils", + "AmazonBedrockGlobalConfig", + ), + "AmazonAI21Config": ( + ".llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation", + "AmazonAI21Config", + ), + "AmazonInvokeNovaConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_nova_transformation", + "AmazonInvokeNovaConfig", + ), + "AmazonQwen2Config": ( + ".llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation", + "AmazonQwen2Config", + ), + "AmazonQwen3Config": ( + ".llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation", + "AmazonQwen3Config", + ), # Aliases for backwards compatibility - "VertexAIConfig": (".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", "VertexGeminiConfig"), # Alias - "GeminiConfig": (".llms.gemini.chat.transformation", "GoogleAIStudioGeminiConfig"), # Alias - "AmazonAnthropicConfig": (".llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation", "AmazonAnthropicConfig"), - "AmazonAnthropicClaudeConfig": (".llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation", "AmazonAnthropicClaudeConfig"), - "AmazonCohereConfig": (".llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation", "AmazonCohereConfig"), - "AmazonLlamaConfig": (".llms.bedrock.chat.invoke_transformations.amazon_llama_transformation", "AmazonLlamaConfig"), - "AmazonDeepSeekR1Config": (".llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation", "AmazonDeepSeekR1Config"), - "AmazonMistralConfig": (".llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation", "AmazonMistralConfig"), - "AmazonMoonshotConfig": (".llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation", "AmazonMoonshotConfig"), - "AmazonTitanConfig": (".llms.bedrock.chat.invoke_transformations.amazon_titan_transformation", "AmazonTitanConfig"), - "AmazonTwelveLabsPegasusConfig": (".llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation", "AmazonTwelveLabsPegasusConfig"), - "AmazonInvokeConfig": (".llms.bedrock.chat.invoke_transformations.base_invoke_transformation", "AmazonInvokeConfig"), - "AmazonBedrockOpenAIConfig": (".llms.bedrock.chat.invoke_transformations.amazon_openai_transformation", "AmazonBedrockOpenAIConfig"), - "AmazonStabilityConfig": (".llms.bedrock.image_generation.amazon_stability1_transformation", "AmazonStabilityConfig"), - "AmazonStability3Config": (".llms.bedrock.image_generation.amazon_stability3_transformation", "AmazonStability3Config"), - "AmazonNovaCanvasConfig": (".llms.bedrock.image_generation.amazon_nova_canvas_transformation", "AmazonNovaCanvasConfig"), - "AmazonTitanG1Config": (".llms.bedrock.embed.amazon_titan_g1_transformation", "AmazonTitanG1Config"), - "AmazonTitanMultimodalEmbeddingG1Config": (".llms.bedrock.embed.amazon_titan_multimodal_transformation", "AmazonTitanMultimodalEmbeddingG1Config"), + "VertexAIConfig": ( + ".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", + "VertexGeminiConfig", + ), # Alias + "GeminiConfig": ( + ".llms.gemini.chat.transformation", + "GoogleAIStudioGeminiConfig", + ), # Alias + "AmazonAnthropicConfig": ( + ".llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation", + "AmazonAnthropicConfig", + ), + "AmazonAnthropicClaudeConfig": ( + ".llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation", + "AmazonAnthropicClaudeConfig", + ), + "AmazonCohereConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation", + "AmazonCohereConfig", + ), + "AmazonLlamaConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_llama_transformation", + "AmazonLlamaConfig", + ), + "AmazonDeepSeekR1Config": ( + ".llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation", + "AmazonDeepSeekR1Config", + ), + "AmazonMistralConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation", + "AmazonMistralConfig", + ), + "AmazonMoonshotConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation", + "AmazonMoonshotConfig", + ), + "AmazonTitanConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_titan_transformation", + "AmazonTitanConfig", + ), + "AmazonTwelveLabsPegasusConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation", + "AmazonTwelveLabsPegasusConfig", + ), + "AmazonInvokeConfig": ( + ".llms.bedrock.chat.invoke_transformations.base_invoke_transformation", + "AmazonInvokeConfig", + ), + "AmazonBedrockOpenAIConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_openai_transformation", + "AmazonBedrockOpenAIConfig", + ), + "AmazonStabilityConfig": ( + ".llms.bedrock.image_generation.amazon_stability1_transformation", + "AmazonStabilityConfig", + ), + "AmazonStability3Config": ( + ".llms.bedrock.image_generation.amazon_stability3_transformation", + "AmazonStability3Config", + ), + "AmazonNovaCanvasConfig": ( + ".llms.bedrock.image_generation.amazon_nova_canvas_transformation", + "AmazonNovaCanvasConfig", + ), + "AmazonTitanG1Config": ( + ".llms.bedrock.embed.amazon_titan_g1_transformation", + "AmazonTitanG1Config", + ), + "AmazonTitanMultimodalEmbeddingG1Config": ( + ".llms.bedrock.embed.amazon_titan_multimodal_transformation", + "AmazonTitanMultimodalEmbeddingG1Config", + ), "CohereV2ChatConfig": (".llms.cohere.chat.v2_transformation", "CohereV2ChatConfig"), - "BedrockCohereEmbeddingConfig": (".llms.bedrock.embed.cohere_transformation", "BedrockCohereEmbeddingConfig"), - "TwelveLabsMarengoEmbeddingConfig": (".llms.bedrock.embed.twelvelabs_marengo_transformation", "TwelveLabsMarengoEmbeddingConfig"), - "AmazonNovaEmbeddingConfig": (".llms.bedrock.embed.amazon_nova_transformation", "AmazonNovaEmbeddingConfig"), + "BedrockCohereEmbeddingConfig": ( + ".llms.bedrock.embed.cohere_transformation", + "BedrockCohereEmbeddingConfig", + ), + "TwelveLabsMarengoEmbeddingConfig": ( + ".llms.bedrock.embed.twelvelabs_marengo_transformation", + "TwelveLabsMarengoEmbeddingConfig", + ), + "AmazonNovaEmbeddingConfig": ( + ".llms.bedrock.embed.amazon_nova_transformation", + "AmazonNovaEmbeddingConfig", + ), "OpenAIConfig": (".llms.openai.openai", "OpenAIConfig"), "MistralEmbeddingConfig": (".llms.openai.openai", "MistralEmbeddingConfig"), - "OpenAIImageVariationConfig": (".llms.openai.image_variations.transformation", "OpenAIImageVariationConfig"), + "OpenAIImageVariationConfig": ( + ".llms.openai.image_variations.transformation", + "OpenAIImageVariationConfig", + ), "DeepInfraConfig": (".llms.deepinfra.chat.transformation", "DeepInfraConfig"), - "DeepgramAudioTranscriptionConfig": (".llms.deepgram.audio_transcription.transformation", "DeepgramAudioTranscriptionConfig"), - "TopazImageVariationConfig": (".llms.topaz.image_variations.transformation", "TopazImageVariationConfig"), - "OpenAITextCompletionConfig": ("litellm.llms.openai.completion.transformation", "OpenAITextCompletionConfig"), + "DeepgramAudioTranscriptionConfig": ( + ".llms.deepgram.audio_transcription.transformation", + "DeepgramAudioTranscriptionConfig", + ), + "TopazImageVariationConfig": ( + ".llms.topaz.image_variations.transformation", + "TopazImageVariationConfig", + ), + "OpenAITextCompletionConfig": ( + "litellm.llms.openai.completion.transformation", + "OpenAITextCompletionConfig", + ), "GroqChatConfig": (".llms.groq.chat.transformation", "GroqChatConfig"), - "GenAIHubOrchestrationConfig": (".llms.sap.chat.transformation", "GenAIHubOrchestrationConfig"), - "VoyageEmbeddingConfig": (".llms.voyage.embedding.transformation", "VoyageEmbeddingConfig"), - "VoyageContextualEmbeddingConfig": (".llms.voyage.embedding.transformation_contextual", "VoyageContextualEmbeddingConfig"), - "InfinityEmbeddingConfig": (".llms.infinity.embedding.transformation", "InfinityEmbeddingConfig"), - "AzureAIStudioConfig": (".llms.azure_ai.chat.transformation", "AzureAIStudioConfig"), + "GenAIHubOrchestrationConfig": ( + ".llms.sap.chat.transformation", + "GenAIHubOrchestrationConfig", + ), + "VoyageEmbeddingConfig": ( + ".llms.voyage.embedding.transformation", + "VoyageEmbeddingConfig", + ), + "VoyageContextualEmbeddingConfig": ( + ".llms.voyage.embedding.transformation_contextual", + "VoyageContextualEmbeddingConfig", + ), + "InfinityEmbeddingConfig": ( + ".llms.infinity.embedding.transformation", + "InfinityEmbeddingConfig", + ), + "AzureAIStudioConfig": ( + ".llms.azure_ai.chat.transformation", + "AzureAIStudioConfig", + ), "MistralConfig": (".llms.mistral.chat.transformation", "MistralConfig"), - "OpenAIResponsesAPIConfig": (".llms.openai.responses.transformation", "OpenAIResponsesAPIConfig"), - "AzureOpenAIResponsesAPIConfig": (".llms.azure.responses.transformation", "AzureOpenAIResponsesAPIConfig"), - "AzureOpenAIOSeriesResponsesAPIConfig": (".llms.azure.responses.o_series_transformation", "AzureOpenAIOSeriesResponsesAPIConfig"), - "XAIResponsesAPIConfig": (".llms.xai.responses.transformation", "XAIResponsesAPIConfig"), - "LiteLLMProxyResponsesAPIConfig": (".llms.litellm_proxy.responses.transformation", "LiteLLMProxyResponsesAPIConfig"), - "VolcEngineResponsesAPIConfig": (".llms.volcengine.responses.transformation", "VolcEngineResponsesAPIConfig"), - "ManusResponsesAPIConfig": (".llms.manus.responses.transformation", "ManusResponsesAPIConfig"), - "GoogleAIStudioInteractionsConfig": (".llms.gemini.interactions.transformation", "GoogleAIStudioInteractionsConfig"), - "OpenAIOSeriesConfig": (".llms.openai.chat.o_series_transformation", "OpenAIOSeriesConfig"), - "AnthropicSkillsConfig": (".llms.anthropic.skills.transformation", "AnthropicSkillsConfig"), - "BaseSkillsAPIConfig": (".llms.base_llm.skills.transformation", "BaseSkillsAPIConfig"), + "OpenAIResponsesAPIConfig": ( + ".llms.openai.responses.transformation", + "OpenAIResponsesAPIConfig", + ), + "AzureOpenAIResponsesAPIConfig": ( + ".llms.azure.responses.transformation", + "AzureOpenAIResponsesAPIConfig", + ), + "AzureOpenAIOSeriesResponsesAPIConfig": ( + ".llms.azure.responses.o_series_transformation", + "AzureOpenAIOSeriesResponsesAPIConfig", + ), + "XAIResponsesAPIConfig": ( + ".llms.xai.responses.transformation", + "XAIResponsesAPIConfig", + ), + "LiteLLMProxyResponsesAPIConfig": ( + ".llms.litellm_proxy.responses.transformation", + "LiteLLMProxyResponsesAPIConfig", + ), + "VolcEngineResponsesAPIConfig": ( + ".llms.volcengine.responses.transformation", + "VolcEngineResponsesAPIConfig", + ), + "ManusResponsesAPIConfig": ( + ".llms.manus.responses.transformation", + "ManusResponsesAPIConfig", + ), + "GoogleAIStudioInteractionsConfig": ( + ".llms.gemini.interactions.transformation", + "GoogleAIStudioInteractionsConfig", + ), + "OpenAIOSeriesConfig": ( + ".llms.openai.chat.o_series_transformation", + "OpenAIOSeriesConfig", + ), + "AnthropicSkillsConfig": ( + ".llms.anthropic.skills.transformation", + "AnthropicSkillsConfig", + ), + "BaseSkillsAPIConfig": ( + ".llms.base_llm.skills.transformation", + "BaseSkillsAPIConfig", + ), "GradientAIConfig": (".llms.gradient_ai.chat.transformation", "GradientAIConfig"), # Alias for backwards compatibility - "OpenAIO1Config": (".llms.openai.chat.o_series_transformation", "OpenAIOSeriesConfig"), # Alias + "OpenAIO1Config": ( + ".llms.openai.chat.o_series_transformation", + "OpenAIOSeriesConfig", + ), # Alias "OpenAIGPTConfig": (".llms.openai.chat.gpt_transformation", "OpenAIGPTConfig"), "OpenAIGPT5Config": (".llms.openai.chat.gpt_5_transformation", "OpenAIGPT5Config"), - "OpenAIWhisperAudioTranscriptionConfig": (".llms.openai.transcriptions.whisper_transformation", "OpenAIWhisperAudioTranscriptionConfig"), - "OpenAIGPTAudioTranscriptionConfig": (".llms.openai.transcriptions.gpt_transformation", "OpenAIGPTAudioTranscriptionConfig"), - "OpenAIGPTAudioConfig": (".llms.openai.chat.gpt_audio_transformation", "OpenAIGPTAudioConfig"), + "OpenAIWhisperAudioTranscriptionConfig": ( + ".llms.openai.transcriptions.whisper_transformation", + "OpenAIWhisperAudioTranscriptionConfig", + ), + "OpenAIGPTAudioTranscriptionConfig": ( + ".llms.openai.transcriptions.gpt_transformation", + "OpenAIGPTAudioTranscriptionConfig", + ), + "OpenAIGPTAudioConfig": ( + ".llms.openai.chat.gpt_audio_transformation", + "OpenAIGPTAudioConfig", + ), "NvidiaNimConfig": (".llms.nvidia_nim.chat.transformation", "NvidiaNimConfig"), "NvidiaNimEmbeddingConfig": (".llms.nvidia_nim.embed", "NvidiaNimEmbeddingConfig"), - "FeatherlessAIConfig": (".llms.featherless_ai.chat.transformation", "FeatherlessAIConfig"), + "FeatherlessAIConfig": ( + ".llms.featherless_ai.chat.transformation", + "FeatherlessAIConfig", + ), "CerebrasConfig": (".llms.cerebras.chat", "CerebrasConfig"), "BasetenConfig": (".llms.baseten.chat", "BasetenConfig"), "SambanovaConfig": (".llms.sambanova.chat", "SambanovaConfig"), - "SambaNovaEmbeddingConfig": (".llms.sambanova.embedding.transformation", "SambaNovaEmbeddingConfig"), - "FireworksAIConfig": (".llms.fireworks_ai.chat.transformation", "FireworksAIConfig"), - "FireworksAITextCompletionConfig": (".llms.fireworks_ai.completion.transformation", "FireworksAITextCompletionConfig"), - "FireworksAIAudioTranscriptionConfig": (".llms.fireworks_ai.audio_transcription.transformation", "FireworksAIAudioTranscriptionConfig"), - "FireworksAIEmbeddingConfig": (".llms.fireworks_ai.embed.fireworks_ai_transformation", "FireworksAIEmbeddingConfig"), - "FriendliaiChatConfig": (".llms.friendliai.chat.transformation", "FriendliaiChatConfig"), - "JinaAIEmbeddingConfig": (".llms.jina_ai.embedding.transformation", "JinaAIEmbeddingConfig"), + "SambaNovaEmbeddingConfig": ( + ".llms.sambanova.embedding.transformation", + "SambaNovaEmbeddingConfig", + ), + "FireworksAIConfig": ( + ".llms.fireworks_ai.chat.transformation", + "FireworksAIConfig", + ), + "FireworksAITextCompletionConfig": ( + ".llms.fireworks_ai.completion.transformation", + "FireworksAITextCompletionConfig", + ), + "FireworksAIAudioTranscriptionConfig": ( + ".llms.fireworks_ai.audio_transcription.transformation", + "FireworksAIAudioTranscriptionConfig", + ), + "FireworksAIEmbeddingConfig": ( + ".llms.fireworks_ai.embed.fireworks_ai_transformation", + "FireworksAIEmbeddingConfig", + ), + "FriendliaiChatConfig": ( + ".llms.friendliai.chat.transformation", + "FriendliaiChatConfig", + ), + "JinaAIEmbeddingConfig": ( + ".llms.jina_ai.embedding.transformation", + "JinaAIEmbeddingConfig", + ), "XAIChatConfig": (".llms.xai.chat.transformation", "XAIChatConfig"), "ZAIChatConfig": (".llms.zai.chat.transformation", "ZAIChatConfig"), "AIMLChatConfig": (".llms.aiml.chat.transformation", "AIMLChatConfig"), - "VolcEngineChatConfig": (".llms.volcengine.chat.transformation", "VolcEngineChatConfig"), - "CodestralTextCompletionConfig": (".llms.codestral.completion.transformation", "CodestralTextCompletionConfig"), - "AzureOpenAIAssistantsAPIConfig": (".llms.azure.azure", "AzureOpenAIAssistantsAPIConfig"), + "VolcEngineChatConfig": ( + ".llms.volcengine.chat.transformation", + "VolcEngineChatConfig", + ), + "CodestralTextCompletionConfig": ( + ".llms.codestral.completion.transformation", + "CodestralTextCompletionConfig", + ), + "AzureOpenAIAssistantsAPIConfig": ( + ".llms.azure.azure", + "AzureOpenAIAssistantsAPIConfig", + ), "HerokuChatConfig": (".llms.heroku.chat.transformation", "HerokuChatConfig"), "CometAPIConfig": (".llms.cometapi.chat.transformation", "CometAPIConfig"), "AzureOpenAIConfig": (".llms.azure.chat.gpt_transformation", "AzureOpenAIConfig"), - "AzureOpenAIGPT5Config": (".llms.azure.chat.gpt_5_transformation", "AzureOpenAIGPT5Config"), - "AzureOpenAITextConfig": (".llms.azure.completion.transformation", "AzureOpenAITextConfig"), - "HostedVLLMChatConfig": (".llms.hosted_vllm.chat.transformation", "HostedVLLMChatConfig"), + "AzureOpenAIGPT5Config": ( + ".llms.azure.chat.gpt_5_transformation", + "AzureOpenAIGPT5Config", + ), + "AzureOpenAITextConfig": ( + ".llms.azure.completion.transformation", + "AzureOpenAITextConfig", + ), + "HostedVLLMChatConfig": ( + ".llms.hosted_vllm.chat.transformation", + "HostedVLLMChatConfig", + ), + "HostedVLLMEmbeddingConfig": ( + ".llms.hosted_vllm.embedding.transformation", + "HostedVLLMEmbeddingConfig", + ), # Alias for backwards compatibility - "VolcEngineConfig": (".llms.volcengine.chat.transformation", "VolcEngineChatConfig"), # Alias - "LlamafileChatConfig": (".llms.llamafile.chat.transformation", "LlamafileChatConfig"), - "LiteLLMProxyChatConfig": (".llms.litellm_proxy.chat.transformation", "LiteLLMProxyChatConfig"), + "VolcEngineConfig": ( + ".llms.volcengine.chat.transformation", + "VolcEngineChatConfig", + ), # Alias + "LlamafileChatConfig": ( + ".llms.llamafile.chat.transformation", + "LlamafileChatConfig", + ), + "LiteLLMProxyChatConfig": ( + ".llms.litellm_proxy.chat.transformation", + "LiteLLMProxyChatConfig", + ), "VLLMConfig": (".llms.vllm.completion.transformation", "VLLMConfig"), "DeepSeekChatConfig": (".llms.deepseek.chat.transformation", "DeepSeekChatConfig"), "LMStudioChatConfig": (".llms.lm_studio.chat.transformation", "LMStudioChatConfig"), - "LmStudioEmbeddingConfig": (".llms.lm_studio.embed.transformation", "LmStudioEmbeddingConfig"), + "LmStudioEmbeddingConfig": ( + ".llms.lm_studio.embed.transformation", + "LmStudioEmbeddingConfig", + ), "NscaleConfig": (".llms.nscale.chat.transformation", "NscaleConfig"), - "PerplexityChatConfig": (".llms.perplexity.chat.transformation", "PerplexityChatConfig"), - "AzureOpenAIO1Config": (".llms.azure.chat.o_series_transformation", "AzureOpenAIO1Config"), - "IBMWatsonXAIConfig": (".llms.watsonx.completion.transformation", "IBMWatsonXAIConfig"), - "IBMWatsonXChatConfig": (".llms.watsonx.chat.transformation", "IBMWatsonXChatConfig"), - "IBMWatsonXEmbeddingConfig": (".llms.watsonx.embed.transformation", "IBMWatsonXEmbeddingConfig"), - "GenAIHubEmbeddingConfig": (".llms.sap.embed.transformation", "GenAIHubEmbeddingConfig"), - "IBMWatsonXAudioTranscriptionConfig": (".llms.watsonx.audio_transcription.transformation", "IBMWatsonXAudioTranscriptionConfig"), - "GithubCopilotConfig": (".llms.github_copilot.chat.transformation", "GithubCopilotConfig"), - "GithubCopilotResponsesAPIConfig": (".llms.github_copilot.responses.transformation", "GithubCopilotResponsesAPIConfig"), - "GithubCopilotEmbeddingConfig": (".llms.github_copilot.embedding.transformation", "GithubCopilotEmbeddingConfig"), + "PerplexityChatConfig": ( + ".llms.perplexity.chat.transformation", + "PerplexityChatConfig", + ), + "AzureOpenAIO1Config": ( + ".llms.azure.chat.o_series_transformation", + "AzureOpenAIO1Config", + ), + "IBMWatsonXAIConfig": ( + ".llms.watsonx.completion.transformation", + "IBMWatsonXAIConfig", + ), + "IBMWatsonXChatConfig": ( + ".llms.watsonx.chat.transformation", + "IBMWatsonXChatConfig", + ), + "IBMWatsonXEmbeddingConfig": ( + ".llms.watsonx.embed.transformation", + "IBMWatsonXEmbeddingConfig", + ), + "GenAIHubEmbeddingConfig": ( + ".llms.sap.embed.transformation", + "GenAIHubEmbeddingConfig", + ), + "IBMWatsonXAudioTranscriptionConfig": ( + ".llms.watsonx.audio_transcription.transformation", + "IBMWatsonXAudioTranscriptionConfig", + ), + "GithubCopilotConfig": ( + ".llms.github_copilot.chat.transformation", + "GithubCopilotConfig", + ), + "GithubCopilotResponsesAPIConfig": ( + ".llms.github_copilot.responses.transformation", + "GithubCopilotResponsesAPIConfig", + ), + "GithubCopilotEmbeddingConfig": ( + ".llms.github_copilot.embedding.transformation", + "GithubCopilotEmbeddingConfig", + ), "ChatGPTConfig": (".llms.chatgpt.chat.transformation", "ChatGPTConfig"), - "ChatGPTResponsesAPIConfig": (".llms.chatgpt.responses.transformation", "ChatGPTResponsesAPIConfig"), + "ChatGPTResponsesAPIConfig": ( + ".llms.chatgpt.responses.transformation", + "ChatGPTResponsesAPIConfig", + ), "NebiusConfig": (".llms.nebius.chat.transformation", "NebiusConfig"), "WandbConfig": (".llms.wandb.chat.transformation", "WandbConfig"), "GigaChatConfig": (".llms.gigachat.chat.transformation", "GigaChatConfig"), - "GigaChatEmbeddingConfig": (".llms.gigachat.embedding.transformation", "GigaChatEmbeddingConfig"), - "DashScopeChatConfig": (".llms.dashscope.chat.transformation", "DashScopeChatConfig"), + "GigaChatEmbeddingConfig": ( + ".llms.gigachat.embedding.transformation", + "GigaChatEmbeddingConfig", + ), + "DashScopeChatConfig": ( + ".llms.dashscope.chat.transformation", + "DashScopeChatConfig", + ), "MoonshotChatConfig": (".llms.moonshot.chat.transformation", "MoonshotChatConfig"), - "DockerModelRunnerChatConfig": (".llms.docker_model_runner.chat.transformation", "DockerModelRunnerChatConfig"), + "DockerModelRunnerChatConfig": ( + ".llms.docker_model_runner.chat.transformation", + "DockerModelRunnerChatConfig", + ), "V0ChatConfig": (".llms.v0.chat.transformation", "V0ChatConfig"), "OCIChatConfig": (".llms.oci.chat.transformation", "OCIChatConfig"), "MorphChatConfig": (".llms.morph.chat.transformation", "MorphChatConfig"), "RAGFlowConfig": (".llms.ragflow.chat.transformation", "RAGFlowConfig"), "LambdaAIChatConfig": (".llms.lambda_ai.chat.transformation", "LambdaAIChatConfig"), - "HyperbolicChatConfig": (".llms.hyperbolic.chat.transformation", "HyperbolicChatConfig"), - "VercelAIGatewayConfig": (".llms.vercel_ai_gateway.chat.transformation", "VercelAIGatewayConfig"), + "HyperbolicChatConfig": ( + ".llms.hyperbolic.chat.transformation", + "HyperbolicChatConfig", + ), + "VercelAIGatewayConfig": ( + ".llms.vercel_ai_gateway.chat.transformation", + "VercelAIGatewayConfig", + ), "OVHCloudChatConfig": (".llms.ovhcloud.chat.transformation", "OVHCloudChatConfig"), - "OVHCloudEmbeddingConfig": (".llms.ovhcloud.embedding.transformation", "OVHCloudEmbeddingConfig"), - "CometAPIEmbeddingConfig": (".llms.cometapi.embed.transformation", "CometAPIEmbeddingConfig"), + "OVHCloudEmbeddingConfig": ( + ".llms.ovhcloud.embedding.transformation", + "OVHCloudEmbeddingConfig", + ), + "CometAPIEmbeddingConfig": ( + ".llms.cometapi.embed.transformation", + "CometAPIEmbeddingConfig", + ), "LemonadeChatConfig": (".llms.lemonade.chat.transformation", "LemonadeChatConfig"), - "SnowflakeEmbeddingConfig": (".llms.snowflake.embedding.transformation", "SnowflakeEmbeddingConfig"), - "AmazonNovaChatConfig": (".llms.amazon_nova.chat.transformation", "AmazonNovaChatConfig"), + "SnowflakeEmbeddingConfig": ( + ".llms.snowflake.embedding.transformation", + "SnowflakeEmbeddingConfig", + ), + "AmazonNovaChatConfig": ( + ".llms.amazon_nova.chat.transformation", + "AmazonNovaChatConfig", + ), } # Import map for utils module lazy imports _UTILS_MODULE_IMPORT_MAP = { "encoding": ("litellm.main", "encoding"), - "BaseVectorStore": ("litellm.integrations.vector_store_integrations.base_vector_store", "BaseVectorStore"), - "CredentialAccessor": ("litellm.litellm_core_utils.credential_accessor", "CredentialAccessor"), - "exception_type": ("litellm.litellm_core_utils.exception_mapping_utils", "exception_type"), - "get_error_message": ("litellm.litellm_core_utils.exception_mapping_utils", "get_error_message"), - "_get_response_headers": ("litellm.litellm_core_utils.exception_mapping_utils", "_get_response_headers"), - "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"), - "_is_non_openai_azure_model": ("litellm.litellm_core_utils.get_llm_provider_logic", "_is_non_openai_azure_model"), - "get_supported_openai_params": ("litellm.litellm_core_utils.get_supported_openai_params", "get_supported_openai_params"), - "LiteLLMResponseObjectHandler": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "LiteLLMResponseObjectHandler"), - "_handle_invalid_parallel_tool_calls": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "_handle_invalid_parallel_tool_calls"), - "convert_to_model_response_object": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_model_response_object"), - "convert_to_streaming_response": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_streaming_response"), - "convert_to_streaming_response_async": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_streaming_response_async"), - "get_api_base": ("litellm.litellm_core_utils.llm_response_utils.get_api_base", "get_api_base"), - "ResponseMetadata": ("litellm.litellm_core_utils.llm_response_utils.response_metadata", "ResponseMetadata"), - "_parse_content_for_reasoning": ("litellm.litellm_core_utils.prompt_templates.common_utils", "_parse_content_for_reasoning"), - "LiteLLMLoggingObject": ("litellm.litellm_core_utils.redact_messages", "LiteLLMLoggingObject"), - "redact_message_input_output_from_logging": ("litellm.litellm_core_utils.redact_messages", "redact_message_input_output_from_logging"), - "CustomStreamWrapper": ("litellm.litellm_core_utils.streaming_handler", "CustomStreamWrapper"), - "BaseGoogleGenAIGenerateContentConfig": ("litellm.llms.base_llm.google_genai.transformation", "BaseGoogleGenAIGenerateContentConfig"), + "BaseVectorStore": ( + "litellm.integrations.vector_store_integrations.base_vector_store", + "BaseVectorStore", + ), + "CredentialAccessor": ( + "litellm.litellm_core_utils.credential_accessor", + "CredentialAccessor", + ), + "exception_type": ( + "litellm.litellm_core_utils.exception_mapping_utils", + "exception_type", + ), + "get_error_message": ( + "litellm.litellm_core_utils.exception_mapping_utils", + "get_error_message", + ), + "_get_response_headers": ( + "litellm.litellm_core_utils.exception_mapping_utils", + "_get_response_headers", + ), + "get_llm_provider": ( + "litellm.litellm_core_utils.get_llm_provider_logic", + "get_llm_provider", + ), + "_is_non_openai_azure_model": ( + "litellm.litellm_core_utils.get_llm_provider_logic", + "_is_non_openai_azure_model", + ), + "get_supported_openai_params": ( + "litellm.litellm_core_utils.get_supported_openai_params", + "get_supported_openai_params", + ), + "LiteLLMResponseObjectHandler": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "LiteLLMResponseObjectHandler", + ), + "_handle_invalid_parallel_tool_calls": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "_handle_invalid_parallel_tool_calls", + ), + "convert_to_model_response_object": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "convert_to_model_response_object", + ), + "convert_to_streaming_response": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "convert_to_streaming_response", + ), + "convert_to_streaming_response_async": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "convert_to_streaming_response_async", + ), + "get_api_base": ( + "litellm.litellm_core_utils.llm_response_utils.get_api_base", + "get_api_base", + ), + "ResponseMetadata": ( + "litellm.litellm_core_utils.llm_response_utils.response_metadata", + "ResponseMetadata", + ), + "_parse_content_for_reasoning": ( + "litellm.litellm_core_utils.prompt_templates.common_utils", + "_parse_content_for_reasoning", + ), + "LiteLLMLoggingObject": ( + "litellm.litellm_core_utils.redact_messages", + "LiteLLMLoggingObject", + ), + "redact_message_input_output_from_logging": ( + "litellm.litellm_core_utils.redact_messages", + "redact_message_input_output_from_logging", + ), + "CustomStreamWrapper": ( + "litellm.litellm_core_utils.streaming_handler", + "CustomStreamWrapper", + ), + "BaseGoogleGenAIGenerateContentConfig": ( + "litellm.llms.base_llm.google_genai.transformation", + "BaseGoogleGenAIGenerateContentConfig", + ), "BaseOCRConfig": ("litellm.llms.base_llm.ocr.transformation", "BaseOCRConfig"), - "BaseSearchConfig": ("litellm.llms.base_llm.search.transformation", "BaseSearchConfig"), - "BaseTextToSpeechConfig": ("litellm.llms.base_llm.text_to_speech.transformation", "BaseTextToSpeechConfig"), + "BaseSearchConfig": ( + "litellm.llms.base_llm.search.transformation", + "BaseSearchConfig", + ), + "BaseTextToSpeechConfig": ( + "litellm.llms.base_llm.text_to_speech.transformation", + "BaseTextToSpeechConfig", + ), "BedrockModelInfo": ("litellm.llms.bedrock.common_utils", "BedrockModelInfo"), "CohereModelInfo": ("litellm.llms.cohere.common_utils", "CohereModelInfo"), "MistralOCRConfig": ("litellm.llms.mistral.ocr.transformation", "MistralOCRConfig"), "Rules": ("litellm.litellm_core_utils.rules", "Rules"), "AsyncHTTPHandler": ("litellm.llms.custom_httpx.http_handler", "AsyncHTTPHandler"), "HTTPHandler": ("litellm.llms.custom_httpx.http_handler", "HTTPHandler"), - "get_num_retries_from_retry_policy": ("litellm.router_utils.get_retry_from_policy", "get_num_retries_from_retry_policy"), - "reset_retry_policy": ("litellm.router_utils.get_retry_from_policy", "reset_retry_policy"), + "get_num_retries_from_retry_policy": ( + "litellm.router_utils.get_retry_from_policy", + "get_num_retries_from_retry_policy", + ), + "reset_retry_policy": ( + "litellm.router_utils.get_retry_from_policy", + "reset_retry_policy", + ), "get_secret": ("litellm.secret_managers.main", "get_secret"), - "get_coroutine_checker": ("litellm.litellm_core_utils.cached_imports", "get_coroutine_checker"), - "get_litellm_logging_class": ("litellm.litellm_core_utils.cached_imports", "get_litellm_logging_class"), - "get_set_callbacks": ("litellm.litellm_core_utils.cached_imports", "get_set_callbacks"), - "get_litellm_metadata_from_kwargs": ("litellm.litellm_core_utils.core_helpers", "get_litellm_metadata_from_kwargs"), - "map_finish_reason": ("litellm.litellm_core_utils.core_helpers", "map_finish_reason"), - "process_response_headers": ("litellm.litellm_core_utils.core_helpers", "process_response_headers"), - "delete_nested_value": ("litellm.litellm_core_utils.dot_notation_indexing", "delete_nested_value"), - "is_nested_path": ("litellm.litellm_core_utils.dot_notation_indexing", "is_nested_path"), - "_get_base_model_from_litellm_call_metadata": ("litellm.litellm_core_utils.get_litellm_params", "_get_base_model_from_litellm_call_metadata"), - "get_litellm_params": ("litellm.litellm_core_utils.get_litellm_params", "get_litellm_params"), - "_ensure_extra_body_is_safe": ("litellm.litellm_core_utils.llm_request_utils", "_ensure_extra_body_is_safe"), - "get_formatted_prompt": ("litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt", "get_formatted_prompt"), - "get_response_headers": ("litellm.litellm_core_utils.llm_response_utils.get_headers", "get_response_headers"), - "update_response_metadata": ("litellm.litellm_core_utils.llm_response_utils.response_metadata", "update_response_metadata"), + "get_coroutine_checker": ( + "litellm.litellm_core_utils.cached_imports", + "get_coroutine_checker", + ), + "get_litellm_logging_class": ( + "litellm.litellm_core_utils.cached_imports", + "get_litellm_logging_class", + ), + "get_set_callbacks": ( + "litellm.litellm_core_utils.cached_imports", + "get_set_callbacks", + ), + "get_litellm_metadata_from_kwargs": ( + "litellm.litellm_core_utils.core_helpers", + "get_litellm_metadata_from_kwargs", + ), + "map_finish_reason": ( + "litellm.litellm_core_utils.core_helpers", + "map_finish_reason", + ), + "process_response_headers": ( + "litellm.litellm_core_utils.core_helpers", + "process_response_headers", + ), + "delete_nested_value": ( + "litellm.litellm_core_utils.dot_notation_indexing", + "delete_nested_value", + ), + "is_nested_path": ( + "litellm.litellm_core_utils.dot_notation_indexing", + "is_nested_path", + ), + "_get_base_model_from_litellm_call_metadata": ( + "litellm.litellm_core_utils.get_litellm_params", + "_get_base_model_from_litellm_call_metadata", + ), + "get_litellm_params": ( + "litellm.litellm_core_utils.get_litellm_params", + "get_litellm_params", + ), + "_ensure_extra_body_is_safe": ( + "litellm.litellm_core_utils.llm_request_utils", + "_ensure_extra_body_is_safe", + ), + "get_formatted_prompt": ( + "litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt", + "get_formatted_prompt", + ), + "get_response_headers": ( + "litellm.litellm_core_utils.llm_response_utils.get_headers", + "get_response_headers", + ), + "update_response_metadata": ( + "litellm.litellm_core_utils.llm_response_utils.response_metadata", + "update_response_metadata", + ), "executor": ("litellm.litellm_core_utils.thread_pool_executor", "executor"), - "BaseAnthropicMessagesConfig": ("litellm.llms.base_llm.anthropic_messages.transformation", "BaseAnthropicMessagesConfig"), - "BaseAudioTranscriptionConfig": ("litellm.llms.base_llm.audio_transcription.transformation", "BaseAudioTranscriptionConfig"), - "BaseBatchesConfig": ("litellm.llms.base_llm.batches.transformation", "BaseBatchesConfig"), - "BaseContainerConfig": ("litellm.llms.base_llm.containers.transformation", "BaseContainerConfig"), - "BaseEmbeddingConfig": ("litellm.llms.base_llm.embedding.transformation", "BaseEmbeddingConfig"), - "BaseImageEditConfig": ("litellm.llms.base_llm.image_edit.transformation", "BaseImageEditConfig"), - "BaseImageGenerationConfig": ("litellm.llms.base_llm.image_generation.transformation", "BaseImageGenerationConfig"), - "BaseImageVariationConfig": ("litellm.llms.base_llm.image_variations.transformation", "BaseImageVariationConfig"), - "BasePassthroughConfig": ("litellm.llms.base_llm.passthrough.transformation", "BasePassthroughConfig"), - "BaseRealtimeConfig": ("litellm.llms.base_llm.realtime.transformation", "BaseRealtimeConfig"), - "BaseRerankConfig": ("litellm.llms.base_llm.rerank.transformation", "BaseRerankConfig"), - "BaseVectorStoreConfig": ("litellm.llms.base_llm.vector_store.transformation", "BaseVectorStoreConfig"), - "BaseVectorStoreFilesConfig": ("litellm.llms.base_llm.vector_store_files.transformation", "BaseVectorStoreFilesConfig"), - "BaseVideoConfig": ("litellm.llms.base_llm.videos.transformation", "BaseVideoConfig"), - "ANTHROPIC_API_ONLY_HEADERS": ("litellm.types.llms.anthropic", "ANTHROPIC_API_ONLY_HEADERS"), - "AnthropicThinkingParam": ("litellm.types.llms.anthropic", "AnthropicThinkingParam"), + "BaseAnthropicMessagesConfig": ( + "litellm.llms.base_llm.anthropic_messages.transformation", + "BaseAnthropicMessagesConfig", + ), + "BaseAudioTranscriptionConfig": ( + "litellm.llms.base_llm.audio_transcription.transformation", + "BaseAudioTranscriptionConfig", + ), + "BaseBatchesConfig": ( + "litellm.llms.base_llm.batches.transformation", + "BaseBatchesConfig", + ), + "BaseContainerConfig": ( + "litellm.llms.base_llm.containers.transformation", + "BaseContainerConfig", + ), + "BaseEmbeddingConfig": ( + "litellm.llms.base_llm.embedding.transformation", + "BaseEmbeddingConfig", + ), + "BaseImageEditConfig": ( + "litellm.llms.base_llm.image_edit.transformation", + "BaseImageEditConfig", + ), + "BaseImageGenerationConfig": ( + "litellm.llms.base_llm.image_generation.transformation", + "BaseImageGenerationConfig", + ), + "BaseImageVariationConfig": ( + "litellm.llms.base_llm.image_variations.transformation", + "BaseImageVariationConfig", + ), + "BasePassthroughConfig": ( + "litellm.llms.base_llm.passthrough.transformation", + "BasePassthroughConfig", + ), + "BaseRealtimeConfig": ( + "litellm.llms.base_llm.realtime.transformation", + "BaseRealtimeConfig", + ), + "BaseRerankConfig": ( + "litellm.llms.base_llm.rerank.transformation", + "BaseRerankConfig", + ), + "BaseVectorStoreConfig": ( + "litellm.llms.base_llm.vector_store.transformation", + "BaseVectorStoreConfig", + ), + "BaseVectorStoreFilesConfig": ( + "litellm.llms.base_llm.vector_store_files.transformation", + "BaseVectorStoreFilesConfig", + ), + "BaseVideoConfig": ( + "litellm.llms.base_llm.videos.transformation", + "BaseVideoConfig", + ), + "ANTHROPIC_API_ONLY_HEADERS": ( + "litellm.types.llms.anthropic", + "ANTHROPIC_API_ONLY_HEADERS", + ), + "AnthropicThinkingParam": ( + "litellm.types.llms.anthropic", + "AnthropicThinkingParam", + ), "RerankResponse": ("litellm.types.rerank", "RerankResponse"), - "ChatCompletionDeltaToolCallChunk": ("litellm.types.llms.openai", "ChatCompletionDeltaToolCallChunk"), - "ChatCompletionToolCallChunk": ("litellm.types.llms.openai", "ChatCompletionToolCallChunk"), - "ChatCompletionToolCallFunctionChunk": ("litellm.types.llms.openai", "ChatCompletionToolCallFunctionChunk"), + "ChatCompletionDeltaToolCallChunk": ( + "litellm.types.llms.openai", + "ChatCompletionDeltaToolCallChunk", + ), + "ChatCompletionToolCallChunk": ( + "litellm.types.llms.openai", + "ChatCompletionToolCallChunk", + ), + "ChatCompletionToolCallFunctionChunk": ( + "litellm.types.llms.openai", + "ChatCompletionToolCallFunctionChunk", + ), "LiteLLM_Params": ("litellm.types.router", "LiteLLM_Params"), } diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py index 03488ad0183..98ee5e4fa86 100644 --- a/litellm/litellm_core_utils/prompt_templates/factory.py +++ b/litellm/litellm_core_utils/prompt_templates/factory.py @@ -1632,6 +1632,7 @@ def _sanitize_anthropic_tool_use_id(tool_use_id: str) -> str: def convert_to_anthropic_tool_result( message: Union[ChatCompletionToolMessage, ChatCompletionFunctionMessage], + force_base64: bool = False, ) -> AnthropicMessagesToolResultParam: """ OpenAI message with a tool result looks like: @@ -1694,7 +1695,7 @@ def convert_to_anthropic_tool_result( else None ) _anthropic_image_param = create_anthropic_image_param( - content["image_url"], format=format + content["image_url"], format=format, is_bedrock_invoke=force_base64 ) _anthropic_image_param = add_cache_control_to_content( anthropic_content_element=_anthropic_image_param, @@ -2056,6 +2057,12 @@ def anthropic_messages_pt( # noqa: PLR0915 else: messages.append(DEFAULT_USER_CONTINUE_MESSAGE_TYPED) + # Bedrock invoke models have format: invoke/... + # Vertex AI Anthropic also doesn't support URL sources for images + is_bedrock_invoke = model.lower().startswith("invoke/") + is_vertex_ai = llm_provider.startswith("vertex_ai") if llm_provider else False + force_base64 = is_bedrock_invoke or is_vertex_ai + msg_i = 0 while msg_i < len(messages): user_content: List[AnthropicMessagesUserMessageValues] = [] @@ -2165,7 +2172,9 @@ def anthropic_messages_pt( # noqa: PLR0915 ): # OpenAI's tool message content will always be a string user_content.append( - convert_to_anthropic_tool_result(user_message_types_block) + convert_to_anthropic_tool_result( + user_message_types_block, force_base64=force_base64 + ) ) msg_i += 1 diff --git a/litellm/llms/gemini/files/transformation.py b/litellm/llms/gemini/files/transformation.py index d9ebf69a97a..334dc013226 100644 --- a/litellm/llms/gemini/files/transformation.py +++ b/litellm/llms/gemini/files/transformation.py @@ -180,7 +180,25 @@ def transform_retrieve_file_request( optional_params: dict, litellm_params: dict, ) -> tuple[str, dict]: - raise NotImplementedError("GoogleAIStudioFilesHandler does not support file retrieval") + """ + Get the URL to retrieve a file from Google AI Studio. + + We expect file_id to be the URI (e.g. https://generativelanguage.googleapis.com/v1beta/files/...) + as returned by the upload response. + """ + api_key = litellm_params.get("api_key") + if not api_key: + raise ValueError("api_key is required") + + if file_id.startswith("http"): + url = "{}?key={}".format(file_id, api_key) + else: + # Fallback for just file name (files/...) + api_base = self.get_api_base(litellm_params.get("api_base")) or "https://generativelanguage.googleapis.com" + api_base = api_base.rstrip("/") + url = "{}/v1beta/{}?key={}".format(api_base, file_id, api_key) + + return url, {"Content-Type": "application/json"} def transform_retrieve_file_response( self, @@ -188,7 +206,40 @@ def transform_retrieve_file_response( logging_obj: LiteLLMLoggingObj, litellm_params: dict, ) -> OpenAIFileObject: - raise NotImplementedError("GoogleAIStudioFilesHandler does not support file retrieval") + """ + Transform Gemini's file retrieval response into OpenAI-style FileObject + """ + try: + response_json = raw_response.json() + + # Map Gemini state to OpenAI status + gemini_state = response_json.get("state", "STATE_UNSPECIFIED") + status = "uploaded" # Default + if gemini_state == "ACTIVE": + status = "processed" + elif gemini_state == "FAILED": + status = "error" + + return OpenAIFileObject( + id=response_json.get("uri", ""), + bytes=int(response_json.get("sizeBytes", 0)), + created_at=int( + time.mktime( + time.strptime( + response_json["createTime"].replace("Z", "+00:00"), + "%Y-%m-%dT%H:%M:%S.%f%z", + ) + ) + ), + filename=response_json.get("displayName", ""), + object="file", + purpose="user_data", + status=status, + status_details=str(response_json.get("error", "")) if gemini_state == "FAILED" else None, + ) + except Exception as e: + verbose_logger.exception(f"Error parsing file retrieve response: {str(e)}") + raise ValueError(f"Error parsing file retrieve response: {str(e)}") def transform_delete_file_request( self, diff --git a/litellm/llms/hosted_vllm/embedding/transformation.py b/litellm/llms/hosted_vllm/embedding/transformation.py new file mode 100644 index 00000000000..9c3e8c6c7cc --- /dev/null +++ b/litellm/llms/hosted_vllm/embedding/transformation.py @@ -0,0 +1,180 @@ +""" +Hosted VLLM Embedding API Configuration. + +This module provides the configuration for hosted VLLM's Embedding API. +VLLM is OpenAI-compatible and supports embeddings via the /v1/embeddings endpoint. + +Docs: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html +""" + +from typing import TYPE_CHECKING, Any, List, Optional, Union + +import httpx + +from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig +from litellm.secret_managers.main import get_secret_str +from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues +from litellm.types.utils import EmbeddingResponse +from litellm.utils import convert_to_model_response_object + +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + + LiteLLMLoggingObj = _LiteLLMLoggingObj +else: + LiteLLMLoggingObj = Any + + +class HostedVLLMEmbeddingError(BaseLLMException): + """Exception class for Hosted VLLM Embedding errors.""" + + pass + + +class HostedVLLMEmbeddingConfig(BaseEmbeddingConfig): + """ + Configuration for Hosted VLLM's Embedding API. + + Reference: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html + """ + + def validate_environment( + self, + headers: dict, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> dict: + """ + Validate environment and set up headers for Hosted VLLM API. + """ + if api_key is None: + api_key = get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key" + + default_headers = { + "Content-Type": "application/json", + } + + # Only add Authorization header if api_key is not "fake-api-key" + if api_key and api_key != "fake-api-key": + default_headers["Authorization"] = f"Bearer {api_key}" + + # Merge with existing headers (user's headers take priority) + return {**default_headers, **headers} + + def get_complete_url( + self, + api_base: Optional[str], + api_key: Optional[str], + model: str, + optional_params: dict, + litellm_params: dict, + stream: Optional[bool] = None, + ) -> str: + """ + Get the complete URL for Hosted VLLM Embedding API endpoint. + """ + if api_base is None: + api_base = get_secret_str("HOSTED_VLLM_API_BASE") + if api_base is None: + raise ValueError("api_base is required for hosted_vllm embeddings") + + # Remove trailing slashes + api_base = api_base.rstrip("/") + + # Ensure the URL ends with /embeddings + if not api_base.endswith("/embeddings"): + api_base = f"{api_base}/embeddings" + + return api_base + + def transform_embedding_request( + self, + model: str, + input: AllEmbeddingInputValues, + optional_params: dict, + headers: dict, + ) -> dict: + """ + Transform embedding request to Hosted VLLM format (OpenAI-compatible). + """ + # Ensure input is a list + if isinstance(input, str): + input = [input] + + # Strip 'hosted_vllm/' prefix if present + if model.startswith("hosted_vllm/"): + model = model.replace("hosted_vllm/", "", 1) + + return { + "model": model, + "input": input, + **optional_params, + } + + def transform_embedding_response( + self, + model: str, + raw_response: httpx.Response, + model_response: EmbeddingResponse, + logging_obj: LiteLLMLoggingObj, + api_key: Optional[str], + request_data: dict, + optional_params: dict, + litellm_params: dict, + ) -> EmbeddingResponse: + """ + Transform embedding response from Hosted VLLM format (OpenAI-compatible). + """ + logging_obj.post_call(original_response=raw_response.text) + + # VLLM returns standard OpenAI-compatible embedding response + response_json = raw_response.json() + + return convert_to_model_response_object( + response_object=response_json, + model_response_object=model_response, + response_type="embedding", + ) + + def get_supported_openai_params(self, model: str) -> list: + """ + Get list of supported OpenAI parameters for Hosted VLLM embeddings. + """ + return [ + "timeout", + "dimensions", + "encoding_format", + "user", + ] + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + """ + Map OpenAI parameters to Hosted VLLM format. + """ + for param, value in non_default_params.items(): + if param in self.get_supported_openai_params(model): + optional_params[param] = value + return optional_params + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + """ + Get the error class for Hosted VLLM errors. + """ + return HostedVLLMEmbeddingError( + message=error_message, + status_code=status_code, + headers=headers, + ) diff --git a/litellm/main.py b/litellm/main.py index 99bf224c5b7..ac368317e6c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2353,6 +2353,33 @@ def completion( # type: ignore # noqa: PLR0915 or "https://api.minimax.io/v1" ) + response = base_llm_http_handler.completion( + model=model, + messages=messages, + api_base=api_base, + custom_llm_provider=custom_llm_provider, + model_response=model_response, + encoding=_get_encoding(), + logging_obj=logging, + optional_params=optional_params, + timeout=timeout, + litellm_params=litellm_params, + shared_session=shared_session, + acompletion=acompletion, + stream=stream, + api_key=api_key, + headers=headers, + client=client, + provider_config=provider_config, + ) + logging.post_call( + input=messages, api_key=api_key, original_response=response + ) + elif custom_llm_provider == "hosted_vllm": + api_base = ( + api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE") + ) + response = base_llm_http_handler.completion( model=model, messages=messages, @@ -3591,9 +3618,9 @@ def completion( # type: ignore # noqa: PLR0915 "aws_region_name" not in optional_params or optional_params["aws_region_name"] is None ): - optional_params[ - "aws_region_name" - ] = aws_bedrock_client.meta.region_name + optional_params["aws_region_name"] = ( + aws_bedrock_client.meta.region_name + ) bedrock_route = BedrockModelInfo.get_bedrock_route(model) if bedrock_route == "converse": @@ -4753,9 +4780,32 @@ def embedding( # noqa: PLR0915 client=client, aembedding=aembedding, ) + elif custom_llm_provider == "hosted_vllm": + api_base = ( + api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE") + ) + + # set API KEY + if api_key is None: + api_key = litellm.api_key or get_secret_str("HOSTED_VLLM_API_KEY") + + response = base_llm_http_handler.embedding( + model=model, + input=input, + custom_llm_provider=custom_llm_provider, + api_base=api_base, + api_key=api_key, + logging_obj=logging, + timeout=timeout, + model_response=EmbeddingResponse(), + optional_params=optional_params, + client=client, + aembedding=aembedding, + litellm_params=litellm_params_dict, + headers=headers or {}, + ) elif ( custom_llm_provider == "openai_like" - or custom_llm_provider == "hosted_vllm" or custom_llm_provider == "llamafile" or custom_llm_provider == "lm_studio" ): @@ -5928,9 +5978,9 @@ def adapter_completion( new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs) response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs) # type: ignore - translated_response: Optional[ - Union[BaseModel, AdapterCompletionStreamWrapper] - ] = None + translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = ( + None + ) if isinstance(response, ModelResponse): translated_response = translation_obj.translate_completion_output_params( response=response @@ -6635,9 +6685,9 @@ def speech( # noqa: PLR0915 ElevenLabsTextToSpeechConfig.ELEVENLABS_QUERY_PARAMS_KEY ] = query_params - litellm_params_dict[ - ElevenLabsTextToSpeechConfig.ELEVENLABS_VOICE_ID_KEY - ] = voice_id + litellm_params_dict[ElevenLabsTextToSpeechConfig.ELEVENLABS_VOICE_ID_KEY] = ( + voice_id + ) if api_base is not None: litellm_params_dict["api_base"] = api_base @@ -7143,9 +7193,9 @@ def stream_chunk_builder( # noqa: PLR0915 ] if len(content_chunks) > 0: - response["choices"][0]["message"][ - "content" - ] = processor.get_combined_content(content_chunks) + response["choices"][0]["message"]["content"] = ( + processor.get_combined_content(content_chunks) + ) thinking_blocks = [ chunk @@ -7156,9 +7206,9 @@ def stream_chunk_builder( # noqa: PLR0915 ] if len(thinking_blocks) > 0: - response["choices"][0]["message"][ - "thinking_blocks" - ] = processor.get_combined_thinking_content(thinking_blocks) + response["choices"][0]["message"]["thinking_blocks"] = ( + processor.get_combined_thinking_content(thinking_blocks) + ) reasoning_chunks = [ chunk @@ -7169,9 +7219,9 @@ def stream_chunk_builder( # noqa: PLR0915 ] if len(reasoning_chunks) > 0: - response["choices"][0]["message"][ - "reasoning_content" - ] = processor.get_combined_reasoning_content(reasoning_chunks) + response["choices"][0]["message"]["reasoning_content"] = ( + processor.get_combined_reasoning_content(reasoning_chunks) + ) annotation_chunks = [ chunk @@ -7197,6 +7247,23 @@ def stream_chunk_builder( # noqa: PLR0915 _choice = cast(Choices, response.choices[0]) _choice.message.audio = processor.get_combined_audio_content(audio_chunks) + # Handle image chunks from models like gemini-2.5-flash-image + # See: https://github.com/BerriAI/litellm/issues/19478 + image_chunks = [ + chunk + for chunk in chunks + if len(chunk["choices"]) > 0 + and "images" in chunk["choices"][0]["delta"] + and chunk["choices"][0]["delta"]["images"] is not None + ] + + if len(image_chunks) > 0: + # Images come complete in a single chunk, collect all images from all chunks + all_images = [] + for chunk in image_chunks: + all_images.extend(chunk["choices"][0]["delta"]["images"]) + response["choices"][0]["message"]["images"] = all_images + # Combine provider_specific_fields from streaming chunks (e.g., web_search_results, citations) # See: https://github.com/BerriAI/litellm/issues/17737 provider_specific_chunks = [ diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 51f3e6482a4..61b857dc473 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -237,6 +237,70 @@ async def combined_generator() -> AsyncGenerator[str, None]: ) +def _override_openai_response_model( + *, + response_obj: Any, + requested_model: str, + log_context: str, +) -> None: + """ + Force the OpenAI-compatible `model` field in the response to match what the client requested. + + LiteLLM internally prefixes some provider/deployment model identifiers (e.g. `hosted_vllm/...`). + That internal identifier should not be returned to clients in the OpenAI `model` field. + + Note: This is intentionally verbose. A model mismatch is a useful signal that an internal + model identifier is being stamped/preserved somewhere in the request/response pipeline. + We log mismatches as warnings (and then restamp to the client-requested value) so these + paths stay observable for maintainers/operators without breaking client compatibility. + + Errors are reserved for cases where the proxy cannot read/override the response model field. + """ + if not requested_model: + return + + if isinstance(response_obj, dict): + downstream_model = response_obj.get("model") + if downstream_model != requested_model: + verbose_proxy_logger.warning( + "%s: response model mismatch - requested=%r downstream=%r. Overriding response['model'] to requested model.", + log_context, + requested_model, + downstream_model, + ) + response_obj["model"] = requested_model + return + + if not hasattr(response_obj, "model"): + verbose_proxy_logger.error( + "%s: cannot override response model; missing `model` attribute. response_type=%s", + log_context, + type(response_obj), + ) + return + + downstream_model = getattr(response_obj, "model", None) + if downstream_model != requested_model: + verbose_proxy_logger.warning( + "%s: response model mismatch - requested=%r downstream=%r. Overriding response.model to requested model.", + log_context, + requested_model, + downstream_model, + ) + + try: + setattr(response_obj, "model", requested_model) + except Exception as e: + verbose_proxy_logger.error( + "%s: failed to override response.model=%r on response_type=%s. error=%s", + log_context, + requested_model, + type(response_obj), + str(e), + exc_info=True, + ) + + def _get_cost_breakdown_from_logging_obj( litellm_logging_obj: Optional[LiteLLMLoggingObj], ) -> Tuple[Optional[float], Optional[float], Optional[float], Optional[float]]: @@ -625,6 +689,9 @@ async def base_process_llm_request( """ Common request processing logic for both chat completions and responses API endpoints """ + requested_model_from_client: Optional[str] = ( + self.data.get("model") if isinstance(self.data.get("model"), str) else None + ) if verbose_proxy_logger.isEnabledFor(logging.DEBUG): verbose_proxy_logger.debug( "Request received by LiteLLM:\n{}".format( @@ -694,13 +761,15 @@ async def base_process_llm_request( model_info = litellm_metadata.get("model_info", {}) or {} model_id = model_info.get("id", "") or "" - cache_key = hidden_params.get("cache_key", None) or "" - api_base = hidden_params.get("api_base", None) or "" - response_cost = hidden_params.get("response_cost", None) or "" - fastest_response_batch_completion = hidden_params.get( - "fastest_response_batch_completion", None + cache_key, api_base, response_cost = ( + hidden_params.get("cache_key", None) or "", + hidden_params.get("api_base", None) or "", + hidden_params.get("response_cost", None) or "", + ) + fastest_response_batch_completion, additional_headers = ( + hidden_params.get("fastest_response_batch_completion", None), + hidden_params.get("additional_headers", {}) or {}, ) - additional_headers: dict = hidden_params.get("additional_headers", {}) or {} # Post Call Processing if llm_router is not None: @@ -730,6 +799,13 @@ async def base_process_llm_request( litellm_logging_obj=logging_obj, **additional_headers, ) + + # Preserve the original client-requested model (pre-alias mapping) for downstream + # streaming generators. Pre-call processing can rewrite `self.data["model"]` for + # aliasing/routing, but the OpenAI-compatible response `model` field should reflect + # what the client sent. + if requested_model_from_client: + self.data["_litellm_client_requested_model"] = requested_model_from_client if route_type == "allm_passthrough_route": # Check if response is an async generator if self._is_streaming_response(response): @@ -789,6 +865,15 @@ async def base_process_llm_request( data=self.data, user_api_key_dict=user_api_key_dict, response=response ) + # Always return the client-requested model name (not provider-prefixed internal identifiers) + # for OpenAI-compatible responses. + if requested_model_from_client: + _override_openai_response_model( + response_obj=response, + requested_model=requested_model_from_client, + log_context=f"litellm_call_id={logging_obj.litellm_call_id}", + ) + hidden_params = ( getattr(response, "_hidden_params", {}) or {} ) # get any updated response headers diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py index d27e0036235..eddd64c36c7 100644 --- a/litellm/proxy/health_endpoints/_health_endpoints.py +++ b/litellm/proxy/health_endpoints/_health_endpoints.py @@ -118,6 +118,7 @@ def _resolve_os_environ_variables(params: dict) -> dict: "email", "braintrust", "datadog", + "datadog_llm_observability", "generic_api", "arize", "sqs" @@ -190,6 +191,7 @@ async def health_services_endpoint( # noqa: PLR0915 "custom_callback_api", "langsmith", "datadog", + "datadog_llm_observability", "generic_api", "arize", "sqs" diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index df6fd173c06..078ce0edf27 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -11,7 +11,8 @@ import time import traceback import warnings -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta +import enum from typing import ( TYPE_CHECKING, Any, @@ -28,7 +29,41 @@ get_origin, get_type_hints, ) - +from pydantic import BaseModel, Json + +from litellm.proxy._types import ( + ProxyException, + UserAPIKeyAuth, + LiteLLM_UserTable, + CommonProxyErrors, + LitellmUserRoles, + ConfigList, + ConfigYAML, + ConfigFieldUpdate, + ConfigGeneralSettings, + ConfigFieldInfo, + PassThroughGenericEndpoint, + FieldDetail, + ConfigFieldDelete, + CallbackDelete, + InvitationClaim, + InvitationModel, + InvitationNew, + InvitationUpdate, + InvitationDelete, + CallInfo, + Litellm_EntityType, + TeamDefaultSettings, + RoleBasedPermissions, + SupportedDBObjectType, + ProxyErrorTypes, + EnterpriseLicenseData, + LiteLLM_JWTAuth, + TokenCountRequest, + TransformRequestBody, + LiteLLM_TeamTable, + SpecialModelNames, +) from litellm._uuid import uuid from litellm.constants import ( AIOHTTP_CONNECTOR_LIMIT, @@ -45,6 +80,9 @@ LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS, LITELLM_SETTINGS_SAFE_DB_OVERRIDES, ) +from litellm.litellm_core_utils.litellm_logging import ( + _init_custom_logger_compatible_class, +) from litellm.litellm_core_utils.safe_json_dumps import safe_dumps from litellm.proxy.common_utils.callback_utils import ( normalize_callback_names, @@ -2154,6 +2192,12 @@ def parse_search_tools(self, config: dict) -> Optional[List[SearchToolTypedDict] List of validated SearchToolTypedDict or None if not configured """ search_tools_raw = config.get("search_tools", None) + if not search_tools_raw: + # Check in general_settings + general_settings = config.get("general_settings", {}) + if general_settings: + search_tools_raw = general_settings.get("search_tools", None) + if not search_tools_raw: return None @@ -2898,9 +2942,6 @@ def _load_alerting_settings(self, general_settings: dict): """ Initialize alerting settings """ - from litellm.litellm_core_utils.litellm_logging import ( - _init_custom_logger_compatible_class, - ) _alerting_callbacks = general_settings.get("alerting", None) verbose_proxy_logger.debug(f"_alerting_callbacks: {general_settings}") @@ -3198,6 +3239,8 @@ async def _update_llm_router( verbose_proxy_logger.debug(f"updated llm_router: {llm_router}") else: verbose_proxy_logger.debug(f"len new_models: {len(models_list)}") + if search_tools is not None and llm_router is not None: + llm_router.search_tools = search_tools ## DELETE MODEL LOGIC await self._delete_deployment(db_models=models_list) @@ -4579,6 +4622,68 @@ async def async_assistants_data_generator( yield f"data: {error_returned}\n\n" +def _get_client_requested_model_for_streaming(request_data: dict) -> str: + """ + Prefer the original client-requested model (pre-alias mapping) when available. + + Pre-call processing can rewrite `request_data["model"]` for aliasing/routing purposes. + The OpenAI-compatible public `model` field should reflect what the client sent. + """ + requested_model = request_data.get("_litellm_client_requested_model") + if isinstance(requested_model, str): + return requested_model + + requested_model = request_data.get("model") + return requested_model if isinstance(requested_model, str) else "" + + +def _restamp_streaming_chunk_model( + *, + chunk: Any, + requested_model_from_client: str, + request_data: dict, + model_mismatch_logged: bool, +) -> Tuple[Any, bool]: + # Always return the client-requested model name (not provider-prefixed internal identifiers) + # on streaming chunks. + # + # Note: This warning is intentionally verbose. A mismatch is a useful signal that an + # internal provider/deployment identifier is leaking into the public API, and helps + # maintainers/operators catch regressions while preserving OpenAI-compatible output. + if not requested_model_from_client or not isinstance(chunk, (BaseModel, dict)): + return chunk, model_mismatch_logged + + downstream_model = ( + chunk.get("model") if isinstance(chunk, dict) else getattr(chunk, "model", None) + ) + if not model_mismatch_logged and downstream_model != requested_model_from_client: + verbose_proxy_logger.warning( + "litellm_call_id=%s: streaming chunk model mismatch - requested=%r downstream=%r. Overriding model to requested.", + request_data.get("litellm_call_id"), + requested_model_from_client, + downstream_model, + ) + model_mismatch_logged = True + + if isinstance(chunk, dict): + chunk["model"] = requested_model_from_client + return chunk, model_mismatch_logged + + try: + setattr(chunk, "model", requested_model_from_client) + except Exception as e: + verbose_proxy_logger.error( + "litellm_call_id=%s: failed to override chunk.model=%r on chunk_type=%s. error=%s", + request_data.get("litellm_call_id"), + requested_model_from_client, + type(chunk), + str(e), + exc_info=True, + ) + + return chunk, model_mismatch_logged + + async def async_data_generator( response, user_api_key_dict: UserAPIKeyAuth, request_data: dict ): @@ -4587,6 +4692,10 @@ async def async_data_generator( # Use a list to accumulate response segments to avoid O(n^2) string concatenation str_so_far_parts: list[str] = [] error_message: Optional[str] = None + requested_model_from_client = _get_client_requested_model_for_streaming( + request_data=request_data + ) + model_mismatch_logged = False async for chunk in proxy_logging_obj.async_post_call_streaming_iterator_hook( user_api_key_dict=user_api_key_dict, response=response, @@ -4608,6 +4717,13 @@ async def async_data_generator( response_str = litellm.get_response_string(response_obj=chunk) str_so_far_parts.append(response_str) + chunk, model_mismatch_logged = _restamp_streaming_chunk_model( + chunk=chunk, + requested_model_from_client=requested_model_from_client, + request_data=request_data, + model_mismatch_logged=model_mismatch_logged, + ) + if isinstance(chunk, BaseModel): chunk = chunk.model_dump_json(exclude_none=True, exclude_unset=True) elif isinstance(chunk, str) and chunk.startswith("data: "): diff --git a/litellm/utils.py b/litellm/utils.py index d7fb4855a48..61a446564dc 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -771,13 +771,15 @@ def function_setup( # noqa: PLR0915 function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None ## LAZY LOAD COROUTINE CHECKER ## - get_coroutine_checker_fn = getattr(sys.modules[__name__], "get_coroutine_checker") + get_coroutine_checker_fn = getattr( + sys.modules[__name__], "get_coroutine_checker" + ) coroutine_checker = get_coroutine_checker_fn() ## DYNAMIC CALLBACKS ## - dynamic_callbacks: Optional[ - List[Union[str, Callable, "CustomLogger"]] - ] = kwargs.pop("callbacks", None) + dynamic_callbacks: Optional[List[Union[str, Callable, "CustomLogger"]]] = ( + kwargs.pop("callbacks", None) + ) all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks) if len(all_callbacks) > 0: @@ -1660,9 +1662,9 @@ def wrapper(*args, **kwargs): # noqa: PLR0915 exception=e, retry_policy=kwargs.get("retry_policy"), ) - kwargs[ - "retry_policy" - ] = reset_retry_policy() # prevent infinite loops + kwargs["retry_policy"] = ( + reset_retry_policy() + ) # prevent infinite loops litellm.num_retries = ( None # set retries to None to prevent infinite loops ) @@ -1709,9 +1711,9 @@ def wrapper(*args, **kwargs): # noqa: PLR0915 exception=e, retry_policy=kwargs.get("retry_policy"), ) - kwargs[ - "retry_policy" - ] = reset_retry_policy() # prevent infinite loops + kwargs["retry_policy"] = ( + reset_retry_policy() + ) # prevent infinite loops litellm.num_retries = ( None # set retries to None to prevent infinite loops ) @@ -3640,10 +3642,10 @@ def pre_process_non_default_params( if "response_format" in non_default_params: if provider_config is not None: - non_default_params[ - "response_format" - ] = provider_config.get_json_schema_from_pydantic_object( - response_format=non_default_params["response_format"] + non_default_params["response_format"] = ( + provider_config.get_json_schema_from_pydantic_object( + response_format=non_default_params["response_format"] + ) ) else: non_default_params["response_format"] = type_to_response_format_param( @@ -3772,16 +3774,16 @@ def pre_process_optional_params( True # so that main.py adds the function call to the prompt ) if "tools" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("tools") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("tools") + ) non_default_params.pop( "tool_choice", None ) # causes ollama requests to hang elif "functions" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("functions") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("functions") + ) elif ( litellm.add_function_to_prompt ): # if user opts to add it to prompt instead @@ -4937,9 +4939,9 @@ def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) return delta if isinstance(delta, str) else "" # Handle standard ModelResponse and ModelResponseStream - _choices: Union[ - List[Union[Choices, StreamingChoices]], List[StreamingChoices] - ] = response_obj.choices + _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = ( + response_obj.choices + ) # Use list accumulation to avoid O(n^2) string concatenation across choices response_parts: List[str] = [] @@ -7714,25 +7716,29 @@ def validate_chat_completion_tool_choice( f"Invalid tool choice, tool_choice={tool_choice}. Got={type(tool_choice)}. Expecting str, or dict. Please ensure tool_choice follows the OpenAI tool_choice spec" ) -def validate_openai_optional_params( - stop: Optional[Union[str, List[str]]] = None, - **kwargs -) -> Optional[Union[str, List[str]]]: - """ - Validates and fixes OpenAI optional parameters. - - Args: - stop: Stop sequences (string or list of strings) - **kwargs: Additional optional parameters - - Returns: - Validated stop parameter (truncated to 4 elements if needed) - """ - if stop is not None and isinstance(stop, list) and not litellm.disable_stop_sequence_limit: + +def validate_openai_optional_params( + stop: Optional[Union[str, List[str]]] = None, **kwargs +) -> Optional[Union[str, List[str]]]: + """ + Validates and fixes OpenAI optional parameters. + + Args: + stop: Stop sequences (string or list of strings) + **kwargs: Additional optional parameters + + Returns: + Validated stop parameter (truncated to 4 elements if needed) + """ + if ( + stop is not None + and isinstance(stop, list) + and not litellm.disable_stop_sequence_limit + ): # Truncate to 4 elements if more are provided as openai only supports up to 4 stop sequences - if len(stop) > 4: - stop = stop[:4] - + if len(stop) > 4: + stop = stop[:4] + return stop @@ -8061,6 +8067,8 @@ def get_provider_embedding_config( return VercelAIGatewayEmbeddingConfig() elif litellm.LlmProviders.GIGACHAT == provider: return litellm.GigaChatEmbeddingConfig() + elif litellm.LlmProviders.HOSTED_VLLM == provider: + return litellm.HostedVLLMEmbeddingConfig() elif litellm.LlmProviders.SAGEMAKER == provider: from litellm.llms.sagemaker.embedding.transformation import ( SagemakerEmbeddingConfig, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 6a605f460d4..8dd02537923 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -23790,6 +23790,20 @@ "output_cost_per_token": 6.5e-07, "supports_tool_choice": true }, + "openrouter/moonshotai/kimi-k2.5": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://openrouter.ai/moonshotai/kimi-k2.5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, "openrouter/nousresearch/nous-hermes-llama2-13b": { "input_cost_per_token": 2e-07, "litellm_provider": "openrouter", diff --git a/tests/litellm/test_stream_chunk_builder_images.py b/tests/litellm/test_stream_chunk_builder_images.py new file mode 100644 index 00000000000..c51a14ede67 --- /dev/null +++ b/tests/litellm/test_stream_chunk_builder_images.py @@ -0,0 +1,242 @@ +""" +Test that stream_chunk_builder correctly preserves images from streaming chunks. + +This tests the fix for https://github.com/BerriAI/litellm/issues/19478 +where images from models like gemini-2.5-flash-image were lost when +rebuilding the response from streaming chunks. +""" +import pytest +import litellm +from litellm import stream_chunk_builder + + +def test_stream_chunk_builder_preserves_images(): + """ + Test that stream_chunk_builder correctly preserves images from streaming chunks. + """ + # Simulate streaming chunks from an image generation model + init_chunks = [ + { + "id": "chatcmpl-image-test", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + }, + "finish_reason": None, + } + ], + "created": 1737654321, + "model": "gemini/gemini-2.5-flash-image", + "object": "chat.completion.chunk", + }, + { + "id": "chatcmpl-image-test", + "choices": [ + { + "index": 0, + "delta": { + "images": [ + { + "image_url": { + "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==", + "detail": "auto" + }, + "index": 0, + "type": "image_url" + } + ], + }, + "finish_reason": None, + } + ], + "created": 1737654321, + "model": "gemini/gemini-2.5-flash-image", + "object": "chat.completion.chunk", + }, + { + "id": "chatcmpl-image-test", + "choices": [ + { + "index": 0, + "delta": {}, + "finish_reason": "stop", + } + ], + "created": 1737654321, + "model": "gemini/gemini-2.5-flash-image", + "object": "chat.completion.chunk", + }, + ] + + chunks = [] + for chunk in init_chunks: + chunks.append(litellm.ModelResponse(**chunk, stream=True)) + + response = stream_chunk_builder(chunks=chunks) + + # Verify that images are preserved in the rebuilt response + assert response.choices[0].message.images is not None, "Images should be preserved in stream_chunk_builder" + assert len(response.choices[0].message.images) == 1, "Should have exactly 1 image" + assert response.choices[0].message.images[0]["type"] == "image_url" + assert "base64" in response.choices[0].message.images[0]["image_url"]["url"] + + +def test_stream_chunk_builder_preserves_multiple_images(): + """ + Test that stream_chunk_builder correctly preserves multiple images from different chunks. + """ + init_chunks = [ + { + "id": "chatcmpl-multi-image-test", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "Here are your images:", + }, + "finish_reason": None, + } + ], + "created": 1737654321, + "model": "gemini/gemini-2.5-flash-image", + "object": "chat.completion.chunk", + }, + { + "id": "chatcmpl-multi-image-test", + "choices": [ + { + "index": 0, + "delta": { + "images": [ + { + "image_url": {"url": "data:image/png;base64,image1data", "detail": "auto"}, + "index": 0, + "type": "image_url" + } + ], + }, + "finish_reason": None, + } + ], + "created": 1737654321, + "model": "gemini/gemini-2.5-flash-image", + "object": "chat.completion.chunk", + }, + { + "id": "chatcmpl-multi-image-test", + "choices": [ + { + "index": 0, + "delta": { + "images": [ + { + "image_url": {"url": "data:image/png;base64,image2data", "detail": "auto"}, + "index": 1, + "type": "image_url" + } + ], + }, + "finish_reason": None, + } + ], + "created": 1737654321, + "model": "gemini/gemini-2.5-flash-image", + "object": "chat.completion.chunk", + }, + { + "id": "chatcmpl-multi-image-test", + "choices": [ + { + "index": 0, + "delta": {}, + "finish_reason": "stop", + } + ], + "created": 1737654321, + "model": "gemini/gemini-2.5-flash-image", + "object": "chat.completion.chunk", + }, + ] + + chunks = [] + for chunk in init_chunks: + chunks.append(litellm.ModelResponse(**chunk, stream=True)) + + response = stream_chunk_builder(chunks=chunks) + + # Verify content is preserved + assert response.choices[0].message.content == "Here are your images:" + + # Verify all images are preserved + assert response.choices[0].message.images is not None, "Images should be preserved" + assert len(response.choices[0].message.images) == 2, "Should have exactly 2 images" + assert "image1data" in response.choices[0].message.images[0]["image_url"]["url"] + assert "image2data" in response.choices[0].message.images[1]["image_url"]["url"] + + +def test_stream_chunk_builder_no_images(): + """ + Test that stream_chunk_builder works correctly when there are no images (regression test). + """ + init_chunks = [ + { + "id": "chatcmpl-no-image-test", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "Hello, ", + }, + "finish_reason": None, + } + ], + "created": 1737654321, + "model": "gpt-4", + "object": "chat.completion.chunk", + }, + { + "id": "chatcmpl-no-image-test", + "choices": [ + { + "index": 0, + "delta": { + "content": "world!", + }, + "finish_reason": None, + } + ], + "created": 1737654321, + "model": "gpt-4", + "object": "chat.completion.chunk", + }, + { + "id": "chatcmpl-no-image-test", + "choices": [ + { + "index": 0, + "delta": {}, + "finish_reason": "stop", + } + ], + "created": 1737654321, + "model": "gpt-4", + "object": "chat.completion.chunk", + }, + ] + + chunks = [] + for chunk in init_chunks: + chunks.append(litellm.ModelResponse(**chunk, stream=True)) + + response = stream_chunk_builder(chunks=chunks) + + # Verify content is preserved + assert response.choices[0].message.content == "Hello, world!" + + # Verify images attribute doesn't exist or is None (no images in this stream) + images = getattr(response.choices[0].message, 'images', None) + assert images is None, "Should not have images when none were in the stream" diff --git a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py index 9b3b6aeaea1..ddc01db0ec7 100644 --- a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py +++ b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py @@ -1,10 +1,7 @@ import json import os import sys -from unittest.mock import AsyncMock, MagicMock, patch - -import httpx -import pytest +from unittest.mock import MagicMock, patch sys.path.insert( 0, os.path.abspath("../../../../..") @@ -47,15 +44,34 @@ def test_hosted_vllm_chat_transformation_file_url(): def test_hosted_vllm_chat_transformation_with_audio_url(): from litellm import completion - from litellm.llms.custom_httpx.http_handler import HTTPHandler - - client = MagicMock() - with patch.object( - client.chat.completions.with_raw_response, "create", return_value=MagicMock() - ) as mock_post: + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1234567890, + "model": "llama-3.1-70b-instruct", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Test response"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + } + mock_response.text = json.dumps(mock_response.json.return_value) + mock_client.post.return_value = mock_response + + with patch( + "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client", + return_value=mock_client, + ): try: - response = completion( + completion( model="hosted_vllm/llama-3.1-70b-instruct", messages=[ { @@ -68,14 +84,15 @@ def test_hosted_vllm_chat_transformation_with_audio_url(): ], }, ], - client=client, + api_base="https://test-vllm.example.com/v1", ) - except Exception as e: - print(f"Error: {e}") + except Exception: + pass - mock_post.assert_called_once() - print(f"mock_post.call_args.kwargs: {mock_post.call_args.kwargs}") - assert mock_post.call_args.kwargs["messages"] == [ + mock_client.post.assert_called_once() + call_kwargs = mock_client.post.call_args[1] + request_data = json.loads(call_kwargs["data"]) + assert request_data["messages"] == [ { "role": "user", "content": [ diff --git a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py new file mode 100644 index 00000000000..8f98b3ca8f1 --- /dev/null +++ b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py @@ -0,0 +1,152 @@ +""" +Test SSL verification for hosted_vllm provider. + +This test ensures that the ssl_verify parameter is properly passed through +to the HTTP client when using the hosted_vllm provider. + +Issue: ssl_verify parameter was being ignored because hosted_vllm fell through +to the OpenAI catch-all path in main.py, which doesn't pass ssl_verify to the HTTP client. +""" + +import os +import sys +from unittest.mock import MagicMock, patch + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../../../..") +) # Adds the parent directory to the system path + +import litellm + + +class TestHostedVLLMSSLVerify: + """Test suite for SSL verification in hosted_vllm provider.""" + + @patch("litellm.llms.custom_httpx.llm_http_handler._get_httpx_client") + def test_hosted_vllm_ssl_verify_false_sync(self, mock_get_httpx_client): + """Test that ssl_verify=False is passed to the HTTP client for sync calls.""" + # Setup mock client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1234567890, + "model": "test-model", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Test response", + }, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + }, + } + mock_response.text = '{"id": "chatcmpl-test", "object": "chat.completion", "created": 1234567890, "model": "test-model", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Test response"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}}' + mock_client.post.return_value = mock_response + mock_get_httpx_client.return_value = mock_client + + try: + litellm.completion( + model="hosted_vllm/test-model", + messages=[{"role": "user", "content": "Hello"}], + api_base="https://test-vllm.example.com/v1", + ssl_verify=False, + ) + except Exception: + # Even if the response parsing fails, we just need to verify + # that the mock was called with the correct ssl_verify parameter + pass + + # Verify _get_httpx_client was called with ssl_verify=False + mock_get_httpx_client.assert_called() + call_args = mock_get_httpx_client.call_args + + # Check that params contains ssl_verify=False + if call_args[0]: + # Positional argument + params = call_args[0][0] + else: + # Keyword argument + params = call_args[1].get("params", {}) + + assert ( + params.get("ssl_verify") is False + ), f"Expected ssl_verify=False in params, got {params}" + + @patch("litellm.llms.custom_httpx.llm_http_handler.get_async_httpx_client") + @pytest.mark.asyncio + async def test_hosted_vllm_ssl_verify_false_async( + self, mock_get_async_httpx_client + ): + """Test that ssl_verify=False is passed to the HTTP client for async calls.""" + # Setup mock async client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1234567890, + "model": "test-model", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Test response", + }, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + }, + } + mock_response.text = '{"id": "chatcmpl-test", "object": "chat.completion", "created": 1234567890, "model": "test-model", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Test response"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}}' + + async def mock_post(*args, **kwargs): + return mock_response + + mock_client.post = mock_post + mock_get_async_httpx_client.return_value = mock_client + + try: + await litellm.acompletion( + model="hosted_vllm/test-model", + messages=[{"role": "user", "content": "Hello"}], + api_base="https://test-vllm.example.com/v1", + ssl_verify=False, + ) + except Exception: + # Even if the response parsing fails, we just need to verify + # that the mock was called with the correct ssl_verify parameter + pass + + # Verify get_async_httpx_client was called with ssl_verify=False + mock_get_async_httpx_client.assert_called() + call_kwargs = mock_get_async_httpx_client.call_args[1] + + # Check that params contains ssl_verify=False + params = call_kwargs.get("params", {}) + assert ( + params.get("ssl_verify") is False + ), f"Expected ssl_verify=False in params, got {params}" + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "-s"]) diff --git a/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py b/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py new file mode 100644 index 00000000000..bb911814c23 --- /dev/null +++ b/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py @@ -0,0 +1,140 @@ +""" +Test SSL verification for hosted_vllm provider embeddings. + +This test ensures that the ssl_verify parameter is properly passed through +to the HTTP client when using the hosted_vllm provider for embeddings. + +Issue: ssl_verify parameter was being ignored because hosted_vllm fell through +to the openai_like catch-all path in main.py, which doesn't pass ssl_verify to the HTTP client. +""" + +import os +import sys +from unittest.mock import MagicMock, patch + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../../../..") +) # Adds the parent directory to the system path + +import litellm + + +class TestHostedVLLMEmbeddingSSLVerify: + """Test suite for SSL verification in hosted_vllm provider embeddings.""" + + @patch("litellm.llms.custom_httpx.llm_http_handler._get_httpx_client") + def test_hosted_vllm_embedding_ssl_verify_false_sync(self, mock_get_httpx_client): + """Test that ssl_verify=False is passed to the HTTP client for sync embedding calls.""" + # Setup mock client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "object": "list", + "data": [ + { + "object": "embedding", + "index": 0, + "embedding": [0.1, 0.2, 0.3, 0.4, 0.5], + } + ], + "model": "text-embedding-model", + "usage": { + "prompt_tokens": 5, + "total_tokens": 5, + }, + } + mock_response.text = '{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}], "model": "text-embedding-model", "usage": {"prompt_tokens": 5, "total_tokens": 5}}' + mock_client.post.return_value = mock_response + mock_get_httpx_client.return_value = mock_client + + try: + litellm.embedding( + model="hosted_vllm/text-embedding-model", + input=["hello world"], + api_base="https://test-vllm.example.com/v1", + ssl_verify=False, + ) + except Exception: + # Even if the response parsing fails, we just need to verify + # that the mock was called with the correct ssl_verify parameter + pass + + # Verify _get_httpx_client was called with ssl_verify=False + mock_get_httpx_client.assert_called() + call_args = mock_get_httpx_client.call_args + + # Check that params contains ssl_verify=False + if call_args[0]: + # Positional argument + params = call_args[0][0] + else: + # Keyword argument + params = call_args[1].get("params", {}) + + assert ( + params.get("ssl_verify") is False + ), f"Expected ssl_verify=False in params, got {params}" + + @patch("litellm.llms.custom_httpx.llm_http_handler.get_async_httpx_client") + @pytest.mark.asyncio + async def test_hosted_vllm_embedding_ssl_verify_false_async( + self, mock_get_async_httpx_client + ): + """Test that ssl_verify=False is passed to the HTTP client for async embedding calls.""" + # Setup mock async client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "object": "list", + "data": [ + { + "object": "embedding", + "index": 0, + "embedding": [0.1, 0.2, 0.3, 0.4, 0.5], + } + ], + "model": "text-embedding-model", + "usage": { + "prompt_tokens": 5, + "total_tokens": 5, + }, + } + mock_response.text = '{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}], "model": "text-embedding-model", "usage": {"prompt_tokens": 5, "total_tokens": 5}}' + + async def mock_post(*args, **kwargs): + return mock_response + + mock_client.post = mock_post + mock_get_async_httpx_client.return_value = mock_client + + try: + await litellm.aembedding( + model="hosted_vllm/text-embedding-model", + input=["hello world"], + api_base="https://test-vllm.example.com/v1", + ssl_verify=False, + ) + except Exception: + # Even if the response parsing fails, we just need to verify + # that the mock was called with the correct ssl_verify parameter + pass + + # Verify get_async_httpx_client was called with ssl_verify=False + mock_get_async_httpx_client.assert_called() + call_kwargs = mock_get_async_httpx_client.call_args[1] + + # Check that params contains ssl_verify=False + params = call_kwargs.get("params", {}) + assert ( + params.get("ssl_verify") is False + ), f"Expected ssl_verify=False in params, got {params}" + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "-s"]) diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_anthropic_image_url_handling.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_anthropic_image_url_handling.py index fca784342d7..3f014d65d4d 100644 --- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_anthropic_image_url_handling.py +++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_anthropic_image_url_handling.py @@ -17,6 +17,7 @@ from litellm.litellm_core_utils.prompt_templates.factory import ( anthropic_messages_pt, + convert_to_anthropic_tool_result, create_anthropic_image_param, ) @@ -177,3 +178,194 @@ def test_no_force_uses_url_type(self, mock_convert_url: MagicMock): mock_convert_url.assert_not_called() assert result["source"]["type"] == "url" assert result["source"]["url"] == "https://example.com/image.jpg" + + +class TestToolMessageImageURLHandling: + """ + Test that tool messages with image_url are converted to base64 for Vertex AI. + + Issue: https://github.com/BerriAI/litellm/issues/19891 + """ + + @patch("litellm.litellm_core_utils.prompt_templates.factory.convert_url_to_base64") + def test_convert_to_anthropic_tool_result_with_force_base64( + self, mock_convert_url: MagicMock + ): + """ + Test that convert_to_anthropic_tool_result converts image URLs to base64 + when force_base64=True. + """ + mock_convert_url.return_value = "data:image/jpeg;base64,/9j/4AAQSkZJRg==" + + tool_message = { + "role": "tool", + "tool_call_id": "call_123", + "content": [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/tool_result.jpg"}, + } + ], + } + + result = convert_to_anthropic_tool_result(tool_message, force_base64=True) + + mock_convert_url.assert_called_once_with(url="https://example.com/tool_result.jpg") + assert result["type"] == "tool_result" + assert result["tool_use_id"] == "call_123" + + # Check the image content is base64 + content = result["content"] + assert len(content) == 1 + assert content[0]["type"] == "image" + assert content[0]["source"]["type"] == "base64" + + @patch("litellm.litellm_core_utils.prompt_templates.factory.convert_url_to_base64") + def test_convert_to_anthropic_tool_result_without_force_base64( + self, mock_convert_url: MagicMock + ): + """ + Test that convert_to_anthropic_tool_result uses URL type when force_base64=False. + """ + tool_message = { + "role": "tool", + "tool_call_id": "call_456", + "content": [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + ], + } + + result = convert_to_anthropic_tool_result(tool_message, force_base64=False) + + mock_convert_url.assert_not_called() + assert result["type"] == "tool_result" + + # Check the image content uses URL type + content = result["content"] + assert len(content) == 1 + assert content[0]["type"] == "image" + assert content[0]["source"]["type"] == "url" + + @patch("litellm.litellm_core_utils.prompt_templates.factory.convert_url_to_base64") + def test_vertex_ai_tool_message_converts_image_to_base64( + self, mock_convert_url: MagicMock + ): + """ + Test full conversation with tool result containing image for Vertex AI. + The image URL should be converted to base64. + """ + mock_convert_url.return_value = "data:image/jpeg;base64,/9j/4AAQSkZJRg==" + + messages = [ + { + "role": "user", + "content": "Get me an image and describe it", + }, + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_789", + "type": "function", + "function": { + "name": "get_image", + "arguments": "{}", + }, + } + ], + }, + { + "role": "tool", + "tool_call_id": "call_789", + "content": [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/result.jpg"}, + } + ], + }, + ] + + result = anthropic_messages_pt( + messages=messages, + model="claude-sonnet-4", + llm_provider="vertex_ai", + ) + + # Verify convert_url_to_base64 was called for the tool result image + mock_convert_url.assert_called_once_with(url="https://example.com/result.jpg") + + # Find the tool_result in the converted messages + for msg in result: + if msg.get("role") == "user": + for content_item in msg.get("content", []): + if isinstance(content_item, dict) and content_item.get("type") == "tool_result": + tool_content = content_item.get("content", []) + for item in tool_content: + if isinstance(item, dict) and item.get("type") == "image": + assert item["source"]["type"] == "base64" + return + pytest.fail("Could not find image in tool result") + + @patch("litellm.litellm_core_utils.prompt_templates.factory.convert_url_to_base64") + def test_regular_anthropic_tool_message_uses_url( + self, mock_convert_url: MagicMock + ): + """ + Test that regular Anthropic API uses URL type for tool result images. + """ + messages = [ + { + "role": "user", + "content": "Get me an image", + }, + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_abc", + "type": "function", + "function": { + "name": "get_image", + "arguments": "{}", + }, + } + ], + }, + { + "role": "tool", + "tool_call_id": "call_abc", + "content": [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + ], + }, + ] + + result = anthropic_messages_pt( + messages=messages, + model="claude-sonnet-4", + llm_provider="anthropic", + ) + + # convert_url_to_base64 should NOT be called for regular Anthropic + mock_convert_url.assert_not_called() + + # Find the tool_result and verify URL type + for msg in result: + if msg.get("role") == "user": + for content_item in msg.get("content", []): + if isinstance(content_item, dict) and content_item.get("type") == "tool_result": + tool_content = content_item.get("content", []) + for item in tool_content: + if isinstance(item, dict) and item.get("type") == "image": + assert item["source"]["type"] == "url" + return + pytest.fail("Could not find image in tool result") diff --git a/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py b/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py index edfdd9e4065..d6393bc6414 100644 --- a/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py +++ b/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py @@ -312,6 +312,43 @@ def mock_resolve_os_environ(params): assert "result" in result +@pytest.mark.asyncio +async def test_health_services_endpoint_datadog_llm_observability(): + """ + Verify that 'datadog_llm_observability' is accepted as a valid service + by the /health/services endpoint and does not raise a 400 error. + + Regression test for: https://github.com/BerriAI/litellm/issues/XXXX + The service was missing from the allowed services validation list. + """ + from litellm.proxy.health_endpoints._health_endpoints import ( + health_services_endpoint, + ) + + # Mock datadog_llm_observability to be in success_callback so the generic branch handles it + with patch("litellm.success_callback", ["datadog_llm_observability"]): + result = await health_services_endpoint( + service="datadog_llm_observability" + ) + + # Should not raise HTTPException(400) and should return success + assert result["status"] == "success" + assert "datadog_llm_observability" in result["message"] + + +@pytest.mark.asyncio +async def test_health_services_endpoint_rejects_unknown_service(): + """ + Verify that an unknown service name is rejected with a 400 error. + """ + from litellm.proxy._types import ProxyException + + with pytest.raises(ProxyException): + await health_services_endpoint( + service="totally_unknown_service_xyz" + ) + + @pytest.fixture(scope="function") def proxy_client(monkeypatch): """ diff --git a/tests/test_litellm/proxy/test_response_model_sanitization.py b/tests/test_litellm/proxy/test_response_model_sanitization.py new file mode 100644 index 00000000000..b1bb8d0ed39 --- /dev/null +++ b/tests/test_litellm/proxy/test_response_model_sanitization.py @@ -0,0 +1,217 @@ +import asyncio +import json +import os +import sys +from typing import AsyncGenerator +from unittest.mock import AsyncMock, MagicMock + +import pytest +import yaml +from fastapi.testclient import TestClient + +sys.path.insert(0, os.path.abspath("../../..")) + +import litellm + +pytestmark = pytest.mark.flaky(condition=False) + + +def _initialize_proxy_with_config(config: dict, tmp_path) -> TestClient: + """ + Initialize the proxy server with a temporary config file and return a TestClient. + + IMPORTANT: proxy_server.initialize() mutates module-level globals. We must call + cleanup_router_config_variables() before initializing to prevent cross-test bleed. + """ + from litellm.proxy.proxy_server import app, cleanup_router_config_variables, initialize + + cleanup_router_config_variables() + + config_fp = tmp_path / "proxy_config.yaml" + config_fp.write_text(yaml.safe_dump(config)) + + asyncio.run(initialize(config=str(config_fp), debug=True)) + return TestClient(app) + + +def _make_minimal_chat_completion_response(model: str) -> litellm.ModelResponse: + response = litellm.ModelResponse() + response.model = model + response.choices[0].message.content = "hello" # type: ignore[union-attr] + response.choices[0].finish_reason = "stop" # type: ignore[union-attr] + return response + + +def _make_model_response_stream_chunk(model: str) -> litellm.ModelResponseStream: + """ + Create a minimal OpenAI-compatible chat.completion.chunk object. + """ + chunk_dict = { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": model, + "choices": [ + { + "index": 0, + "delta": {"role": "assistant", "content": "hi"}, + "finish_reason": None, + } + ], + } + return litellm.ModelResponseStream(**chunk_dict) + + +def test_proxy_chat_completion_does_not_return_provider_prefixed_model(tmp_path, monkeypatch): + """ + Regression test: + + - Client asks for `model="vllm-model"` (no provider prefix) + - Internal provider path uses `hosted_vllm/...` + - Proxy should not leak `hosted_vllm/` in the client-facing `model` field. + """ + client_model = "vllm-model" + internal_model = f"hosted_vllm/{client_model}" + + client = _initialize_proxy_with_config( + config={ + "general_settings": {"master_key": "sk-1234"}, + "model_list": [ + { + "model_name": client_model, + "litellm_params": {"model": internal_model}, + } + ], + }, + tmp_path=tmp_path, + ) + + # Patch router call to avoid making any real network request. + from litellm.proxy import proxy_server + + monkeypatch.setattr( + proxy_server.llm_router, # type: ignore[arg-type] + "acompletion", + AsyncMock(return_value=_make_minimal_chat_completion_response(model=internal_model)), + ) + + # Also no-op proxy logging hooks to keep this test focused and deterministic. + monkeypatch.setattr(proxy_server.proxy_logging_obj, "during_call_hook", AsyncMock(return_value=None)) + monkeypatch.setattr(proxy_server.proxy_logging_obj, "update_request_status", AsyncMock(return_value=None)) + monkeypatch.setattr(proxy_server.proxy_logging_obj, "post_call_success_hook", AsyncMock(side_effect=lambda **kwargs: kwargs["response"])) + + resp = client.post( + "/v1/chat/completions", + headers={"Authorization": "Bearer sk-1234"}, + json={"model": client_model, "messages": [{"role": "user", "content": "hi"}]}, + ) + + assert resp.status_code == 200, resp.text + body = resp.json() + assert body["model"] == client_model + assert not body["model"].startswith("hosted_vllm/") + + +@pytest.mark.asyncio +async def test_proxy_streaming_chunks_do_not_return_provider_prefixed_model(monkeypatch): + """ + Regression test for streaming: + + Even if a streaming chunk contains `model="hosted_vllm/<...>"`, the proxy SSE layer + should not leak the provider prefix to the client. + """ + client_model = "vllm-model" + internal_model = f"hosted_vllm/{client_model}" + + from litellm.proxy._types import UserAPIKeyAuth + from litellm.proxy import proxy_server + + # Patch proxy_logging_obj hooks so async_data_generator yields exactly our chunk. + async def _iterator_hook( + user_api_key_dict: UserAPIKeyAuth, + response: AsyncGenerator, + request_data: dict, + ): + yield _make_model_response_stream_chunk(model=internal_model) + + monkeypatch.setattr(proxy_server.proxy_logging_obj, "async_post_call_streaming_iterator_hook", _iterator_hook) + monkeypatch.setattr( + proxy_server.proxy_logging_obj, + "async_post_call_streaming_hook", + AsyncMock(side_effect=lambda **kwargs: kwargs["response"]), + ) + + user_api_key_dict = UserAPIKeyAuth(api_key="sk-1234") + + gen = proxy_server.async_data_generator( + response=MagicMock(), + user_api_key_dict=user_api_key_dict, + request_data={"model": client_model}, + ) + + chunks = [] + async for item in gen: + chunks.append(item) + + # First chunk is expected to be JSON, last chunk is [DONE] + assert len(chunks) >= 2 + first = chunks[0] + assert first.startswith("data: ") + + payload = json.loads(first[len("data: ") :].strip()) + assert payload["model"] == client_model + assert not payload["model"].startswith("hosted_vllm/") + + +@pytest.mark.asyncio +async def test_proxy_streaming_chunks_use_client_requested_model_before_alias_mapping(monkeypatch): + """ + Regression test for alias mapping on streaming: + + - `common_processing_pre_call_logic` can rewrite `request_data["model"]` via model_alias_map / key-specific aliases. + - Non-streaming responses are restamped using the original client-requested model (captured before the rewrite). + - Streaming chunks must do the same to avoid mismatched `model` values between streaming and non-streaming. + """ + client_model_alias = "alias-model" + canonical_model = "vllm-model" + internal_model = f"hosted_vllm/{canonical_model}" + + from litellm.proxy._types import UserAPIKeyAuth + from litellm.proxy import proxy_server + + async def _iterator_hook( + user_api_key_dict: UserAPIKeyAuth, + response: AsyncGenerator, + request_data: dict, + ): + yield _make_model_response_stream_chunk(model=internal_model) + + monkeypatch.setattr(proxy_server.proxy_logging_obj, "async_post_call_streaming_iterator_hook", _iterator_hook) + monkeypatch.setattr( + proxy_server.proxy_logging_obj, + "async_post_call_streaming_hook", + AsyncMock(side_effect=lambda **kwargs: kwargs["response"]), + ) + + user_api_key_dict = UserAPIKeyAuth(api_key="sk-1234") + + gen = proxy_server.async_data_generator( + response=MagicMock(), + user_api_key_dict=user_api_key_dict, + request_data={ + "model": canonical_model, + "_litellm_client_requested_model": client_model_alias, + }, + ) + + chunks = [] + async for item in gen: + chunks.append(item) + + assert len(chunks) >= 2 + first = chunks[0] + assert first.startswith("data: ") + + payload = json.loads(first[len("data: ") :].strip()) + assert payload["model"] == client_model_alias + assert not payload["model"].startswith("hosted_vllm/") diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py index f6c24d19df5..6a79fd0823b 100644 --- a/tests/test_litellm/test_utils.py +++ b/tests/test_litellm/test_utils.py @@ -2543,6 +2543,48 @@ def test_model_info_for_vertex_ai_deepseek_model(): print("vertex deepseek model info", model_info) +def test_model_info_for_openrouter_kimi_k2_5(): + """ + Test that openrouter/moonshotai/kimi-k2.5 model info is correctly configured + in model_prices_and_context_window.json. + + Model properties from OpenRouter API: + - context_length: 262144 + - pricing: prompt=$0.0000006, completion=$0.000003, input_cache_read=$0.0000001 + - modality: text+image->text (supports vision) + - supports: tool_choice, tools (function calling) + """ + import json + from pathlib import Path + + # Load directly from the local JSON file + json_path = Path(__file__).parents[2] / "model_prices_and_context_window.json" + with open(json_path) as f: + model_cost = json.load(f) + + model_info = model_cost.get("openrouter/moonshotai/kimi-k2.5") + assert model_info is not None, "Model not found in model_prices_and_context_window.json" + assert model_info["litellm_provider"] == "openrouter" + assert model_info["mode"] == "chat" + + # Verify context window + assert model_info["max_input_tokens"] == 262144 + assert model_info["max_output_tokens"] == 262144 + assert model_info["max_tokens"] == 262144 + + # Verify pricing + assert model_info["input_cost_per_token"] == 6e-07 + assert model_info["output_cost_per_token"] == 3e-06 + assert model_info["cache_read_input_token_cost"] == 1e-07 + + # Verify capabilities + assert model_info["supports_vision"] is True + assert model_info["supports_function_calling"] is True + assert model_info["supports_tool_choice"] is True + + print("openrouter kimi-k2.5 model info", model_info) + + class TestGetValidModelsWithCLI: """Test get_valid_models function as used in CLI token usage""" diff --git a/ui/litellm-dashboard/src/components/prompts/prompt_editor_view/utils.ts b/ui/litellm-dashboard/src/components/prompts/prompt_editor_view/utils.ts index aa8532b80c7..523470e34d0 100644 --- a/ui/litellm-dashboard/src/components/prompts/prompt_editor_view/utils.ts +++ b/ui/litellm-dashboard/src/components/prompts/prompt_editor_view/utils.ts @@ -74,6 +74,144 @@ export const convertToDotPrompt = (prompt: PromptType): string => { return result.trim(); }; +type ParsedFrontmatter = { + model?: string; + config: { + temperature?: number; + max_tokens?: number; + top_p?: number; + }; + tools: Tool[]; +}; + +const parseNumber = (raw: string): number | undefined => { + const value = Number(raw); + return Number.isFinite(value) ? value : undefined; +}; + +const parseToolsFromFrontmatter = (lines: string[]): Tool[] => { + const tools: Tool[] = []; + let inToolsBlock = false; + + for (const line of lines) { + const trimmed = line.trim(); + + if (!inToolsBlock) { + if (trimmed === "tools:" || trimmed.startsWith("tools:")) { + inToolsBlock = true; + } + continue; + } + + // New top-level key ends the tools block + if (line.length > 0 && !/^\s/.test(line) && trimmed !== "-" && !trimmed.startsWith("-")) { + break; + } + + const match = trimmed.match(/^-+\s*(.+)$/); + if (!match) continue; + + const rawJson = match[1].trim(); + if (!rawJson) continue; + + try { + const toolObj = JSON.parse(rawJson); + tools.push({ + name: toolObj?.function?.name || "Unnamed Tool", + description: toolObj?.function?.description || "", + json: JSON.stringify(toolObj, null, 2), + }); + } catch { + } + } + + return tools; +}; + +const parseDotpromptFrontmatter = (frontmatter: string): ParsedFrontmatter => { + const result: ParsedFrontmatter = { config: {}, tools: [] }; + const lines = frontmatter.split("\n"); + + result.tools = parseToolsFromFrontmatter(lines); + + for (const line of lines) { + const trimmedLine = line.trim(); + if (!trimmedLine) continue; + + // Skip known nested yaml sections and list items. + if ( + trimmedLine.startsWith("input:") || + trimmedLine.startsWith("output:") || + trimmedLine.startsWith("schema:") || + trimmedLine.startsWith("format:") || + trimmedLine.startsWith("tools:") || + trimmedLine.startsWith("-") + ) { + continue; + } + + const colonIndex = trimmedLine.indexOf(":"); + if (colonIndex <= 0) continue; + + const key = trimmedLine.substring(0, colonIndex).trim(); + const value = trimmedLine.substring(colonIndex + 1).trim(); + + if (key === "model") { + result.model = value; + continue; + } + + if (key === "temperature") result.config.temperature = parseNumber(value); + if (key === "max_tokens") result.config.max_tokens = parseNumber(value); + if (key === "top_p") result.config.top_p = parseNumber(value); + } + + return result; +}; + +type ParsedBody = { developerMessage: string; messages: Message[] }; + +const parseDotpromptBody = (body: string): ParsedBody => { + const roleHeader = /^(System|Developer|User|Assistant):(?:\s(.*)|\s*)$/; + const messages: Message[] = []; + let developerMessage = ""; + + let currentRole: string | null = null; + let buffer: string[] = []; + + const commit = () => { + if (!currentRole) return; + + const content = buffer.join("\n").trim(); + if (currentRole === "developer") { + if (content) { + developerMessage = developerMessage ? `${developerMessage}\n\n${content}` : content; + } + } else if (content) { + messages.push({ role: currentRole, content }); + } else { + messages.push({ role: currentRole, content: "" }); + } + }; + + for (const line of body.split("\n")) { + const match = line.match(roleHeader); + if (match) { + commit(); + currentRole = match[1].toLowerCase(); + buffer = [match[2] ?? ""]; + continue; + } + + if (!currentRole) continue; + buffer.push(line); + } + + commit(); + + return { developerMessage, messages }; +}; + export const parseExistingPrompt = (apiResponse: any): PromptType => { // Extract dotprompt_content from litellm_params const dotpromptContent = apiResponse?.prompt_spec?.litellm_params?.dotprompt_content || ""; @@ -88,63 +226,11 @@ export const parseExistingPrompt = (apiResponse: any): PromptType => { throw new Error("Invalid dotprompt format"); } - // Parse YAML frontmatter (parts[1]) const frontmatter = parts[1]; const content = parts.slice(2).join("---").trim(); - // Extract metadata from frontmatter - const metadata: any = {}; - frontmatter.split("\n").forEach((line: string) => { - const trimmedLine = line.trim(); - if (trimmedLine && !trimmedLine.startsWith("input:") && !trimmedLine.startsWith("output:") && !trimmedLine.startsWith("schema:") && !trimmedLine.startsWith("format:")) { - const colonIndex = trimmedLine.indexOf(":"); - if (colonIndex > 0) { - const key = trimmedLine.substring(0, colonIndex).trim(); - const value = trimmedLine.substring(colonIndex + 1).trim(); - if (key === "temperature" || key === "max_tokens" || key === "top_p") { - metadata[key] = parseFloat(value); - } else if (key === "model") { - metadata[key] = value; - } - } - } - }); - - // Parse content to extract developer message and user messages - let developerMessage = ""; - const messages: Message[] = []; - const lines = content.split("\n"); - let currentRole: "user" | "assistant" | null = null; - let currentContent = ""; - - for (const line of lines) { - if (line.startsWith("Developer:")) { - developerMessage = line.substring("Developer:".length).trim(); - } else if (line.startsWith("User:")) { - if (currentRole && currentContent) { - messages.push({ role: currentRole, content: currentContent.trim() }); - } - currentRole = "user"; - currentContent = line.substring("User:".length).trim(); - } else if (line.startsWith("Assistant:")) { - if (currentRole && currentContent) { - messages.push({ role: currentRole, content: currentContent.trim() }); - } - currentRole = "assistant"; - currentContent = line.substring("Assistant:".length).trim(); - } else if (line.trim() && currentRole) { - currentContent += "\n" + line.trim(); - } - } - - // Add the last message - if (currentRole && currentContent) { - messages.push({ role: currentRole, content: currentContent.trim() }); - } - - // Parse tools from frontmatter if present - const tools: Tool[] = []; - // TODO: Add tool parsing if needed + const parsedFrontmatter = parseDotpromptFrontmatter(frontmatter); + const parsedBody = parseDotpromptBody(content); // Strip version suffix from prompt name for display const promptId = apiResponse?.prompt_spec?.prompt_id || "Unnamed Prompt"; @@ -152,15 +238,14 @@ export const parseExistingPrompt = (apiResponse: any): PromptType => { return { name: baseName, - model: metadata.model || "gpt-4o", - config: { - temperature: metadata.temperature, - max_tokens: metadata.max_tokens, - top_p: metadata.top_p, - }, - tools: tools, - developerMessage: developerMessage, - messages: messages.length > 0 ? messages : [{ role: "user", content: "Enter task specifics. Use {{template_variables}} for dynamic inputs" }], + model: parsedFrontmatter.model || "gpt-4o", + config: parsedFrontmatter.config, + tools: parsedFrontmatter.tools, + developerMessage: parsedBody.developerMessage, + messages: + parsedBody.messages.length > 0 + ? parsedBody.messages + : [{ role: "user", content: "Enter task specifics. Use {{template_variables}} for dynamic inputs" }], }; };