diff --git a/litellm/__init__.py b/litellm/__init__.py index e5c09702b9b..9d39e4ca11d 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1467,6 +1467,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None: from .llms.azure.chat.gpt_5_transformation import AzureOpenAIGPT5Config as AzureOpenAIGPT5Config from .llms.azure.completion.transformation import AzureOpenAITextConfig as AzureOpenAITextConfig from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig as HostedVLLMChatConfig + from .llms.hosted_vllm.embedding.transformation import HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig from .llms.github_copilot.chat.transformation import GithubCopilotConfig as GithubCopilotConfig from .llms.github_copilot.responses.transformation import GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig from .llms.github_copilot.embedding.transformation import GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig diff --git a/litellm/_lazy_imports_registry.py b/litellm/_lazy_imports_registry.py index a92c6f95b0e..0e52e9a59eb 100644 --- a/litellm/_lazy_imports_registry.py +++ b/litellm/_lazy_imports_registry.py @@ -20,25 +20,53 @@ # Utils names that support lazy loading via _lazy_import_utils UTILS_NAMES = ( - "exception_type", "get_optional_params", "get_response_string", "token_counter", - "create_pretrained_tokenizer", "create_tokenizer", "supports_function_calling", - "supports_web_search", "supports_url_context", "supports_response_schema", - "supports_parallel_function_calling", "supports_vision", "supports_audio_input", - "supports_audio_output", "supports_system_messages", "supports_reasoning", - "get_litellm_params", "acreate", "get_max_tokens", "get_model_info", - "register_prompt_template", "validate_environment", "check_valid_key", - "register_model", "encode", "decode", "_calculate_retry_after", "_should_retry", - "get_supported_openai_params", "get_api_base", "get_first_chars_messages", - "ModelResponse", "ModelResponseStream", "EmbeddingResponse", "ImageResponse", - "TranscriptionResponse", "TextCompletionResponse", "get_provider_fields", - "ModelResponseListIterator", "get_valid_models", "timeout", - "get_llm_provider", "remove_index_from_tool_calls", + "exception_type", + "get_optional_params", + "get_response_string", + "token_counter", + "create_pretrained_tokenizer", + "create_tokenizer", + "supports_function_calling", + "supports_web_search", + "supports_url_context", + "supports_response_schema", + "supports_parallel_function_calling", + "supports_vision", + "supports_audio_input", + "supports_audio_output", + "supports_system_messages", + "supports_reasoning", + "get_litellm_params", + "acreate", + "get_max_tokens", + "get_model_info", + "register_prompt_template", + "validate_environment", + "check_valid_key", + "register_model", + "encode", + "decode", + "_calculate_retry_after", + "_should_retry", + "get_supported_openai_params", + "get_api_base", + "get_first_chars_messages", + "ModelResponse", + "ModelResponseStream", + "EmbeddingResponse", + "ImageResponse", + "TranscriptionResponse", + "TextCompletionResponse", + "get_provider_fields", + "ModelResponseListIterator", + "get_valid_models", + "timeout", + "get_llm_provider", + "remove_index_from_tool_calls", ) # Token counter names that support lazy loading via _lazy_import_token_counter -TOKEN_COUNTER_NAMES = ( - "get_modified_max_tokens", -) +TOKEN_COUNTER_NAMES = ("get_modified_max_tokens",) # LLM client cache names that support lazy loading via _lazy_import_llm_client_cache LLM_CLIENT_CACHE_NAMES = ( @@ -47,9 +75,7 @@ ) # Bedrock type names that support lazy loading via _lazy_import_bedrock_types -BEDROCK_TYPES_NAMES = ( - "COHERE_EMBEDDING_INPUT_TYPES", -) +BEDROCK_TYPES_NAMES = ("COHERE_EMBEDDING_INPUT_TYPES",) # Common types from litellm.types.utils that support lazy loading via # _lazy_import_types_utils @@ -236,6 +262,7 @@ "AzureOpenAIGPT5Config", "AzureOpenAITextConfig", "HostedVLLMChatConfig", + "HostedVLLMEmbeddingConfig", # Alias for backwards compatibility "VolcEngineConfig", # Alias for VolcEngineChatConfig "LlamafileChatConfig", @@ -388,7 +415,10 @@ "supports_web_search": (".utils", "supports_web_search"), "supports_url_context": (".utils", "supports_url_context"), "supports_response_schema": (".utils", "supports_response_schema"), - "supports_parallel_function_calling": (".utils", "supports_parallel_function_calling"), + "supports_parallel_function_calling": ( + ".utils", + "supports_parallel_function_calling", + ), "supports_vision": (".utils", "supports_vision"), "supports_audio_input": (".utils", "supports_audio_input"), "supports_audio_output": (".utils", "supports_audio_output"), @@ -419,8 +449,14 @@ "ModelResponseListIterator": (".utils", "ModelResponseListIterator"), "get_valid_models": (".utils", "get_valid_models"), "timeout": (".timeout", "timeout"), - "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"), - "remove_index_from_tool_calls": ("litellm.litellm_core_utils.core_helpers", "remove_index_from_tool_calls"), + "get_llm_provider": ( + "litellm.litellm_core_utils.get_llm_provider_logic", + "get_llm_provider", + ), + "remove_index_from_tool_calls": ( + "litellm.litellm_core_utils.core_helpers", + "remove_index_from_tool_calls", + ), } _COST_CALCULATOR_IMPORT_MAP = { @@ -442,11 +478,17 @@ } _TOKEN_COUNTER_IMPORT_MAP = { - "get_modified_max_tokens": ("litellm.litellm_core_utils.token_counter", "get_modified_max_tokens"), + "get_modified_max_tokens": ( + "litellm.litellm_core_utils.token_counter", + "get_modified_max_tokens", + ), } _BEDROCK_TYPES_IMPORT_MAP = { - "COHERE_EMBEDDING_INPUT_TYPES": ("litellm.types.llms.bedrock", "COHERE_EMBEDDING_INPUT_TYPES"), + "COHERE_EMBEDDING_INPUT_TYPES": ( + "litellm.types.llms.bedrock", + "COHERE_EMBEDDING_INPUT_TYPES", + ), } _CACHING_IMPORT_MAP = { @@ -458,294 +500,868 @@ _LITELLM_LOGGING_IMPORT_MAP = { "Logging": ("litellm.litellm_core_utils.litellm_logging", "Logging"), - "modify_integration": ("litellm.litellm_core_utils.litellm_logging", "modify_integration"), + "modify_integration": ( + "litellm.litellm_core_utils.litellm_logging", + "modify_integration", + ), } _DOTPROMPT_IMPORT_MAP = { - "global_prompt_manager": ("litellm.integrations.dotprompt", "global_prompt_manager"), - "global_prompt_directory": ("litellm.integrations.dotprompt", "global_prompt_directory"), - "set_global_prompt_directory": ("litellm.integrations.dotprompt", "set_global_prompt_directory"), + "global_prompt_manager": ( + "litellm.integrations.dotprompt", + "global_prompt_manager", + ), + "global_prompt_directory": ( + "litellm.integrations.dotprompt", + "global_prompt_directory", + ), + "set_global_prompt_directory": ( + "litellm.integrations.dotprompt", + "set_global_prompt_directory", + ), } _TYPES_IMPORT_MAP = { "GuardrailItem": ("litellm.types.guardrails", "GuardrailItem"), - "DefaultTeamSSOParams": ("litellm.types.proxy.management_endpoints.ui_sso", "DefaultTeamSSOParams"), - "LiteLLM_UpperboundKeyGenerateParams": ("litellm.types.proxy.management_endpoints.ui_sso", "LiteLLM_UpperboundKeyGenerateParams"), - "KeyManagementSystem": ("litellm.types.secret_managers.main", "KeyManagementSystem"), - "PriorityReservationSettings": ("litellm.types.utils", "PriorityReservationSettings"), + "DefaultTeamSSOParams": ( + "litellm.types.proxy.management_endpoints.ui_sso", + "DefaultTeamSSOParams", + ), + "LiteLLM_UpperboundKeyGenerateParams": ( + "litellm.types.proxy.management_endpoints.ui_sso", + "LiteLLM_UpperboundKeyGenerateParams", + ), + "KeyManagementSystem": ( + "litellm.types.secret_managers.main", + "KeyManagementSystem", + ), + "PriorityReservationSettings": ( + "litellm.types.utils", + "PriorityReservationSettings", + ), "CustomLogger": ("litellm.integrations.custom_logger", "CustomLogger"), - "LoggingCallbackManager": ("litellm.litellm_core_utils.logging_callback_manager", "LoggingCallbackManager"), - "DatadogLLMObsInitParams": ("litellm.types.integrations.datadog_llm_obs", "DatadogLLMObsInitParams"), + "LoggingCallbackManager": ( + "litellm.litellm_core_utils.logging_callback_manager", + "LoggingCallbackManager", + ), + "DatadogLLMObsInitParams": ( + "litellm.types.integrations.datadog_llm_obs", + "DatadogLLMObsInitParams", + ), } _LLM_PROVIDER_LOGIC_IMPORT_MAP = { - "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"), - "remove_index_from_tool_calls": ("litellm.litellm_core_utils.core_helpers", "remove_index_from_tool_calls"), + "get_llm_provider": ( + "litellm.litellm_core_utils.get_llm_provider_logic", + "get_llm_provider", + ), + "remove_index_from_tool_calls": ( + "litellm.litellm_core_utils.core_helpers", + "remove_index_from_tool_calls", + ), } _LLM_CONFIGS_IMPORT_MAP = { - "AmazonConverseConfig": (".llms.bedrock.chat.converse_transformation", "AmazonConverseConfig"), + "AmazonConverseConfig": ( + ".llms.bedrock.chat.converse_transformation", + "AmazonConverseConfig", + ), "OpenAILikeChatConfig": (".llms.openai_like.chat.handler", "OpenAILikeChatConfig"), - "GaladrielChatConfig": (".llms.galadriel.chat.transformation", "GaladrielChatConfig"), + "GaladrielChatConfig": ( + ".llms.galadriel.chat.transformation", + "GaladrielChatConfig", + ), "GithubChatConfig": (".llms.github.chat.transformation", "GithubChatConfig"), - "AzureAnthropicConfig": (".llms.azure_ai.anthropic.transformation", "AzureAnthropicConfig"), + "AzureAnthropicConfig": ( + ".llms.azure_ai.anthropic.transformation", + "AzureAnthropicConfig", + ), "BytezChatConfig": (".llms.bytez.chat.transformation", "BytezChatConfig"), - "CompactifAIChatConfig": (".llms.compactifai.chat.transformation", "CompactifAIChatConfig"), + "CompactifAIChatConfig": ( + ".llms.compactifai.chat.transformation", + "CompactifAIChatConfig", + ), "EmpowerChatConfig": (".llms.empower.chat.transformation", "EmpowerChatConfig"), "MinimaxChatConfig": (".llms.minimax.chat.transformation", "MinimaxChatConfig"), - "AiohttpOpenAIChatConfig": (".llms.aiohttp_openai.chat.transformation", "AiohttpOpenAIChatConfig"), - "HuggingFaceChatConfig": (".llms.huggingface.chat.transformation", "HuggingFaceChatConfig"), - "HuggingFaceEmbeddingConfig": (".llms.huggingface.embedding.transformation", "HuggingFaceEmbeddingConfig"), + "AiohttpOpenAIChatConfig": ( + ".llms.aiohttp_openai.chat.transformation", + "AiohttpOpenAIChatConfig", + ), + "HuggingFaceChatConfig": ( + ".llms.huggingface.chat.transformation", + "HuggingFaceChatConfig", + ), + "HuggingFaceEmbeddingConfig": ( + ".llms.huggingface.embedding.transformation", + "HuggingFaceEmbeddingConfig", + ), "OobaboogaConfig": (".llms.oobabooga.chat.transformation", "OobaboogaConfig"), "MaritalkConfig": (".llms.maritalk", "MaritalkConfig"), "OpenrouterConfig": (".llms.openrouter.chat.transformation", "OpenrouterConfig"), "DataRobotConfig": (".llms.datarobot.chat.transformation", "DataRobotConfig"), "AnthropicConfig": (".llms.anthropic.chat.transformation", "AnthropicConfig"), - "AnthropicTextConfig": (".llms.anthropic.completion.transformation", "AnthropicTextConfig"), + "AnthropicTextConfig": ( + ".llms.anthropic.completion.transformation", + "AnthropicTextConfig", + ), "GroqSTTConfig": (".llms.groq.stt.transformation", "GroqSTTConfig"), "TritonConfig": (".llms.triton.completion.transformation", "TritonConfig"), - "TritonGenerateConfig": (".llms.triton.completion.transformation", "TritonGenerateConfig"), - "TritonInferConfig": (".llms.triton.completion.transformation", "TritonInferConfig"), - "TritonEmbeddingConfig": (".llms.triton.embedding.transformation", "TritonEmbeddingConfig"), - "HuggingFaceRerankConfig": (".llms.huggingface.rerank.transformation", "HuggingFaceRerankConfig"), + "TritonGenerateConfig": ( + ".llms.triton.completion.transformation", + "TritonGenerateConfig", + ), + "TritonInferConfig": ( + ".llms.triton.completion.transformation", + "TritonInferConfig", + ), + "TritonEmbeddingConfig": ( + ".llms.triton.embedding.transformation", + "TritonEmbeddingConfig", + ), + "HuggingFaceRerankConfig": ( + ".llms.huggingface.rerank.transformation", + "HuggingFaceRerankConfig", + ), "DatabricksConfig": (".llms.databricks.chat.transformation", "DatabricksConfig"), - "DatabricksEmbeddingConfig": (".llms.databricks.embed.transformation", "DatabricksEmbeddingConfig"), + "DatabricksEmbeddingConfig": ( + ".llms.databricks.embed.transformation", + "DatabricksEmbeddingConfig", + ), "PredibaseConfig": (".llms.predibase.chat.transformation", "PredibaseConfig"), "ReplicateConfig": (".llms.replicate.chat.transformation", "ReplicateConfig"), "SnowflakeConfig": (".llms.snowflake.chat.transformation", "SnowflakeConfig"), "CohereRerankConfig": (".llms.cohere.rerank.transformation", "CohereRerankConfig"), - "CohereRerankV2Config": (".llms.cohere.rerank_v2.transformation", "CohereRerankV2Config"), - "AzureAIRerankConfig": (".llms.azure_ai.rerank.transformation", "AzureAIRerankConfig"), - "InfinityRerankConfig": (".llms.infinity.rerank.transformation", "InfinityRerankConfig"), + "CohereRerankV2Config": ( + ".llms.cohere.rerank_v2.transformation", + "CohereRerankV2Config", + ), + "AzureAIRerankConfig": ( + ".llms.azure_ai.rerank.transformation", + "AzureAIRerankConfig", + ), + "InfinityRerankConfig": ( + ".llms.infinity.rerank.transformation", + "InfinityRerankConfig", + ), "JinaAIRerankConfig": (".llms.jina_ai.rerank.transformation", "JinaAIRerankConfig"), - "DeepinfraRerankConfig": (".llms.deepinfra.rerank.transformation", "DeepinfraRerankConfig"), - "HostedVLLMRerankConfig": (".llms.hosted_vllm.rerank.transformation", "HostedVLLMRerankConfig"), - "NvidiaNimRerankConfig": (".llms.nvidia_nim.rerank.transformation", "NvidiaNimRerankConfig"), - "NvidiaNimRankingConfig": (".llms.nvidia_nim.rerank.ranking_transformation", "NvidiaNimRankingConfig"), - "VertexAIRerankConfig": (".llms.vertex_ai.rerank.transformation", "VertexAIRerankConfig"), - "FireworksAIRerankConfig": (".llms.fireworks_ai.rerank.transformation", "FireworksAIRerankConfig"), + "DeepinfraRerankConfig": ( + ".llms.deepinfra.rerank.transformation", + "DeepinfraRerankConfig", + ), + "HostedVLLMRerankConfig": ( + ".llms.hosted_vllm.rerank.transformation", + "HostedVLLMRerankConfig", + ), + "NvidiaNimRerankConfig": ( + ".llms.nvidia_nim.rerank.transformation", + "NvidiaNimRerankConfig", + ), + "NvidiaNimRankingConfig": ( + ".llms.nvidia_nim.rerank.ranking_transformation", + "NvidiaNimRankingConfig", + ), + "VertexAIRerankConfig": ( + ".llms.vertex_ai.rerank.transformation", + "VertexAIRerankConfig", + ), + "FireworksAIRerankConfig": ( + ".llms.fireworks_ai.rerank.transformation", + "FireworksAIRerankConfig", + ), "VoyageRerankConfig": (".llms.voyage.rerank.transformation", "VoyageRerankConfig"), "ClarifaiConfig": (".llms.clarifai.chat.transformation", "ClarifaiConfig"), "AI21ChatConfig": (".llms.ai21.chat.transformation", "AI21ChatConfig"), "LlamaAPIConfig": (".llms.meta_llama.chat.transformation", "LlamaAPIConfig"), - "TogetherAITextCompletionConfig": (".llms.together_ai.completion.transformation", "TogetherAITextCompletionConfig"), - "CloudflareChatConfig": (".llms.cloudflare.chat.transformation", "CloudflareChatConfig"), + "TogetherAITextCompletionConfig": ( + ".llms.together_ai.completion.transformation", + "TogetherAITextCompletionConfig", + ), + "CloudflareChatConfig": ( + ".llms.cloudflare.chat.transformation", + "CloudflareChatConfig", + ), "NovitaConfig": (".llms.novita.chat.transformation", "NovitaConfig"), "PetalsConfig": (".llms.petals.completion.transformation", "PetalsConfig"), "OllamaChatConfig": (".llms.ollama.chat.transformation", "OllamaChatConfig"), "OllamaConfig": (".llms.ollama.completion.transformation", "OllamaConfig"), "SagemakerConfig": (".llms.sagemaker.completion.transformation", "SagemakerConfig"), - "SagemakerChatConfig": (".llms.sagemaker.chat.transformation", "SagemakerChatConfig"), + "SagemakerChatConfig": ( + ".llms.sagemaker.chat.transformation", + "SagemakerChatConfig", + ), "CohereChatConfig": (".llms.cohere.chat.transformation", "CohereChatConfig"), - "AnthropicMessagesConfig": (".llms.anthropic.experimental_pass_through.messages.transformation", "AnthropicMessagesConfig"), - "AmazonAnthropicClaudeMessagesConfig": (".llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation", "AmazonAnthropicClaudeMessagesConfig"), + "AnthropicMessagesConfig": ( + ".llms.anthropic.experimental_pass_through.messages.transformation", + "AnthropicMessagesConfig", + ), + "AmazonAnthropicClaudeMessagesConfig": ( + ".llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation", + "AmazonAnthropicClaudeMessagesConfig", + ), "TogetherAIConfig": (".llms.together_ai.chat", "TogetherAIConfig"), "NLPCloudConfig": (".llms.nlp_cloud.chat.handler", "NLPCloudConfig"), - "VertexGeminiConfig": (".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", "VertexGeminiConfig"), - "GoogleAIStudioGeminiConfig": (".llms.gemini.chat.transformation", "GoogleAIStudioGeminiConfig"), - "VertexAIAnthropicConfig": (".llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation", "VertexAIAnthropicConfig"), - "VertexAILlama3Config": (".llms.vertex_ai.vertex_ai_partner_models.llama3.transformation", "VertexAILlama3Config"), - "VertexAIAi21Config": (".llms.vertex_ai.vertex_ai_partner_models.ai21.transformation", "VertexAIAi21Config"), - "AmazonCohereChatConfig": (".llms.bedrock.chat.invoke_handler", "AmazonCohereChatConfig"), - "AmazonBedrockGlobalConfig": (".llms.bedrock.common_utils", "AmazonBedrockGlobalConfig"), - "AmazonAI21Config": (".llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation", "AmazonAI21Config"), - "AmazonInvokeNovaConfig": (".llms.bedrock.chat.invoke_transformations.amazon_nova_transformation", "AmazonInvokeNovaConfig"), - "AmazonQwen2Config": (".llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation", "AmazonQwen2Config"), - "AmazonQwen3Config": (".llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation", "AmazonQwen3Config"), + "VertexGeminiConfig": ( + ".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", + "VertexGeminiConfig", + ), + "GoogleAIStudioGeminiConfig": ( + ".llms.gemini.chat.transformation", + "GoogleAIStudioGeminiConfig", + ), + "VertexAIAnthropicConfig": ( + ".llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation", + "VertexAIAnthropicConfig", + ), + "VertexAILlama3Config": ( + ".llms.vertex_ai.vertex_ai_partner_models.llama3.transformation", + "VertexAILlama3Config", + ), + "VertexAIAi21Config": ( + ".llms.vertex_ai.vertex_ai_partner_models.ai21.transformation", + "VertexAIAi21Config", + ), + "AmazonCohereChatConfig": ( + ".llms.bedrock.chat.invoke_handler", + "AmazonCohereChatConfig", + ), + "AmazonBedrockGlobalConfig": ( + ".llms.bedrock.common_utils", + "AmazonBedrockGlobalConfig", + ), + "AmazonAI21Config": ( + ".llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation", + "AmazonAI21Config", + ), + "AmazonInvokeNovaConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_nova_transformation", + "AmazonInvokeNovaConfig", + ), + "AmazonQwen2Config": ( + ".llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation", + "AmazonQwen2Config", + ), + "AmazonQwen3Config": ( + ".llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation", + "AmazonQwen3Config", + ), # Aliases for backwards compatibility - "VertexAIConfig": (".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", "VertexGeminiConfig"), # Alias - "GeminiConfig": (".llms.gemini.chat.transformation", "GoogleAIStudioGeminiConfig"), # Alias - "AmazonAnthropicConfig": (".llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation", "AmazonAnthropicConfig"), - "AmazonAnthropicClaudeConfig": (".llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation", "AmazonAnthropicClaudeConfig"), - "AmazonCohereConfig": (".llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation", "AmazonCohereConfig"), - "AmazonLlamaConfig": (".llms.bedrock.chat.invoke_transformations.amazon_llama_transformation", "AmazonLlamaConfig"), - "AmazonDeepSeekR1Config": (".llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation", "AmazonDeepSeekR1Config"), - "AmazonMistralConfig": (".llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation", "AmazonMistralConfig"), - "AmazonMoonshotConfig": (".llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation", "AmazonMoonshotConfig"), - "AmazonTitanConfig": (".llms.bedrock.chat.invoke_transformations.amazon_titan_transformation", "AmazonTitanConfig"), - "AmazonTwelveLabsPegasusConfig": (".llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation", "AmazonTwelveLabsPegasusConfig"), - "AmazonInvokeConfig": (".llms.bedrock.chat.invoke_transformations.base_invoke_transformation", "AmazonInvokeConfig"), - "AmazonBedrockOpenAIConfig": (".llms.bedrock.chat.invoke_transformations.amazon_openai_transformation", "AmazonBedrockOpenAIConfig"), - "AmazonStabilityConfig": (".llms.bedrock.image_generation.amazon_stability1_transformation", "AmazonStabilityConfig"), - "AmazonStability3Config": (".llms.bedrock.image_generation.amazon_stability3_transformation", "AmazonStability3Config"), - "AmazonNovaCanvasConfig": (".llms.bedrock.image_generation.amazon_nova_canvas_transformation", "AmazonNovaCanvasConfig"), - "AmazonTitanG1Config": (".llms.bedrock.embed.amazon_titan_g1_transformation", "AmazonTitanG1Config"), - "AmazonTitanMultimodalEmbeddingG1Config": (".llms.bedrock.embed.amazon_titan_multimodal_transformation", "AmazonTitanMultimodalEmbeddingG1Config"), + "VertexAIConfig": ( + ".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", + "VertexGeminiConfig", + ), # Alias + "GeminiConfig": ( + ".llms.gemini.chat.transformation", + "GoogleAIStudioGeminiConfig", + ), # Alias + "AmazonAnthropicConfig": ( + ".llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation", + "AmazonAnthropicConfig", + ), + "AmazonAnthropicClaudeConfig": ( + ".llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation", + "AmazonAnthropicClaudeConfig", + ), + "AmazonCohereConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation", + "AmazonCohereConfig", + ), + "AmazonLlamaConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_llama_transformation", + "AmazonLlamaConfig", + ), + "AmazonDeepSeekR1Config": ( + ".llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation", + "AmazonDeepSeekR1Config", + ), + "AmazonMistralConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation", + "AmazonMistralConfig", + ), + "AmazonMoonshotConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation", + "AmazonMoonshotConfig", + ), + "AmazonTitanConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_titan_transformation", + "AmazonTitanConfig", + ), + "AmazonTwelveLabsPegasusConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation", + "AmazonTwelveLabsPegasusConfig", + ), + "AmazonInvokeConfig": ( + ".llms.bedrock.chat.invoke_transformations.base_invoke_transformation", + "AmazonInvokeConfig", + ), + "AmazonBedrockOpenAIConfig": ( + ".llms.bedrock.chat.invoke_transformations.amazon_openai_transformation", + "AmazonBedrockOpenAIConfig", + ), + "AmazonStabilityConfig": ( + ".llms.bedrock.image_generation.amazon_stability1_transformation", + "AmazonStabilityConfig", + ), + "AmazonStability3Config": ( + ".llms.bedrock.image_generation.amazon_stability3_transformation", + "AmazonStability3Config", + ), + "AmazonNovaCanvasConfig": ( + ".llms.bedrock.image_generation.amazon_nova_canvas_transformation", + "AmazonNovaCanvasConfig", + ), + "AmazonTitanG1Config": ( + ".llms.bedrock.embed.amazon_titan_g1_transformation", + "AmazonTitanG1Config", + ), + "AmazonTitanMultimodalEmbeddingG1Config": ( + ".llms.bedrock.embed.amazon_titan_multimodal_transformation", + "AmazonTitanMultimodalEmbeddingG1Config", + ), "CohereV2ChatConfig": (".llms.cohere.chat.v2_transformation", "CohereV2ChatConfig"), - "BedrockCohereEmbeddingConfig": (".llms.bedrock.embed.cohere_transformation", "BedrockCohereEmbeddingConfig"), - "TwelveLabsMarengoEmbeddingConfig": (".llms.bedrock.embed.twelvelabs_marengo_transformation", "TwelveLabsMarengoEmbeddingConfig"), - "AmazonNovaEmbeddingConfig": (".llms.bedrock.embed.amazon_nova_transformation", "AmazonNovaEmbeddingConfig"), + "BedrockCohereEmbeddingConfig": ( + ".llms.bedrock.embed.cohere_transformation", + "BedrockCohereEmbeddingConfig", + ), + "TwelveLabsMarengoEmbeddingConfig": ( + ".llms.bedrock.embed.twelvelabs_marengo_transformation", + "TwelveLabsMarengoEmbeddingConfig", + ), + "AmazonNovaEmbeddingConfig": ( + ".llms.bedrock.embed.amazon_nova_transformation", + "AmazonNovaEmbeddingConfig", + ), "OpenAIConfig": (".llms.openai.openai", "OpenAIConfig"), "MistralEmbeddingConfig": (".llms.openai.openai", "MistralEmbeddingConfig"), - "OpenAIImageVariationConfig": (".llms.openai.image_variations.transformation", "OpenAIImageVariationConfig"), + "OpenAIImageVariationConfig": ( + ".llms.openai.image_variations.transformation", + "OpenAIImageVariationConfig", + ), "DeepInfraConfig": (".llms.deepinfra.chat.transformation", "DeepInfraConfig"), - "DeepgramAudioTranscriptionConfig": (".llms.deepgram.audio_transcription.transformation", "DeepgramAudioTranscriptionConfig"), - "TopazImageVariationConfig": (".llms.topaz.image_variations.transformation", "TopazImageVariationConfig"), - "OpenAITextCompletionConfig": ("litellm.llms.openai.completion.transformation", "OpenAITextCompletionConfig"), + "DeepgramAudioTranscriptionConfig": ( + ".llms.deepgram.audio_transcription.transformation", + "DeepgramAudioTranscriptionConfig", + ), + "TopazImageVariationConfig": ( + ".llms.topaz.image_variations.transformation", + "TopazImageVariationConfig", + ), + "OpenAITextCompletionConfig": ( + "litellm.llms.openai.completion.transformation", + "OpenAITextCompletionConfig", + ), "GroqChatConfig": (".llms.groq.chat.transformation", "GroqChatConfig"), - "GenAIHubOrchestrationConfig": (".llms.sap.chat.transformation", "GenAIHubOrchestrationConfig"), - "VoyageEmbeddingConfig": (".llms.voyage.embedding.transformation", "VoyageEmbeddingConfig"), - "VoyageContextualEmbeddingConfig": (".llms.voyage.embedding.transformation_contextual", "VoyageContextualEmbeddingConfig"), - "InfinityEmbeddingConfig": (".llms.infinity.embedding.transformation", "InfinityEmbeddingConfig"), - "AzureAIStudioConfig": (".llms.azure_ai.chat.transformation", "AzureAIStudioConfig"), + "GenAIHubOrchestrationConfig": ( + ".llms.sap.chat.transformation", + "GenAIHubOrchestrationConfig", + ), + "VoyageEmbeddingConfig": ( + ".llms.voyage.embedding.transformation", + "VoyageEmbeddingConfig", + ), + "VoyageContextualEmbeddingConfig": ( + ".llms.voyage.embedding.transformation_contextual", + "VoyageContextualEmbeddingConfig", + ), + "InfinityEmbeddingConfig": ( + ".llms.infinity.embedding.transformation", + "InfinityEmbeddingConfig", + ), + "AzureAIStudioConfig": ( + ".llms.azure_ai.chat.transformation", + "AzureAIStudioConfig", + ), "MistralConfig": (".llms.mistral.chat.transformation", "MistralConfig"), - "OpenAIResponsesAPIConfig": (".llms.openai.responses.transformation", "OpenAIResponsesAPIConfig"), - "AzureOpenAIResponsesAPIConfig": (".llms.azure.responses.transformation", "AzureOpenAIResponsesAPIConfig"), - "AzureOpenAIOSeriesResponsesAPIConfig": (".llms.azure.responses.o_series_transformation", "AzureOpenAIOSeriesResponsesAPIConfig"), - "XAIResponsesAPIConfig": (".llms.xai.responses.transformation", "XAIResponsesAPIConfig"), - "LiteLLMProxyResponsesAPIConfig": (".llms.litellm_proxy.responses.transformation", "LiteLLMProxyResponsesAPIConfig"), - "VolcEngineResponsesAPIConfig": (".llms.volcengine.responses.transformation", "VolcEngineResponsesAPIConfig"), - "ManusResponsesAPIConfig": (".llms.manus.responses.transformation", "ManusResponsesAPIConfig"), - "GoogleAIStudioInteractionsConfig": (".llms.gemini.interactions.transformation", "GoogleAIStudioInteractionsConfig"), - "OpenAIOSeriesConfig": (".llms.openai.chat.o_series_transformation", "OpenAIOSeriesConfig"), - "AnthropicSkillsConfig": (".llms.anthropic.skills.transformation", "AnthropicSkillsConfig"), - "BaseSkillsAPIConfig": (".llms.base_llm.skills.transformation", "BaseSkillsAPIConfig"), + "OpenAIResponsesAPIConfig": ( + ".llms.openai.responses.transformation", + "OpenAIResponsesAPIConfig", + ), + "AzureOpenAIResponsesAPIConfig": ( + ".llms.azure.responses.transformation", + "AzureOpenAIResponsesAPIConfig", + ), + "AzureOpenAIOSeriesResponsesAPIConfig": ( + ".llms.azure.responses.o_series_transformation", + "AzureOpenAIOSeriesResponsesAPIConfig", + ), + "XAIResponsesAPIConfig": ( + ".llms.xai.responses.transformation", + "XAIResponsesAPIConfig", + ), + "LiteLLMProxyResponsesAPIConfig": ( + ".llms.litellm_proxy.responses.transformation", + "LiteLLMProxyResponsesAPIConfig", + ), + "VolcEngineResponsesAPIConfig": ( + ".llms.volcengine.responses.transformation", + "VolcEngineResponsesAPIConfig", + ), + "ManusResponsesAPIConfig": ( + ".llms.manus.responses.transformation", + "ManusResponsesAPIConfig", + ), + "GoogleAIStudioInteractionsConfig": ( + ".llms.gemini.interactions.transformation", + "GoogleAIStudioInteractionsConfig", + ), + "OpenAIOSeriesConfig": ( + ".llms.openai.chat.o_series_transformation", + "OpenAIOSeriesConfig", + ), + "AnthropicSkillsConfig": ( + ".llms.anthropic.skills.transformation", + "AnthropicSkillsConfig", + ), + "BaseSkillsAPIConfig": ( + ".llms.base_llm.skills.transformation", + "BaseSkillsAPIConfig", + ), "GradientAIConfig": (".llms.gradient_ai.chat.transformation", "GradientAIConfig"), # Alias for backwards compatibility - "OpenAIO1Config": (".llms.openai.chat.o_series_transformation", "OpenAIOSeriesConfig"), # Alias + "OpenAIO1Config": ( + ".llms.openai.chat.o_series_transformation", + "OpenAIOSeriesConfig", + ), # Alias "OpenAIGPTConfig": (".llms.openai.chat.gpt_transformation", "OpenAIGPTConfig"), "OpenAIGPT5Config": (".llms.openai.chat.gpt_5_transformation", "OpenAIGPT5Config"), - "OpenAIWhisperAudioTranscriptionConfig": (".llms.openai.transcriptions.whisper_transformation", "OpenAIWhisperAudioTranscriptionConfig"), - "OpenAIGPTAudioTranscriptionConfig": (".llms.openai.transcriptions.gpt_transformation", "OpenAIGPTAudioTranscriptionConfig"), - "OpenAIGPTAudioConfig": (".llms.openai.chat.gpt_audio_transformation", "OpenAIGPTAudioConfig"), + "OpenAIWhisperAudioTranscriptionConfig": ( + ".llms.openai.transcriptions.whisper_transformation", + "OpenAIWhisperAudioTranscriptionConfig", + ), + "OpenAIGPTAudioTranscriptionConfig": ( + ".llms.openai.transcriptions.gpt_transformation", + "OpenAIGPTAudioTranscriptionConfig", + ), + "OpenAIGPTAudioConfig": ( + ".llms.openai.chat.gpt_audio_transformation", + "OpenAIGPTAudioConfig", + ), "NvidiaNimConfig": (".llms.nvidia_nim.chat.transformation", "NvidiaNimConfig"), "NvidiaNimEmbeddingConfig": (".llms.nvidia_nim.embed", "NvidiaNimEmbeddingConfig"), - "FeatherlessAIConfig": (".llms.featherless_ai.chat.transformation", "FeatherlessAIConfig"), + "FeatherlessAIConfig": ( + ".llms.featherless_ai.chat.transformation", + "FeatherlessAIConfig", + ), "CerebrasConfig": (".llms.cerebras.chat", "CerebrasConfig"), "BasetenConfig": (".llms.baseten.chat", "BasetenConfig"), "SambanovaConfig": (".llms.sambanova.chat", "SambanovaConfig"), - "SambaNovaEmbeddingConfig": (".llms.sambanova.embedding.transformation", "SambaNovaEmbeddingConfig"), - "FireworksAIConfig": (".llms.fireworks_ai.chat.transformation", "FireworksAIConfig"), - "FireworksAITextCompletionConfig": (".llms.fireworks_ai.completion.transformation", "FireworksAITextCompletionConfig"), - "FireworksAIAudioTranscriptionConfig": (".llms.fireworks_ai.audio_transcription.transformation", "FireworksAIAudioTranscriptionConfig"), - "FireworksAIEmbeddingConfig": (".llms.fireworks_ai.embed.fireworks_ai_transformation", "FireworksAIEmbeddingConfig"), - "FriendliaiChatConfig": (".llms.friendliai.chat.transformation", "FriendliaiChatConfig"), - "JinaAIEmbeddingConfig": (".llms.jina_ai.embedding.transformation", "JinaAIEmbeddingConfig"), + "SambaNovaEmbeddingConfig": ( + ".llms.sambanova.embedding.transformation", + "SambaNovaEmbeddingConfig", + ), + "FireworksAIConfig": ( + ".llms.fireworks_ai.chat.transformation", + "FireworksAIConfig", + ), + "FireworksAITextCompletionConfig": ( + ".llms.fireworks_ai.completion.transformation", + "FireworksAITextCompletionConfig", + ), + "FireworksAIAudioTranscriptionConfig": ( + ".llms.fireworks_ai.audio_transcription.transformation", + "FireworksAIAudioTranscriptionConfig", + ), + "FireworksAIEmbeddingConfig": ( + ".llms.fireworks_ai.embed.fireworks_ai_transformation", + "FireworksAIEmbeddingConfig", + ), + "FriendliaiChatConfig": ( + ".llms.friendliai.chat.transformation", + "FriendliaiChatConfig", + ), + "JinaAIEmbeddingConfig": ( + ".llms.jina_ai.embedding.transformation", + "JinaAIEmbeddingConfig", + ), "XAIChatConfig": (".llms.xai.chat.transformation", "XAIChatConfig"), "ZAIChatConfig": (".llms.zai.chat.transformation", "ZAIChatConfig"), "AIMLChatConfig": (".llms.aiml.chat.transformation", "AIMLChatConfig"), - "VolcEngineChatConfig": (".llms.volcengine.chat.transformation", "VolcEngineChatConfig"), - "CodestralTextCompletionConfig": (".llms.codestral.completion.transformation", "CodestralTextCompletionConfig"), - "AzureOpenAIAssistantsAPIConfig": (".llms.azure.azure", "AzureOpenAIAssistantsAPIConfig"), + "VolcEngineChatConfig": ( + ".llms.volcengine.chat.transformation", + "VolcEngineChatConfig", + ), + "CodestralTextCompletionConfig": ( + ".llms.codestral.completion.transformation", + "CodestralTextCompletionConfig", + ), + "AzureOpenAIAssistantsAPIConfig": ( + ".llms.azure.azure", + "AzureOpenAIAssistantsAPIConfig", + ), "HerokuChatConfig": (".llms.heroku.chat.transformation", "HerokuChatConfig"), "CometAPIConfig": (".llms.cometapi.chat.transformation", "CometAPIConfig"), "AzureOpenAIConfig": (".llms.azure.chat.gpt_transformation", "AzureOpenAIConfig"), - "AzureOpenAIGPT5Config": (".llms.azure.chat.gpt_5_transformation", "AzureOpenAIGPT5Config"), - "AzureOpenAITextConfig": (".llms.azure.completion.transformation", "AzureOpenAITextConfig"), - "HostedVLLMChatConfig": (".llms.hosted_vllm.chat.transformation", "HostedVLLMChatConfig"), + "AzureOpenAIGPT5Config": ( + ".llms.azure.chat.gpt_5_transformation", + "AzureOpenAIGPT5Config", + ), + "AzureOpenAITextConfig": ( + ".llms.azure.completion.transformation", + "AzureOpenAITextConfig", + ), + "HostedVLLMChatConfig": ( + ".llms.hosted_vllm.chat.transformation", + "HostedVLLMChatConfig", + ), + "HostedVLLMEmbeddingConfig": ( + ".llms.hosted_vllm.embedding.transformation", + "HostedVLLMEmbeddingConfig", + ), # Alias for backwards compatibility - "VolcEngineConfig": (".llms.volcengine.chat.transformation", "VolcEngineChatConfig"), # Alias - "LlamafileChatConfig": (".llms.llamafile.chat.transformation", "LlamafileChatConfig"), - "LiteLLMProxyChatConfig": (".llms.litellm_proxy.chat.transformation", "LiteLLMProxyChatConfig"), + "VolcEngineConfig": ( + ".llms.volcengine.chat.transformation", + "VolcEngineChatConfig", + ), # Alias + "LlamafileChatConfig": ( + ".llms.llamafile.chat.transformation", + "LlamafileChatConfig", + ), + "LiteLLMProxyChatConfig": ( + ".llms.litellm_proxy.chat.transformation", + "LiteLLMProxyChatConfig", + ), "VLLMConfig": (".llms.vllm.completion.transformation", "VLLMConfig"), "DeepSeekChatConfig": (".llms.deepseek.chat.transformation", "DeepSeekChatConfig"), "LMStudioChatConfig": (".llms.lm_studio.chat.transformation", "LMStudioChatConfig"), - "LmStudioEmbeddingConfig": (".llms.lm_studio.embed.transformation", "LmStudioEmbeddingConfig"), + "LmStudioEmbeddingConfig": ( + ".llms.lm_studio.embed.transformation", + "LmStudioEmbeddingConfig", + ), "NscaleConfig": (".llms.nscale.chat.transformation", "NscaleConfig"), - "PerplexityChatConfig": (".llms.perplexity.chat.transformation", "PerplexityChatConfig"), - "AzureOpenAIO1Config": (".llms.azure.chat.o_series_transformation", "AzureOpenAIO1Config"), - "IBMWatsonXAIConfig": (".llms.watsonx.completion.transformation", "IBMWatsonXAIConfig"), - "IBMWatsonXChatConfig": (".llms.watsonx.chat.transformation", "IBMWatsonXChatConfig"), - "IBMWatsonXEmbeddingConfig": (".llms.watsonx.embed.transformation", "IBMWatsonXEmbeddingConfig"), - "GenAIHubEmbeddingConfig": (".llms.sap.embed.transformation", "GenAIHubEmbeddingConfig"), - "IBMWatsonXAudioTranscriptionConfig": (".llms.watsonx.audio_transcription.transformation", "IBMWatsonXAudioTranscriptionConfig"), - "GithubCopilotConfig": (".llms.github_copilot.chat.transformation", "GithubCopilotConfig"), - "GithubCopilotResponsesAPIConfig": (".llms.github_copilot.responses.transformation", "GithubCopilotResponsesAPIConfig"), - "GithubCopilotEmbeddingConfig": (".llms.github_copilot.embedding.transformation", "GithubCopilotEmbeddingConfig"), + "PerplexityChatConfig": ( + ".llms.perplexity.chat.transformation", + "PerplexityChatConfig", + ), + "AzureOpenAIO1Config": ( + ".llms.azure.chat.o_series_transformation", + "AzureOpenAIO1Config", + ), + "IBMWatsonXAIConfig": ( + ".llms.watsonx.completion.transformation", + "IBMWatsonXAIConfig", + ), + "IBMWatsonXChatConfig": ( + ".llms.watsonx.chat.transformation", + "IBMWatsonXChatConfig", + ), + "IBMWatsonXEmbeddingConfig": ( + ".llms.watsonx.embed.transformation", + "IBMWatsonXEmbeddingConfig", + ), + "GenAIHubEmbeddingConfig": ( + ".llms.sap.embed.transformation", + "GenAIHubEmbeddingConfig", + ), + "IBMWatsonXAudioTranscriptionConfig": ( + ".llms.watsonx.audio_transcription.transformation", + "IBMWatsonXAudioTranscriptionConfig", + ), + "GithubCopilotConfig": ( + ".llms.github_copilot.chat.transformation", + "GithubCopilotConfig", + ), + "GithubCopilotResponsesAPIConfig": ( + ".llms.github_copilot.responses.transformation", + "GithubCopilotResponsesAPIConfig", + ), + "GithubCopilotEmbeddingConfig": ( + ".llms.github_copilot.embedding.transformation", + "GithubCopilotEmbeddingConfig", + ), "ChatGPTConfig": (".llms.chatgpt.chat.transformation", "ChatGPTConfig"), - "ChatGPTResponsesAPIConfig": (".llms.chatgpt.responses.transformation", "ChatGPTResponsesAPIConfig"), + "ChatGPTResponsesAPIConfig": ( + ".llms.chatgpt.responses.transformation", + "ChatGPTResponsesAPIConfig", + ), "NebiusConfig": (".llms.nebius.chat.transformation", "NebiusConfig"), "WandbConfig": (".llms.wandb.chat.transformation", "WandbConfig"), "GigaChatConfig": (".llms.gigachat.chat.transformation", "GigaChatConfig"), - "GigaChatEmbeddingConfig": (".llms.gigachat.embedding.transformation", "GigaChatEmbeddingConfig"), - "DashScopeChatConfig": (".llms.dashscope.chat.transformation", "DashScopeChatConfig"), + "GigaChatEmbeddingConfig": ( + ".llms.gigachat.embedding.transformation", + "GigaChatEmbeddingConfig", + ), + "DashScopeChatConfig": ( + ".llms.dashscope.chat.transformation", + "DashScopeChatConfig", + ), "MoonshotChatConfig": (".llms.moonshot.chat.transformation", "MoonshotChatConfig"), - "DockerModelRunnerChatConfig": (".llms.docker_model_runner.chat.transformation", "DockerModelRunnerChatConfig"), + "DockerModelRunnerChatConfig": ( + ".llms.docker_model_runner.chat.transformation", + "DockerModelRunnerChatConfig", + ), "V0ChatConfig": (".llms.v0.chat.transformation", "V0ChatConfig"), "OCIChatConfig": (".llms.oci.chat.transformation", "OCIChatConfig"), "MorphChatConfig": (".llms.morph.chat.transformation", "MorphChatConfig"), "RAGFlowConfig": (".llms.ragflow.chat.transformation", "RAGFlowConfig"), "LambdaAIChatConfig": (".llms.lambda_ai.chat.transformation", "LambdaAIChatConfig"), - "HyperbolicChatConfig": (".llms.hyperbolic.chat.transformation", "HyperbolicChatConfig"), - "VercelAIGatewayConfig": (".llms.vercel_ai_gateway.chat.transformation", "VercelAIGatewayConfig"), + "HyperbolicChatConfig": ( + ".llms.hyperbolic.chat.transformation", + "HyperbolicChatConfig", + ), + "VercelAIGatewayConfig": ( + ".llms.vercel_ai_gateway.chat.transformation", + "VercelAIGatewayConfig", + ), "OVHCloudChatConfig": (".llms.ovhcloud.chat.transformation", "OVHCloudChatConfig"), - "OVHCloudEmbeddingConfig": (".llms.ovhcloud.embedding.transformation", "OVHCloudEmbeddingConfig"), - "CometAPIEmbeddingConfig": (".llms.cometapi.embed.transformation", "CometAPIEmbeddingConfig"), + "OVHCloudEmbeddingConfig": ( + ".llms.ovhcloud.embedding.transformation", + "OVHCloudEmbeddingConfig", + ), + "CometAPIEmbeddingConfig": ( + ".llms.cometapi.embed.transformation", + "CometAPIEmbeddingConfig", + ), "LemonadeChatConfig": (".llms.lemonade.chat.transformation", "LemonadeChatConfig"), - "SnowflakeEmbeddingConfig": (".llms.snowflake.embedding.transformation", "SnowflakeEmbeddingConfig"), - "AmazonNovaChatConfig": (".llms.amazon_nova.chat.transformation", "AmazonNovaChatConfig"), + "SnowflakeEmbeddingConfig": ( + ".llms.snowflake.embedding.transformation", + "SnowflakeEmbeddingConfig", + ), + "AmazonNovaChatConfig": ( + ".llms.amazon_nova.chat.transformation", + "AmazonNovaChatConfig", + ), } # Import map for utils module lazy imports _UTILS_MODULE_IMPORT_MAP = { "encoding": ("litellm.main", "encoding"), - "BaseVectorStore": ("litellm.integrations.vector_store_integrations.base_vector_store", "BaseVectorStore"), - "CredentialAccessor": ("litellm.litellm_core_utils.credential_accessor", "CredentialAccessor"), - "exception_type": ("litellm.litellm_core_utils.exception_mapping_utils", "exception_type"), - "get_error_message": ("litellm.litellm_core_utils.exception_mapping_utils", "get_error_message"), - "_get_response_headers": ("litellm.litellm_core_utils.exception_mapping_utils", "_get_response_headers"), - "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"), - "_is_non_openai_azure_model": ("litellm.litellm_core_utils.get_llm_provider_logic", "_is_non_openai_azure_model"), - "get_supported_openai_params": ("litellm.litellm_core_utils.get_supported_openai_params", "get_supported_openai_params"), - "LiteLLMResponseObjectHandler": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "LiteLLMResponseObjectHandler"), - "_handle_invalid_parallel_tool_calls": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "_handle_invalid_parallel_tool_calls"), - "convert_to_model_response_object": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_model_response_object"), - "convert_to_streaming_response": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_streaming_response"), - "convert_to_streaming_response_async": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_streaming_response_async"), - "get_api_base": ("litellm.litellm_core_utils.llm_response_utils.get_api_base", "get_api_base"), - "ResponseMetadata": ("litellm.litellm_core_utils.llm_response_utils.response_metadata", "ResponseMetadata"), - "_parse_content_for_reasoning": ("litellm.litellm_core_utils.prompt_templates.common_utils", "_parse_content_for_reasoning"), - "LiteLLMLoggingObject": ("litellm.litellm_core_utils.redact_messages", "LiteLLMLoggingObject"), - "redact_message_input_output_from_logging": ("litellm.litellm_core_utils.redact_messages", "redact_message_input_output_from_logging"), - "CustomStreamWrapper": ("litellm.litellm_core_utils.streaming_handler", "CustomStreamWrapper"), - "BaseGoogleGenAIGenerateContentConfig": ("litellm.llms.base_llm.google_genai.transformation", "BaseGoogleGenAIGenerateContentConfig"), + "BaseVectorStore": ( + "litellm.integrations.vector_store_integrations.base_vector_store", + "BaseVectorStore", + ), + "CredentialAccessor": ( + "litellm.litellm_core_utils.credential_accessor", + "CredentialAccessor", + ), + "exception_type": ( + "litellm.litellm_core_utils.exception_mapping_utils", + "exception_type", + ), + "get_error_message": ( + "litellm.litellm_core_utils.exception_mapping_utils", + "get_error_message", + ), + "_get_response_headers": ( + "litellm.litellm_core_utils.exception_mapping_utils", + "_get_response_headers", + ), + "get_llm_provider": ( + "litellm.litellm_core_utils.get_llm_provider_logic", + "get_llm_provider", + ), + "_is_non_openai_azure_model": ( + "litellm.litellm_core_utils.get_llm_provider_logic", + "_is_non_openai_azure_model", + ), + "get_supported_openai_params": ( + "litellm.litellm_core_utils.get_supported_openai_params", + "get_supported_openai_params", + ), + "LiteLLMResponseObjectHandler": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "LiteLLMResponseObjectHandler", + ), + "_handle_invalid_parallel_tool_calls": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "_handle_invalid_parallel_tool_calls", + ), + "convert_to_model_response_object": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "convert_to_model_response_object", + ), + "convert_to_streaming_response": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "convert_to_streaming_response", + ), + "convert_to_streaming_response_async": ( + "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", + "convert_to_streaming_response_async", + ), + "get_api_base": ( + "litellm.litellm_core_utils.llm_response_utils.get_api_base", + "get_api_base", + ), + "ResponseMetadata": ( + "litellm.litellm_core_utils.llm_response_utils.response_metadata", + "ResponseMetadata", + ), + "_parse_content_for_reasoning": ( + "litellm.litellm_core_utils.prompt_templates.common_utils", + "_parse_content_for_reasoning", + ), + "LiteLLMLoggingObject": ( + "litellm.litellm_core_utils.redact_messages", + "LiteLLMLoggingObject", + ), + "redact_message_input_output_from_logging": ( + "litellm.litellm_core_utils.redact_messages", + "redact_message_input_output_from_logging", + ), + "CustomStreamWrapper": ( + "litellm.litellm_core_utils.streaming_handler", + "CustomStreamWrapper", + ), + "BaseGoogleGenAIGenerateContentConfig": ( + "litellm.llms.base_llm.google_genai.transformation", + "BaseGoogleGenAIGenerateContentConfig", + ), "BaseOCRConfig": ("litellm.llms.base_llm.ocr.transformation", "BaseOCRConfig"), - "BaseSearchConfig": ("litellm.llms.base_llm.search.transformation", "BaseSearchConfig"), - "BaseTextToSpeechConfig": ("litellm.llms.base_llm.text_to_speech.transformation", "BaseTextToSpeechConfig"), + "BaseSearchConfig": ( + "litellm.llms.base_llm.search.transformation", + "BaseSearchConfig", + ), + "BaseTextToSpeechConfig": ( + "litellm.llms.base_llm.text_to_speech.transformation", + "BaseTextToSpeechConfig", + ), "BedrockModelInfo": ("litellm.llms.bedrock.common_utils", "BedrockModelInfo"), "CohereModelInfo": ("litellm.llms.cohere.common_utils", "CohereModelInfo"), "MistralOCRConfig": ("litellm.llms.mistral.ocr.transformation", "MistralOCRConfig"), "Rules": ("litellm.litellm_core_utils.rules", "Rules"), "AsyncHTTPHandler": ("litellm.llms.custom_httpx.http_handler", "AsyncHTTPHandler"), "HTTPHandler": ("litellm.llms.custom_httpx.http_handler", "HTTPHandler"), - "get_num_retries_from_retry_policy": ("litellm.router_utils.get_retry_from_policy", "get_num_retries_from_retry_policy"), - "reset_retry_policy": ("litellm.router_utils.get_retry_from_policy", "reset_retry_policy"), + "get_num_retries_from_retry_policy": ( + "litellm.router_utils.get_retry_from_policy", + "get_num_retries_from_retry_policy", + ), + "reset_retry_policy": ( + "litellm.router_utils.get_retry_from_policy", + "reset_retry_policy", + ), "get_secret": ("litellm.secret_managers.main", "get_secret"), - "get_coroutine_checker": ("litellm.litellm_core_utils.cached_imports", "get_coroutine_checker"), - "get_litellm_logging_class": ("litellm.litellm_core_utils.cached_imports", "get_litellm_logging_class"), - "get_set_callbacks": ("litellm.litellm_core_utils.cached_imports", "get_set_callbacks"), - "get_litellm_metadata_from_kwargs": ("litellm.litellm_core_utils.core_helpers", "get_litellm_metadata_from_kwargs"), - "map_finish_reason": ("litellm.litellm_core_utils.core_helpers", "map_finish_reason"), - "process_response_headers": ("litellm.litellm_core_utils.core_helpers", "process_response_headers"), - "delete_nested_value": ("litellm.litellm_core_utils.dot_notation_indexing", "delete_nested_value"), - "is_nested_path": ("litellm.litellm_core_utils.dot_notation_indexing", "is_nested_path"), - "_get_base_model_from_litellm_call_metadata": ("litellm.litellm_core_utils.get_litellm_params", "_get_base_model_from_litellm_call_metadata"), - "get_litellm_params": ("litellm.litellm_core_utils.get_litellm_params", "get_litellm_params"), - "_ensure_extra_body_is_safe": ("litellm.litellm_core_utils.llm_request_utils", "_ensure_extra_body_is_safe"), - "get_formatted_prompt": ("litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt", "get_formatted_prompt"), - "get_response_headers": ("litellm.litellm_core_utils.llm_response_utils.get_headers", "get_response_headers"), - "update_response_metadata": ("litellm.litellm_core_utils.llm_response_utils.response_metadata", "update_response_metadata"), + "get_coroutine_checker": ( + "litellm.litellm_core_utils.cached_imports", + "get_coroutine_checker", + ), + "get_litellm_logging_class": ( + "litellm.litellm_core_utils.cached_imports", + "get_litellm_logging_class", + ), + "get_set_callbacks": ( + "litellm.litellm_core_utils.cached_imports", + "get_set_callbacks", + ), + "get_litellm_metadata_from_kwargs": ( + "litellm.litellm_core_utils.core_helpers", + "get_litellm_metadata_from_kwargs", + ), + "map_finish_reason": ( + "litellm.litellm_core_utils.core_helpers", + "map_finish_reason", + ), + "process_response_headers": ( + "litellm.litellm_core_utils.core_helpers", + "process_response_headers", + ), + "delete_nested_value": ( + "litellm.litellm_core_utils.dot_notation_indexing", + "delete_nested_value", + ), + "is_nested_path": ( + "litellm.litellm_core_utils.dot_notation_indexing", + "is_nested_path", + ), + "_get_base_model_from_litellm_call_metadata": ( + "litellm.litellm_core_utils.get_litellm_params", + "_get_base_model_from_litellm_call_metadata", + ), + "get_litellm_params": ( + "litellm.litellm_core_utils.get_litellm_params", + "get_litellm_params", + ), + "_ensure_extra_body_is_safe": ( + "litellm.litellm_core_utils.llm_request_utils", + "_ensure_extra_body_is_safe", + ), + "get_formatted_prompt": ( + "litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt", + "get_formatted_prompt", + ), + "get_response_headers": ( + "litellm.litellm_core_utils.llm_response_utils.get_headers", + "get_response_headers", + ), + "update_response_metadata": ( + "litellm.litellm_core_utils.llm_response_utils.response_metadata", + "update_response_metadata", + ), "executor": ("litellm.litellm_core_utils.thread_pool_executor", "executor"), - "BaseAnthropicMessagesConfig": ("litellm.llms.base_llm.anthropic_messages.transformation", "BaseAnthropicMessagesConfig"), - "BaseAudioTranscriptionConfig": ("litellm.llms.base_llm.audio_transcription.transformation", "BaseAudioTranscriptionConfig"), - "BaseBatchesConfig": ("litellm.llms.base_llm.batches.transformation", "BaseBatchesConfig"), - "BaseContainerConfig": ("litellm.llms.base_llm.containers.transformation", "BaseContainerConfig"), - "BaseEmbeddingConfig": ("litellm.llms.base_llm.embedding.transformation", "BaseEmbeddingConfig"), - "BaseImageEditConfig": ("litellm.llms.base_llm.image_edit.transformation", "BaseImageEditConfig"), - "BaseImageGenerationConfig": ("litellm.llms.base_llm.image_generation.transformation", "BaseImageGenerationConfig"), - "BaseImageVariationConfig": ("litellm.llms.base_llm.image_variations.transformation", "BaseImageVariationConfig"), - "BasePassthroughConfig": ("litellm.llms.base_llm.passthrough.transformation", "BasePassthroughConfig"), - "BaseRealtimeConfig": ("litellm.llms.base_llm.realtime.transformation", "BaseRealtimeConfig"), - "BaseRerankConfig": ("litellm.llms.base_llm.rerank.transformation", "BaseRerankConfig"), - "BaseVectorStoreConfig": ("litellm.llms.base_llm.vector_store.transformation", "BaseVectorStoreConfig"), - "BaseVectorStoreFilesConfig": ("litellm.llms.base_llm.vector_store_files.transformation", "BaseVectorStoreFilesConfig"), - "BaseVideoConfig": ("litellm.llms.base_llm.videos.transformation", "BaseVideoConfig"), - "ANTHROPIC_API_ONLY_HEADERS": ("litellm.types.llms.anthropic", "ANTHROPIC_API_ONLY_HEADERS"), - "AnthropicThinkingParam": ("litellm.types.llms.anthropic", "AnthropicThinkingParam"), + "BaseAnthropicMessagesConfig": ( + "litellm.llms.base_llm.anthropic_messages.transformation", + "BaseAnthropicMessagesConfig", + ), + "BaseAudioTranscriptionConfig": ( + "litellm.llms.base_llm.audio_transcription.transformation", + "BaseAudioTranscriptionConfig", + ), + "BaseBatchesConfig": ( + "litellm.llms.base_llm.batches.transformation", + "BaseBatchesConfig", + ), + "BaseContainerConfig": ( + "litellm.llms.base_llm.containers.transformation", + "BaseContainerConfig", + ), + "BaseEmbeddingConfig": ( + "litellm.llms.base_llm.embedding.transformation", + "BaseEmbeddingConfig", + ), + "BaseImageEditConfig": ( + "litellm.llms.base_llm.image_edit.transformation", + "BaseImageEditConfig", + ), + "BaseImageGenerationConfig": ( + "litellm.llms.base_llm.image_generation.transformation", + "BaseImageGenerationConfig", + ), + "BaseImageVariationConfig": ( + "litellm.llms.base_llm.image_variations.transformation", + "BaseImageVariationConfig", + ), + "BasePassthroughConfig": ( + "litellm.llms.base_llm.passthrough.transformation", + "BasePassthroughConfig", + ), + "BaseRealtimeConfig": ( + "litellm.llms.base_llm.realtime.transformation", + "BaseRealtimeConfig", + ), + "BaseRerankConfig": ( + "litellm.llms.base_llm.rerank.transformation", + "BaseRerankConfig", + ), + "BaseVectorStoreConfig": ( + "litellm.llms.base_llm.vector_store.transformation", + "BaseVectorStoreConfig", + ), + "BaseVectorStoreFilesConfig": ( + "litellm.llms.base_llm.vector_store_files.transformation", + "BaseVectorStoreFilesConfig", + ), + "BaseVideoConfig": ( + "litellm.llms.base_llm.videos.transformation", + "BaseVideoConfig", + ), + "ANTHROPIC_API_ONLY_HEADERS": ( + "litellm.types.llms.anthropic", + "ANTHROPIC_API_ONLY_HEADERS", + ), + "AnthropicThinkingParam": ( + "litellm.types.llms.anthropic", + "AnthropicThinkingParam", + ), "RerankResponse": ("litellm.types.rerank", "RerankResponse"), - "ChatCompletionDeltaToolCallChunk": ("litellm.types.llms.openai", "ChatCompletionDeltaToolCallChunk"), - "ChatCompletionToolCallChunk": ("litellm.types.llms.openai", "ChatCompletionToolCallChunk"), - "ChatCompletionToolCallFunctionChunk": ("litellm.types.llms.openai", "ChatCompletionToolCallFunctionChunk"), + "ChatCompletionDeltaToolCallChunk": ( + "litellm.types.llms.openai", + "ChatCompletionDeltaToolCallChunk", + ), + "ChatCompletionToolCallChunk": ( + "litellm.types.llms.openai", + "ChatCompletionToolCallChunk", + ), + "ChatCompletionToolCallFunctionChunk": ( + "litellm.types.llms.openai", + "ChatCompletionToolCallFunctionChunk", + ), "LiteLLM_Params": ("litellm.types.router", "LiteLLM_Params"), } diff --git a/litellm/llms/hosted_vllm/embedding/transformation.py b/litellm/llms/hosted_vllm/embedding/transformation.py new file mode 100644 index 00000000000..9c3e8c6c7cc --- /dev/null +++ b/litellm/llms/hosted_vllm/embedding/transformation.py @@ -0,0 +1,180 @@ +""" +Hosted VLLM Embedding API Configuration. + +This module provides the configuration for hosted VLLM's Embedding API. +VLLM is OpenAI-compatible and supports embeddings via the /v1/embeddings endpoint. + +Docs: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html +""" + +from typing import TYPE_CHECKING, Any, List, Optional, Union + +import httpx + +from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig +from litellm.secret_managers.main import get_secret_str +from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues +from litellm.types.utils import EmbeddingResponse +from litellm.utils import convert_to_model_response_object + +if TYPE_CHECKING: + from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj + + LiteLLMLoggingObj = _LiteLLMLoggingObj +else: + LiteLLMLoggingObj = Any + + +class HostedVLLMEmbeddingError(BaseLLMException): + """Exception class for Hosted VLLM Embedding errors.""" + + pass + + +class HostedVLLMEmbeddingConfig(BaseEmbeddingConfig): + """ + Configuration for Hosted VLLM's Embedding API. + + Reference: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html + """ + + def validate_environment( + self, + headers: dict, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + ) -> dict: + """ + Validate environment and set up headers for Hosted VLLM API. + """ + if api_key is None: + api_key = get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key" + + default_headers = { + "Content-Type": "application/json", + } + + # Only add Authorization header if api_key is not "fake-api-key" + if api_key and api_key != "fake-api-key": + default_headers["Authorization"] = f"Bearer {api_key}" + + # Merge with existing headers (user's headers take priority) + return {**default_headers, **headers} + + def get_complete_url( + self, + api_base: Optional[str], + api_key: Optional[str], + model: str, + optional_params: dict, + litellm_params: dict, + stream: Optional[bool] = None, + ) -> str: + """ + Get the complete URL for Hosted VLLM Embedding API endpoint. + """ + if api_base is None: + api_base = get_secret_str("HOSTED_VLLM_API_BASE") + if api_base is None: + raise ValueError("api_base is required for hosted_vllm embeddings") + + # Remove trailing slashes + api_base = api_base.rstrip("/") + + # Ensure the URL ends with /embeddings + if not api_base.endswith("/embeddings"): + api_base = f"{api_base}/embeddings" + + return api_base + + def transform_embedding_request( + self, + model: str, + input: AllEmbeddingInputValues, + optional_params: dict, + headers: dict, + ) -> dict: + """ + Transform embedding request to Hosted VLLM format (OpenAI-compatible). + """ + # Ensure input is a list + if isinstance(input, str): + input = [input] + + # Strip 'hosted_vllm/' prefix if present + if model.startswith("hosted_vllm/"): + model = model.replace("hosted_vllm/", "", 1) + + return { + "model": model, + "input": input, + **optional_params, + } + + def transform_embedding_response( + self, + model: str, + raw_response: httpx.Response, + model_response: EmbeddingResponse, + logging_obj: LiteLLMLoggingObj, + api_key: Optional[str], + request_data: dict, + optional_params: dict, + litellm_params: dict, + ) -> EmbeddingResponse: + """ + Transform embedding response from Hosted VLLM format (OpenAI-compatible). + """ + logging_obj.post_call(original_response=raw_response.text) + + # VLLM returns standard OpenAI-compatible embedding response + response_json = raw_response.json() + + return convert_to_model_response_object( + response_object=response_json, + model_response_object=model_response, + response_type="embedding", + ) + + def get_supported_openai_params(self, model: str) -> list: + """ + Get list of supported OpenAI parameters for Hosted VLLM embeddings. + """ + return [ + "timeout", + "dimensions", + "encoding_format", + "user", + ] + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + """ + Map OpenAI parameters to Hosted VLLM format. + """ + for param, value in non_default_params.items(): + if param in self.get_supported_openai_params(model): + optional_params[param] = value + return optional_params + + def get_error_class( + self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] + ) -> BaseLLMException: + """ + Get the error class for Hosted VLLM errors. + """ + return HostedVLLMEmbeddingError( + message=error_message, + status_code=status_code, + headers=headers, + ) diff --git a/litellm/main.py b/litellm/main.py index 5b8c569a390..35043e1ba0e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2373,6 +2373,33 @@ def completion( # type: ignore # noqa: PLR0915 or "https://api.minimax.io/v1" ) + response = base_llm_http_handler.completion( + model=model, + messages=messages, + api_base=api_base, + custom_llm_provider=custom_llm_provider, + model_response=model_response, + encoding=_get_encoding(), + logging_obj=logging, + optional_params=optional_params, + timeout=timeout, + litellm_params=litellm_params, + shared_session=shared_session, + acompletion=acompletion, + stream=stream, + api_key=api_key, + headers=headers, + client=client, + provider_config=provider_config, + ) + logging.post_call( + input=messages, api_key=api_key, original_response=response + ) + elif custom_llm_provider == "hosted_vllm": + api_base = ( + api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE") + ) + response = base_llm_http_handler.completion( model=model, messages=messages, @@ -3611,9 +3638,9 @@ def completion( # type: ignore # noqa: PLR0915 "aws_region_name" not in optional_params or optional_params["aws_region_name"] is None ): - optional_params[ - "aws_region_name" - ] = aws_bedrock_client.meta.region_name + optional_params["aws_region_name"] = ( + aws_bedrock_client.meta.region_name + ) bedrock_route = BedrockModelInfo.get_bedrock_route(model) if bedrock_route == "converse": @@ -4773,9 +4800,32 @@ def embedding( # noqa: PLR0915 client=client, aembedding=aembedding, ) + elif custom_llm_provider == "hosted_vllm": + api_base = ( + api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE") + ) + + # set API KEY + if api_key is None: + api_key = litellm.api_key or get_secret_str("HOSTED_VLLM_API_KEY") + + response = base_llm_http_handler.embedding( + model=model, + input=input, + custom_llm_provider=custom_llm_provider, + api_base=api_base, + api_key=api_key, + logging_obj=logging, + timeout=timeout, + model_response=EmbeddingResponse(), + optional_params=optional_params, + client=client, + aembedding=aembedding, + litellm_params=litellm_params_dict, + headers=headers or {}, + ) elif ( custom_llm_provider == "openai_like" - or custom_llm_provider == "hosted_vllm" or custom_llm_provider == "llamafile" or custom_llm_provider == "lm_studio" ): @@ -5948,9 +5998,9 @@ def adapter_completion( new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs) response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs) # type: ignore - translated_response: Optional[ - Union[BaseModel, AdapterCompletionStreamWrapper] - ] = None + translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = ( + None + ) if isinstance(response, ModelResponse): translated_response = translation_obj.translate_completion_output_params( response=response @@ -6655,9 +6705,9 @@ def speech( # noqa: PLR0915 ElevenLabsTextToSpeechConfig.ELEVENLABS_QUERY_PARAMS_KEY ] = query_params - litellm_params_dict[ - ElevenLabsTextToSpeechConfig.ELEVENLABS_VOICE_ID_KEY - ] = voice_id + litellm_params_dict[ElevenLabsTextToSpeechConfig.ELEVENLABS_VOICE_ID_KEY] = ( + voice_id + ) if api_base is not None: litellm_params_dict["api_base"] = api_base @@ -7163,9 +7213,9 @@ def stream_chunk_builder( # noqa: PLR0915 ] if len(content_chunks) > 0: - response["choices"][0]["message"][ - "content" - ] = processor.get_combined_content(content_chunks) + response["choices"][0]["message"]["content"] = ( + processor.get_combined_content(content_chunks) + ) thinking_blocks = [ chunk @@ -7176,9 +7226,9 @@ def stream_chunk_builder( # noqa: PLR0915 ] if len(thinking_blocks) > 0: - response["choices"][0]["message"][ - "thinking_blocks" - ] = processor.get_combined_thinking_content(thinking_blocks) + response["choices"][0]["message"]["thinking_blocks"] = ( + processor.get_combined_thinking_content(thinking_blocks) + ) reasoning_chunks = [ chunk @@ -7189,9 +7239,9 @@ def stream_chunk_builder( # noqa: PLR0915 ] if len(reasoning_chunks) > 0: - response["choices"][0]["message"][ - "reasoning_content" - ] = processor.get_combined_reasoning_content(reasoning_chunks) + response["choices"][0]["message"]["reasoning_content"] = ( + processor.get_combined_reasoning_content(reasoning_chunks) + ) annotation_chunks = [ chunk diff --git a/litellm/utils.py b/litellm/utils.py index d7fb4855a48..61a446564dc 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -771,13 +771,15 @@ def function_setup( # noqa: PLR0915 function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None ## LAZY LOAD COROUTINE CHECKER ## - get_coroutine_checker_fn = getattr(sys.modules[__name__], "get_coroutine_checker") + get_coroutine_checker_fn = getattr( + sys.modules[__name__], "get_coroutine_checker" + ) coroutine_checker = get_coroutine_checker_fn() ## DYNAMIC CALLBACKS ## - dynamic_callbacks: Optional[ - List[Union[str, Callable, "CustomLogger"]] - ] = kwargs.pop("callbacks", None) + dynamic_callbacks: Optional[List[Union[str, Callable, "CustomLogger"]]] = ( + kwargs.pop("callbacks", None) + ) all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks) if len(all_callbacks) > 0: @@ -1660,9 +1662,9 @@ def wrapper(*args, **kwargs): # noqa: PLR0915 exception=e, retry_policy=kwargs.get("retry_policy"), ) - kwargs[ - "retry_policy" - ] = reset_retry_policy() # prevent infinite loops + kwargs["retry_policy"] = ( + reset_retry_policy() + ) # prevent infinite loops litellm.num_retries = ( None # set retries to None to prevent infinite loops ) @@ -1709,9 +1711,9 @@ def wrapper(*args, **kwargs): # noqa: PLR0915 exception=e, retry_policy=kwargs.get("retry_policy"), ) - kwargs[ - "retry_policy" - ] = reset_retry_policy() # prevent infinite loops + kwargs["retry_policy"] = ( + reset_retry_policy() + ) # prevent infinite loops litellm.num_retries = ( None # set retries to None to prevent infinite loops ) @@ -3640,10 +3642,10 @@ def pre_process_non_default_params( if "response_format" in non_default_params: if provider_config is not None: - non_default_params[ - "response_format" - ] = provider_config.get_json_schema_from_pydantic_object( - response_format=non_default_params["response_format"] + non_default_params["response_format"] = ( + provider_config.get_json_schema_from_pydantic_object( + response_format=non_default_params["response_format"] + ) ) else: non_default_params["response_format"] = type_to_response_format_param( @@ -3772,16 +3774,16 @@ def pre_process_optional_params( True # so that main.py adds the function call to the prompt ) if "tools" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("tools") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("tools") + ) non_default_params.pop( "tool_choice", None ) # causes ollama requests to hang elif "functions" in non_default_params: - optional_params[ - "functions_unsupported_model" - ] = non_default_params.pop("functions") + optional_params["functions_unsupported_model"] = ( + non_default_params.pop("functions") + ) elif ( litellm.add_function_to_prompt ): # if user opts to add it to prompt instead @@ -4937,9 +4939,9 @@ def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) return delta if isinstance(delta, str) else "" # Handle standard ModelResponse and ModelResponseStream - _choices: Union[ - List[Union[Choices, StreamingChoices]], List[StreamingChoices] - ] = response_obj.choices + _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = ( + response_obj.choices + ) # Use list accumulation to avoid O(n^2) string concatenation across choices response_parts: List[str] = [] @@ -7714,25 +7716,29 @@ def validate_chat_completion_tool_choice( f"Invalid tool choice, tool_choice={tool_choice}. Got={type(tool_choice)}. Expecting str, or dict. Please ensure tool_choice follows the OpenAI tool_choice spec" ) -def validate_openai_optional_params( - stop: Optional[Union[str, List[str]]] = None, - **kwargs -) -> Optional[Union[str, List[str]]]: - """ - Validates and fixes OpenAI optional parameters. - - Args: - stop: Stop sequences (string or list of strings) - **kwargs: Additional optional parameters - - Returns: - Validated stop parameter (truncated to 4 elements if needed) - """ - if stop is not None and isinstance(stop, list) and not litellm.disable_stop_sequence_limit: + +def validate_openai_optional_params( + stop: Optional[Union[str, List[str]]] = None, **kwargs +) -> Optional[Union[str, List[str]]]: + """ + Validates and fixes OpenAI optional parameters. + + Args: + stop: Stop sequences (string or list of strings) + **kwargs: Additional optional parameters + + Returns: + Validated stop parameter (truncated to 4 elements if needed) + """ + if ( + stop is not None + and isinstance(stop, list) + and not litellm.disable_stop_sequence_limit + ): # Truncate to 4 elements if more are provided as openai only supports up to 4 stop sequences - if len(stop) > 4: - stop = stop[:4] - + if len(stop) > 4: + stop = stop[:4] + return stop @@ -8061,6 +8067,8 @@ def get_provider_embedding_config( return VercelAIGatewayEmbeddingConfig() elif litellm.LlmProviders.GIGACHAT == provider: return litellm.GigaChatEmbeddingConfig() + elif litellm.LlmProviders.HOSTED_VLLM == provider: + return litellm.HostedVLLMEmbeddingConfig() elif litellm.LlmProviders.SAGEMAKER == provider: from litellm.llms.sagemaker.embedding.transformation import ( SagemakerEmbeddingConfig, diff --git a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py index 9b3b6aeaea1..ddc01db0ec7 100644 --- a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py +++ b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py @@ -1,10 +1,7 @@ import json import os import sys -from unittest.mock import AsyncMock, MagicMock, patch - -import httpx -import pytest +from unittest.mock import MagicMock, patch sys.path.insert( 0, os.path.abspath("../../../../..") @@ -47,15 +44,34 @@ def test_hosted_vllm_chat_transformation_file_url(): def test_hosted_vllm_chat_transformation_with_audio_url(): from litellm import completion - from litellm.llms.custom_httpx.http_handler import HTTPHandler - - client = MagicMock() - with patch.object( - client.chat.completions.with_raw_response, "create", return_value=MagicMock() - ) as mock_post: + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1234567890, + "model": "llama-3.1-70b-instruct", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Test response"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + } + mock_response.text = json.dumps(mock_response.json.return_value) + mock_client.post.return_value = mock_response + + with patch( + "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client", + return_value=mock_client, + ): try: - response = completion( + completion( model="hosted_vllm/llama-3.1-70b-instruct", messages=[ { @@ -68,14 +84,15 @@ def test_hosted_vllm_chat_transformation_with_audio_url(): ], }, ], - client=client, + api_base="https://test-vllm.example.com/v1", ) - except Exception as e: - print(f"Error: {e}") + except Exception: + pass - mock_post.assert_called_once() - print(f"mock_post.call_args.kwargs: {mock_post.call_args.kwargs}") - assert mock_post.call_args.kwargs["messages"] == [ + mock_client.post.assert_called_once() + call_kwargs = mock_client.post.call_args[1] + request_data = json.loads(call_kwargs["data"]) + assert request_data["messages"] == [ { "role": "user", "content": [ diff --git a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py new file mode 100644 index 00000000000..8f98b3ca8f1 --- /dev/null +++ b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py @@ -0,0 +1,152 @@ +""" +Test SSL verification for hosted_vllm provider. + +This test ensures that the ssl_verify parameter is properly passed through +to the HTTP client when using the hosted_vllm provider. + +Issue: ssl_verify parameter was being ignored because hosted_vllm fell through +to the OpenAI catch-all path in main.py, which doesn't pass ssl_verify to the HTTP client. +""" + +import os +import sys +from unittest.mock import MagicMock, patch + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../../../..") +) # Adds the parent directory to the system path + +import litellm + + +class TestHostedVLLMSSLVerify: + """Test suite for SSL verification in hosted_vllm provider.""" + + @patch("litellm.llms.custom_httpx.llm_http_handler._get_httpx_client") + def test_hosted_vllm_ssl_verify_false_sync(self, mock_get_httpx_client): + """Test that ssl_verify=False is passed to the HTTP client for sync calls.""" + # Setup mock client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1234567890, + "model": "test-model", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Test response", + }, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + }, + } + mock_response.text = '{"id": "chatcmpl-test", "object": "chat.completion", "created": 1234567890, "model": "test-model", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Test response"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}}' + mock_client.post.return_value = mock_response + mock_get_httpx_client.return_value = mock_client + + try: + litellm.completion( + model="hosted_vllm/test-model", + messages=[{"role": "user", "content": "Hello"}], + api_base="https://test-vllm.example.com/v1", + ssl_verify=False, + ) + except Exception: + # Even if the response parsing fails, we just need to verify + # that the mock was called with the correct ssl_verify parameter + pass + + # Verify _get_httpx_client was called with ssl_verify=False + mock_get_httpx_client.assert_called() + call_args = mock_get_httpx_client.call_args + + # Check that params contains ssl_verify=False + if call_args[0]: + # Positional argument + params = call_args[0][0] + else: + # Keyword argument + params = call_args[1].get("params", {}) + + assert ( + params.get("ssl_verify") is False + ), f"Expected ssl_verify=False in params, got {params}" + + @patch("litellm.llms.custom_httpx.llm_http_handler.get_async_httpx_client") + @pytest.mark.asyncio + async def test_hosted_vllm_ssl_verify_false_async( + self, mock_get_async_httpx_client + ): + """Test that ssl_verify=False is passed to the HTTP client for async calls.""" + # Setup mock async client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1234567890, + "model": "test-model", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Test response", + }, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + }, + } + mock_response.text = '{"id": "chatcmpl-test", "object": "chat.completion", "created": 1234567890, "model": "test-model", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Test response"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}}' + + async def mock_post(*args, **kwargs): + return mock_response + + mock_client.post = mock_post + mock_get_async_httpx_client.return_value = mock_client + + try: + await litellm.acompletion( + model="hosted_vllm/test-model", + messages=[{"role": "user", "content": "Hello"}], + api_base="https://test-vllm.example.com/v1", + ssl_verify=False, + ) + except Exception: + # Even if the response parsing fails, we just need to verify + # that the mock was called with the correct ssl_verify parameter + pass + + # Verify get_async_httpx_client was called with ssl_verify=False + mock_get_async_httpx_client.assert_called() + call_kwargs = mock_get_async_httpx_client.call_args[1] + + # Check that params contains ssl_verify=False + params = call_kwargs.get("params", {}) + assert ( + params.get("ssl_verify") is False + ), f"Expected ssl_verify=False in params, got {params}" + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "-s"]) diff --git a/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py b/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py new file mode 100644 index 00000000000..bb911814c23 --- /dev/null +++ b/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py @@ -0,0 +1,140 @@ +""" +Test SSL verification for hosted_vllm provider embeddings. + +This test ensures that the ssl_verify parameter is properly passed through +to the HTTP client when using the hosted_vllm provider for embeddings. + +Issue: ssl_verify parameter was being ignored because hosted_vllm fell through +to the openai_like catch-all path in main.py, which doesn't pass ssl_verify to the HTTP client. +""" + +import os +import sys +from unittest.mock import MagicMock, patch + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../../../..") +) # Adds the parent directory to the system path + +import litellm + + +class TestHostedVLLMEmbeddingSSLVerify: + """Test suite for SSL verification in hosted_vllm provider embeddings.""" + + @patch("litellm.llms.custom_httpx.llm_http_handler._get_httpx_client") + def test_hosted_vllm_embedding_ssl_verify_false_sync(self, mock_get_httpx_client): + """Test that ssl_verify=False is passed to the HTTP client for sync embedding calls.""" + # Setup mock client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "object": "list", + "data": [ + { + "object": "embedding", + "index": 0, + "embedding": [0.1, 0.2, 0.3, 0.4, 0.5], + } + ], + "model": "text-embedding-model", + "usage": { + "prompt_tokens": 5, + "total_tokens": 5, + }, + } + mock_response.text = '{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}], "model": "text-embedding-model", "usage": {"prompt_tokens": 5, "total_tokens": 5}}' + mock_client.post.return_value = mock_response + mock_get_httpx_client.return_value = mock_client + + try: + litellm.embedding( + model="hosted_vllm/text-embedding-model", + input=["hello world"], + api_base="https://test-vllm.example.com/v1", + ssl_verify=False, + ) + except Exception: + # Even if the response parsing fails, we just need to verify + # that the mock was called with the correct ssl_verify parameter + pass + + # Verify _get_httpx_client was called with ssl_verify=False + mock_get_httpx_client.assert_called() + call_args = mock_get_httpx_client.call_args + + # Check that params contains ssl_verify=False + if call_args[0]: + # Positional argument + params = call_args[0][0] + else: + # Keyword argument + params = call_args[1].get("params", {}) + + assert ( + params.get("ssl_verify") is False + ), f"Expected ssl_verify=False in params, got {params}" + + @patch("litellm.llms.custom_httpx.llm_http_handler.get_async_httpx_client") + @pytest.mark.asyncio + async def test_hosted_vllm_embedding_ssl_verify_false_async( + self, mock_get_async_httpx_client + ): + """Test that ssl_verify=False is passed to the HTTP client for async embedding calls.""" + # Setup mock async client + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = { + "object": "list", + "data": [ + { + "object": "embedding", + "index": 0, + "embedding": [0.1, 0.2, 0.3, 0.4, 0.5], + } + ], + "model": "text-embedding-model", + "usage": { + "prompt_tokens": 5, + "total_tokens": 5, + }, + } + mock_response.text = '{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}], "model": "text-embedding-model", "usage": {"prompt_tokens": 5, "total_tokens": 5}}' + + async def mock_post(*args, **kwargs): + return mock_response + + mock_client.post = mock_post + mock_get_async_httpx_client.return_value = mock_client + + try: + await litellm.aembedding( + model="hosted_vllm/text-embedding-model", + input=["hello world"], + api_base="https://test-vllm.example.com/v1", + ssl_verify=False, + ) + except Exception: + # Even if the response parsing fails, we just need to verify + # that the mock was called with the correct ssl_verify parameter + pass + + # Verify get_async_httpx_client was called with ssl_verify=False + mock_get_async_httpx_client.assert_called() + call_kwargs = mock_get_async_httpx_client.call_args[1] + + # Check that params contains ssl_verify=False + params = call_kwargs.get("params", {}) + assert ( + params.get("ssl_verify") is False + ), f"Expected ssl_verify=False in params, got {params}" + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "-s"])