diff --git a/litellm/__init__.py b/litellm/__init__.py
index e5c09702b9b..9d39e4ca11d 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -1467,6 +1467,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
     from .llms.azure.chat.gpt_5_transformation import AzureOpenAIGPT5Config as AzureOpenAIGPT5Config
     from .llms.azure.completion.transformation import AzureOpenAITextConfig as AzureOpenAITextConfig
     from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig as HostedVLLMChatConfig
+    from .llms.hosted_vllm.embedding.transformation import HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig
     from .llms.github_copilot.chat.transformation import GithubCopilotConfig as GithubCopilotConfig
     from .llms.github_copilot.responses.transformation import GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig
     from .llms.github_copilot.embedding.transformation import GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig
diff --git a/litellm/_lazy_imports_registry.py b/litellm/_lazy_imports_registry.py
index a92c6f95b0e..0e52e9a59eb 100644
--- a/litellm/_lazy_imports_registry.py
+++ b/litellm/_lazy_imports_registry.py
@@ -20,25 +20,53 @@
 
 # Utils names that support lazy loading via _lazy_import_utils
 UTILS_NAMES = (
-    "exception_type", "get_optional_params", "get_response_string", "token_counter",
-    "create_pretrained_tokenizer", "create_tokenizer", "supports_function_calling",
-    "supports_web_search", "supports_url_context", "supports_response_schema",
-    "supports_parallel_function_calling", "supports_vision", "supports_audio_input",
-    "supports_audio_output", "supports_system_messages", "supports_reasoning",
-    "get_litellm_params", "acreate", "get_max_tokens", "get_model_info",
-    "register_prompt_template", "validate_environment", "check_valid_key",
-    "register_model", "encode", "decode", "_calculate_retry_after", "_should_retry",
-    "get_supported_openai_params", "get_api_base", "get_first_chars_messages",
-    "ModelResponse", "ModelResponseStream", "EmbeddingResponse", "ImageResponse",
-    "TranscriptionResponse", "TextCompletionResponse", "get_provider_fields",
-    "ModelResponseListIterator", "get_valid_models", "timeout",
-    "get_llm_provider", "remove_index_from_tool_calls",
+    "exception_type",
+    "get_optional_params",
+    "get_response_string",
+    "token_counter",
+    "create_pretrained_tokenizer",
+    "create_tokenizer",
+    "supports_function_calling",
+    "supports_web_search",
+    "supports_url_context",
+    "supports_response_schema",
+    "supports_parallel_function_calling",
+    "supports_vision",
+    "supports_audio_input",
+    "supports_audio_output",
+    "supports_system_messages",
+    "supports_reasoning",
+    "get_litellm_params",
+    "acreate",
+    "get_max_tokens",
+    "get_model_info",
+    "register_prompt_template",
+    "validate_environment",
+    "check_valid_key",
+    "register_model",
+    "encode",
+    "decode",
+    "_calculate_retry_after",
+    "_should_retry",
+    "get_supported_openai_params",
+    "get_api_base",
+    "get_first_chars_messages",
+    "ModelResponse",
+    "ModelResponseStream",
+    "EmbeddingResponse",
+    "ImageResponse",
+    "TranscriptionResponse",
+    "TextCompletionResponse",
+    "get_provider_fields",
+    "ModelResponseListIterator",
+    "get_valid_models",
+    "timeout",
+    "get_llm_provider",
+    "remove_index_from_tool_calls",
 )
 
 # Token counter names that support lazy loading via _lazy_import_token_counter
-TOKEN_COUNTER_NAMES = (
-    "get_modified_max_tokens",
-)
+TOKEN_COUNTER_NAMES = ("get_modified_max_tokens",)
 
 # LLM client cache names that support lazy loading via _lazy_import_llm_client_cache
 LLM_CLIENT_CACHE_NAMES = (
@@ -47,9 +75,7 @@
 )
 
 # Bedrock type names that support lazy loading via _lazy_import_bedrock_types
-BEDROCK_TYPES_NAMES = (
-    "COHERE_EMBEDDING_INPUT_TYPES",
-)
+BEDROCK_TYPES_NAMES = ("COHERE_EMBEDDING_INPUT_TYPES",)
 
 # Common types from litellm.types.utils that support lazy loading via
 # _lazy_import_types_utils
@@ -236,6 +262,7 @@
     "AzureOpenAIGPT5Config",
     "AzureOpenAITextConfig",
     "HostedVLLMChatConfig",
+    "HostedVLLMEmbeddingConfig",
     # Alias for backwards compatibility
     "VolcEngineConfig",  # Alias for VolcEngineChatConfig
     "LlamafileChatConfig",
@@ -388,7 +415,10 @@
     "supports_web_search": (".utils", "supports_web_search"),
     "supports_url_context": (".utils", "supports_url_context"),
     "supports_response_schema": (".utils", "supports_response_schema"),
-    "supports_parallel_function_calling": (".utils", "supports_parallel_function_calling"),
+    "supports_parallel_function_calling": (
+        ".utils",
+        "supports_parallel_function_calling",
+    ),
     "supports_vision": (".utils", "supports_vision"),
     "supports_audio_input": (".utils", "supports_audio_input"),
     "supports_audio_output": (".utils", "supports_audio_output"),
@@ -419,8 +449,14 @@
     "ModelResponseListIterator": (".utils", "ModelResponseListIterator"),
     "get_valid_models": (".utils", "get_valid_models"),
     "timeout": (".timeout", "timeout"),
-    "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"),
-    "remove_index_from_tool_calls": ("litellm.litellm_core_utils.core_helpers", "remove_index_from_tool_calls"),
+    "get_llm_provider": (
+        "litellm.litellm_core_utils.get_llm_provider_logic",
+        "get_llm_provider",
+    ),
+    "remove_index_from_tool_calls": (
+        "litellm.litellm_core_utils.core_helpers",
+        "remove_index_from_tool_calls",
+    ),
 }
 
 _COST_CALCULATOR_IMPORT_MAP = {
@@ -442,11 +478,17 @@
 }
 
 _TOKEN_COUNTER_IMPORT_MAP = {
-    "get_modified_max_tokens": ("litellm.litellm_core_utils.token_counter", "get_modified_max_tokens"),
+    "get_modified_max_tokens": (
+        "litellm.litellm_core_utils.token_counter",
+        "get_modified_max_tokens",
+    ),
 }
 
 _BEDROCK_TYPES_IMPORT_MAP = {
-    "COHERE_EMBEDDING_INPUT_TYPES": ("litellm.types.llms.bedrock", "COHERE_EMBEDDING_INPUT_TYPES"),
+    "COHERE_EMBEDDING_INPUT_TYPES": (
+        "litellm.types.llms.bedrock",
+        "COHERE_EMBEDDING_INPUT_TYPES",
+    ),
 }
 
 _CACHING_IMPORT_MAP = {
@@ -458,294 +500,868 @@
 
 _LITELLM_LOGGING_IMPORT_MAP = {
     "Logging": ("litellm.litellm_core_utils.litellm_logging", "Logging"),
-    "modify_integration": ("litellm.litellm_core_utils.litellm_logging", "modify_integration"),
+    "modify_integration": (
+        "litellm.litellm_core_utils.litellm_logging",
+        "modify_integration",
+    ),
 }
 
 _DOTPROMPT_IMPORT_MAP = {
-    "global_prompt_manager": ("litellm.integrations.dotprompt", "global_prompt_manager"),
-    "global_prompt_directory": ("litellm.integrations.dotprompt", "global_prompt_directory"),
-    "set_global_prompt_directory": ("litellm.integrations.dotprompt", "set_global_prompt_directory"),
+    "global_prompt_manager": (
+        "litellm.integrations.dotprompt",
+        "global_prompt_manager",
+    ),
+    "global_prompt_directory": (
+        "litellm.integrations.dotprompt",
+        "global_prompt_directory",
+    ),
+    "set_global_prompt_directory": (
+        "litellm.integrations.dotprompt",
+        "set_global_prompt_directory",
+    ),
 }
 
 _TYPES_IMPORT_MAP = {
     "GuardrailItem": ("litellm.types.guardrails", "GuardrailItem"),
-    "DefaultTeamSSOParams": ("litellm.types.proxy.management_endpoints.ui_sso", "DefaultTeamSSOParams"),
-    "LiteLLM_UpperboundKeyGenerateParams": ("litellm.types.proxy.management_endpoints.ui_sso", "LiteLLM_UpperboundKeyGenerateParams"),
-    "KeyManagementSystem": ("litellm.types.secret_managers.main", "KeyManagementSystem"),
-    "PriorityReservationSettings": ("litellm.types.utils", "PriorityReservationSettings"),
+    "DefaultTeamSSOParams": (
+        "litellm.types.proxy.management_endpoints.ui_sso",
+        "DefaultTeamSSOParams",
+    ),
+    "LiteLLM_UpperboundKeyGenerateParams": (
+        "litellm.types.proxy.management_endpoints.ui_sso",
+        "LiteLLM_UpperboundKeyGenerateParams",
+    ),
+    "KeyManagementSystem": (
+        "litellm.types.secret_managers.main",
+        "KeyManagementSystem",
+    ),
+    "PriorityReservationSettings": (
+        "litellm.types.utils",
+        "PriorityReservationSettings",
+    ),
     "CustomLogger": ("litellm.integrations.custom_logger", "CustomLogger"),
-    "LoggingCallbackManager": ("litellm.litellm_core_utils.logging_callback_manager", "LoggingCallbackManager"),
-    "DatadogLLMObsInitParams": ("litellm.types.integrations.datadog_llm_obs", "DatadogLLMObsInitParams"),
+    "LoggingCallbackManager": (
+        "litellm.litellm_core_utils.logging_callback_manager",
+        "LoggingCallbackManager",
+    ),
+    "DatadogLLMObsInitParams": (
+        "litellm.types.integrations.datadog_llm_obs",
+        "DatadogLLMObsInitParams",
+    ),
 }
 
 _LLM_PROVIDER_LOGIC_IMPORT_MAP = {
-    "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"),
-    "remove_index_from_tool_calls": ("litellm.litellm_core_utils.core_helpers", "remove_index_from_tool_calls"),
+    "get_llm_provider": (
+        "litellm.litellm_core_utils.get_llm_provider_logic",
+        "get_llm_provider",
+    ),
+    "remove_index_from_tool_calls": (
+        "litellm.litellm_core_utils.core_helpers",
+        "remove_index_from_tool_calls",
+    ),
 }
 
 _LLM_CONFIGS_IMPORT_MAP = {
-    "AmazonConverseConfig": (".llms.bedrock.chat.converse_transformation", "AmazonConverseConfig"),
+    "AmazonConverseConfig": (
+        ".llms.bedrock.chat.converse_transformation",
+        "AmazonConverseConfig",
+    ),
     "OpenAILikeChatConfig": (".llms.openai_like.chat.handler", "OpenAILikeChatConfig"),
-    "GaladrielChatConfig": (".llms.galadriel.chat.transformation", "GaladrielChatConfig"),
+    "GaladrielChatConfig": (
+        ".llms.galadriel.chat.transformation",
+        "GaladrielChatConfig",
+    ),
     "GithubChatConfig": (".llms.github.chat.transformation", "GithubChatConfig"),
-    "AzureAnthropicConfig": (".llms.azure_ai.anthropic.transformation", "AzureAnthropicConfig"),
+    "AzureAnthropicConfig": (
+        ".llms.azure_ai.anthropic.transformation",
+        "AzureAnthropicConfig",
+    ),
     "BytezChatConfig": (".llms.bytez.chat.transformation", "BytezChatConfig"),
-    "CompactifAIChatConfig": (".llms.compactifai.chat.transformation", "CompactifAIChatConfig"),
+    "CompactifAIChatConfig": (
+        ".llms.compactifai.chat.transformation",
+        "CompactifAIChatConfig",
+    ),
     "EmpowerChatConfig": (".llms.empower.chat.transformation", "EmpowerChatConfig"),
     "MinimaxChatConfig": (".llms.minimax.chat.transformation", "MinimaxChatConfig"),
-    "AiohttpOpenAIChatConfig": (".llms.aiohttp_openai.chat.transformation", "AiohttpOpenAIChatConfig"),
-    "HuggingFaceChatConfig": (".llms.huggingface.chat.transformation", "HuggingFaceChatConfig"),
-    "HuggingFaceEmbeddingConfig": (".llms.huggingface.embedding.transformation", "HuggingFaceEmbeddingConfig"),
+    "AiohttpOpenAIChatConfig": (
+        ".llms.aiohttp_openai.chat.transformation",
+        "AiohttpOpenAIChatConfig",
+    ),
+    "HuggingFaceChatConfig": (
+        ".llms.huggingface.chat.transformation",
+        "HuggingFaceChatConfig",
+    ),
+    "HuggingFaceEmbeddingConfig": (
+        ".llms.huggingface.embedding.transformation",
+        "HuggingFaceEmbeddingConfig",
+    ),
     "OobaboogaConfig": (".llms.oobabooga.chat.transformation", "OobaboogaConfig"),
     "MaritalkConfig": (".llms.maritalk", "MaritalkConfig"),
     "OpenrouterConfig": (".llms.openrouter.chat.transformation", "OpenrouterConfig"),
     "DataRobotConfig": (".llms.datarobot.chat.transformation", "DataRobotConfig"),
     "AnthropicConfig": (".llms.anthropic.chat.transformation", "AnthropicConfig"),
-    "AnthropicTextConfig": (".llms.anthropic.completion.transformation", "AnthropicTextConfig"),
+    "AnthropicTextConfig": (
+        ".llms.anthropic.completion.transformation",
+        "AnthropicTextConfig",
+    ),
     "GroqSTTConfig": (".llms.groq.stt.transformation", "GroqSTTConfig"),
     "TritonConfig": (".llms.triton.completion.transformation", "TritonConfig"),
-    "TritonGenerateConfig": (".llms.triton.completion.transformation", "TritonGenerateConfig"),
-    "TritonInferConfig": (".llms.triton.completion.transformation", "TritonInferConfig"),
-    "TritonEmbeddingConfig": (".llms.triton.embedding.transformation", "TritonEmbeddingConfig"),
-    "HuggingFaceRerankConfig": (".llms.huggingface.rerank.transformation", "HuggingFaceRerankConfig"),
+    "TritonGenerateConfig": (
+        ".llms.triton.completion.transformation",
+        "TritonGenerateConfig",
+    ),
+    "TritonInferConfig": (
+        ".llms.triton.completion.transformation",
+        "TritonInferConfig",
+    ),
+    "TritonEmbeddingConfig": (
+        ".llms.triton.embedding.transformation",
+        "TritonEmbeddingConfig",
+    ),
+    "HuggingFaceRerankConfig": (
+        ".llms.huggingface.rerank.transformation",
+        "HuggingFaceRerankConfig",
+    ),
     "DatabricksConfig": (".llms.databricks.chat.transformation", "DatabricksConfig"),
-    "DatabricksEmbeddingConfig": (".llms.databricks.embed.transformation", "DatabricksEmbeddingConfig"),
+    "DatabricksEmbeddingConfig": (
+        ".llms.databricks.embed.transformation",
+        "DatabricksEmbeddingConfig",
+    ),
     "PredibaseConfig": (".llms.predibase.chat.transformation", "PredibaseConfig"),
     "ReplicateConfig": (".llms.replicate.chat.transformation", "ReplicateConfig"),
     "SnowflakeConfig": (".llms.snowflake.chat.transformation", "SnowflakeConfig"),
     "CohereRerankConfig": (".llms.cohere.rerank.transformation", "CohereRerankConfig"),
-    "CohereRerankV2Config": (".llms.cohere.rerank_v2.transformation", "CohereRerankV2Config"),
-    "AzureAIRerankConfig": (".llms.azure_ai.rerank.transformation", "AzureAIRerankConfig"),
-    "InfinityRerankConfig": (".llms.infinity.rerank.transformation", "InfinityRerankConfig"),
+    "CohereRerankV2Config": (
+        ".llms.cohere.rerank_v2.transformation",
+        "CohereRerankV2Config",
+    ),
+    "AzureAIRerankConfig": (
+        ".llms.azure_ai.rerank.transformation",
+        "AzureAIRerankConfig",
+    ),
+    "InfinityRerankConfig": (
+        ".llms.infinity.rerank.transformation",
+        "InfinityRerankConfig",
+    ),
     "JinaAIRerankConfig": (".llms.jina_ai.rerank.transformation", "JinaAIRerankConfig"),
-    "DeepinfraRerankConfig": (".llms.deepinfra.rerank.transformation", "DeepinfraRerankConfig"),
-    "HostedVLLMRerankConfig": (".llms.hosted_vllm.rerank.transformation", "HostedVLLMRerankConfig"),
-    "NvidiaNimRerankConfig": (".llms.nvidia_nim.rerank.transformation", "NvidiaNimRerankConfig"),
-    "NvidiaNimRankingConfig": (".llms.nvidia_nim.rerank.ranking_transformation", "NvidiaNimRankingConfig"),
-    "VertexAIRerankConfig": (".llms.vertex_ai.rerank.transformation", "VertexAIRerankConfig"),
-    "FireworksAIRerankConfig": (".llms.fireworks_ai.rerank.transformation", "FireworksAIRerankConfig"),
+    "DeepinfraRerankConfig": (
+        ".llms.deepinfra.rerank.transformation",
+        "DeepinfraRerankConfig",
+    ),
+    "HostedVLLMRerankConfig": (
+        ".llms.hosted_vllm.rerank.transformation",
+        "HostedVLLMRerankConfig",
+    ),
+    "NvidiaNimRerankConfig": (
+        ".llms.nvidia_nim.rerank.transformation",
+        "NvidiaNimRerankConfig",
+    ),
+    "NvidiaNimRankingConfig": (
+        ".llms.nvidia_nim.rerank.ranking_transformation",
+        "NvidiaNimRankingConfig",
+    ),
+    "VertexAIRerankConfig": (
+        ".llms.vertex_ai.rerank.transformation",
+        "VertexAIRerankConfig",
+    ),
+    "FireworksAIRerankConfig": (
+        ".llms.fireworks_ai.rerank.transformation",
+        "FireworksAIRerankConfig",
+    ),
     "VoyageRerankConfig": (".llms.voyage.rerank.transformation", "VoyageRerankConfig"),
     "ClarifaiConfig": (".llms.clarifai.chat.transformation", "ClarifaiConfig"),
     "AI21ChatConfig": (".llms.ai21.chat.transformation", "AI21ChatConfig"),
     "LlamaAPIConfig": (".llms.meta_llama.chat.transformation", "LlamaAPIConfig"),
-    "TogetherAITextCompletionConfig": (".llms.together_ai.completion.transformation", "TogetherAITextCompletionConfig"),
-    "CloudflareChatConfig": (".llms.cloudflare.chat.transformation", "CloudflareChatConfig"),
+    "TogetherAITextCompletionConfig": (
+        ".llms.together_ai.completion.transformation",
+        "TogetherAITextCompletionConfig",
+    ),
+    "CloudflareChatConfig": (
+        ".llms.cloudflare.chat.transformation",
+        "CloudflareChatConfig",
+    ),
     "NovitaConfig": (".llms.novita.chat.transformation", "NovitaConfig"),
     "PetalsConfig": (".llms.petals.completion.transformation", "PetalsConfig"),
     "OllamaChatConfig": (".llms.ollama.chat.transformation", "OllamaChatConfig"),
     "OllamaConfig": (".llms.ollama.completion.transformation", "OllamaConfig"),
     "SagemakerConfig": (".llms.sagemaker.completion.transformation", "SagemakerConfig"),
-    "SagemakerChatConfig": (".llms.sagemaker.chat.transformation", "SagemakerChatConfig"),
+    "SagemakerChatConfig": (
+        ".llms.sagemaker.chat.transformation",
+        "SagemakerChatConfig",
+    ),
     "CohereChatConfig": (".llms.cohere.chat.transformation", "CohereChatConfig"),
-    "AnthropicMessagesConfig": (".llms.anthropic.experimental_pass_through.messages.transformation", "AnthropicMessagesConfig"),
-    "AmazonAnthropicClaudeMessagesConfig": (".llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation", "AmazonAnthropicClaudeMessagesConfig"),
+    "AnthropicMessagesConfig": (
+        ".llms.anthropic.experimental_pass_through.messages.transformation",
+        "AnthropicMessagesConfig",
+    ),
+    "AmazonAnthropicClaudeMessagesConfig": (
+        ".llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation",
+        "AmazonAnthropicClaudeMessagesConfig",
+    ),
     "TogetherAIConfig": (".llms.together_ai.chat", "TogetherAIConfig"),
     "NLPCloudConfig": (".llms.nlp_cloud.chat.handler", "NLPCloudConfig"),
-    "VertexGeminiConfig": (".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", "VertexGeminiConfig"),
-    "GoogleAIStudioGeminiConfig": (".llms.gemini.chat.transformation", "GoogleAIStudioGeminiConfig"),
-    "VertexAIAnthropicConfig": (".llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation", "VertexAIAnthropicConfig"),
-    "VertexAILlama3Config": (".llms.vertex_ai.vertex_ai_partner_models.llama3.transformation", "VertexAILlama3Config"),
-    "VertexAIAi21Config": (".llms.vertex_ai.vertex_ai_partner_models.ai21.transformation", "VertexAIAi21Config"),
-    "AmazonCohereChatConfig": (".llms.bedrock.chat.invoke_handler", "AmazonCohereChatConfig"),
-    "AmazonBedrockGlobalConfig": (".llms.bedrock.common_utils", "AmazonBedrockGlobalConfig"),
-    "AmazonAI21Config": (".llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation", "AmazonAI21Config"),
-    "AmazonInvokeNovaConfig": (".llms.bedrock.chat.invoke_transformations.amazon_nova_transformation", "AmazonInvokeNovaConfig"),
-    "AmazonQwen2Config": (".llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation", "AmazonQwen2Config"),
-    "AmazonQwen3Config": (".llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation", "AmazonQwen3Config"),
+    "VertexGeminiConfig": (
+        ".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini",
+        "VertexGeminiConfig",
+    ),
+    "GoogleAIStudioGeminiConfig": (
+        ".llms.gemini.chat.transformation",
+        "GoogleAIStudioGeminiConfig",
+    ),
+    "VertexAIAnthropicConfig": (
+        ".llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation",
+        "VertexAIAnthropicConfig",
+    ),
+    "VertexAILlama3Config": (
+        ".llms.vertex_ai.vertex_ai_partner_models.llama3.transformation",
+        "VertexAILlama3Config",
+    ),
+    "VertexAIAi21Config": (
+        ".llms.vertex_ai.vertex_ai_partner_models.ai21.transformation",
+        "VertexAIAi21Config",
+    ),
+    "AmazonCohereChatConfig": (
+        ".llms.bedrock.chat.invoke_handler",
+        "AmazonCohereChatConfig",
+    ),
+    "AmazonBedrockGlobalConfig": (
+        ".llms.bedrock.common_utils",
+        "AmazonBedrockGlobalConfig",
+    ),
+    "AmazonAI21Config": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation",
+        "AmazonAI21Config",
+    ),
+    "AmazonInvokeNovaConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_nova_transformation",
+        "AmazonInvokeNovaConfig",
+    ),
+    "AmazonQwen2Config": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation",
+        "AmazonQwen2Config",
+    ),
+    "AmazonQwen3Config": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation",
+        "AmazonQwen3Config",
+    ),
     # Aliases for backwards compatibility
-    "VertexAIConfig": (".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", "VertexGeminiConfig"),  # Alias
-    "GeminiConfig": (".llms.gemini.chat.transformation", "GoogleAIStudioGeminiConfig"),  # Alias
-    "AmazonAnthropicConfig": (".llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation", "AmazonAnthropicConfig"),
-    "AmazonAnthropicClaudeConfig": (".llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation", "AmazonAnthropicClaudeConfig"),
-    "AmazonCohereConfig": (".llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation", "AmazonCohereConfig"),
-    "AmazonLlamaConfig": (".llms.bedrock.chat.invoke_transformations.amazon_llama_transformation", "AmazonLlamaConfig"),
-    "AmazonDeepSeekR1Config": (".llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation", "AmazonDeepSeekR1Config"),
-    "AmazonMistralConfig": (".llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation", "AmazonMistralConfig"),
-    "AmazonMoonshotConfig": (".llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation", "AmazonMoonshotConfig"),
-    "AmazonTitanConfig": (".llms.bedrock.chat.invoke_transformations.amazon_titan_transformation", "AmazonTitanConfig"),
-    "AmazonTwelveLabsPegasusConfig": (".llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation", "AmazonTwelveLabsPegasusConfig"),
-    "AmazonInvokeConfig": (".llms.bedrock.chat.invoke_transformations.base_invoke_transformation", "AmazonInvokeConfig"),
-    "AmazonBedrockOpenAIConfig": (".llms.bedrock.chat.invoke_transformations.amazon_openai_transformation", "AmazonBedrockOpenAIConfig"),
-    "AmazonStabilityConfig": (".llms.bedrock.image_generation.amazon_stability1_transformation", "AmazonStabilityConfig"),
-    "AmazonStability3Config": (".llms.bedrock.image_generation.amazon_stability3_transformation", "AmazonStability3Config"),
-    "AmazonNovaCanvasConfig": (".llms.bedrock.image_generation.amazon_nova_canvas_transformation", "AmazonNovaCanvasConfig"),
-    "AmazonTitanG1Config": (".llms.bedrock.embed.amazon_titan_g1_transformation", "AmazonTitanG1Config"),
-    "AmazonTitanMultimodalEmbeddingG1Config": (".llms.bedrock.embed.amazon_titan_multimodal_transformation", "AmazonTitanMultimodalEmbeddingG1Config"),
+    "VertexAIConfig": (
+        ".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini",
+        "VertexGeminiConfig",
+    ),  # Alias
+    "GeminiConfig": (
+        ".llms.gemini.chat.transformation",
+        "GoogleAIStudioGeminiConfig",
+    ),  # Alias
+    "AmazonAnthropicConfig": (
+        ".llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation",
+        "AmazonAnthropicConfig",
+    ),
+    "AmazonAnthropicClaudeConfig": (
+        ".llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation",
+        "AmazonAnthropicClaudeConfig",
+    ),
+    "AmazonCohereConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation",
+        "AmazonCohereConfig",
+    ),
+    "AmazonLlamaConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_llama_transformation",
+        "AmazonLlamaConfig",
+    ),
+    "AmazonDeepSeekR1Config": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation",
+        "AmazonDeepSeekR1Config",
+    ),
+    "AmazonMistralConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation",
+        "AmazonMistralConfig",
+    ),
+    "AmazonMoonshotConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation",
+        "AmazonMoonshotConfig",
+    ),
+    "AmazonTitanConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_titan_transformation",
+        "AmazonTitanConfig",
+    ),
+    "AmazonTwelveLabsPegasusConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation",
+        "AmazonTwelveLabsPegasusConfig",
+    ),
+    "AmazonInvokeConfig": (
+        ".llms.bedrock.chat.invoke_transformations.base_invoke_transformation",
+        "AmazonInvokeConfig",
+    ),
+    "AmazonBedrockOpenAIConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_openai_transformation",
+        "AmazonBedrockOpenAIConfig",
+    ),
+    "AmazonStabilityConfig": (
+        ".llms.bedrock.image_generation.amazon_stability1_transformation",
+        "AmazonStabilityConfig",
+    ),
+    "AmazonStability3Config": (
+        ".llms.bedrock.image_generation.amazon_stability3_transformation",
+        "AmazonStability3Config",
+    ),
+    "AmazonNovaCanvasConfig": (
+        ".llms.bedrock.image_generation.amazon_nova_canvas_transformation",
+        "AmazonNovaCanvasConfig",
+    ),
+    "AmazonTitanG1Config": (
+        ".llms.bedrock.embed.amazon_titan_g1_transformation",
+        "AmazonTitanG1Config",
+    ),
+    "AmazonTitanMultimodalEmbeddingG1Config": (
+        ".llms.bedrock.embed.amazon_titan_multimodal_transformation",
+        "AmazonTitanMultimodalEmbeddingG1Config",
+    ),
     "CohereV2ChatConfig": (".llms.cohere.chat.v2_transformation", "CohereV2ChatConfig"),
-    "BedrockCohereEmbeddingConfig": (".llms.bedrock.embed.cohere_transformation", "BedrockCohereEmbeddingConfig"),
-    "TwelveLabsMarengoEmbeddingConfig": (".llms.bedrock.embed.twelvelabs_marengo_transformation", "TwelveLabsMarengoEmbeddingConfig"),
-    "AmazonNovaEmbeddingConfig": (".llms.bedrock.embed.amazon_nova_transformation", "AmazonNovaEmbeddingConfig"),
+    "BedrockCohereEmbeddingConfig": (
+        ".llms.bedrock.embed.cohere_transformation",
+        "BedrockCohereEmbeddingConfig",
+    ),
+    "TwelveLabsMarengoEmbeddingConfig": (
+        ".llms.bedrock.embed.twelvelabs_marengo_transformation",
+        "TwelveLabsMarengoEmbeddingConfig",
+    ),
+    "AmazonNovaEmbeddingConfig": (
+        ".llms.bedrock.embed.amazon_nova_transformation",
+        "AmazonNovaEmbeddingConfig",
+    ),
     "OpenAIConfig": (".llms.openai.openai", "OpenAIConfig"),
     "MistralEmbeddingConfig": (".llms.openai.openai", "MistralEmbeddingConfig"),
-    "OpenAIImageVariationConfig": (".llms.openai.image_variations.transformation", "OpenAIImageVariationConfig"),
+    "OpenAIImageVariationConfig": (
+        ".llms.openai.image_variations.transformation",
+        "OpenAIImageVariationConfig",
+    ),
     "DeepInfraConfig": (".llms.deepinfra.chat.transformation", "DeepInfraConfig"),
-    "DeepgramAudioTranscriptionConfig": (".llms.deepgram.audio_transcription.transformation", "DeepgramAudioTranscriptionConfig"),
-    "TopazImageVariationConfig": (".llms.topaz.image_variations.transformation", "TopazImageVariationConfig"),
-    "OpenAITextCompletionConfig": ("litellm.llms.openai.completion.transformation", "OpenAITextCompletionConfig"),
+    "DeepgramAudioTranscriptionConfig": (
+        ".llms.deepgram.audio_transcription.transformation",
+        "DeepgramAudioTranscriptionConfig",
+    ),
+    "TopazImageVariationConfig": (
+        ".llms.topaz.image_variations.transformation",
+        "TopazImageVariationConfig",
+    ),
+    "OpenAITextCompletionConfig": (
+        "litellm.llms.openai.completion.transformation",
+        "OpenAITextCompletionConfig",
+    ),
     "GroqChatConfig": (".llms.groq.chat.transformation", "GroqChatConfig"),
-    "GenAIHubOrchestrationConfig": (".llms.sap.chat.transformation", "GenAIHubOrchestrationConfig"),
-    "VoyageEmbeddingConfig": (".llms.voyage.embedding.transformation", "VoyageEmbeddingConfig"),
-    "VoyageContextualEmbeddingConfig": (".llms.voyage.embedding.transformation_contextual", "VoyageContextualEmbeddingConfig"),
-    "InfinityEmbeddingConfig": (".llms.infinity.embedding.transformation", "InfinityEmbeddingConfig"),
-    "AzureAIStudioConfig": (".llms.azure_ai.chat.transformation", "AzureAIStudioConfig"),
+    "GenAIHubOrchestrationConfig": (
+        ".llms.sap.chat.transformation",
+        "GenAIHubOrchestrationConfig",
+    ),
+    "VoyageEmbeddingConfig": (
+        ".llms.voyage.embedding.transformation",
+        "VoyageEmbeddingConfig",
+    ),
+    "VoyageContextualEmbeddingConfig": (
+        ".llms.voyage.embedding.transformation_contextual",
+        "VoyageContextualEmbeddingConfig",
+    ),
+    "InfinityEmbeddingConfig": (
+        ".llms.infinity.embedding.transformation",
+        "InfinityEmbeddingConfig",
+    ),
+    "AzureAIStudioConfig": (
+        ".llms.azure_ai.chat.transformation",
+        "AzureAIStudioConfig",
+    ),
     "MistralConfig": (".llms.mistral.chat.transformation", "MistralConfig"),
-    "OpenAIResponsesAPIConfig": (".llms.openai.responses.transformation", "OpenAIResponsesAPIConfig"),
-    "AzureOpenAIResponsesAPIConfig": (".llms.azure.responses.transformation", "AzureOpenAIResponsesAPIConfig"),
-    "AzureOpenAIOSeriesResponsesAPIConfig": (".llms.azure.responses.o_series_transformation", "AzureOpenAIOSeriesResponsesAPIConfig"),
-    "XAIResponsesAPIConfig": (".llms.xai.responses.transformation", "XAIResponsesAPIConfig"),
-    "LiteLLMProxyResponsesAPIConfig": (".llms.litellm_proxy.responses.transformation", "LiteLLMProxyResponsesAPIConfig"),
-    "VolcEngineResponsesAPIConfig": (".llms.volcengine.responses.transformation", "VolcEngineResponsesAPIConfig"),
-    "ManusResponsesAPIConfig": (".llms.manus.responses.transformation", "ManusResponsesAPIConfig"),
-    "GoogleAIStudioInteractionsConfig": (".llms.gemini.interactions.transformation", "GoogleAIStudioInteractionsConfig"),
-    "OpenAIOSeriesConfig": (".llms.openai.chat.o_series_transformation", "OpenAIOSeriesConfig"),
-    "AnthropicSkillsConfig": (".llms.anthropic.skills.transformation", "AnthropicSkillsConfig"),
-    "BaseSkillsAPIConfig": (".llms.base_llm.skills.transformation", "BaseSkillsAPIConfig"),
+    "OpenAIResponsesAPIConfig": (
+        ".llms.openai.responses.transformation",
+        "OpenAIResponsesAPIConfig",
+    ),
+    "AzureOpenAIResponsesAPIConfig": (
+        ".llms.azure.responses.transformation",
+        "AzureOpenAIResponsesAPIConfig",
+    ),
+    "AzureOpenAIOSeriesResponsesAPIConfig": (
+        ".llms.azure.responses.o_series_transformation",
+        "AzureOpenAIOSeriesResponsesAPIConfig",
+    ),
+    "XAIResponsesAPIConfig": (
+        ".llms.xai.responses.transformation",
+        "XAIResponsesAPIConfig",
+    ),
+    "LiteLLMProxyResponsesAPIConfig": (
+        ".llms.litellm_proxy.responses.transformation",
+        "LiteLLMProxyResponsesAPIConfig",
+    ),
+    "VolcEngineResponsesAPIConfig": (
+        ".llms.volcengine.responses.transformation",
+        "VolcEngineResponsesAPIConfig",
+    ),
+    "ManusResponsesAPIConfig": (
+        ".llms.manus.responses.transformation",
+        "ManusResponsesAPIConfig",
+    ),
+    "GoogleAIStudioInteractionsConfig": (
+        ".llms.gemini.interactions.transformation",
+        "GoogleAIStudioInteractionsConfig",
+    ),
+    "OpenAIOSeriesConfig": (
+        ".llms.openai.chat.o_series_transformation",
+        "OpenAIOSeriesConfig",
+    ),
+    "AnthropicSkillsConfig": (
+        ".llms.anthropic.skills.transformation",
+        "AnthropicSkillsConfig",
+    ),
+    "BaseSkillsAPIConfig": (
+        ".llms.base_llm.skills.transformation",
+        "BaseSkillsAPIConfig",
+    ),
     "GradientAIConfig": (".llms.gradient_ai.chat.transformation", "GradientAIConfig"),
     # Alias for backwards compatibility
-    "OpenAIO1Config": (".llms.openai.chat.o_series_transformation", "OpenAIOSeriesConfig"),  # Alias
+    "OpenAIO1Config": (
+        ".llms.openai.chat.o_series_transformation",
+        "OpenAIOSeriesConfig",
+    ),  # Alias
     "OpenAIGPTConfig": (".llms.openai.chat.gpt_transformation", "OpenAIGPTConfig"),
     "OpenAIGPT5Config": (".llms.openai.chat.gpt_5_transformation", "OpenAIGPT5Config"),
-    "OpenAIWhisperAudioTranscriptionConfig": (".llms.openai.transcriptions.whisper_transformation", "OpenAIWhisperAudioTranscriptionConfig"),
-    "OpenAIGPTAudioTranscriptionConfig": (".llms.openai.transcriptions.gpt_transformation", "OpenAIGPTAudioTranscriptionConfig"),
-    "OpenAIGPTAudioConfig": (".llms.openai.chat.gpt_audio_transformation", "OpenAIGPTAudioConfig"),
+    "OpenAIWhisperAudioTranscriptionConfig": (
+        ".llms.openai.transcriptions.whisper_transformation",
+        "OpenAIWhisperAudioTranscriptionConfig",
+    ),
+    "OpenAIGPTAudioTranscriptionConfig": (
+        ".llms.openai.transcriptions.gpt_transformation",
+        "OpenAIGPTAudioTranscriptionConfig",
+    ),
+    "OpenAIGPTAudioConfig": (
+        ".llms.openai.chat.gpt_audio_transformation",
+        "OpenAIGPTAudioConfig",
+    ),
     "NvidiaNimConfig": (".llms.nvidia_nim.chat.transformation", "NvidiaNimConfig"),
     "NvidiaNimEmbeddingConfig": (".llms.nvidia_nim.embed", "NvidiaNimEmbeddingConfig"),
-    "FeatherlessAIConfig": (".llms.featherless_ai.chat.transformation", "FeatherlessAIConfig"),
+    "FeatherlessAIConfig": (
+        ".llms.featherless_ai.chat.transformation",
+        "FeatherlessAIConfig",
+    ),
     "CerebrasConfig": (".llms.cerebras.chat", "CerebrasConfig"),
     "BasetenConfig": (".llms.baseten.chat", "BasetenConfig"),
     "SambanovaConfig": (".llms.sambanova.chat", "SambanovaConfig"),
-    "SambaNovaEmbeddingConfig": (".llms.sambanova.embedding.transformation", "SambaNovaEmbeddingConfig"),
-    "FireworksAIConfig": (".llms.fireworks_ai.chat.transformation", "FireworksAIConfig"),
-    "FireworksAITextCompletionConfig": (".llms.fireworks_ai.completion.transformation", "FireworksAITextCompletionConfig"),
-    "FireworksAIAudioTranscriptionConfig": (".llms.fireworks_ai.audio_transcription.transformation", "FireworksAIAudioTranscriptionConfig"),
-    "FireworksAIEmbeddingConfig": (".llms.fireworks_ai.embed.fireworks_ai_transformation", "FireworksAIEmbeddingConfig"),
-    "FriendliaiChatConfig": (".llms.friendliai.chat.transformation", "FriendliaiChatConfig"),
-    "JinaAIEmbeddingConfig": (".llms.jina_ai.embedding.transformation", "JinaAIEmbeddingConfig"),
+    "SambaNovaEmbeddingConfig": (
+        ".llms.sambanova.embedding.transformation",
+        "SambaNovaEmbeddingConfig",
+    ),
+    "FireworksAIConfig": (
+        ".llms.fireworks_ai.chat.transformation",
+        "FireworksAIConfig",
+    ),
+    "FireworksAITextCompletionConfig": (
+        ".llms.fireworks_ai.completion.transformation",
+        "FireworksAITextCompletionConfig",
+    ),
+    "FireworksAIAudioTranscriptionConfig": (
+        ".llms.fireworks_ai.audio_transcription.transformation",
+        "FireworksAIAudioTranscriptionConfig",
+    ),
+    "FireworksAIEmbeddingConfig": (
+        ".llms.fireworks_ai.embed.fireworks_ai_transformation",
+        "FireworksAIEmbeddingConfig",
+    ),
+    "FriendliaiChatConfig": (
+        ".llms.friendliai.chat.transformation",
+        "FriendliaiChatConfig",
+    ),
+    "JinaAIEmbeddingConfig": (
+        ".llms.jina_ai.embedding.transformation",
+        "JinaAIEmbeddingConfig",
+    ),
     "XAIChatConfig": (".llms.xai.chat.transformation", "XAIChatConfig"),
     "ZAIChatConfig": (".llms.zai.chat.transformation", "ZAIChatConfig"),
     "AIMLChatConfig": (".llms.aiml.chat.transformation", "AIMLChatConfig"),
-    "VolcEngineChatConfig": (".llms.volcengine.chat.transformation", "VolcEngineChatConfig"),
-    "CodestralTextCompletionConfig": (".llms.codestral.completion.transformation", "CodestralTextCompletionConfig"),
-    "AzureOpenAIAssistantsAPIConfig": (".llms.azure.azure", "AzureOpenAIAssistantsAPIConfig"),
+    "VolcEngineChatConfig": (
+        ".llms.volcengine.chat.transformation",
+        "VolcEngineChatConfig",
+    ),
+    "CodestralTextCompletionConfig": (
+        ".llms.codestral.completion.transformation",
+        "CodestralTextCompletionConfig",
+    ),
+    "AzureOpenAIAssistantsAPIConfig": (
+        ".llms.azure.azure",
+        "AzureOpenAIAssistantsAPIConfig",
+    ),
     "HerokuChatConfig": (".llms.heroku.chat.transformation", "HerokuChatConfig"),
     "CometAPIConfig": (".llms.cometapi.chat.transformation", "CometAPIConfig"),
     "AzureOpenAIConfig": (".llms.azure.chat.gpt_transformation", "AzureOpenAIConfig"),
-    "AzureOpenAIGPT5Config": (".llms.azure.chat.gpt_5_transformation", "AzureOpenAIGPT5Config"),
-    "AzureOpenAITextConfig": (".llms.azure.completion.transformation", "AzureOpenAITextConfig"),
-    "HostedVLLMChatConfig": (".llms.hosted_vllm.chat.transformation", "HostedVLLMChatConfig"),
+    "AzureOpenAIGPT5Config": (
+        ".llms.azure.chat.gpt_5_transformation",
+        "AzureOpenAIGPT5Config",
+    ),
+    "AzureOpenAITextConfig": (
+        ".llms.azure.completion.transformation",
+        "AzureOpenAITextConfig",
+    ),
+    "HostedVLLMChatConfig": (
+        ".llms.hosted_vllm.chat.transformation",
+        "HostedVLLMChatConfig",
+    ),
+    "HostedVLLMEmbeddingConfig": (
+        ".llms.hosted_vllm.embedding.transformation",
+        "HostedVLLMEmbeddingConfig",
+    ),
     # Alias for backwards compatibility
-    "VolcEngineConfig": (".llms.volcengine.chat.transformation", "VolcEngineChatConfig"),  # Alias
-    "LlamafileChatConfig": (".llms.llamafile.chat.transformation", "LlamafileChatConfig"),
-    "LiteLLMProxyChatConfig": (".llms.litellm_proxy.chat.transformation", "LiteLLMProxyChatConfig"),
+    "VolcEngineConfig": (
+        ".llms.volcengine.chat.transformation",
+        "VolcEngineChatConfig",
+    ),  # Alias
+    "LlamafileChatConfig": (
+        ".llms.llamafile.chat.transformation",
+        "LlamafileChatConfig",
+    ),
+    "LiteLLMProxyChatConfig": (
+        ".llms.litellm_proxy.chat.transformation",
+        "LiteLLMProxyChatConfig",
+    ),
     "VLLMConfig": (".llms.vllm.completion.transformation", "VLLMConfig"),
     "DeepSeekChatConfig": (".llms.deepseek.chat.transformation", "DeepSeekChatConfig"),
     "LMStudioChatConfig": (".llms.lm_studio.chat.transformation", "LMStudioChatConfig"),
-    "LmStudioEmbeddingConfig": (".llms.lm_studio.embed.transformation", "LmStudioEmbeddingConfig"),
+    "LmStudioEmbeddingConfig": (
+        ".llms.lm_studio.embed.transformation",
+        "LmStudioEmbeddingConfig",
+    ),
     "NscaleConfig": (".llms.nscale.chat.transformation", "NscaleConfig"),
-    "PerplexityChatConfig": (".llms.perplexity.chat.transformation", "PerplexityChatConfig"),
-    "AzureOpenAIO1Config": (".llms.azure.chat.o_series_transformation", "AzureOpenAIO1Config"),
-    "IBMWatsonXAIConfig": (".llms.watsonx.completion.transformation", "IBMWatsonXAIConfig"),
-    "IBMWatsonXChatConfig": (".llms.watsonx.chat.transformation", "IBMWatsonXChatConfig"),
-    "IBMWatsonXEmbeddingConfig": (".llms.watsonx.embed.transformation", "IBMWatsonXEmbeddingConfig"),
-    "GenAIHubEmbeddingConfig": (".llms.sap.embed.transformation", "GenAIHubEmbeddingConfig"),
-    "IBMWatsonXAudioTranscriptionConfig": (".llms.watsonx.audio_transcription.transformation", "IBMWatsonXAudioTranscriptionConfig"),
-    "GithubCopilotConfig": (".llms.github_copilot.chat.transformation", "GithubCopilotConfig"),
-    "GithubCopilotResponsesAPIConfig": (".llms.github_copilot.responses.transformation", "GithubCopilotResponsesAPIConfig"),
-    "GithubCopilotEmbeddingConfig": (".llms.github_copilot.embedding.transformation", "GithubCopilotEmbeddingConfig"),
+    "PerplexityChatConfig": (
+        ".llms.perplexity.chat.transformation",
+        "PerplexityChatConfig",
+    ),
+    "AzureOpenAIO1Config": (
+        ".llms.azure.chat.o_series_transformation",
+        "AzureOpenAIO1Config",
+    ),
+    "IBMWatsonXAIConfig": (
+        ".llms.watsonx.completion.transformation",
+        "IBMWatsonXAIConfig",
+    ),
+    "IBMWatsonXChatConfig": (
+        ".llms.watsonx.chat.transformation",
+        "IBMWatsonXChatConfig",
+    ),
+    "IBMWatsonXEmbeddingConfig": (
+        ".llms.watsonx.embed.transformation",
+        "IBMWatsonXEmbeddingConfig",
+    ),
+    "GenAIHubEmbeddingConfig": (
+        ".llms.sap.embed.transformation",
+        "GenAIHubEmbeddingConfig",
+    ),
+    "IBMWatsonXAudioTranscriptionConfig": (
+        ".llms.watsonx.audio_transcription.transformation",
+        "IBMWatsonXAudioTranscriptionConfig",
+    ),
+    "GithubCopilotConfig": (
+        ".llms.github_copilot.chat.transformation",
+        "GithubCopilotConfig",
+    ),
+    "GithubCopilotResponsesAPIConfig": (
+        ".llms.github_copilot.responses.transformation",
+        "GithubCopilotResponsesAPIConfig",
+    ),
+    "GithubCopilotEmbeddingConfig": (
+        ".llms.github_copilot.embedding.transformation",
+        "GithubCopilotEmbeddingConfig",
+    ),
     "ChatGPTConfig": (".llms.chatgpt.chat.transformation", "ChatGPTConfig"),
-    "ChatGPTResponsesAPIConfig": (".llms.chatgpt.responses.transformation", "ChatGPTResponsesAPIConfig"),
+    "ChatGPTResponsesAPIConfig": (
+        ".llms.chatgpt.responses.transformation",
+        "ChatGPTResponsesAPIConfig",
+    ),
     "NebiusConfig": (".llms.nebius.chat.transformation", "NebiusConfig"),
     "WandbConfig": (".llms.wandb.chat.transformation", "WandbConfig"),
     "GigaChatConfig": (".llms.gigachat.chat.transformation", "GigaChatConfig"),
-    "GigaChatEmbeddingConfig": (".llms.gigachat.embedding.transformation", "GigaChatEmbeddingConfig"),
-    "DashScopeChatConfig": (".llms.dashscope.chat.transformation", "DashScopeChatConfig"),
+    "GigaChatEmbeddingConfig": (
+        ".llms.gigachat.embedding.transformation",
+        "GigaChatEmbeddingConfig",
+    ),
+    "DashScopeChatConfig": (
+        ".llms.dashscope.chat.transformation",
+        "DashScopeChatConfig",
+    ),
     "MoonshotChatConfig": (".llms.moonshot.chat.transformation", "MoonshotChatConfig"),
-    "DockerModelRunnerChatConfig": (".llms.docker_model_runner.chat.transformation", "DockerModelRunnerChatConfig"),
+    "DockerModelRunnerChatConfig": (
+        ".llms.docker_model_runner.chat.transformation",
+        "DockerModelRunnerChatConfig",
+    ),
     "V0ChatConfig": (".llms.v0.chat.transformation", "V0ChatConfig"),
     "OCIChatConfig": (".llms.oci.chat.transformation", "OCIChatConfig"),
     "MorphChatConfig": (".llms.morph.chat.transformation", "MorphChatConfig"),
     "RAGFlowConfig": (".llms.ragflow.chat.transformation", "RAGFlowConfig"),
     "LambdaAIChatConfig": (".llms.lambda_ai.chat.transformation", "LambdaAIChatConfig"),
-    "HyperbolicChatConfig": (".llms.hyperbolic.chat.transformation", "HyperbolicChatConfig"),
-    "VercelAIGatewayConfig": (".llms.vercel_ai_gateway.chat.transformation", "VercelAIGatewayConfig"),
+    "HyperbolicChatConfig": (
+        ".llms.hyperbolic.chat.transformation",
+        "HyperbolicChatConfig",
+    ),
+    "VercelAIGatewayConfig": (
+        ".llms.vercel_ai_gateway.chat.transformation",
+        "VercelAIGatewayConfig",
+    ),
     "OVHCloudChatConfig": (".llms.ovhcloud.chat.transformation", "OVHCloudChatConfig"),
-    "OVHCloudEmbeddingConfig": (".llms.ovhcloud.embedding.transformation", "OVHCloudEmbeddingConfig"),
-    "CometAPIEmbeddingConfig": (".llms.cometapi.embed.transformation", "CometAPIEmbeddingConfig"),
+    "OVHCloudEmbeddingConfig": (
+        ".llms.ovhcloud.embedding.transformation",
+        "OVHCloudEmbeddingConfig",
+    ),
+    "CometAPIEmbeddingConfig": (
+        ".llms.cometapi.embed.transformation",
+        "CometAPIEmbeddingConfig",
+    ),
     "LemonadeChatConfig": (".llms.lemonade.chat.transformation", "LemonadeChatConfig"),
-    "SnowflakeEmbeddingConfig": (".llms.snowflake.embedding.transformation", "SnowflakeEmbeddingConfig"),
-    "AmazonNovaChatConfig": (".llms.amazon_nova.chat.transformation", "AmazonNovaChatConfig"),
+    "SnowflakeEmbeddingConfig": (
+        ".llms.snowflake.embedding.transformation",
+        "SnowflakeEmbeddingConfig",
+    ),
+    "AmazonNovaChatConfig": (
+        ".llms.amazon_nova.chat.transformation",
+        "AmazonNovaChatConfig",
+    ),
 }
 
 # Import map for utils module lazy imports
 _UTILS_MODULE_IMPORT_MAP = {
     "encoding": ("litellm.main", "encoding"),
-    "BaseVectorStore": ("litellm.integrations.vector_store_integrations.base_vector_store", "BaseVectorStore"),
-    "CredentialAccessor": ("litellm.litellm_core_utils.credential_accessor", "CredentialAccessor"),
-    "exception_type": ("litellm.litellm_core_utils.exception_mapping_utils", "exception_type"),
-    "get_error_message": ("litellm.litellm_core_utils.exception_mapping_utils", "get_error_message"),
-    "_get_response_headers": ("litellm.litellm_core_utils.exception_mapping_utils", "_get_response_headers"),
-    "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"),
-    "_is_non_openai_azure_model": ("litellm.litellm_core_utils.get_llm_provider_logic", "_is_non_openai_azure_model"),
-    "get_supported_openai_params": ("litellm.litellm_core_utils.get_supported_openai_params", "get_supported_openai_params"),
-    "LiteLLMResponseObjectHandler": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "LiteLLMResponseObjectHandler"),
-    "_handle_invalid_parallel_tool_calls": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "_handle_invalid_parallel_tool_calls"),
-    "convert_to_model_response_object": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_model_response_object"),
-    "convert_to_streaming_response": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_streaming_response"),
-    "convert_to_streaming_response_async": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_streaming_response_async"),
-    "get_api_base": ("litellm.litellm_core_utils.llm_response_utils.get_api_base", "get_api_base"),
-    "ResponseMetadata": ("litellm.litellm_core_utils.llm_response_utils.response_metadata", "ResponseMetadata"),
-    "_parse_content_for_reasoning": ("litellm.litellm_core_utils.prompt_templates.common_utils", "_parse_content_for_reasoning"),
-    "LiteLLMLoggingObject": ("litellm.litellm_core_utils.redact_messages", "LiteLLMLoggingObject"),
-    "redact_message_input_output_from_logging": ("litellm.litellm_core_utils.redact_messages", "redact_message_input_output_from_logging"),
-    "CustomStreamWrapper": ("litellm.litellm_core_utils.streaming_handler", "CustomStreamWrapper"),
-    "BaseGoogleGenAIGenerateContentConfig": ("litellm.llms.base_llm.google_genai.transformation", "BaseGoogleGenAIGenerateContentConfig"),
+    "BaseVectorStore": (
+        "litellm.integrations.vector_store_integrations.base_vector_store",
+        "BaseVectorStore",
+    ),
+    "CredentialAccessor": (
+        "litellm.litellm_core_utils.credential_accessor",
+        "CredentialAccessor",
+    ),
+    "exception_type": (
+        "litellm.litellm_core_utils.exception_mapping_utils",
+        "exception_type",
+    ),
+    "get_error_message": (
+        "litellm.litellm_core_utils.exception_mapping_utils",
+        "get_error_message",
+    ),
+    "_get_response_headers": (
+        "litellm.litellm_core_utils.exception_mapping_utils",
+        "_get_response_headers",
+    ),
+    "get_llm_provider": (
+        "litellm.litellm_core_utils.get_llm_provider_logic",
+        "get_llm_provider",
+    ),
+    "_is_non_openai_azure_model": (
+        "litellm.litellm_core_utils.get_llm_provider_logic",
+        "_is_non_openai_azure_model",
+    ),
+    "get_supported_openai_params": (
+        "litellm.litellm_core_utils.get_supported_openai_params",
+        "get_supported_openai_params",
+    ),
+    "LiteLLMResponseObjectHandler": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "LiteLLMResponseObjectHandler",
+    ),
+    "_handle_invalid_parallel_tool_calls": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "_handle_invalid_parallel_tool_calls",
+    ),
+    "convert_to_model_response_object": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "convert_to_model_response_object",
+    ),
+    "convert_to_streaming_response": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "convert_to_streaming_response",
+    ),
+    "convert_to_streaming_response_async": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "convert_to_streaming_response_async",
+    ),
+    "get_api_base": (
+        "litellm.litellm_core_utils.llm_response_utils.get_api_base",
+        "get_api_base",
+    ),
+    "ResponseMetadata": (
+        "litellm.litellm_core_utils.llm_response_utils.response_metadata",
+        "ResponseMetadata",
+    ),
+    "_parse_content_for_reasoning": (
+        "litellm.litellm_core_utils.prompt_templates.common_utils",
+        "_parse_content_for_reasoning",
+    ),
+    "LiteLLMLoggingObject": (
+        "litellm.litellm_core_utils.redact_messages",
+        "LiteLLMLoggingObject",
+    ),
+    "redact_message_input_output_from_logging": (
+        "litellm.litellm_core_utils.redact_messages",
+        "redact_message_input_output_from_logging",
+    ),
+    "CustomStreamWrapper": (
+        "litellm.litellm_core_utils.streaming_handler",
+        "CustomStreamWrapper",
+    ),
+    "BaseGoogleGenAIGenerateContentConfig": (
+        "litellm.llms.base_llm.google_genai.transformation",
+        "BaseGoogleGenAIGenerateContentConfig",
+    ),
     "BaseOCRConfig": ("litellm.llms.base_llm.ocr.transformation", "BaseOCRConfig"),
-    "BaseSearchConfig": ("litellm.llms.base_llm.search.transformation", "BaseSearchConfig"),
-    "BaseTextToSpeechConfig": ("litellm.llms.base_llm.text_to_speech.transformation", "BaseTextToSpeechConfig"),
+    "BaseSearchConfig": (
+        "litellm.llms.base_llm.search.transformation",
+        "BaseSearchConfig",
+    ),
+    "BaseTextToSpeechConfig": (
+        "litellm.llms.base_llm.text_to_speech.transformation",
+        "BaseTextToSpeechConfig",
+    ),
     "BedrockModelInfo": ("litellm.llms.bedrock.common_utils", "BedrockModelInfo"),
     "CohereModelInfo": ("litellm.llms.cohere.common_utils", "CohereModelInfo"),
     "MistralOCRConfig": ("litellm.llms.mistral.ocr.transformation", "MistralOCRConfig"),
     "Rules": ("litellm.litellm_core_utils.rules", "Rules"),
     "AsyncHTTPHandler": ("litellm.llms.custom_httpx.http_handler", "AsyncHTTPHandler"),
     "HTTPHandler": ("litellm.llms.custom_httpx.http_handler", "HTTPHandler"),
-    "get_num_retries_from_retry_policy": ("litellm.router_utils.get_retry_from_policy", "get_num_retries_from_retry_policy"),
-    "reset_retry_policy": ("litellm.router_utils.get_retry_from_policy", "reset_retry_policy"),
+    "get_num_retries_from_retry_policy": (
+        "litellm.router_utils.get_retry_from_policy",
+        "get_num_retries_from_retry_policy",
+    ),
+    "reset_retry_policy": (
+        "litellm.router_utils.get_retry_from_policy",
+        "reset_retry_policy",
+    ),
     "get_secret": ("litellm.secret_managers.main", "get_secret"),
-    "get_coroutine_checker": ("litellm.litellm_core_utils.cached_imports", "get_coroutine_checker"),
-    "get_litellm_logging_class": ("litellm.litellm_core_utils.cached_imports", "get_litellm_logging_class"),
-    "get_set_callbacks": ("litellm.litellm_core_utils.cached_imports", "get_set_callbacks"),
-    "get_litellm_metadata_from_kwargs": ("litellm.litellm_core_utils.core_helpers", "get_litellm_metadata_from_kwargs"),
-    "map_finish_reason": ("litellm.litellm_core_utils.core_helpers", "map_finish_reason"),
-    "process_response_headers": ("litellm.litellm_core_utils.core_helpers", "process_response_headers"),
-    "delete_nested_value": ("litellm.litellm_core_utils.dot_notation_indexing", "delete_nested_value"),
-    "is_nested_path": ("litellm.litellm_core_utils.dot_notation_indexing", "is_nested_path"),
-    "_get_base_model_from_litellm_call_metadata": ("litellm.litellm_core_utils.get_litellm_params", "_get_base_model_from_litellm_call_metadata"),
-    "get_litellm_params": ("litellm.litellm_core_utils.get_litellm_params", "get_litellm_params"),
-    "_ensure_extra_body_is_safe": ("litellm.litellm_core_utils.llm_request_utils", "_ensure_extra_body_is_safe"),
-    "get_formatted_prompt": ("litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt", "get_formatted_prompt"),
-    "get_response_headers": ("litellm.litellm_core_utils.llm_response_utils.get_headers", "get_response_headers"),
-    "update_response_metadata": ("litellm.litellm_core_utils.llm_response_utils.response_metadata", "update_response_metadata"),
+    "get_coroutine_checker": (
+        "litellm.litellm_core_utils.cached_imports",
+        "get_coroutine_checker",
+    ),
+    "get_litellm_logging_class": (
+        "litellm.litellm_core_utils.cached_imports",
+        "get_litellm_logging_class",
+    ),
+    "get_set_callbacks": (
+        "litellm.litellm_core_utils.cached_imports",
+        "get_set_callbacks",
+    ),
+    "get_litellm_metadata_from_kwargs": (
+        "litellm.litellm_core_utils.core_helpers",
+        "get_litellm_metadata_from_kwargs",
+    ),
+    "map_finish_reason": (
+        "litellm.litellm_core_utils.core_helpers",
+        "map_finish_reason",
+    ),
+    "process_response_headers": (
+        "litellm.litellm_core_utils.core_helpers",
+        "process_response_headers",
+    ),
+    "delete_nested_value": (
+        "litellm.litellm_core_utils.dot_notation_indexing",
+        "delete_nested_value",
+    ),
+    "is_nested_path": (
+        "litellm.litellm_core_utils.dot_notation_indexing",
+        "is_nested_path",
+    ),
+    "_get_base_model_from_litellm_call_metadata": (
+        "litellm.litellm_core_utils.get_litellm_params",
+        "_get_base_model_from_litellm_call_metadata",
+    ),
+    "get_litellm_params": (
+        "litellm.litellm_core_utils.get_litellm_params",
+        "get_litellm_params",
+    ),
+    "_ensure_extra_body_is_safe": (
+        "litellm.litellm_core_utils.llm_request_utils",
+        "_ensure_extra_body_is_safe",
+    ),
+    "get_formatted_prompt": (
+        "litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt",
+        "get_formatted_prompt",
+    ),
+    "get_response_headers": (
+        "litellm.litellm_core_utils.llm_response_utils.get_headers",
+        "get_response_headers",
+    ),
+    "update_response_metadata": (
+        "litellm.litellm_core_utils.llm_response_utils.response_metadata",
+        "update_response_metadata",
+    ),
     "executor": ("litellm.litellm_core_utils.thread_pool_executor", "executor"),
-    "BaseAnthropicMessagesConfig": ("litellm.llms.base_llm.anthropic_messages.transformation", "BaseAnthropicMessagesConfig"),
-    "BaseAudioTranscriptionConfig": ("litellm.llms.base_llm.audio_transcription.transformation", "BaseAudioTranscriptionConfig"),
-    "BaseBatchesConfig": ("litellm.llms.base_llm.batches.transformation", "BaseBatchesConfig"),
-    "BaseContainerConfig": ("litellm.llms.base_llm.containers.transformation", "BaseContainerConfig"),
-    "BaseEmbeddingConfig": ("litellm.llms.base_llm.embedding.transformation", "BaseEmbeddingConfig"),
-    "BaseImageEditConfig": ("litellm.llms.base_llm.image_edit.transformation", "BaseImageEditConfig"),
-    "BaseImageGenerationConfig": ("litellm.llms.base_llm.image_generation.transformation", "BaseImageGenerationConfig"),
-    "BaseImageVariationConfig": ("litellm.llms.base_llm.image_variations.transformation", "BaseImageVariationConfig"),
-    "BasePassthroughConfig": ("litellm.llms.base_llm.passthrough.transformation", "BasePassthroughConfig"),
-    "BaseRealtimeConfig": ("litellm.llms.base_llm.realtime.transformation", "BaseRealtimeConfig"),
-    "BaseRerankConfig": ("litellm.llms.base_llm.rerank.transformation", "BaseRerankConfig"),
-    "BaseVectorStoreConfig": ("litellm.llms.base_llm.vector_store.transformation", "BaseVectorStoreConfig"),
-    "BaseVectorStoreFilesConfig": ("litellm.llms.base_llm.vector_store_files.transformation", "BaseVectorStoreFilesConfig"),
-    "BaseVideoConfig": ("litellm.llms.base_llm.videos.transformation", "BaseVideoConfig"),
-    "ANTHROPIC_API_ONLY_HEADERS": ("litellm.types.llms.anthropic", "ANTHROPIC_API_ONLY_HEADERS"),
-    "AnthropicThinkingParam": ("litellm.types.llms.anthropic", "AnthropicThinkingParam"),
+    "BaseAnthropicMessagesConfig": (
+        "litellm.llms.base_llm.anthropic_messages.transformation",
+        "BaseAnthropicMessagesConfig",
+    ),
+    "BaseAudioTranscriptionConfig": (
+        "litellm.llms.base_llm.audio_transcription.transformation",
+        "BaseAudioTranscriptionConfig",
+    ),
+    "BaseBatchesConfig": (
+        "litellm.llms.base_llm.batches.transformation",
+        "BaseBatchesConfig",
+    ),
+    "BaseContainerConfig": (
+        "litellm.llms.base_llm.containers.transformation",
+        "BaseContainerConfig",
+    ),
+    "BaseEmbeddingConfig": (
+        "litellm.llms.base_llm.embedding.transformation",
+        "BaseEmbeddingConfig",
+    ),
+    "BaseImageEditConfig": (
+        "litellm.llms.base_llm.image_edit.transformation",
+        "BaseImageEditConfig",
+    ),
+    "BaseImageGenerationConfig": (
+        "litellm.llms.base_llm.image_generation.transformation",
+        "BaseImageGenerationConfig",
+    ),
+    "BaseImageVariationConfig": (
+        "litellm.llms.base_llm.image_variations.transformation",
+        "BaseImageVariationConfig",
+    ),
+    "BasePassthroughConfig": (
+        "litellm.llms.base_llm.passthrough.transformation",
+        "BasePassthroughConfig",
+    ),
+    "BaseRealtimeConfig": (
+        "litellm.llms.base_llm.realtime.transformation",
+        "BaseRealtimeConfig",
+    ),
+    "BaseRerankConfig": (
+        "litellm.llms.base_llm.rerank.transformation",
+        "BaseRerankConfig",
+    ),
+    "BaseVectorStoreConfig": (
+        "litellm.llms.base_llm.vector_store.transformation",
+        "BaseVectorStoreConfig",
+    ),
+    "BaseVectorStoreFilesConfig": (
+        "litellm.llms.base_llm.vector_store_files.transformation",
+        "BaseVectorStoreFilesConfig",
+    ),
+    "BaseVideoConfig": (
+        "litellm.llms.base_llm.videos.transformation",
+        "BaseVideoConfig",
+    ),
+    "ANTHROPIC_API_ONLY_HEADERS": (
+        "litellm.types.llms.anthropic",
+        "ANTHROPIC_API_ONLY_HEADERS",
+    ),
+    "AnthropicThinkingParam": (
+        "litellm.types.llms.anthropic",
+        "AnthropicThinkingParam",
+    ),
     "RerankResponse": ("litellm.types.rerank", "RerankResponse"),
-    "ChatCompletionDeltaToolCallChunk": ("litellm.types.llms.openai", "ChatCompletionDeltaToolCallChunk"),
-    "ChatCompletionToolCallChunk": ("litellm.types.llms.openai", "ChatCompletionToolCallChunk"),
-    "ChatCompletionToolCallFunctionChunk": ("litellm.types.llms.openai", "ChatCompletionToolCallFunctionChunk"),
+    "ChatCompletionDeltaToolCallChunk": (
+        "litellm.types.llms.openai",
+        "ChatCompletionDeltaToolCallChunk",
+    ),
+    "ChatCompletionToolCallChunk": (
+        "litellm.types.llms.openai",
+        "ChatCompletionToolCallChunk",
+    ),
+    "ChatCompletionToolCallFunctionChunk": (
+        "litellm.types.llms.openai",
+        "ChatCompletionToolCallFunctionChunk",
+    ),
     "LiteLLM_Params": ("litellm.types.router", "LiteLLM_Params"),
 }
 
diff --git a/litellm/llms/hosted_vllm/embedding/transformation.py b/litellm/llms/hosted_vllm/embedding/transformation.py
new file mode 100644
index 00000000000..9c3e8c6c7cc
--- /dev/null
+++ b/litellm/llms/hosted_vllm/embedding/transformation.py
@@ -0,0 +1,180 @@
+"""
+Hosted VLLM Embedding API Configuration.
+
+This module provides the configuration for hosted VLLM's Embedding API.
+VLLM is OpenAI-compatible and supports embeddings via the /v1/embeddings endpoint.
+
+Docs: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+"""
+
+from typing import TYPE_CHECKING, Any, List, Optional, Union
+
+import httpx
+
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues
+from litellm.types.utils import EmbeddingResponse
+from litellm.utils import convert_to_model_response_object
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+
+class HostedVLLMEmbeddingError(BaseLLMException):
+    """Exception class for Hosted VLLM Embedding errors."""
+
+    pass
+
+
+class HostedVLLMEmbeddingConfig(BaseEmbeddingConfig):
+    """
+    Configuration for Hosted VLLM's Embedding API.
+
+    Reference: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+    """
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        """
+        Validate environment and set up headers for Hosted VLLM API.
+        """
+        if api_key is None:
+            api_key = get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
+
+        default_headers = {
+            "Content-Type": "application/json",
+        }
+
+        # Only add Authorization header if api_key is not "fake-api-key"
+        if api_key and api_key != "fake-api-key":
+            default_headers["Authorization"] = f"Bearer {api_key}"
+
+        # Merge with existing headers (user's headers take priority)
+        return {**default_headers, **headers}
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        Get the complete URL for Hosted VLLM Embedding API endpoint.
+        """
+        if api_base is None:
+            api_base = get_secret_str("HOSTED_VLLM_API_BASE")
+            if api_base is None:
+                raise ValueError("api_base is required for hosted_vllm embeddings")
+
+        # Remove trailing slashes
+        api_base = api_base.rstrip("/")
+
+        # Ensure the URL ends with /embeddings
+        if not api_base.endswith("/embeddings"):
+            api_base = f"{api_base}/embeddings"
+
+        return api_base
+
+    def transform_embedding_request(
+        self,
+        model: str,
+        input: AllEmbeddingInputValues,
+        optional_params: dict,
+        headers: dict,
+    ) -> dict:
+        """
+        Transform embedding request to Hosted VLLM format (OpenAI-compatible).
+        """
+        # Ensure input is a list
+        if isinstance(input, str):
+            input = [input]
+
+        # Strip 'hosted_vllm/' prefix if present
+        if model.startswith("hosted_vllm/"):
+            model = model.replace("hosted_vllm/", "", 1)
+
+        return {
+            "model": model,
+            "input": input,
+            **optional_params,
+        }
+
+    def transform_embedding_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: EmbeddingResponse,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str],
+        request_data: dict,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> EmbeddingResponse:
+        """
+        Transform embedding response from Hosted VLLM format (OpenAI-compatible).
+        """
+        logging_obj.post_call(original_response=raw_response.text)
+
+        # VLLM returns standard OpenAI-compatible embedding response
+        response_json = raw_response.json()
+
+        return convert_to_model_response_object(
+            response_object=response_json,
+            model_response_object=model_response,
+            response_type="embedding",
+        )
+
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Get list of supported OpenAI parameters for Hosted VLLM embeddings.
+        """
+        return [
+            "timeout",
+            "dimensions",
+            "encoding_format",
+            "user",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        Map OpenAI parameters to Hosted VLLM format.
+        """
+        for param, value in non_default_params.items():
+            if param in self.get_supported_openai_params(model):
+                optional_params[param] = value
+        return optional_params
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        """
+        Get the error class for Hosted VLLM errors.
+        """
+        return HostedVLLMEmbeddingError(
+            message=error_message,
+            status_code=status_code,
+            headers=headers,
+        )
diff --git a/litellm/main.py b/litellm/main.py
index 5b8c569a390..35043e1ba0e 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2373,6 +2373,33 @@ def completion(  # type: ignore # noqa: PLR0915
                 or "https://api.minimax.io/v1"
             )
 
+            response = base_llm_http_handler.completion(
+                model=model,
+                messages=messages,
+                api_base=api_base,
+                custom_llm_provider=custom_llm_provider,
+                model_response=model_response,
+                encoding=_get_encoding(),
+                logging_obj=logging,
+                optional_params=optional_params,
+                timeout=timeout,
+                litellm_params=litellm_params,
+                shared_session=shared_session,
+                acompletion=acompletion,
+                stream=stream,
+                api_key=api_key,
+                headers=headers,
+                client=client,
+                provider_config=provider_config,
+            )
+            logging.post_call(
+                input=messages, api_key=api_key, original_response=response
+            )
+        elif custom_llm_provider == "hosted_vllm":
+            api_base = (
+                api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE")
+            )
+
             response = base_llm_http_handler.completion(
                 model=model,
                 messages=messages,
@@ -3611,9 +3638,9 @@ def completion(  # type: ignore # noqa: PLR0915
                     "aws_region_name" not in optional_params
                     or optional_params["aws_region_name"] is None
                 ):
-                    optional_params[
-                        "aws_region_name"
-                    ] = aws_bedrock_client.meta.region_name
+                    optional_params["aws_region_name"] = (
+                        aws_bedrock_client.meta.region_name
+                    )
 
             bedrock_route = BedrockModelInfo.get_bedrock_route(model)
             if bedrock_route == "converse":
@@ -4773,9 +4800,32 @@ def embedding(  # noqa: PLR0915
                 client=client,
                 aembedding=aembedding,
             )
+        elif custom_llm_provider == "hosted_vllm":
+            api_base = (
+                api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE")
+            )
+
+            # set API KEY
+            if api_key is None:
+                api_key = litellm.api_key or get_secret_str("HOSTED_VLLM_API_KEY")
+
+            response = base_llm_http_handler.embedding(
+                model=model,
+                input=input,
+                custom_llm_provider=custom_llm_provider,
+                api_base=api_base,
+                api_key=api_key,
+                logging_obj=logging,
+                timeout=timeout,
+                model_response=EmbeddingResponse(),
+                optional_params=optional_params,
+                client=client,
+                aembedding=aembedding,
+                litellm_params=litellm_params_dict,
+                headers=headers or {},
+            )
         elif (
             custom_llm_provider == "openai_like"
-            or custom_llm_provider == "hosted_vllm"
             or custom_llm_provider == "llamafile"
             or custom_llm_provider == "lm_studio"
         ):
@@ -5948,9 +5998,9 @@ def adapter_completion(
     new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
 
     response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs)  # type: ignore
-    translated_response: Optional[
-        Union[BaseModel, AdapterCompletionStreamWrapper]
-    ] = None
+    translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
+        None
+    )
     if isinstance(response, ModelResponse):
         translated_response = translation_obj.translate_completion_output_params(
             response=response
@@ -6655,9 +6705,9 @@ def speech(  # noqa: PLR0915
                 ElevenLabsTextToSpeechConfig.ELEVENLABS_QUERY_PARAMS_KEY
             ] = query_params
 
-        litellm_params_dict[
-            ElevenLabsTextToSpeechConfig.ELEVENLABS_VOICE_ID_KEY
-        ] = voice_id
+        litellm_params_dict[ElevenLabsTextToSpeechConfig.ELEVENLABS_VOICE_ID_KEY] = (
+            voice_id
+        )
 
         if api_base is not None:
             litellm_params_dict["api_base"] = api_base
@@ -7163,9 +7213,9 @@ def stream_chunk_builder(  # noqa: PLR0915
         ]
 
         if len(content_chunks) > 0:
-            response["choices"][0]["message"][
-                "content"
-            ] = processor.get_combined_content(content_chunks)
+            response["choices"][0]["message"]["content"] = (
+                processor.get_combined_content(content_chunks)
+            )
 
         thinking_blocks = [
             chunk
@@ -7176,9 +7226,9 @@ def stream_chunk_builder(  # noqa: PLR0915
         ]
 
         if len(thinking_blocks) > 0:
-            response["choices"][0]["message"][
-                "thinking_blocks"
-            ] = processor.get_combined_thinking_content(thinking_blocks)
+            response["choices"][0]["message"]["thinking_blocks"] = (
+                processor.get_combined_thinking_content(thinking_blocks)
+            )
 
         reasoning_chunks = [
             chunk
@@ -7189,9 +7239,9 @@ def stream_chunk_builder(  # noqa: PLR0915
         ]
 
         if len(reasoning_chunks) > 0:
-            response["choices"][0]["message"][
-                "reasoning_content"
-            ] = processor.get_combined_reasoning_content(reasoning_chunks)
+            response["choices"][0]["message"]["reasoning_content"] = (
+                processor.get_combined_reasoning_content(reasoning_chunks)
+            )
 
         annotation_chunks = [
             chunk
diff --git a/litellm/utils.py b/litellm/utils.py
index d7fb4855a48..61a446564dc 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -771,13 +771,15 @@ def function_setup(  # noqa: PLR0915
         function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
 
         ## LAZY LOAD COROUTINE CHECKER ##
-        get_coroutine_checker_fn = getattr(sys.modules[__name__], "get_coroutine_checker")
+        get_coroutine_checker_fn = getattr(
+            sys.modules[__name__], "get_coroutine_checker"
+        )
         coroutine_checker = get_coroutine_checker_fn()
 
         ## DYNAMIC CALLBACKS ##
-        dynamic_callbacks: Optional[
-            List[Union[str, Callable, "CustomLogger"]]
-        ] = kwargs.pop("callbacks", None)
+        dynamic_callbacks: Optional[List[Union[str, Callable, "CustomLogger"]]] = (
+            kwargs.pop("callbacks", None)
+        )
         all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
 
         if len(all_callbacks) > 0:
@@ -1660,9 +1662,9 @@ def wrapper(*args, **kwargs):  # noqa: PLR0915
                         exception=e,
                         retry_policy=kwargs.get("retry_policy"),
                     )
-                    kwargs[
-                        "retry_policy"
-                    ] = reset_retry_policy()  # prevent infinite loops
+                    kwargs["retry_policy"] = (
+                        reset_retry_policy()
+                    )  # prevent infinite loops
                 litellm.num_retries = (
                     None  # set retries to None to prevent infinite loops
                 )
@@ -1709,9 +1711,9 @@ def wrapper(*args, **kwargs):  # noqa: PLR0915
                         exception=e,
                         retry_policy=kwargs.get("retry_policy"),
                     )
-                    kwargs[
-                        "retry_policy"
-                    ] = reset_retry_policy()  # prevent infinite loops
+                    kwargs["retry_policy"] = (
+                        reset_retry_policy()
+                    )  # prevent infinite loops
                 litellm.num_retries = (
                     None  # set retries to None to prevent infinite loops
                 )
@@ -3640,10 +3642,10 @@ def pre_process_non_default_params(
 
     if "response_format" in non_default_params:
         if provider_config is not None:
-            non_default_params[
-                "response_format"
-            ] = provider_config.get_json_schema_from_pydantic_object(
-                response_format=non_default_params["response_format"]
+            non_default_params["response_format"] = (
+                provider_config.get_json_schema_from_pydantic_object(
+                    response_format=non_default_params["response_format"]
+                )
             )
         else:
             non_default_params["response_format"] = type_to_response_format_param(
@@ -3772,16 +3774,16 @@ def pre_process_optional_params(
                     True  # so that main.py adds the function call to the prompt
                 )
                 if "tools" in non_default_params:
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.pop("tools")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.pop("tools")
+                    )
                     non_default_params.pop(
                         "tool_choice", None
                     )  # causes ollama requests to hang
                 elif "functions" in non_default_params:
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.pop("functions")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.pop("functions")
+                    )
             elif (
                 litellm.add_function_to_prompt
             ):  # if user opts to add it to prompt instead
@@ -4937,9 +4939,9 @@ def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream])
             return delta if isinstance(delta, str) else ""
 
     # Handle standard ModelResponse and ModelResponseStream
-    _choices: Union[
-        List[Union[Choices, StreamingChoices]], List[StreamingChoices]
-    ] = response_obj.choices
+    _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
+        response_obj.choices
+    )
 
     # Use list accumulation to avoid O(n^2) string concatenation across choices
     response_parts: List[str] = []
@@ -7714,25 +7716,29 @@ def validate_chat_completion_tool_choice(
         f"Invalid tool choice, tool_choice={tool_choice}. Got={type(tool_choice)}. Expecting str, or dict. Please ensure tool_choice follows the OpenAI tool_choice spec"
     )
 
-def validate_openai_optional_params(  
-    stop: Optional[Union[str, List[str]]] = None,  
-    **kwargs  
-) -> Optional[Union[str, List[str]]]:  
-    """  
-    Validates and fixes OpenAI optional parameters.  
-      
-    Args:  
-        stop: Stop sequences (string or list of strings)  
-        **kwargs: Additional optional parameters  
-          
-    Returns:  
-        Validated stop parameter (truncated to 4 elements if needed)  
-    """  
-    if stop is not None and isinstance(stop, list) and not litellm.disable_stop_sequence_limit:  
+
+def validate_openai_optional_params(
+    stop: Optional[Union[str, List[str]]] = None, **kwargs
+) -> Optional[Union[str, List[str]]]:
+    """
+    Validates and fixes OpenAI optional parameters.
+
+    Args:
+        stop: Stop sequences (string or list of strings)
+        **kwargs: Additional optional parameters
+
+    Returns:
+        Validated stop parameter (truncated to 4 elements if needed)
+    """
+    if (
+        stop is not None
+        and isinstance(stop, list)
+        and not litellm.disable_stop_sequence_limit
+    ):
         # Truncate to 4 elements if more are provided as openai only supports up to 4 stop sequences
-        if len(stop) > 4:  
-            stop = stop[:4]  
-      
+        if len(stop) > 4:
+            stop = stop[:4]
+
     return stop
 
 
@@ -8061,6 +8067,8 @@ def get_provider_embedding_config(
             return VercelAIGatewayEmbeddingConfig()
         elif litellm.LlmProviders.GIGACHAT == provider:
             return litellm.GigaChatEmbeddingConfig()
+        elif litellm.LlmProviders.HOSTED_VLLM == provider:
+            return litellm.HostedVLLMEmbeddingConfig()
         elif litellm.LlmProviders.SAGEMAKER == provider:
             from litellm.llms.sagemaker.embedding.transformation import (
                 SagemakerEmbeddingConfig,
diff --git a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
index 9b3b6aeaea1..ddc01db0ec7 100644
--- a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
+++ b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
@@ -1,10 +1,7 @@
 import json
 import os
 import sys
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import httpx
-import pytest
+from unittest.mock import MagicMock, patch
 
 sys.path.insert(
     0, os.path.abspath("../../../../..")
@@ -47,15 +44,34 @@ def test_hosted_vllm_chat_transformation_file_url():
 
 def test_hosted_vllm_chat_transformation_with_audio_url():
     from litellm import completion
-    from litellm.llms.custom_httpx.http_handler import HTTPHandler
-
-    client = MagicMock()
 
-    with patch.object(
-        client.chat.completions.with_raw_response, "create", return_value=MagicMock()
-    ) as mock_post:
+    mock_client = MagicMock()
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {
+        "id": "chatcmpl-test",
+        "object": "chat.completion",
+        "created": 1234567890,
+        "model": "llama-3.1-70b-instruct",
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": "Test response"},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
+    }
+    mock_response.text = json.dumps(mock_response.json.return_value)
+    mock_client.post.return_value = mock_response
+
+    with patch(
+        "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
+        return_value=mock_client,
+    ):
         try:
-            response = completion(
+            completion(
                 model="hosted_vllm/llama-3.1-70b-instruct",
                 messages=[
                     {
@@ -68,14 +84,15 @@ def test_hosted_vllm_chat_transformation_with_audio_url():
                         ],
                     },
                 ],
-                client=client,
+                api_base="https://test-vllm.example.com/v1",
             )
-        except Exception as e:
-            print(f"Error: {e}")
+        except Exception:
+            pass
 
-        mock_post.assert_called_once()
-        print(f"mock_post.call_args.kwargs: {mock_post.call_args.kwargs}")
-        assert mock_post.call_args.kwargs["messages"] == [
+        mock_client.post.assert_called_once()
+        call_kwargs = mock_client.post.call_args[1]
+        request_data = json.loads(call_kwargs["data"])
+        assert request_data["messages"] == [
             {
                 "role": "user",
                 "content": [
diff --git a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py
new file mode 100644
index 00000000000..8f98b3ca8f1
--- /dev/null
+++ b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py
@@ -0,0 +1,152 @@
+"""
+Test SSL verification for hosted_vllm provider.
+
+This test ensures that the ssl_verify parameter is properly passed through
+to the HTTP client when using the hosted_vllm provider.
+
+Issue: ssl_verify parameter was being ignored because hosted_vllm fell through
+to the OpenAI catch-all path in main.py, which doesn't pass ssl_verify to the HTTP client.
+"""
+
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+
+
+class TestHostedVLLMSSLVerify:
+    """Test suite for SSL verification in hosted_vllm provider."""
+
+    @patch("litellm.llms.custom_httpx.llm_http_handler._get_httpx_client")
+    def test_hosted_vllm_ssl_verify_false_sync(self, mock_get_httpx_client):
+        """Test that ssl_verify=False is passed to the HTTP client for sync calls."""
+        # Setup mock client
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"content-type": "application/json"}
+        mock_response.json.return_value = {
+            "id": "chatcmpl-test",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "test-model",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Test response",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 10,
+                "completion_tokens": 5,
+                "total_tokens": 15,
+            },
+        }
+        mock_response.text = '{"id": "chatcmpl-test", "object": "chat.completion", "created": 1234567890, "model": "test-model", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Test response"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}}'
+        mock_client.post.return_value = mock_response
+        mock_get_httpx_client.return_value = mock_client
+
+        try:
+            litellm.completion(
+                model="hosted_vllm/test-model",
+                messages=[{"role": "user", "content": "Hello"}],
+                api_base="https://test-vllm.example.com/v1",
+                ssl_verify=False,
+            )
+        except Exception:
+            # Even if the response parsing fails, we just need to verify
+            # that the mock was called with the correct ssl_verify parameter
+            pass
+
+        # Verify _get_httpx_client was called with ssl_verify=False
+        mock_get_httpx_client.assert_called()
+        call_args = mock_get_httpx_client.call_args
+
+        # Check that params contains ssl_verify=False
+        if call_args[0]:
+            # Positional argument
+            params = call_args[0][0]
+        else:
+            # Keyword argument
+            params = call_args[1].get("params", {})
+
+        assert (
+            params.get("ssl_verify") is False
+        ), f"Expected ssl_verify=False in params, got {params}"
+
+    @patch("litellm.llms.custom_httpx.llm_http_handler.get_async_httpx_client")
+    @pytest.mark.asyncio
+    async def test_hosted_vllm_ssl_verify_false_async(
+        self, mock_get_async_httpx_client
+    ):
+        """Test that ssl_verify=False is passed to the HTTP client for async calls."""
+        # Setup mock async client
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"content-type": "application/json"}
+        mock_response.json.return_value = {
+            "id": "chatcmpl-test",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "test-model",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Test response",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 10,
+                "completion_tokens": 5,
+                "total_tokens": 15,
+            },
+        }
+        mock_response.text = '{"id": "chatcmpl-test", "object": "chat.completion", "created": 1234567890, "model": "test-model", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Test response"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}}'
+
+        async def mock_post(*args, **kwargs):
+            return mock_response
+
+        mock_client.post = mock_post
+        mock_get_async_httpx_client.return_value = mock_client
+
+        try:
+            await litellm.acompletion(
+                model="hosted_vllm/test-model",
+                messages=[{"role": "user", "content": "Hello"}],
+                api_base="https://test-vllm.example.com/v1",
+                ssl_verify=False,
+            )
+        except Exception:
+            # Even if the response parsing fails, we just need to verify
+            # that the mock was called with the correct ssl_verify parameter
+            pass
+
+        # Verify get_async_httpx_client was called with ssl_verify=False
+        mock_get_async_httpx_client.assert_called()
+        call_kwargs = mock_get_async_httpx_client.call_args[1]
+
+        # Check that params contains ssl_verify=False
+        params = call_kwargs.get("params", {})
+        assert (
+            params.get("ssl_verify") is False
+        ), f"Expected ssl_verify=False in params, got {params}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
diff --git a/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py b/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py
new file mode 100644
index 00000000000..bb911814c23
--- /dev/null
+++ b/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py
@@ -0,0 +1,140 @@
+"""
+Test SSL verification for hosted_vllm provider embeddings.
+
+This test ensures that the ssl_verify parameter is properly passed through
+to the HTTP client when using the hosted_vllm provider for embeddings.
+
+Issue: ssl_verify parameter was being ignored because hosted_vllm fell through
+to the openai_like catch-all path in main.py, which doesn't pass ssl_verify to the HTTP client.
+"""
+
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+
+
+class TestHostedVLLMEmbeddingSSLVerify:
+    """Test suite for SSL verification in hosted_vllm provider embeddings."""
+
+    @patch("litellm.llms.custom_httpx.llm_http_handler._get_httpx_client")
+    def test_hosted_vllm_embedding_ssl_verify_false_sync(self, mock_get_httpx_client):
+        """Test that ssl_verify=False is passed to the HTTP client for sync embedding calls."""
+        # Setup mock client
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"content-type": "application/json"}
+        mock_response.json.return_value = {
+            "object": "list",
+            "data": [
+                {
+                    "object": "embedding",
+                    "index": 0,
+                    "embedding": [0.1, 0.2, 0.3, 0.4, 0.5],
+                }
+            ],
+            "model": "text-embedding-model",
+            "usage": {
+                "prompt_tokens": 5,
+                "total_tokens": 5,
+            },
+        }
+        mock_response.text = '{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}], "model": "text-embedding-model", "usage": {"prompt_tokens": 5, "total_tokens": 5}}'
+        mock_client.post.return_value = mock_response
+        mock_get_httpx_client.return_value = mock_client
+
+        try:
+            litellm.embedding(
+                model="hosted_vllm/text-embedding-model",
+                input=["hello world"],
+                api_base="https://test-vllm.example.com/v1",
+                ssl_verify=False,
+            )
+        except Exception:
+            # Even if the response parsing fails, we just need to verify
+            # that the mock was called with the correct ssl_verify parameter
+            pass
+
+        # Verify _get_httpx_client was called with ssl_verify=False
+        mock_get_httpx_client.assert_called()
+        call_args = mock_get_httpx_client.call_args
+
+        # Check that params contains ssl_verify=False
+        if call_args[0]:
+            # Positional argument
+            params = call_args[0][0]
+        else:
+            # Keyword argument
+            params = call_args[1].get("params", {})
+
+        assert (
+            params.get("ssl_verify") is False
+        ), f"Expected ssl_verify=False in params, got {params}"
+
+    @patch("litellm.llms.custom_httpx.llm_http_handler.get_async_httpx_client")
+    @pytest.mark.asyncio
+    async def test_hosted_vllm_embedding_ssl_verify_false_async(
+        self, mock_get_async_httpx_client
+    ):
+        """Test that ssl_verify=False is passed to the HTTP client for async embedding calls."""
+        # Setup mock async client
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"content-type": "application/json"}
+        mock_response.json.return_value = {
+            "object": "list",
+            "data": [
+                {
+                    "object": "embedding",
+                    "index": 0,
+                    "embedding": [0.1, 0.2, 0.3, 0.4, 0.5],
+                }
+            ],
+            "model": "text-embedding-model",
+            "usage": {
+                "prompt_tokens": 5,
+                "total_tokens": 5,
+            },
+        }
+        mock_response.text = '{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}], "model": "text-embedding-model", "usage": {"prompt_tokens": 5, "total_tokens": 5}}'
+
+        async def mock_post(*args, **kwargs):
+            return mock_response
+
+        mock_client.post = mock_post
+        mock_get_async_httpx_client.return_value = mock_client
+
+        try:
+            await litellm.aembedding(
+                model="hosted_vllm/text-embedding-model",
+                input=["hello world"],
+                api_base="https://test-vllm.example.com/v1",
+                ssl_verify=False,
+            )
+        except Exception:
+            # Even if the response parsing fails, we just need to verify
+            # that the mock was called with the correct ssl_verify parameter
+            pass
+
+        # Verify get_async_httpx_client was called with ssl_verify=False
+        mock_get_async_httpx_client.assert_called()
+        call_kwargs = mock_get_async_httpx_client.call_args[1]
+
+        # Check that params contains ssl_verify=False
+        params = call_kwargs.get("params", {})
+        assert (
+            params.get("ssl_verify") is False
+        ), f"Expected ssl_verify=False in params, got {params}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])