diff --git a/Dockerfile b/Dockerfile
index 2c54e2dec28..2987a44b394 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -46,8 +46,8 @@ FROM $LITELLM_RUNTIME_IMAGE AS runtime
 # Ensure runtime stage runs as root
 USER root
 
-# Install runtime dependencies
-RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip
+# Install runtime dependencies (libsndfile needed for audio processing on ARM64)
+RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile
 
 WORKDIR /app
 # Copy the current directory contents into the container at /app
diff --git a/litellm/__init__.py b/litellm/__init__.py
index f5db57f76fd..9e88d3282ff 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -1469,6 +1469,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
     from .llms.azure.chat.gpt_5_transformation import AzureOpenAIGPT5Config as AzureOpenAIGPT5Config
     from .llms.azure.completion.transformation import AzureOpenAITextConfig as AzureOpenAITextConfig
     from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig as HostedVLLMChatConfig
+    from .llms.hosted_vllm.embedding.transformation import HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig
     from .llms.github_copilot.chat.transformation import GithubCopilotConfig as GithubCopilotConfig
     from .llms.github_copilot.responses.transformation import GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig
     from .llms.github_copilot.embedding.transformation import GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig
diff --git a/litellm/_lazy_imports_registry.py b/litellm/_lazy_imports_registry.py
index a92c6f95b0e..0e52e9a59eb 100644
--- a/litellm/_lazy_imports_registry.py
+++ b/litellm/_lazy_imports_registry.py
@@ -20,25 +20,53 @@
 
 # Utils names that support lazy loading via _lazy_import_utils
 UTILS_NAMES = (
-    "exception_type", "get_optional_params", "get_response_string", "token_counter",
-    "create_pretrained_tokenizer", "create_tokenizer", "supports_function_calling",
-    "supports_web_search", "supports_url_context", "supports_response_schema",
-    "supports_parallel_function_calling", "supports_vision", "supports_audio_input",
-    "supports_audio_output", "supports_system_messages", "supports_reasoning",
-    "get_litellm_params", "acreate", "get_max_tokens", "get_model_info",
-    "register_prompt_template", "validate_environment", "check_valid_key",
-    "register_model", "encode", "decode", "_calculate_retry_after", "_should_retry",
-    "get_supported_openai_params", "get_api_base", "get_first_chars_messages",
-    "ModelResponse", "ModelResponseStream", "EmbeddingResponse", "ImageResponse",
-    "TranscriptionResponse", "TextCompletionResponse", "get_provider_fields",
-    "ModelResponseListIterator", "get_valid_models", "timeout",
-    "get_llm_provider", "remove_index_from_tool_calls",
+    "exception_type",
+    "get_optional_params",
+    "get_response_string",
+    "token_counter",
+    "create_pretrained_tokenizer",
+    "create_tokenizer",
+    "supports_function_calling",
+    "supports_web_search",
+    "supports_url_context",
+    "supports_response_schema",
+    "supports_parallel_function_calling",
+    "supports_vision",
+    "supports_audio_input",
+    "supports_audio_output",
+    "supports_system_messages",
+    "supports_reasoning",
+    "get_litellm_params",
+    "acreate",
+    "get_max_tokens",
+    "get_model_info",
+    "register_prompt_template",
+    "validate_environment",
+    "check_valid_key",
+    "register_model",
+    "encode",
+    "decode",
+    "_calculate_retry_after",
+    "_should_retry",
+    "get_supported_openai_params",
+    "get_api_base",
+    "get_first_chars_messages",
+    "ModelResponse",
+    "ModelResponseStream",
+    "EmbeddingResponse",
+    "ImageResponse",
+    "TranscriptionResponse",
+    "TextCompletionResponse",
+    "get_provider_fields",
+    "ModelResponseListIterator",
+    "get_valid_models",
+    "timeout",
+    "get_llm_provider",
+    "remove_index_from_tool_calls",
 )
 
 # Token counter names that support lazy loading via _lazy_import_token_counter
-TOKEN_COUNTER_NAMES = (
-    "get_modified_max_tokens",
-)
+TOKEN_COUNTER_NAMES = ("get_modified_max_tokens",)
 
 # LLM client cache names that support lazy loading via _lazy_import_llm_client_cache
 LLM_CLIENT_CACHE_NAMES = (
@@ -47,9 +75,7 @@
 )
 
 # Bedrock type names that support lazy loading via _lazy_import_bedrock_types
-BEDROCK_TYPES_NAMES = (
-    "COHERE_EMBEDDING_INPUT_TYPES",
-)
+BEDROCK_TYPES_NAMES = ("COHERE_EMBEDDING_INPUT_TYPES",)
 
 # Common types from litellm.types.utils that support lazy loading via
 # _lazy_import_types_utils
@@ -236,6 +262,7 @@
     "AzureOpenAIGPT5Config",
     "AzureOpenAITextConfig",
     "HostedVLLMChatConfig",
+    "HostedVLLMEmbeddingConfig",
     # Alias for backwards compatibility
     "VolcEngineConfig",  # Alias for VolcEngineChatConfig
     "LlamafileChatConfig",
@@ -388,7 +415,10 @@
     "supports_web_search": (".utils", "supports_web_search"),
     "supports_url_context": (".utils", "supports_url_context"),
     "supports_response_schema": (".utils", "supports_response_schema"),
-    "supports_parallel_function_calling": (".utils", "supports_parallel_function_calling"),
+    "supports_parallel_function_calling": (
+        ".utils",
+        "supports_parallel_function_calling",
+    ),
     "supports_vision": (".utils", "supports_vision"),
     "supports_audio_input": (".utils", "supports_audio_input"),
     "supports_audio_output": (".utils", "supports_audio_output"),
@@ -419,8 +449,14 @@
     "ModelResponseListIterator": (".utils", "ModelResponseListIterator"),
     "get_valid_models": (".utils", "get_valid_models"),
     "timeout": (".timeout", "timeout"),
-    "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"),
-    "remove_index_from_tool_calls": ("litellm.litellm_core_utils.core_helpers", "remove_index_from_tool_calls"),
+    "get_llm_provider": (
+        "litellm.litellm_core_utils.get_llm_provider_logic",
+        "get_llm_provider",
+    ),
+    "remove_index_from_tool_calls": (
+        "litellm.litellm_core_utils.core_helpers",
+        "remove_index_from_tool_calls",
+    ),
 }
 
 _COST_CALCULATOR_IMPORT_MAP = {
@@ -442,11 +478,17 @@
 }
 
 _TOKEN_COUNTER_IMPORT_MAP = {
-    "get_modified_max_tokens": ("litellm.litellm_core_utils.token_counter", "get_modified_max_tokens"),
+    "get_modified_max_tokens": (
+        "litellm.litellm_core_utils.token_counter",
+        "get_modified_max_tokens",
+    ),
 }
 
 _BEDROCK_TYPES_IMPORT_MAP = {
-    "COHERE_EMBEDDING_INPUT_TYPES": ("litellm.types.llms.bedrock", "COHERE_EMBEDDING_INPUT_TYPES"),
+    "COHERE_EMBEDDING_INPUT_TYPES": (
+        "litellm.types.llms.bedrock",
+        "COHERE_EMBEDDING_INPUT_TYPES",
+    ),
 }
 
 _CACHING_IMPORT_MAP = {
@@ -458,294 +500,868 @@
 
 _LITELLM_LOGGING_IMPORT_MAP = {
     "Logging": ("litellm.litellm_core_utils.litellm_logging", "Logging"),
-    "modify_integration": ("litellm.litellm_core_utils.litellm_logging", "modify_integration"),
+    "modify_integration": (
+        "litellm.litellm_core_utils.litellm_logging",
+        "modify_integration",
+    ),
 }
 
 _DOTPROMPT_IMPORT_MAP = {
-    "global_prompt_manager": ("litellm.integrations.dotprompt", "global_prompt_manager"),
-    "global_prompt_directory": ("litellm.integrations.dotprompt", "global_prompt_directory"),
-    "set_global_prompt_directory": ("litellm.integrations.dotprompt", "set_global_prompt_directory"),
+    "global_prompt_manager": (
+        "litellm.integrations.dotprompt",
+        "global_prompt_manager",
+    ),
+    "global_prompt_directory": (
+        "litellm.integrations.dotprompt",
+        "global_prompt_directory",
+    ),
+    "set_global_prompt_directory": (
+        "litellm.integrations.dotprompt",
+        "set_global_prompt_directory",
+    ),
 }
 
 _TYPES_IMPORT_MAP = {
     "GuardrailItem": ("litellm.types.guardrails", "GuardrailItem"),
-    "DefaultTeamSSOParams": ("litellm.types.proxy.management_endpoints.ui_sso", "DefaultTeamSSOParams"),
-    "LiteLLM_UpperboundKeyGenerateParams": ("litellm.types.proxy.management_endpoints.ui_sso", "LiteLLM_UpperboundKeyGenerateParams"),
-    "KeyManagementSystem": ("litellm.types.secret_managers.main", "KeyManagementSystem"),
-    "PriorityReservationSettings": ("litellm.types.utils", "PriorityReservationSettings"),
+    "DefaultTeamSSOParams": (
+        "litellm.types.proxy.management_endpoints.ui_sso",
+        "DefaultTeamSSOParams",
+    ),
+    "LiteLLM_UpperboundKeyGenerateParams": (
+        "litellm.types.proxy.management_endpoints.ui_sso",
+        "LiteLLM_UpperboundKeyGenerateParams",
+    ),
+    "KeyManagementSystem": (
+        "litellm.types.secret_managers.main",
+        "KeyManagementSystem",
+    ),
+    "PriorityReservationSettings": (
+        "litellm.types.utils",
+        "PriorityReservationSettings",
+    ),
     "CustomLogger": ("litellm.integrations.custom_logger", "CustomLogger"),
-    "LoggingCallbackManager": ("litellm.litellm_core_utils.logging_callback_manager", "LoggingCallbackManager"),
-    "DatadogLLMObsInitParams": ("litellm.types.integrations.datadog_llm_obs", "DatadogLLMObsInitParams"),
+    "LoggingCallbackManager": (
+        "litellm.litellm_core_utils.logging_callback_manager",
+        "LoggingCallbackManager",
+    ),
+    "DatadogLLMObsInitParams": (
+        "litellm.types.integrations.datadog_llm_obs",
+        "DatadogLLMObsInitParams",
+    ),
 }
 
 _LLM_PROVIDER_LOGIC_IMPORT_MAP = {
-    "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"),
-    "remove_index_from_tool_calls": ("litellm.litellm_core_utils.core_helpers", "remove_index_from_tool_calls"),
+    "get_llm_provider": (
+        "litellm.litellm_core_utils.get_llm_provider_logic",
+        "get_llm_provider",
+    ),
+    "remove_index_from_tool_calls": (
+        "litellm.litellm_core_utils.core_helpers",
+        "remove_index_from_tool_calls",
+    ),
 }
 
 _LLM_CONFIGS_IMPORT_MAP = {
-    "AmazonConverseConfig": (".llms.bedrock.chat.converse_transformation", "AmazonConverseConfig"),
+    "AmazonConverseConfig": (
+        ".llms.bedrock.chat.converse_transformation",
+        "AmazonConverseConfig",
+    ),
     "OpenAILikeChatConfig": (".llms.openai_like.chat.handler", "OpenAILikeChatConfig"),
-    "GaladrielChatConfig": (".llms.galadriel.chat.transformation", "GaladrielChatConfig"),
+    "GaladrielChatConfig": (
+        ".llms.galadriel.chat.transformation",
+        "GaladrielChatConfig",
+    ),
     "GithubChatConfig": (".llms.github.chat.transformation", "GithubChatConfig"),
-    "AzureAnthropicConfig": (".llms.azure_ai.anthropic.transformation", "AzureAnthropicConfig"),
+    "AzureAnthropicConfig": (
+        ".llms.azure_ai.anthropic.transformation",
+        "AzureAnthropicConfig",
+    ),
     "BytezChatConfig": (".llms.bytez.chat.transformation", "BytezChatConfig"),
-    "CompactifAIChatConfig": (".llms.compactifai.chat.transformation", "CompactifAIChatConfig"),
+    "CompactifAIChatConfig": (
+        ".llms.compactifai.chat.transformation",
+        "CompactifAIChatConfig",
+    ),
     "EmpowerChatConfig": (".llms.empower.chat.transformation", "EmpowerChatConfig"),
     "MinimaxChatConfig": (".llms.minimax.chat.transformation", "MinimaxChatConfig"),
-    "AiohttpOpenAIChatConfig": (".llms.aiohttp_openai.chat.transformation", "AiohttpOpenAIChatConfig"),
-    "HuggingFaceChatConfig": (".llms.huggingface.chat.transformation", "HuggingFaceChatConfig"),
-    "HuggingFaceEmbeddingConfig": (".llms.huggingface.embedding.transformation", "HuggingFaceEmbeddingConfig"),
+    "AiohttpOpenAIChatConfig": (
+        ".llms.aiohttp_openai.chat.transformation",
+        "AiohttpOpenAIChatConfig",
+    ),
+    "HuggingFaceChatConfig": (
+        ".llms.huggingface.chat.transformation",
+        "HuggingFaceChatConfig",
+    ),
+    "HuggingFaceEmbeddingConfig": (
+        ".llms.huggingface.embedding.transformation",
+        "HuggingFaceEmbeddingConfig",
+    ),
     "OobaboogaConfig": (".llms.oobabooga.chat.transformation", "OobaboogaConfig"),
     "MaritalkConfig": (".llms.maritalk", "MaritalkConfig"),
     "OpenrouterConfig": (".llms.openrouter.chat.transformation", "OpenrouterConfig"),
     "DataRobotConfig": (".llms.datarobot.chat.transformation", "DataRobotConfig"),
     "AnthropicConfig": (".llms.anthropic.chat.transformation", "AnthropicConfig"),
-    "AnthropicTextConfig": (".llms.anthropic.completion.transformation", "AnthropicTextConfig"),
+    "AnthropicTextConfig": (
+        ".llms.anthropic.completion.transformation",
+        "AnthropicTextConfig",
+    ),
     "GroqSTTConfig": (".llms.groq.stt.transformation", "GroqSTTConfig"),
     "TritonConfig": (".llms.triton.completion.transformation", "TritonConfig"),
-    "TritonGenerateConfig": (".llms.triton.completion.transformation", "TritonGenerateConfig"),
-    "TritonInferConfig": (".llms.triton.completion.transformation", "TritonInferConfig"),
-    "TritonEmbeddingConfig": (".llms.triton.embedding.transformation", "TritonEmbeddingConfig"),
-    "HuggingFaceRerankConfig": (".llms.huggingface.rerank.transformation", "HuggingFaceRerankConfig"),
+    "TritonGenerateConfig": (
+        ".llms.triton.completion.transformation",
+        "TritonGenerateConfig",
+    ),
+    "TritonInferConfig": (
+        ".llms.triton.completion.transformation",
+        "TritonInferConfig",
+    ),
+    "TritonEmbeddingConfig": (
+        ".llms.triton.embedding.transformation",
+        "TritonEmbeddingConfig",
+    ),
+    "HuggingFaceRerankConfig": (
+        ".llms.huggingface.rerank.transformation",
+        "HuggingFaceRerankConfig",
+    ),
     "DatabricksConfig": (".llms.databricks.chat.transformation", "DatabricksConfig"),
-    "DatabricksEmbeddingConfig": (".llms.databricks.embed.transformation", "DatabricksEmbeddingConfig"),
+    "DatabricksEmbeddingConfig": (
+        ".llms.databricks.embed.transformation",
+        "DatabricksEmbeddingConfig",
+    ),
     "PredibaseConfig": (".llms.predibase.chat.transformation", "PredibaseConfig"),
     "ReplicateConfig": (".llms.replicate.chat.transformation", "ReplicateConfig"),
     "SnowflakeConfig": (".llms.snowflake.chat.transformation", "SnowflakeConfig"),
     "CohereRerankConfig": (".llms.cohere.rerank.transformation", "CohereRerankConfig"),
-    "CohereRerankV2Config": (".llms.cohere.rerank_v2.transformation", "CohereRerankV2Config"),
-    "AzureAIRerankConfig": (".llms.azure_ai.rerank.transformation", "AzureAIRerankConfig"),
-    "InfinityRerankConfig": (".llms.infinity.rerank.transformation", "InfinityRerankConfig"),
+    "CohereRerankV2Config": (
+        ".llms.cohere.rerank_v2.transformation",
+        "CohereRerankV2Config",
+    ),
+    "AzureAIRerankConfig": (
+        ".llms.azure_ai.rerank.transformation",
+        "AzureAIRerankConfig",
+    ),
+    "InfinityRerankConfig": (
+        ".llms.infinity.rerank.transformation",
+        "InfinityRerankConfig",
+    ),
     "JinaAIRerankConfig": (".llms.jina_ai.rerank.transformation", "JinaAIRerankConfig"),
-    "DeepinfraRerankConfig": (".llms.deepinfra.rerank.transformation", "DeepinfraRerankConfig"),
-    "HostedVLLMRerankConfig": (".llms.hosted_vllm.rerank.transformation", "HostedVLLMRerankConfig"),
-    "NvidiaNimRerankConfig": (".llms.nvidia_nim.rerank.transformation", "NvidiaNimRerankConfig"),
-    "NvidiaNimRankingConfig": (".llms.nvidia_nim.rerank.ranking_transformation", "NvidiaNimRankingConfig"),
-    "VertexAIRerankConfig": (".llms.vertex_ai.rerank.transformation", "VertexAIRerankConfig"),
-    "FireworksAIRerankConfig": (".llms.fireworks_ai.rerank.transformation", "FireworksAIRerankConfig"),
+    "DeepinfraRerankConfig": (
+        ".llms.deepinfra.rerank.transformation",
+        "DeepinfraRerankConfig",
+    ),
+    "HostedVLLMRerankConfig": (
+        ".llms.hosted_vllm.rerank.transformation",
+        "HostedVLLMRerankConfig",
+    ),
+    "NvidiaNimRerankConfig": (
+        ".llms.nvidia_nim.rerank.transformation",
+        "NvidiaNimRerankConfig",
+    ),
+    "NvidiaNimRankingConfig": (
+        ".llms.nvidia_nim.rerank.ranking_transformation",
+        "NvidiaNimRankingConfig",
+    ),
+    "VertexAIRerankConfig": (
+        ".llms.vertex_ai.rerank.transformation",
+        "VertexAIRerankConfig",
+    ),
+    "FireworksAIRerankConfig": (
+        ".llms.fireworks_ai.rerank.transformation",
+        "FireworksAIRerankConfig",
+    ),
     "VoyageRerankConfig": (".llms.voyage.rerank.transformation", "VoyageRerankConfig"),
     "ClarifaiConfig": (".llms.clarifai.chat.transformation", "ClarifaiConfig"),
     "AI21ChatConfig": (".llms.ai21.chat.transformation", "AI21ChatConfig"),
     "LlamaAPIConfig": (".llms.meta_llama.chat.transformation", "LlamaAPIConfig"),
-    "TogetherAITextCompletionConfig": (".llms.together_ai.completion.transformation", "TogetherAITextCompletionConfig"),
-    "CloudflareChatConfig": (".llms.cloudflare.chat.transformation", "CloudflareChatConfig"),
+    "TogetherAITextCompletionConfig": (
+        ".llms.together_ai.completion.transformation",
+        "TogetherAITextCompletionConfig",
+    ),
+    "CloudflareChatConfig": (
+        ".llms.cloudflare.chat.transformation",
+        "CloudflareChatConfig",
+    ),
     "NovitaConfig": (".llms.novita.chat.transformation", "NovitaConfig"),
     "PetalsConfig": (".llms.petals.completion.transformation", "PetalsConfig"),
     "OllamaChatConfig": (".llms.ollama.chat.transformation", "OllamaChatConfig"),
     "OllamaConfig": (".llms.ollama.completion.transformation", "OllamaConfig"),
     "SagemakerConfig": (".llms.sagemaker.completion.transformation", "SagemakerConfig"),
-    "SagemakerChatConfig": (".llms.sagemaker.chat.transformation", "SagemakerChatConfig"),
+    "SagemakerChatConfig": (
+        ".llms.sagemaker.chat.transformation",
+        "SagemakerChatConfig",
+    ),
     "CohereChatConfig": (".llms.cohere.chat.transformation", "CohereChatConfig"),
-    "AnthropicMessagesConfig": (".llms.anthropic.experimental_pass_through.messages.transformation", "AnthropicMessagesConfig"),
-    "AmazonAnthropicClaudeMessagesConfig": (".llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation", "AmazonAnthropicClaudeMessagesConfig"),
+    "AnthropicMessagesConfig": (
+        ".llms.anthropic.experimental_pass_through.messages.transformation",
+        "AnthropicMessagesConfig",
+    ),
+    "AmazonAnthropicClaudeMessagesConfig": (
+        ".llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation",
+        "AmazonAnthropicClaudeMessagesConfig",
+    ),
     "TogetherAIConfig": (".llms.together_ai.chat", "TogetherAIConfig"),
     "NLPCloudConfig": (".llms.nlp_cloud.chat.handler", "NLPCloudConfig"),
-    "VertexGeminiConfig": (".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", "VertexGeminiConfig"),
-    "GoogleAIStudioGeminiConfig": (".llms.gemini.chat.transformation", "GoogleAIStudioGeminiConfig"),
-    "VertexAIAnthropicConfig": (".llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation", "VertexAIAnthropicConfig"),
-    "VertexAILlama3Config": (".llms.vertex_ai.vertex_ai_partner_models.llama3.transformation", "VertexAILlama3Config"),
-    "VertexAIAi21Config": (".llms.vertex_ai.vertex_ai_partner_models.ai21.transformation", "VertexAIAi21Config"),
-    "AmazonCohereChatConfig": (".llms.bedrock.chat.invoke_handler", "AmazonCohereChatConfig"),
-    "AmazonBedrockGlobalConfig": (".llms.bedrock.common_utils", "AmazonBedrockGlobalConfig"),
-    "AmazonAI21Config": (".llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation", "AmazonAI21Config"),
-    "AmazonInvokeNovaConfig": (".llms.bedrock.chat.invoke_transformations.amazon_nova_transformation", "AmazonInvokeNovaConfig"),
-    "AmazonQwen2Config": (".llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation", "AmazonQwen2Config"),
-    "AmazonQwen3Config": (".llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation", "AmazonQwen3Config"),
+    "VertexGeminiConfig": (
+        ".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini",
+        "VertexGeminiConfig",
+    ),
+    "GoogleAIStudioGeminiConfig": (
+        ".llms.gemini.chat.transformation",
+        "GoogleAIStudioGeminiConfig",
+    ),
+    "VertexAIAnthropicConfig": (
+        ".llms.vertex_ai.vertex_ai_partner_models.anthropic.transformation",
+        "VertexAIAnthropicConfig",
+    ),
+    "VertexAILlama3Config": (
+        ".llms.vertex_ai.vertex_ai_partner_models.llama3.transformation",
+        "VertexAILlama3Config",
+    ),
+    "VertexAIAi21Config": (
+        ".llms.vertex_ai.vertex_ai_partner_models.ai21.transformation",
+        "VertexAIAi21Config",
+    ),
+    "AmazonCohereChatConfig": (
+        ".llms.bedrock.chat.invoke_handler",
+        "AmazonCohereChatConfig",
+    ),
+    "AmazonBedrockGlobalConfig": (
+        ".llms.bedrock.common_utils",
+        "AmazonBedrockGlobalConfig",
+    ),
+    "AmazonAI21Config": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_ai21_transformation",
+        "AmazonAI21Config",
+    ),
+    "AmazonInvokeNovaConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_nova_transformation",
+        "AmazonInvokeNovaConfig",
+    ),
+    "AmazonQwen2Config": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_qwen2_transformation",
+        "AmazonQwen2Config",
+    ),
+    "AmazonQwen3Config": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_qwen3_transformation",
+        "AmazonQwen3Config",
+    ),
     # Aliases for backwards compatibility
-    "VertexAIConfig": (".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini", "VertexGeminiConfig"),  # Alias
-    "GeminiConfig": (".llms.gemini.chat.transformation", "GoogleAIStudioGeminiConfig"),  # Alias
-    "AmazonAnthropicConfig": (".llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation", "AmazonAnthropicConfig"),
-    "AmazonAnthropicClaudeConfig": (".llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation", "AmazonAnthropicClaudeConfig"),
-    "AmazonCohereConfig": (".llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation", "AmazonCohereConfig"),
-    "AmazonLlamaConfig": (".llms.bedrock.chat.invoke_transformations.amazon_llama_transformation", "AmazonLlamaConfig"),
-    "AmazonDeepSeekR1Config": (".llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation", "AmazonDeepSeekR1Config"),
-    "AmazonMistralConfig": (".llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation", "AmazonMistralConfig"),
-    "AmazonMoonshotConfig": (".llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation", "AmazonMoonshotConfig"),
-    "AmazonTitanConfig": (".llms.bedrock.chat.invoke_transformations.amazon_titan_transformation", "AmazonTitanConfig"),
-    "AmazonTwelveLabsPegasusConfig": (".llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation", "AmazonTwelveLabsPegasusConfig"),
-    "AmazonInvokeConfig": (".llms.bedrock.chat.invoke_transformations.base_invoke_transformation", "AmazonInvokeConfig"),
-    "AmazonBedrockOpenAIConfig": (".llms.bedrock.chat.invoke_transformations.amazon_openai_transformation", "AmazonBedrockOpenAIConfig"),
-    "AmazonStabilityConfig": (".llms.bedrock.image_generation.amazon_stability1_transformation", "AmazonStabilityConfig"),
-    "AmazonStability3Config": (".llms.bedrock.image_generation.amazon_stability3_transformation", "AmazonStability3Config"),
-    "AmazonNovaCanvasConfig": (".llms.bedrock.image_generation.amazon_nova_canvas_transformation", "AmazonNovaCanvasConfig"),
-    "AmazonTitanG1Config": (".llms.bedrock.embed.amazon_titan_g1_transformation", "AmazonTitanG1Config"),
-    "AmazonTitanMultimodalEmbeddingG1Config": (".llms.bedrock.embed.amazon_titan_multimodal_transformation", "AmazonTitanMultimodalEmbeddingG1Config"),
+    "VertexAIConfig": (
+        ".llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini",
+        "VertexGeminiConfig",
+    ),  # Alias
+    "GeminiConfig": (
+        ".llms.gemini.chat.transformation",
+        "GoogleAIStudioGeminiConfig",
+    ),  # Alias
+    "AmazonAnthropicConfig": (
+        ".llms.bedrock.chat.invoke_transformations.anthropic_claude2_transformation",
+        "AmazonAnthropicConfig",
+    ),
+    "AmazonAnthropicClaudeConfig": (
+        ".llms.bedrock.chat.invoke_transformations.anthropic_claude3_transformation",
+        "AmazonAnthropicClaudeConfig",
+    ),
+    "AmazonCohereConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_cohere_transformation",
+        "AmazonCohereConfig",
+    ),
+    "AmazonLlamaConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_llama_transformation",
+        "AmazonLlamaConfig",
+    ),
+    "AmazonDeepSeekR1Config": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_deepseek_transformation",
+        "AmazonDeepSeekR1Config",
+    ),
+    "AmazonMistralConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_mistral_transformation",
+        "AmazonMistralConfig",
+    ),
+    "AmazonMoonshotConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_moonshot_transformation",
+        "AmazonMoonshotConfig",
+    ),
+    "AmazonTitanConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_titan_transformation",
+        "AmazonTitanConfig",
+    ),
+    "AmazonTwelveLabsPegasusConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation",
+        "AmazonTwelveLabsPegasusConfig",
+    ),
+    "AmazonInvokeConfig": (
+        ".llms.bedrock.chat.invoke_transformations.base_invoke_transformation",
+        "AmazonInvokeConfig",
+    ),
+    "AmazonBedrockOpenAIConfig": (
+        ".llms.bedrock.chat.invoke_transformations.amazon_openai_transformation",
+        "AmazonBedrockOpenAIConfig",
+    ),
+    "AmazonStabilityConfig": (
+        ".llms.bedrock.image_generation.amazon_stability1_transformation",
+        "AmazonStabilityConfig",
+    ),
+    "AmazonStability3Config": (
+        ".llms.bedrock.image_generation.amazon_stability3_transformation",
+        "AmazonStability3Config",
+    ),
+    "AmazonNovaCanvasConfig": (
+        ".llms.bedrock.image_generation.amazon_nova_canvas_transformation",
+        "AmazonNovaCanvasConfig",
+    ),
+    "AmazonTitanG1Config": (
+        ".llms.bedrock.embed.amazon_titan_g1_transformation",
+        "AmazonTitanG1Config",
+    ),
+    "AmazonTitanMultimodalEmbeddingG1Config": (
+        ".llms.bedrock.embed.amazon_titan_multimodal_transformation",
+        "AmazonTitanMultimodalEmbeddingG1Config",
+    ),
     "CohereV2ChatConfig": (".llms.cohere.chat.v2_transformation", "CohereV2ChatConfig"),
-    "BedrockCohereEmbeddingConfig": (".llms.bedrock.embed.cohere_transformation", "BedrockCohereEmbeddingConfig"),
-    "TwelveLabsMarengoEmbeddingConfig": (".llms.bedrock.embed.twelvelabs_marengo_transformation", "TwelveLabsMarengoEmbeddingConfig"),
-    "AmazonNovaEmbeddingConfig": (".llms.bedrock.embed.amazon_nova_transformation", "AmazonNovaEmbeddingConfig"),
+    "BedrockCohereEmbeddingConfig": (
+        ".llms.bedrock.embed.cohere_transformation",
+        "BedrockCohereEmbeddingConfig",
+    ),
+    "TwelveLabsMarengoEmbeddingConfig": (
+        ".llms.bedrock.embed.twelvelabs_marengo_transformation",
+        "TwelveLabsMarengoEmbeddingConfig",
+    ),
+    "AmazonNovaEmbeddingConfig": (
+        ".llms.bedrock.embed.amazon_nova_transformation",
+        "AmazonNovaEmbeddingConfig",
+    ),
     "OpenAIConfig": (".llms.openai.openai", "OpenAIConfig"),
     "MistralEmbeddingConfig": (".llms.openai.openai", "MistralEmbeddingConfig"),
-    "OpenAIImageVariationConfig": (".llms.openai.image_variations.transformation", "OpenAIImageVariationConfig"),
+    "OpenAIImageVariationConfig": (
+        ".llms.openai.image_variations.transformation",
+        "OpenAIImageVariationConfig",
+    ),
     "DeepInfraConfig": (".llms.deepinfra.chat.transformation", "DeepInfraConfig"),
-    "DeepgramAudioTranscriptionConfig": (".llms.deepgram.audio_transcription.transformation", "DeepgramAudioTranscriptionConfig"),
-    "TopazImageVariationConfig": (".llms.topaz.image_variations.transformation", "TopazImageVariationConfig"),
-    "OpenAITextCompletionConfig": ("litellm.llms.openai.completion.transformation", "OpenAITextCompletionConfig"),
+    "DeepgramAudioTranscriptionConfig": (
+        ".llms.deepgram.audio_transcription.transformation",
+        "DeepgramAudioTranscriptionConfig",
+    ),
+    "TopazImageVariationConfig": (
+        ".llms.topaz.image_variations.transformation",
+        "TopazImageVariationConfig",
+    ),
+    "OpenAITextCompletionConfig": (
+        "litellm.llms.openai.completion.transformation",
+        "OpenAITextCompletionConfig",
+    ),
     "GroqChatConfig": (".llms.groq.chat.transformation", "GroqChatConfig"),
-    "GenAIHubOrchestrationConfig": (".llms.sap.chat.transformation", "GenAIHubOrchestrationConfig"),
-    "VoyageEmbeddingConfig": (".llms.voyage.embedding.transformation", "VoyageEmbeddingConfig"),
-    "VoyageContextualEmbeddingConfig": (".llms.voyage.embedding.transformation_contextual", "VoyageContextualEmbeddingConfig"),
-    "InfinityEmbeddingConfig": (".llms.infinity.embedding.transformation", "InfinityEmbeddingConfig"),
-    "AzureAIStudioConfig": (".llms.azure_ai.chat.transformation", "AzureAIStudioConfig"),
+    "GenAIHubOrchestrationConfig": (
+        ".llms.sap.chat.transformation",
+        "GenAIHubOrchestrationConfig",
+    ),
+    "VoyageEmbeddingConfig": (
+        ".llms.voyage.embedding.transformation",
+        "VoyageEmbeddingConfig",
+    ),
+    "VoyageContextualEmbeddingConfig": (
+        ".llms.voyage.embedding.transformation_contextual",
+        "VoyageContextualEmbeddingConfig",
+    ),
+    "InfinityEmbeddingConfig": (
+        ".llms.infinity.embedding.transformation",
+        "InfinityEmbeddingConfig",
+    ),
+    "AzureAIStudioConfig": (
+        ".llms.azure_ai.chat.transformation",
+        "AzureAIStudioConfig",
+    ),
     "MistralConfig": (".llms.mistral.chat.transformation", "MistralConfig"),
-    "OpenAIResponsesAPIConfig": (".llms.openai.responses.transformation", "OpenAIResponsesAPIConfig"),
-    "AzureOpenAIResponsesAPIConfig": (".llms.azure.responses.transformation", "AzureOpenAIResponsesAPIConfig"),
-    "AzureOpenAIOSeriesResponsesAPIConfig": (".llms.azure.responses.o_series_transformation", "AzureOpenAIOSeriesResponsesAPIConfig"),
-    "XAIResponsesAPIConfig": (".llms.xai.responses.transformation", "XAIResponsesAPIConfig"),
-    "LiteLLMProxyResponsesAPIConfig": (".llms.litellm_proxy.responses.transformation", "LiteLLMProxyResponsesAPIConfig"),
-    "VolcEngineResponsesAPIConfig": (".llms.volcengine.responses.transformation", "VolcEngineResponsesAPIConfig"),
-    "ManusResponsesAPIConfig": (".llms.manus.responses.transformation", "ManusResponsesAPIConfig"),
-    "GoogleAIStudioInteractionsConfig": (".llms.gemini.interactions.transformation", "GoogleAIStudioInteractionsConfig"),
-    "OpenAIOSeriesConfig": (".llms.openai.chat.o_series_transformation", "OpenAIOSeriesConfig"),
-    "AnthropicSkillsConfig": (".llms.anthropic.skills.transformation", "AnthropicSkillsConfig"),
-    "BaseSkillsAPIConfig": (".llms.base_llm.skills.transformation", "BaseSkillsAPIConfig"),
+    "OpenAIResponsesAPIConfig": (
+        ".llms.openai.responses.transformation",
+        "OpenAIResponsesAPIConfig",
+    ),
+    "AzureOpenAIResponsesAPIConfig": (
+        ".llms.azure.responses.transformation",
+        "AzureOpenAIResponsesAPIConfig",
+    ),
+    "AzureOpenAIOSeriesResponsesAPIConfig": (
+        ".llms.azure.responses.o_series_transformation",
+        "AzureOpenAIOSeriesResponsesAPIConfig",
+    ),
+    "XAIResponsesAPIConfig": (
+        ".llms.xai.responses.transformation",
+        "XAIResponsesAPIConfig",
+    ),
+    "LiteLLMProxyResponsesAPIConfig": (
+        ".llms.litellm_proxy.responses.transformation",
+        "LiteLLMProxyResponsesAPIConfig",
+    ),
+    "VolcEngineResponsesAPIConfig": (
+        ".llms.volcengine.responses.transformation",
+        "VolcEngineResponsesAPIConfig",
+    ),
+    "ManusResponsesAPIConfig": (
+        ".llms.manus.responses.transformation",
+        "ManusResponsesAPIConfig",
+    ),
+    "GoogleAIStudioInteractionsConfig": (
+        ".llms.gemini.interactions.transformation",
+        "GoogleAIStudioInteractionsConfig",
+    ),
+    "OpenAIOSeriesConfig": (
+        ".llms.openai.chat.o_series_transformation",
+        "OpenAIOSeriesConfig",
+    ),
+    "AnthropicSkillsConfig": (
+        ".llms.anthropic.skills.transformation",
+        "AnthropicSkillsConfig",
+    ),
+    "BaseSkillsAPIConfig": (
+        ".llms.base_llm.skills.transformation",
+        "BaseSkillsAPIConfig",
+    ),
     "GradientAIConfig": (".llms.gradient_ai.chat.transformation", "GradientAIConfig"),
     # Alias for backwards compatibility
-    "OpenAIO1Config": (".llms.openai.chat.o_series_transformation", "OpenAIOSeriesConfig"),  # Alias
+    "OpenAIO1Config": (
+        ".llms.openai.chat.o_series_transformation",
+        "OpenAIOSeriesConfig",
+    ),  # Alias
     "OpenAIGPTConfig": (".llms.openai.chat.gpt_transformation", "OpenAIGPTConfig"),
     "OpenAIGPT5Config": (".llms.openai.chat.gpt_5_transformation", "OpenAIGPT5Config"),
-    "OpenAIWhisperAudioTranscriptionConfig": (".llms.openai.transcriptions.whisper_transformation", "OpenAIWhisperAudioTranscriptionConfig"),
-    "OpenAIGPTAudioTranscriptionConfig": (".llms.openai.transcriptions.gpt_transformation", "OpenAIGPTAudioTranscriptionConfig"),
-    "OpenAIGPTAudioConfig": (".llms.openai.chat.gpt_audio_transformation", "OpenAIGPTAudioConfig"),
+    "OpenAIWhisperAudioTranscriptionConfig": (
+        ".llms.openai.transcriptions.whisper_transformation",
+        "OpenAIWhisperAudioTranscriptionConfig",
+    ),
+    "OpenAIGPTAudioTranscriptionConfig": (
+        ".llms.openai.transcriptions.gpt_transformation",
+        "OpenAIGPTAudioTranscriptionConfig",
+    ),
+    "OpenAIGPTAudioConfig": (
+        ".llms.openai.chat.gpt_audio_transformation",
+        "OpenAIGPTAudioConfig",
+    ),
     "NvidiaNimConfig": (".llms.nvidia_nim.chat.transformation", "NvidiaNimConfig"),
     "NvidiaNimEmbeddingConfig": (".llms.nvidia_nim.embed", "NvidiaNimEmbeddingConfig"),
-    "FeatherlessAIConfig": (".llms.featherless_ai.chat.transformation", "FeatherlessAIConfig"),
+    "FeatherlessAIConfig": (
+        ".llms.featherless_ai.chat.transformation",
+        "FeatherlessAIConfig",
+    ),
     "CerebrasConfig": (".llms.cerebras.chat", "CerebrasConfig"),
     "BasetenConfig": (".llms.baseten.chat", "BasetenConfig"),
     "SambanovaConfig": (".llms.sambanova.chat", "SambanovaConfig"),
-    "SambaNovaEmbeddingConfig": (".llms.sambanova.embedding.transformation", "SambaNovaEmbeddingConfig"),
-    "FireworksAIConfig": (".llms.fireworks_ai.chat.transformation", "FireworksAIConfig"),
-    "FireworksAITextCompletionConfig": (".llms.fireworks_ai.completion.transformation", "FireworksAITextCompletionConfig"),
-    "FireworksAIAudioTranscriptionConfig": (".llms.fireworks_ai.audio_transcription.transformation", "FireworksAIAudioTranscriptionConfig"),
-    "FireworksAIEmbeddingConfig": (".llms.fireworks_ai.embed.fireworks_ai_transformation", "FireworksAIEmbeddingConfig"),
-    "FriendliaiChatConfig": (".llms.friendliai.chat.transformation", "FriendliaiChatConfig"),
-    "JinaAIEmbeddingConfig": (".llms.jina_ai.embedding.transformation", "JinaAIEmbeddingConfig"),
+    "SambaNovaEmbeddingConfig": (
+        ".llms.sambanova.embedding.transformation",
+        "SambaNovaEmbeddingConfig",
+    ),
+    "FireworksAIConfig": (
+        ".llms.fireworks_ai.chat.transformation",
+        "FireworksAIConfig",
+    ),
+    "FireworksAITextCompletionConfig": (
+        ".llms.fireworks_ai.completion.transformation",
+        "FireworksAITextCompletionConfig",
+    ),
+    "FireworksAIAudioTranscriptionConfig": (
+        ".llms.fireworks_ai.audio_transcription.transformation",
+        "FireworksAIAudioTranscriptionConfig",
+    ),
+    "FireworksAIEmbeddingConfig": (
+        ".llms.fireworks_ai.embed.fireworks_ai_transformation",
+        "FireworksAIEmbeddingConfig",
+    ),
+    "FriendliaiChatConfig": (
+        ".llms.friendliai.chat.transformation",
+        "FriendliaiChatConfig",
+    ),
+    "JinaAIEmbeddingConfig": (
+        ".llms.jina_ai.embedding.transformation",
+        "JinaAIEmbeddingConfig",
+    ),
     "XAIChatConfig": (".llms.xai.chat.transformation", "XAIChatConfig"),
     "ZAIChatConfig": (".llms.zai.chat.transformation", "ZAIChatConfig"),
     "AIMLChatConfig": (".llms.aiml.chat.transformation", "AIMLChatConfig"),
-    "VolcEngineChatConfig": (".llms.volcengine.chat.transformation", "VolcEngineChatConfig"),
-    "CodestralTextCompletionConfig": (".llms.codestral.completion.transformation", "CodestralTextCompletionConfig"),
-    "AzureOpenAIAssistantsAPIConfig": (".llms.azure.azure", "AzureOpenAIAssistantsAPIConfig"),
+    "VolcEngineChatConfig": (
+        ".llms.volcengine.chat.transformation",
+        "VolcEngineChatConfig",
+    ),
+    "CodestralTextCompletionConfig": (
+        ".llms.codestral.completion.transformation",
+        "CodestralTextCompletionConfig",
+    ),
+    "AzureOpenAIAssistantsAPIConfig": (
+        ".llms.azure.azure",
+        "AzureOpenAIAssistantsAPIConfig",
+    ),
     "HerokuChatConfig": (".llms.heroku.chat.transformation", "HerokuChatConfig"),
     "CometAPIConfig": (".llms.cometapi.chat.transformation", "CometAPIConfig"),
     "AzureOpenAIConfig": (".llms.azure.chat.gpt_transformation", "AzureOpenAIConfig"),
-    "AzureOpenAIGPT5Config": (".llms.azure.chat.gpt_5_transformation", "AzureOpenAIGPT5Config"),
-    "AzureOpenAITextConfig": (".llms.azure.completion.transformation", "AzureOpenAITextConfig"),
-    "HostedVLLMChatConfig": (".llms.hosted_vllm.chat.transformation", "HostedVLLMChatConfig"),
+    "AzureOpenAIGPT5Config": (
+        ".llms.azure.chat.gpt_5_transformation",
+        "AzureOpenAIGPT5Config",
+    ),
+    "AzureOpenAITextConfig": (
+        ".llms.azure.completion.transformation",
+        "AzureOpenAITextConfig",
+    ),
+    "HostedVLLMChatConfig": (
+        ".llms.hosted_vllm.chat.transformation",
+        "HostedVLLMChatConfig",
+    ),
+    "HostedVLLMEmbeddingConfig": (
+        ".llms.hosted_vllm.embedding.transformation",
+        "HostedVLLMEmbeddingConfig",
+    ),
     # Alias for backwards compatibility
-    "VolcEngineConfig": (".llms.volcengine.chat.transformation", "VolcEngineChatConfig"),  # Alias
-    "LlamafileChatConfig": (".llms.llamafile.chat.transformation", "LlamafileChatConfig"),
-    "LiteLLMProxyChatConfig": (".llms.litellm_proxy.chat.transformation", "LiteLLMProxyChatConfig"),
+    "VolcEngineConfig": (
+        ".llms.volcengine.chat.transformation",
+        "VolcEngineChatConfig",
+    ),  # Alias
+    "LlamafileChatConfig": (
+        ".llms.llamafile.chat.transformation",
+        "LlamafileChatConfig",
+    ),
+    "LiteLLMProxyChatConfig": (
+        ".llms.litellm_proxy.chat.transformation",
+        "LiteLLMProxyChatConfig",
+    ),
     "VLLMConfig": (".llms.vllm.completion.transformation", "VLLMConfig"),
     "DeepSeekChatConfig": (".llms.deepseek.chat.transformation", "DeepSeekChatConfig"),
     "LMStudioChatConfig": (".llms.lm_studio.chat.transformation", "LMStudioChatConfig"),
-    "LmStudioEmbeddingConfig": (".llms.lm_studio.embed.transformation", "LmStudioEmbeddingConfig"),
+    "LmStudioEmbeddingConfig": (
+        ".llms.lm_studio.embed.transformation",
+        "LmStudioEmbeddingConfig",
+    ),
     "NscaleConfig": (".llms.nscale.chat.transformation", "NscaleConfig"),
-    "PerplexityChatConfig": (".llms.perplexity.chat.transformation", "PerplexityChatConfig"),
-    "AzureOpenAIO1Config": (".llms.azure.chat.o_series_transformation", "AzureOpenAIO1Config"),
-    "IBMWatsonXAIConfig": (".llms.watsonx.completion.transformation", "IBMWatsonXAIConfig"),
-    "IBMWatsonXChatConfig": (".llms.watsonx.chat.transformation", "IBMWatsonXChatConfig"),
-    "IBMWatsonXEmbeddingConfig": (".llms.watsonx.embed.transformation", "IBMWatsonXEmbeddingConfig"),
-    "GenAIHubEmbeddingConfig": (".llms.sap.embed.transformation", "GenAIHubEmbeddingConfig"),
-    "IBMWatsonXAudioTranscriptionConfig": (".llms.watsonx.audio_transcription.transformation", "IBMWatsonXAudioTranscriptionConfig"),
-    "GithubCopilotConfig": (".llms.github_copilot.chat.transformation", "GithubCopilotConfig"),
-    "GithubCopilotResponsesAPIConfig": (".llms.github_copilot.responses.transformation", "GithubCopilotResponsesAPIConfig"),
-    "GithubCopilotEmbeddingConfig": (".llms.github_copilot.embedding.transformation", "GithubCopilotEmbeddingConfig"),
+    "PerplexityChatConfig": (
+        ".llms.perplexity.chat.transformation",
+        "PerplexityChatConfig",
+    ),
+    "AzureOpenAIO1Config": (
+        ".llms.azure.chat.o_series_transformation",
+        "AzureOpenAIO1Config",
+    ),
+    "IBMWatsonXAIConfig": (
+        ".llms.watsonx.completion.transformation",
+        "IBMWatsonXAIConfig",
+    ),
+    "IBMWatsonXChatConfig": (
+        ".llms.watsonx.chat.transformation",
+        "IBMWatsonXChatConfig",
+    ),
+    "IBMWatsonXEmbeddingConfig": (
+        ".llms.watsonx.embed.transformation",
+        "IBMWatsonXEmbeddingConfig",
+    ),
+    "GenAIHubEmbeddingConfig": (
+        ".llms.sap.embed.transformation",
+        "GenAIHubEmbeddingConfig",
+    ),
+    "IBMWatsonXAudioTranscriptionConfig": (
+        ".llms.watsonx.audio_transcription.transformation",
+        "IBMWatsonXAudioTranscriptionConfig",
+    ),
+    "GithubCopilotConfig": (
+        ".llms.github_copilot.chat.transformation",
+        "GithubCopilotConfig",
+    ),
+    "GithubCopilotResponsesAPIConfig": (
+        ".llms.github_copilot.responses.transformation",
+        "GithubCopilotResponsesAPIConfig",
+    ),
+    "GithubCopilotEmbeddingConfig": (
+        ".llms.github_copilot.embedding.transformation",
+        "GithubCopilotEmbeddingConfig",
+    ),
     "ChatGPTConfig": (".llms.chatgpt.chat.transformation", "ChatGPTConfig"),
-    "ChatGPTResponsesAPIConfig": (".llms.chatgpt.responses.transformation", "ChatGPTResponsesAPIConfig"),
+    "ChatGPTResponsesAPIConfig": (
+        ".llms.chatgpt.responses.transformation",
+        "ChatGPTResponsesAPIConfig",
+    ),
     "NebiusConfig": (".llms.nebius.chat.transformation", "NebiusConfig"),
     "WandbConfig": (".llms.wandb.chat.transformation", "WandbConfig"),
     "GigaChatConfig": (".llms.gigachat.chat.transformation", "GigaChatConfig"),
-    "GigaChatEmbeddingConfig": (".llms.gigachat.embedding.transformation", "GigaChatEmbeddingConfig"),
-    "DashScopeChatConfig": (".llms.dashscope.chat.transformation", "DashScopeChatConfig"),
+    "GigaChatEmbeddingConfig": (
+        ".llms.gigachat.embedding.transformation",
+        "GigaChatEmbeddingConfig",
+    ),
+    "DashScopeChatConfig": (
+        ".llms.dashscope.chat.transformation",
+        "DashScopeChatConfig",
+    ),
     "MoonshotChatConfig": (".llms.moonshot.chat.transformation", "MoonshotChatConfig"),
-    "DockerModelRunnerChatConfig": (".llms.docker_model_runner.chat.transformation", "DockerModelRunnerChatConfig"),
+    "DockerModelRunnerChatConfig": (
+        ".llms.docker_model_runner.chat.transformation",
+        "DockerModelRunnerChatConfig",
+    ),
     "V0ChatConfig": (".llms.v0.chat.transformation", "V0ChatConfig"),
     "OCIChatConfig": (".llms.oci.chat.transformation", "OCIChatConfig"),
     "MorphChatConfig": (".llms.morph.chat.transformation", "MorphChatConfig"),
     "RAGFlowConfig": (".llms.ragflow.chat.transformation", "RAGFlowConfig"),
     "LambdaAIChatConfig": (".llms.lambda_ai.chat.transformation", "LambdaAIChatConfig"),
-    "HyperbolicChatConfig": (".llms.hyperbolic.chat.transformation", "HyperbolicChatConfig"),
-    "VercelAIGatewayConfig": (".llms.vercel_ai_gateway.chat.transformation", "VercelAIGatewayConfig"),
+    "HyperbolicChatConfig": (
+        ".llms.hyperbolic.chat.transformation",
+        "HyperbolicChatConfig",
+    ),
+    "VercelAIGatewayConfig": (
+        ".llms.vercel_ai_gateway.chat.transformation",
+        "VercelAIGatewayConfig",
+    ),
     "OVHCloudChatConfig": (".llms.ovhcloud.chat.transformation", "OVHCloudChatConfig"),
-    "OVHCloudEmbeddingConfig": (".llms.ovhcloud.embedding.transformation", "OVHCloudEmbeddingConfig"),
-    "CometAPIEmbeddingConfig": (".llms.cometapi.embed.transformation", "CometAPIEmbeddingConfig"),
+    "OVHCloudEmbeddingConfig": (
+        ".llms.ovhcloud.embedding.transformation",
+        "OVHCloudEmbeddingConfig",
+    ),
+    "CometAPIEmbeddingConfig": (
+        ".llms.cometapi.embed.transformation",
+        "CometAPIEmbeddingConfig",
+    ),
     "LemonadeChatConfig": (".llms.lemonade.chat.transformation", "LemonadeChatConfig"),
-    "SnowflakeEmbeddingConfig": (".llms.snowflake.embedding.transformation", "SnowflakeEmbeddingConfig"),
-    "AmazonNovaChatConfig": (".llms.amazon_nova.chat.transformation", "AmazonNovaChatConfig"),
+    "SnowflakeEmbeddingConfig": (
+        ".llms.snowflake.embedding.transformation",
+        "SnowflakeEmbeddingConfig",
+    ),
+    "AmazonNovaChatConfig": (
+        ".llms.amazon_nova.chat.transformation",
+        "AmazonNovaChatConfig",
+    ),
 }
 
 # Import map for utils module lazy imports
 _UTILS_MODULE_IMPORT_MAP = {
     "encoding": ("litellm.main", "encoding"),
-    "BaseVectorStore": ("litellm.integrations.vector_store_integrations.base_vector_store", "BaseVectorStore"),
-    "CredentialAccessor": ("litellm.litellm_core_utils.credential_accessor", "CredentialAccessor"),
-    "exception_type": ("litellm.litellm_core_utils.exception_mapping_utils", "exception_type"),
-    "get_error_message": ("litellm.litellm_core_utils.exception_mapping_utils", "get_error_message"),
-    "_get_response_headers": ("litellm.litellm_core_utils.exception_mapping_utils", "_get_response_headers"),
-    "get_llm_provider": ("litellm.litellm_core_utils.get_llm_provider_logic", "get_llm_provider"),
-    "_is_non_openai_azure_model": ("litellm.litellm_core_utils.get_llm_provider_logic", "_is_non_openai_azure_model"),
-    "get_supported_openai_params": ("litellm.litellm_core_utils.get_supported_openai_params", "get_supported_openai_params"),
-    "LiteLLMResponseObjectHandler": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "LiteLLMResponseObjectHandler"),
-    "_handle_invalid_parallel_tool_calls": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "_handle_invalid_parallel_tool_calls"),
-    "convert_to_model_response_object": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_model_response_object"),
-    "convert_to_streaming_response": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_streaming_response"),
-    "convert_to_streaming_response_async": ("litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response", "convert_to_streaming_response_async"),
-    "get_api_base": ("litellm.litellm_core_utils.llm_response_utils.get_api_base", "get_api_base"),
-    "ResponseMetadata": ("litellm.litellm_core_utils.llm_response_utils.response_metadata", "ResponseMetadata"),
-    "_parse_content_for_reasoning": ("litellm.litellm_core_utils.prompt_templates.common_utils", "_parse_content_for_reasoning"),
-    "LiteLLMLoggingObject": ("litellm.litellm_core_utils.redact_messages", "LiteLLMLoggingObject"),
-    "redact_message_input_output_from_logging": ("litellm.litellm_core_utils.redact_messages", "redact_message_input_output_from_logging"),
-    "CustomStreamWrapper": ("litellm.litellm_core_utils.streaming_handler", "CustomStreamWrapper"),
-    "BaseGoogleGenAIGenerateContentConfig": ("litellm.llms.base_llm.google_genai.transformation", "BaseGoogleGenAIGenerateContentConfig"),
+    "BaseVectorStore": (
+        "litellm.integrations.vector_store_integrations.base_vector_store",
+        "BaseVectorStore",
+    ),
+    "CredentialAccessor": (
+        "litellm.litellm_core_utils.credential_accessor",
+        "CredentialAccessor",
+    ),
+    "exception_type": (
+        "litellm.litellm_core_utils.exception_mapping_utils",
+        "exception_type",
+    ),
+    "get_error_message": (
+        "litellm.litellm_core_utils.exception_mapping_utils",
+        "get_error_message",
+    ),
+    "_get_response_headers": (
+        "litellm.litellm_core_utils.exception_mapping_utils",
+        "_get_response_headers",
+    ),
+    "get_llm_provider": (
+        "litellm.litellm_core_utils.get_llm_provider_logic",
+        "get_llm_provider",
+    ),
+    "_is_non_openai_azure_model": (
+        "litellm.litellm_core_utils.get_llm_provider_logic",
+        "_is_non_openai_azure_model",
+    ),
+    "get_supported_openai_params": (
+        "litellm.litellm_core_utils.get_supported_openai_params",
+        "get_supported_openai_params",
+    ),
+    "LiteLLMResponseObjectHandler": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "LiteLLMResponseObjectHandler",
+    ),
+    "_handle_invalid_parallel_tool_calls": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "_handle_invalid_parallel_tool_calls",
+    ),
+    "convert_to_model_response_object": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "convert_to_model_response_object",
+    ),
+    "convert_to_streaming_response": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "convert_to_streaming_response",
+    ),
+    "convert_to_streaming_response_async": (
+        "litellm.litellm_core_utils.llm_response_utils.convert_dict_to_response",
+        "convert_to_streaming_response_async",
+    ),
+    "get_api_base": (
+        "litellm.litellm_core_utils.llm_response_utils.get_api_base",
+        "get_api_base",
+    ),
+    "ResponseMetadata": (
+        "litellm.litellm_core_utils.llm_response_utils.response_metadata",
+        "ResponseMetadata",
+    ),
+    "_parse_content_for_reasoning": (
+        "litellm.litellm_core_utils.prompt_templates.common_utils",
+        "_parse_content_for_reasoning",
+    ),
+    "LiteLLMLoggingObject": (
+        "litellm.litellm_core_utils.redact_messages",
+        "LiteLLMLoggingObject",
+    ),
+    "redact_message_input_output_from_logging": (
+        "litellm.litellm_core_utils.redact_messages",
+        "redact_message_input_output_from_logging",
+    ),
+    "CustomStreamWrapper": (
+        "litellm.litellm_core_utils.streaming_handler",
+        "CustomStreamWrapper",
+    ),
+    "BaseGoogleGenAIGenerateContentConfig": (
+        "litellm.llms.base_llm.google_genai.transformation",
+        "BaseGoogleGenAIGenerateContentConfig",
+    ),
     "BaseOCRConfig": ("litellm.llms.base_llm.ocr.transformation", "BaseOCRConfig"),
-    "BaseSearchConfig": ("litellm.llms.base_llm.search.transformation", "BaseSearchConfig"),
-    "BaseTextToSpeechConfig": ("litellm.llms.base_llm.text_to_speech.transformation", "BaseTextToSpeechConfig"),
+    "BaseSearchConfig": (
+        "litellm.llms.base_llm.search.transformation",
+        "BaseSearchConfig",
+    ),
+    "BaseTextToSpeechConfig": (
+        "litellm.llms.base_llm.text_to_speech.transformation",
+        "BaseTextToSpeechConfig",
+    ),
     "BedrockModelInfo": ("litellm.llms.bedrock.common_utils", "BedrockModelInfo"),
     "CohereModelInfo": ("litellm.llms.cohere.common_utils", "CohereModelInfo"),
     "MistralOCRConfig": ("litellm.llms.mistral.ocr.transformation", "MistralOCRConfig"),
     "Rules": ("litellm.litellm_core_utils.rules", "Rules"),
     "AsyncHTTPHandler": ("litellm.llms.custom_httpx.http_handler", "AsyncHTTPHandler"),
     "HTTPHandler": ("litellm.llms.custom_httpx.http_handler", "HTTPHandler"),
-    "get_num_retries_from_retry_policy": ("litellm.router_utils.get_retry_from_policy", "get_num_retries_from_retry_policy"),
-    "reset_retry_policy": ("litellm.router_utils.get_retry_from_policy", "reset_retry_policy"),
+    "get_num_retries_from_retry_policy": (
+        "litellm.router_utils.get_retry_from_policy",
+        "get_num_retries_from_retry_policy",
+    ),
+    "reset_retry_policy": (
+        "litellm.router_utils.get_retry_from_policy",
+        "reset_retry_policy",
+    ),
     "get_secret": ("litellm.secret_managers.main", "get_secret"),
-    "get_coroutine_checker": ("litellm.litellm_core_utils.cached_imports", "get_coroutine_checker"),
-    "get_litellm_logging_class": ("litellm.litellm_core_utils.cached_imports", "get_litellm_logging_class"),
-    "get_set_callbacks": ("litellm.litellm_core_utils.cached_imports", "get_set_callbacks"),
-    "get_litellm_metadata_from_kwargs": ("litellm.litellm_core_utils.core_helpers", "get_litellm_metadata_from_kwargs"),
-    "map_finish_reason": ("litellm.litellm_core_utils.core_helpers", "map_finish_reason"),
-    "process_response_headers": ("litellm.litellm_core_utils.core_helpers", "process_response_headers"),
-    "delete_nested_value": ("litellm.litellm_core_utils.dot_notation_indexing", "delete_nested_value"),
-    "is_nested_path": ("litellm.litellm_core_utils.dot_notation_indexing", "is_nested_path"),
-    "_get_base_model_from_litellm_call_metadata": ("litellm.litellm_core_utils.get_litellm_params", "_get_base_model_from_litellm_call_metadata"),
-    "get_litellm_params": ("litellm.litellm_core_utils.get_litellm_params", "get_litellm_params"),
-    "_ensure_extra_body_is_safe": ("litellm.litellm_core_utils.llm_request_utils", "_ensure_extra_body_is_safe"),
-    "get_formatted_prompt": ("litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt", "get_formatted_prompt"),
-    "get_response_headers": ("litellm.litellm_core_utils.llm_response_utils.get_headers", "get_response_headers"),
-    "update_response_metadata": ("litellm.litellm_core_utils.llm_response_utils.response_metadata", "update_response_metadata"),
+    "get_coroutine_checker": (
+        "litellm.litellm_core_utils.cached_imports",
+        "get_coroutine_checker",
+    ),
+    "get_litellm_logging_class": (
+        "litellm.litellm_core_utils.cached_imports",
+        "get_litellm_logging_class",
+    ),
+    "get_set_callbacks": (
+        "litellm.litellm_core_utils.cached_imports",
+        "get_set_callbacks",
+    ),
+    "get_litellm_metadata_from_kwargs": (
+        "litellm.litellm_core_utils.core_helpers",
+        "get_litellm_metadata_from_kwargs",
+    ),
+    "map_finish_reason": (
+        "litellm.litellm_core_utils.core_helpers",
+        "map_finish_reason",
+    ),
+    "process_response_headers": (
+        "litellm.litellm_core_utils.core_helpers",
+        "process_response_headers",
+    ),
+    "delete_nested_value": (
+        "litellm.litellm_core_utils.dot_notation_indexing",
+        "delete_nested_value",
+    ),
+    "is_nested_path": (
+        "litellm.litellm_core_utils.dot_notation_indexing",
+        "is_nested_path",
+    ),
+    "_get_base_model_from_litellm_call_metadata": (
+        "litellm.litellm_core_utils.get_litellm_params",
+        "_get_base_model_from_litellm_call_metadata",
+    ),
+    "get_litellm_params": (
+        "litellm.litellm_core_utils.get_litellm_params",
+        "get_litellm_params",
+    ),
+    "_ensure_extra_body_is_safe": (
+        "litellm.litellm_core_utils.llm_request_utils",
+        "_ensure_extra_body_is_safe",
+    ),
+    "get_formatted_prompt": (
+        "litellm.litellm_core_utils.llm_response_utils.get_formatted_prompt",
+        "get_formatted_prompt",
+    ),
+    "get_response_headers": (
+        "litellm.litellm_core_utils.llm_response_utils.get_headers",
+        "get_response_headers",
+    ),
+    "update_response_metadata": (
+        "litellm.litellm_core_utils.llm_response_utils.response_metadata",
+        "update_response_metadata",
+    ),
     "executor": ("litellm.litellm_core_utils.thread_pool_executor", "executor"),
-    "BaseAnthropicMessagesConfig": ("litellm.llms.base_llm.anthropic_messages.transformation", "BaseAnthropicMessagesConfig"),
-    "BaseAudioTranscriptionConfig": ("litellm.llms.base_llm.audio_transcription.transformation", "BaseAudioTranscriptionConfig"),
-    "BaseBatchesConfig": ("litellm.llms.base_llm.batches.transformation", "BaseBatchesConfig"),
-    "BaseContainerConfig": ("litellm.llms.base_llm.containers.transformation", "BaseContainerConfig"),
-    "BaseEmbeddingConfig": ("litellm.llms.base_llm.embedding.transformation", "BaseEmbeddingConfig"),
-    "BaseImageEditConfig": ("litellm.llms.base_llm.image_edit.transformation", "BaseImageEditConfig"),
-    "BaseImageGenerationConfig": ("litellm.llms.base_llm.image_generation.transformation", "BaseImageGenerationConfig"),
-    "BaseImageVariationConfig": ("litellm.llms.base_llm.image_variations.transformation", "BaseImageVariationConfig"),
-    "BasePassthroughConfig": ("litellm.llms.base_llm.passthrough.transformation", "BasePassthroughConfig"),
-    "BaseRealtimeConfig": ("litellm.llms.base_llm.realtime.transformation", "BaseRealtimeConfig"),
-    "BaseRerankConfig": ("litellm.llms.base_llm.rerank.transformation", "BaseRerankConfig"),
-    "BaseVectorStoreConfig": ("litellm.llms.base_llm.vector_store.transformation", "BaseVectorStoreConfig"),
-    "BaseVectorStoreFilesConfig": ("litellm.llms.base_llm.vector_store_files.transformation", "BaseVectorStoreFilesConfig"),
-    "BaseVideoConfig": ("litellm.llms.base_llm.videos.transformation", "BaseVideoConfig"),
-    "ANTHROPIC_API_ONLY_HEADERS": ("litellm.types.llms.anthropic", "ANTHROPIC_API_ONLY_HEADERS"),
-    "AnthropicThinkingParam": ("litellm.types.llms.anthropic", "AnthropicThinkingParam"),
+    "BaseAnthropicMessagesConfig": (
+        "litellm.llms.base_llm.anthropic_messages.transformation",
+        "BaseAnthropicMessagesConfig",
+    ),
+    "BaseAudioTranscriptionConfig": (
+        "litellm.llms.base_llm.audio_transcription.transformation",
+        "BaseAudioTranscriptionConfig",
+    ),
+    "BaseBatchesConfig": (
+        "litellm.llms.base_llm.batches.transformation",
+        "BaseBatchesConfig",
+    ),
+    "BaseContainerConfig": (
+        "litellm.llms.base_llm.containers.transformation",
+        "BaseContainerConfig",
+    ),
+    "BaseEmbeddingConfig": (
+        "litellm.llms.base_llm.embedding.transformation",
+        "BaseEmbeddingConfig",
+    ),
+    "BaseImageEditConfig": (
+        "litellm.llms.base_llm.image_edit.transformation",
+        "BaseImageEditConfig",
+    ),
+    "BaseImageGenerationConfig": (
+        "litellm.llms.base_llm.image_generation.transformation",
+        "BaseImageGenerationConfig",
+    ),
+    "BaseImageVariationConfig": (
+        "litellm.llms.base_llm.image_variations.transformation",
+        "BaseImageVariationConfig",
+    ),
+    "BasePassthroughConfig": (
+        "litellm.llms.base_llm.passthrough.transformation",
+        "BasePassthroughConfig",
+    ),
+    "BaseRealtimeConfig": (
+        "litellm.llms.base_llm.realtime.transformation",
+        "BaseRealtimeConfig",
+    ),
+    "BaseRerankConfig": (
+        "litellm.llms.base_llm.rerank.transformation",
+        "BaseRerankConfig",
+    ),
+    "BaseVectorStoreConfig": (
+        "litellm.llms.base_llm.vector_store.transformation",
+        "BaseVectorStoreConfig",
+    ),
+    "BaseVectorStoreFilesConfig": (
+        "litellm.llms.base_llm.vector_store_files.transformation",
+        "BaseVectorStoreFilesConfig",
+    ),
+    "BaseVideoConfig": (
+        "litellm.llms.base_llm.videos.transformation",
+        "BaseVideoConfig",
+    ),
+    "ANTHROPIC_API_ONLY_HEADERS": (
+        "litellm.types.llms.anthropic",
+        "ANTHROPIC_API_ONLY_HEADERS",
+    ),
+    "AnthropicThinkingParam": (
+        "litellm.types.llms.anthropic",
+        "AnthropicThinkingParam",
+    ),
     "RerankResponse": ("litellm.types.rerank", "RerankResponse"),
-    "ChatCompletionDeltaToolCallChunk": ("litellm.types.llms.openai", "ChatCompletionDeltaToolCallChunk"),
-    "ChatCompletionToolCallChunk": ("litellm.types.llms.openai", "ChatCompletionToolCallChunk"),
-    "ChatCompletionToolCallFunctionChunk": ("litellm.types.llms.openai", "ChatCompletionToolCallFunctionChunk"),
+    "ChatCompletionDeltaToolCallChunk": (
+        "litellm.types.llms.openai",
+        "ChatCompletionDeltaToolCallChunk",
+    ),
+    "ChatCompletionToolCallChunk": (
+        "litellm.types.llms.openai",
+        "ChatCompletionToolCallChunk",
+    ),
+    "ChatCompletionToolCallFunctionChunk": (
+        "litellm.types.llms.openai",
+        "ChatCompletionToolCallFunctionChunk",
+    ),
     "LiteLLM_Params": ("litellm.types.router", "LiteLLM_Params"),
 }
 
diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
index 03488ad0183..98ee5e4fa86 100644
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -1632,6 +1632,7 @@ def _sanitize_anthropic_tool_use_id(tool_use_id: str) -> str:
 
 def convert_to_anthropic_tool_result(
     message: Union[ChatCompletionToolMessage, ChatCompletionFunctionMessage],
+    force_base64: bool = False,
 ) -> AnthropicMessagesToolResultParam:
     """
     OpenAI message with a tool result looks like:
@@ -1694,7 +1695,7 @@ def convert_to_anthropic_tool_result(
                     else None
                 )
                 _anthropic_image_param = create_anthropic_image_param(
-                    content["image_url"], format=format
+                    content["image_url"], format=format, is_bedrock_invoke=force_base64
                 )
                 _anthropic_image_param = add_cache_control_to_content(
                     anthropic_content_element=_anthropic_image_param,
@@ -2056,6 +2057,12 @@ def anthropic_messages_pt(  # noqa: PLR0915
         else:
             messages.append(DEFAULT_USER_CONTINUE_MESSAGE_TYPED)
 
+    # Bedrock invoke models have format: invoke/...
+    # Vertex AI Anthropic also doesn't support URL sources for images
+    is_bedrock_invoke = model.lower().startswith("invoke/")
+    is_vertex_ai = llm_provider.startswith("vertex_ai") if llm_provider else False
+    force_base64 = is_bedrock_invoke or is_vertex_ai
+
     msg_i = 0
     while msg_i < len(messages):
         user_content: List[AnthropicMessagesUserMessageValues] = []
@@ -2165,7 +2172,9 @@ def anthropic_messages_pt(  # noqa: PLR0915
             ):
                 # OpenAI's tool message content will always be a string
                 user_content.append(
-                    convert_to_anthropic_tool_result(user_message_types_block)
+                    convert_to_anthropic_tool_result(
+                        user_message_types_block, force_base64=force_base64
+                    )
                 )
 
             msg_i += 1
diff --git a/litellm/llms/gemini/files/transformation.py b/litellm/llms/gemini/files/transformation.py
index d9ebf69a97a..334dc013226 100644
--- a/litellm/llms/gemini/files/transformation.py
+++ b/litellm/llms/gemini/files/transformation.py
@@ -180,7 +180,25 @@ def transform_retrieve_file_request(
         optional_params: dict,
         litellm_params: dict,
     ) -> tuple[str, dict]:
-        raise NotImplementedError("GoogleAIStudioFilesHandler does not support file retrieval")
+        """
+        Get the URL to retrieve a file from Google AI Studio.
+        
+        We expect file_id to be the URI (e.g. https://generativelanguage.googleapis.com/v1beta/files/...)
+        as returned by the upload response.
+        """
+        api_key = litellm_params.get("api_key")
+        if not api_key:
+            raise ValueError("api_key is required")
+
+        if file_id.startswith("http"):
+            url = "{}?key={}".format(file_id, api_key)
+        else:
+            # Fallback for just file name (files/...)
+            api_base = self.get_api_base(litellm_params.get("api_base")) or "https://generativelanguage.googleapis.com"
+            api_base = api_base.rstrip("/")
+            url = "{}/v1beta/{}?key={}".format(api_base, file_id, api_key)
+
+        return url, {"Content-Type": "application/json"}
 
     def transform_retrieve_file_response(
         self,
@@ -188,7 +206,40 @@ def transform_retrieve_file_response(
         logging_obj: LiteLLMLoggingObj,
         litellm_params: dict,
     ) -> OpenAIFileObject:
-        raise NotImplementedError("GoogleAIStudioFilesHandler does not support file retrieval")
+        """
+        Transform Gemini's file retrieval response into OpenAI-style FileObject
+        """
+        try:
+            response_json = raw_response.json()
+            
+            # Map Gemini state to OpenAI status
+            gemini_state = response_json.get("state", "STATE_UNSPECIFIED")
+            status = "uploaded" # Default
+            if gemini_state == "ACTIVE":
+                status = "processed"
+            elif gemini_state == "FAILED":
+                status = "error"
+            
+            return OpenAIFileObject(
+                id=response_json.get("uri", ""),
+                bytes=int(response_json.get("sizeBytes", 0)),
+                created_at=int(
+                    time.mktime(
+                        time.strptime(
+                            response_json["createTime"].replace("Z", "+00:00"),
+                            "%Y-%m-%dT%H:%M:%S.%f%z",
+                        )
+                    )
+                ),
+                filename=response_json.get("displayName", ""),
+                object="file",
+                purpose="user_data",
+                status=status,
+                status_details=str(response_json.get("error", "")) if gemini_state == "FAILED" else None,
+            )
+        except Exception as e:
+            verbose_logger.exception(f"Error parsing file retrieve response: {str(e)}")
+            raise ValueError(f"Error parsing file retrieve response: {str(e)}")
 
     def transform_delete_file_request(
         self,
diff --git a/litellm/llms/hosted_vllm/embedding/transformation.py b/litellm/llms/hosted_vllm/embedding/transformation.py
new file mode 100644
index 00000000000..9c3e8c6c7cc
--- /dev/null
+++ b/litellm/llms/hosted_vllm/embedding/transformation.py
@@ -0,0 +1,180 @@
+"""
+Hosted VLLM Embedding API Configuration.
+
+This module provides the configuration for hosted VLLM's Embedding API.
+VLLM is OpenAI-compatible and supports embeddings via the /v1/embeddings endpoint.
+
+Docs: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+"""
+
+from typing import TYPE_CHECKING, Any, List, Optional, Union
+
+import httpx
+
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues
+from litellm.types.utils import EmbeddingResponse
+from litellm.utils import convert_to_model_response_object
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+
+class HostedVLLMEmbeddingError(BaseLLMException):
+    """Exception class for Hosted VLLM Embedding errors."""
+
+    pass
+
+
+class HostedVLLMEmbeddingConfig(BaseEmbeddingConfig):
+    """
+    Configuration for Hosted VLLM's Embedding API.
+
+    Reference: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
+    """
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        """
+        Validate environment and set up headers for Hosted VLLM API.
+        """
+        if api_key is None:
+            api_key = get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
+
+        default_headers = {
+            "Content-Type": "application/json",
+        }
+
+        # Only add Authorization header if api_key is not "fake-api-key"
+        if api_key and api_key != "fake-api-key":
+            default_headers["Authorization"] = f"Bearer {api_key}"
+
+        # Merge with existing headers (user's headers take priority)
+        return {**default_headers, **headers}
+
+    def get_complete_url(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
+        litellm_params: dict,
+        stream: Optional[bool] = None,
+    ) -> str:
+        """
+        Get the complete URL for Hosted VLLM Embedding API endpoint.
+        """
+        if api_base is None:
+            api_base = get_secret_str("HOSTED_VLLM_API_BASE")
+            if api_base is None:
+                raise ValueError("api_base is required for hosted_vllm embeddings")
+
+        # Remove trailing slashes
+        api_base = api_base.rstrip("/")
+
+        # Ensure the URL ends with /embeddings
+        if not api_base.endswith("/embeddings"):
+            api_base = f"{api_base}/embeddings"
+
+        return api_base
+
+    def transform_embedding_request(
+        self,
+        model: str,
+        input: AllEmbeddingInputValues,
+        optional_params: dict,
+        headers: dict,
+    ) -> dict:
+        """
+        Transform embedding request to Hosted VLLM format (OpenAI-compatible).
+        """
+        # Ensure input is a list
+        if isinstance(input, str):
+            input = [input]
+
+        # Strip 'hosted_vllm/' prefix if present
+        if model.startswith("hosted_vllm/"):
+            model = model.replace("hosted_vllm/", "", 1)
+
+        return {
+            "model": model,
+            "input": input,
+            **optional_params,
+        }
+
+    def transform_embedding_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: EmbeddingResponse,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str],
+        request_data: dict,
+        optional_params: dict,
+        litellm_params: dict,
+    ) -> EmbeddingResponse:
+        """
+        Transform embedding response from Hosted VLLM format (OpenAI-compatible).
+        """
+        logging_obj.post_call(original_response=raw_response.text)
+
+        # VLLM returns standard OpenAI-compatible embedding response
+        response_json = raw_response.json()
+
+        return convert_to_model_response_object(
+            response_object=response_json,
+            model_response_object=model_response,
+            response_type="embedding",
+        )
+
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Get list of supported OpenAI parameters for Hosted VLLM embeddings.
+        """
+        return [
+            "timeout",
+            "dimensions",
+            "encoding_format",
+            "user",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        Map OpenAI parameters to Hosted VLLM format.
+        """
+        for param, value in non_default_params.items():
+            if param in self.get_supported_openai_params(model):
+                optional_params[param] = value
+        return optional_params
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        """
+        Get the error class for Hosted VLLM errors.
+        """
+        return HostedVLLMEmbeddingError(
+            message=error_message,
+            status_code=status_code,
+            headers=headers,
+        )
diff --git a/litellm/main.py b/litellm/main.py
index 99bf224c5b7..ac368317e6c 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2353,6 +2353,33 @@ def completion(  # type: ignore # noqa: PLR0915
                 or "https://api.minimax.io/v1"
             )
 
+            response = base_llm_http_handler.completion(
+                model=model,
+                messages=messages,
+                api_base=api_base,
+                custom_llm_provider=custom_llm_provider,
+                model_response=model_response,
+                encoding=_get_encoding(),
+                logging_obj=logging,
+                optional_params=optional_params,
+                timeout=timeout,
+                litellm_params=litellm_params,
+                shared_session=shared_session,
+                acompletion=acompletion,
+                stream=stream,
+                api_key=api_key,
+                headers=headers,
+                client=client,
+                provider_config=provider_config,
+            )
+            logging.post_call(
+                input=messages, api_key=api_key, original_response=response
+            )
+        elif custom_llm_provider == "hosted_vllm":
+            api_base = (
+                api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE")
+            )
+
             response = base_llm_http_handler.completion(
                 model=model,
                 messages=messages,
@@ -3591,9 +3618,9 @@ def completion(  # type: ignore # noqa: PLR0915
                     "aws_region_name" not in optional_params
                     or optional_params["aws_region_name"] is None
                 ):
-                    optional_params[
-                        "aws_region_name"
-                    ] = aws_bedrock_client.meta.region_name
+                    optional_params["aws_region_name"] = (
+                        aws_bedrock_client.meta.region_name
+                    )
 
             bedrock_route = BedrockModelInfo.get_bedrock_route(model)
             if bedrock_route == "converse":
@@ -4753,9 +4780,32 @@ def embedding(  # noqa: PLR0915
                 client=client,
                 aembedding=aembedding,
             )
+        elif custom_llm_provider == "hosted_vllm":
+            api_base = (
+                api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE")
+            )
+
+            # set API KEY
+            if api_key is None:
+                api_key = litellm.api_key or get_secret_str("HOSTED_VLLM_API_KEY")
+
+            response = base_llm_http_handler.embedding(
+                model=model,
+                input=input,
+                custom_llm_provider=custom_llm_provider,
+                api_base=api_base,
+                api_key=api_key,
+                logging_obj=logging,
+                timeout=timeout,
+                model_response=EmbeddingResponse(),
+                optional_params=optional_params,
+                client=client,
+                aembedding=aembedding,
+                litellm_params=litellm_params_dict,
+                headers=headers or {},
+            )
         elif (
             custom_llm_provider == "openai_like"
-            or custom_llm_provider == "hosted_vllm"
             or custom_llm_provider == "llamafile"
             or custom_llm_provider == "lm_studio"
         ):
@@ -5928,9 +5978,9 @@ def adapter_completion(
     new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
 
     response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs)  # type: ignore
-    translated_response: Optional[
-        Union[BaseModel, AdapterCompletionStreamWrapper]
-    ] = None
+    translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
+        None
+    )
     if isinstance(response, ModelResponse):
         translated_response = translation_obj.translate_completion_output_params(
             response=response
@@ -6635,9 +6685,9 @@ def speech(  # noqa: PLR0915
                 ElevenLabsTextToSpeechConfig.ELEVENLABS_QUERY_PARAMS_KEY
             ] = query_params
 
-        litellm_params_dict[
-            ElevenLabsTextToSpeechConfig.ELEVENLABS_VOICE_ID_KEY
-        ] = voice_id
+        litellm_params_dict[ElevenLabsTextToSpeechConfig.ELEVENLABS_VOICE_ID_KEY] = (
+            voice_id
+        )
 
         if api_base is not None:
             litellm_params_dict["api_base"] = api_base
@@ -7143,9 +7193,9 @@ def stream_chunk_builder(  # noqa: PLR0915
         ]
 
         if len(content_chunks) > 0:
-            response["choices"][0]["message"][
-                "content"
-            ] = processor.get_combined_content(content_chunks)
+            response["choices"][0]["message"]["content"] = (
+                processor.get_combined_content(content_chunks)
+            )
 
         thinking_blocks = [
             chunk
@@ -7156,9 +7206,9 @@ def stream_chunk_builder(  # noqa: PLR0915
         ]
 
         if len(thinking_blocks) > 0:
-            response["choices"][0]["message"][
-                "thinking_blocks"
-            ] = processor.get_combined_thinking_content(thinking_blocks)
+            response["choices"][0]["message"]["thinking_blocks"] = (
+                processor.get_combined_thinking_content(thinking_blocks)
+            )
 
         reasoning_chunks = [
             chunk
@@ -7169,9 +7219,9 @@ def stream_chunk_builder(  # noqa: PLR0915
         ]
 
         if len(reasoning_chunks) > 0:
-            response["choices"][0]["message"][
-                "reasoning_content"
-            ] = processor.get_combined_reasoning_content(reasoning_chunks)
+            response["choices"][0]["message"]["reasoning_content"] = (
+                processor.get_combined_reasoning_content(reasoning_chunks)
+            )
 
         annotation_chunks = [
             chunk
@@ -7197,6 +7247,23 @@ def stream_chunk_builder(  # noqa: PLR0915
             _choice = cast(Choices, response.choices[0])
             _choice.message.audio = processor.get_combined_audio_content(audio_chunks)
 
+        # Handle image chunks from models like gemini-2.5-flash-image
+        # See: https://github.com/BerriAI/litellm/issues/19478
+        image_chunks = [
+            chunk
+            for chunk in chunks
+            if len(chunk["choices"]) > 0
+            and "images" in chunk["choices"][0]["delta"]
+            and chunk["choices"][0]["delta"]["images"] is not None
+        ]
+
+        if len(image_chunks) > 0:
+            # Images come complete in a single chunk, collect all images from all chunks
+            all_images = []
+            for chunk in image_chunks:
+                all_images.extend(chunk["choices"][0]["delta"]["images"])
+            response["choices"][0]["message"]["images"] = all_images
+
         # Combine provider_specific_fields from streaming chunks (e.g., web_search_results, citations)
         # See: https://github.com/BerriAI/litellm/issues/17737
         provider_specific_chunks = [
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 51f3e6482a4..61b857dc473 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -237,6 +237,70 @@ async def combined_generator() -> AsyncGenerator[str, None]:
     )
 
 
+def _override_openai_response_model(
+    *,
+    response_obj: Any,
+    requested_model: str,
+    log_context: str,
+) -> None:
+    """
+    Force the OpenAI-compatible `model` field in the response to match what the client requested.
+
+    LiteLLM internally prefixes some provider/deployment model identifiers (e.g. `hosted_vllm/...`).
+    That internal identifier should not be returned to clients in the OpenAI `model` field.
+
+    Note: This is intentionally verbose. A model mismatch is a useful signal that an internal
+    model identifier is being stamped/preserved somewhere in the request/response pipeline.
+    We log mismatches as warnings (and then restamp to the client-requested value) so these
+    paths stay observable for maintainers/operators without breaking client compatibility.
+
+    Errors are reserved for cases where the proxy cannot read/override the response model field.
+    """
+    if not requested_model:
+        return
+
+    if isinstance(response_obj, dict):
+        downstream_model = response_obj.get("model")
+        if downstream_model != requested_model:
+            verbose_proxy_logger.warning(
+                "%s: response model mismatch - requested=%r downstream=%r. Overriding response['model'] to requested model.",
+                log_context,
+                requested_model,
+                downstream_model,
+            )
+        response_obj["model"] = requested_model
+        return
+
+    if not hasattr(response_obj, "model"):
+        verbose_proxy_logger.error(
+            "%s: cannot override response model; missing `model` attribute. response_type=%s",
+            log_context,
+            type(response_obj),
+        )
+        return
+
+    downstream_model = getattr(response_obj, "model", None)
+    if downstream_model != requested_model:
+        verbose_proxy_logger.warning(
+            "%s: response model mismatch - requested=%r downstream=%r. Overriding response.model to requested model.",
+            log_context,
+            requested_model,
+            downstream_model,
+        )
+
+    try:
+        setattr(response_obj, "model", requested_model)
+    except Exception as e:
+        verbose_proxy_logger.error(
+            "%s: failed to override response.model=%r on response_type=%s. error=%s",
+            log_context,
+            requested_model,
+            type(response_obj),
+            str(e),
+            exc_info=True,
+        )
+
+
 def _get_cost_breakdown_from_logging_obj(
     litellm_logging_obj: Optional[LiteLLMLoggingObj],
 ) -> Tuple[Optional[float], Optional[float], Optional[float], Optional[float]]:
@@ -625,6 +689,9 @@ async def base_process_llm_request(
         """
         Common request processing logic for both chat completions and responses API endpoints
         """
+        requested_model_from_client: Optional[str] = (
+            self.data.get("model") if isinstance(self.data.get("model"), str) else None
+        )
         if verbose_proxy_logger.isEnabledFor(logging.DEBUG):
             verbose_proxy_logger.debug(
                 "Request received by LiteLLM:\n{}".format(
@@ -694,13 +761,15 @@ async def base_process_llm_request(
             model_info = litellm_metadata.get("model_info", {}) or {}
             model_id = model_info.get("id", "") or ""
 
-        cache_key = hidden_params.get("cache_key", None) or ""
-        api_base = hidden_params.get("api_base", None) or ""
-        response_cost = hidden_params.get("response_cost", None) or ""
-        fastest_response_batch_completion = hidden_params.get(
-            "fastest_response_batch_completion", None
+        cache_key, api_base, response_cost = (
+            hidden_params.get("cache_key", None) or "",
+            hidden_params.get("api_base", None) or "",
+            hidden_params.get("response_cost", None) or "",
+        )
+        fastest_response_batch_completion, additional_headers = (
+            hidden_params.get("fastest_response_batch_completion", None),
+            hidden_params.get("additional_headers", {}) or {},
         )
-        additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
 
         # Post Call Processing
         if llm_router is not None:
@@ -730,6 +799,13 @@ async def base_process_llm_request(
                 litellm_logging_obj=logging_obj,
                 **additional_headers,
             )
+
+            # Preserve the original client-requested model (pre-alias mapping) for downstream
+            # streaming generators. Pre-call processing can rewrite `self.data["model"]` for
+            # aliasing/routing, but the OpenAI-compatible response `model` field should reflect
+            # what the client sent.
+            if requested_model_from_client:
+                self.data["_litellm_client_requested_model"] = requested_model_from_client
             if route_type == "allm_passthrough_route":
                 # Check if response is an async generator
                 if self._is_streaming_response(response):
@@ -789,6 +865,15 @@ async def base_process_llm_request(
             data=self.data, user_api_key_dict=user_api_key_dict, response=response
         )
 
+        # Always return the client-requested model name (not provider-prefixed internal identifiers)
+        # for OpenAI-compatible responses.
+        if requested_model_from_client:
+            _override_openai_response_model(
+                response_obj=response,
+                requested_model=requested_model_from_client,
+                log_context=f"litellm_call_id={logging_obj.litellm_call_id}",
+            )
+
         hidden_params = (
             getattr(response, "_hidden_params", {}) or {}
         )  # get any updated response headers
diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py
index d27e0036235..eddd64c36c7 100644
--- a/litellm/proxy/health_endpoints/_health_endpoints.py
+++ b/litellm/proxy/health_endpoints/_health_endpoints.py
@@ -118,6 +118,7 @@ def _resolve_os_environ_variables(params: dict) -> dict:
         "email",
         "braintrust",
         "datadog",
+        "datadog_llm_observability",
         "generic_api",
         "arize",
         "sqs"
@@ -190,6 +191,7 @@ async def health_services_endpoint(  # noqa: PLR0915
             "custom_callback_api",
             "langsmith",
             "datadog",
+            "datadog_llm_observability",
             "generic_api",
             "arize",
             "sqs"
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index df6fd173c06..078ce0edf27 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -11,7 +11,8 @@
 import time
 import traceback
 import warnings
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta
+import enum
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -28,7 +29,41 @@
     get_origin,
     get_type_hints,
 )
-
+from pydantic import BaseModel, Json
+
+from litellm.proxy._types import (
+    ProxyException,
+    UserAPIKeyAuth,
+    LiteLLM_UserTable,
+    CommonProxyErrors,
+    LitellmUserRoles,
+    ConfigList,
+    ConfigYAML,
+    ConfigFieldUpdate,
+    ConfigGeneralSettings,
+    ConfigFieldInfo,
+    PassThroughGenericEndpoint,
+    FieldDetail,
+    ConfigFieldDelete,
+    CallbackDelete,
+    InvitationClaim,
+    InvitationModel,
+    InvitationNew,
+    InvitationUpdate,
+    InvitationDelete,
+    CallInfo,
+    Litellm_EntityType,
+    TeamDefaultSettings,
+    RoleBasedPermissions,
+    SupportedDBObjectType,
+    ProxyErrorTypes,
+    EnterpriseLicenseData,
+    LiteLLM_JWTAuth,
+    TokenCountRequest,
+    TransformRequestBody,
+    LiteLLM_TeamTable,
+    SpecialModelNames,
+)
 from litellm._uuid import uuid
 from litellm.constants import (
     AIOHTTP_CONNECTOR_LIMIT,
@@ -45,6 +80,9 @@
     LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS,
     LITELLM_SETTINGS_SAFE_DB_OVERRIDES,
 )
+from litellm.litellm_core_utils.litellm_logging import (
+    _init_custom_logger_compatible_class,
+)
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
 from litellm.proxy.common_utils.callback_utils import (
     normalize_callback_names,
@@ -2154,6 +2192,12 @@ def parse_search_tools(self, config: dict) -> Optional[List[SearchToolTypedDict]
             List of validated SearchToolTypedDict or None if not configured
         """
         search_tools_raw = config.get("search_tools", None)
+        if not search_tools_raw:
+            # Check in general_settings
+            general_settings = config.get("general_settings", {})
+            if general_settings:
+                search_tools_raw = general_settings.get("search_tools", None)
+
         if not search_tools_raw:
             return None
 
@@ -2898,9 +2942,6 @@ def _load_alerting_settings(self, general_settings: dict):
         """
         Initialize alerting settings
         """
-        from litellm.litellm_core_utils.litellm_logging import (
-            _init_custom_logger_compatible_class,
-        )
 
         _alerting_callbacks = general_settings.get("alerting", None)
         verbose_proxy_logger.debug(f"_alerting_callbacks: {general_settings}")
@@ -3198,6 +3239,8 @@ async def _update_llm_router(
                     verbose_proxy_logger.debug(f"updated llm_router: {llm_router}")
             else:
                 verbose_proxy_logger.debug(f"len new_models: {len(models_list)}")
+                if search_tools is not None and llm_router is not None:
+                    llm_router.search_tools = search_tools
                 ## DELETE MODEL LOGIC
                 await self._delete_deployment(db_models=models_list)
 
@@ -4579,6 +4622,68 @@ async def async_assistants_data_generator(
         yield f"data: {error_returned}\n\n"
 
 
+def _get_client_requested_model_for_streaming(request_data: dict) -> str:
+    """
+    Prefer the original client-requested model (pre-alias mapping) when available.
+
+    Pre-call processing can rewrite `request_data["model"]` for aliasing/routing purposes.
+    The OpenAI-compatible public `model` field should reflect what the client sent.
+    """
+    requested_model = request_data.get("_litellm_client_requested_model")
+    if isinstance(requested_model, str):
+        return requested_model
+
+    requested_model = request_data.get("model")
+    return requested_model if isinstance(requested_model, str) else ""
+
+
+def _restamp_streaming_chunk_model(
+    *,
+    chunk: Any,
+    requested_model_from_client: str,
+    request_data: dict,
+    model_mismatch_logged: bool,
+) -> Tuple[Any, bool]:
+    # Always return the client-requested model name (not provider-prefixed internal identifiers)
+    # on streaming chunks.
+    #
+    # Note: This warning is intentionally verbose. A mismatch is a useful signal that an
+    # internal provider/deployment identifier is leaking into the public API, and helps
+    # maintainers/operators catch regressions while preserving OpenAI-compatible output.
+    if not requested_model_from_client or not isinstance(chunk, (BaseModel, dict)):
+        return chunk, model_mismatch_logged
+
+    downstream_model = (
+        chunk.get("model") if isinstance(chunk, dict) else getattr(chunk, "model", None)
+    )
+    if not model_mismatch_logged and downstream_model != requested_model_from_client:
+        verbose_proxy_logger.warning(
+            "litellm_call_id=%s: streaming chunk model mismatch - requested=%r downstream=%r. Overriding model to requested.",
+            request_data.get("litellm_call_id"),
+            requested_model_from_client,
+            downstream_model,
+        )
+        model_mismatch_logged = True
+
+    if isinstance(chunk, dict):
+        chunk["model"] = requested_model_from_client
+        return chunk, model_mismatch_logged
+
+    try:
+        setattr(chunk, "model", requested_model_from_client)
+    except Exception as e:
+        verbose_proxy_logger.error(
+            "litellm_call_id=%s: failed to override chunk.model=%r on chunk_type=%s. error=%s",
+            request_data.get("litellm_call_id"),
+            requested_model_from_client,
+            type(chunk),
+            str(e),
+            exc_info=True,
+        )
+
+    return chunk, model_mismatch_logged
+
+
 async def async_data_generator(
     response, user_api_key_dict: UserAPIKeyAuth, request_data: dict
 ):
@@ -4587,6 +4692,10 @@ async def async_data_generator(
         # Use a list to accumulate response segments to avoid O(n^2) string concatenation
         str_so_far_parts: list[str] = []
         error_message: Optional[str] = None
+        requested_model_from_client = _get_client_requested_model_for_streaming(
+            request_data=request_data
+        )
+        model_mismatch_logged = False
         async for chunk in proxy_logging_obj.async_post_call_streaming_iterator_hook(
             user_api_key_dict=user_api_key_dict,
             response=response,
@@ -4608,6 +4717,13 @@ async def async_data_generator(
                 response_str = litellm.get_response_string(response_obj=chunk)
                 str_so_far_parts.append(response_str)
 
+            chunk, model_mismatch_logged = _restamp_streaming_chunk_model(
+                chunk=chunk,
+                requested_model_from_client=requested_model_from_client,
+                request_data=request_data,
+                model_mismatch_logged=model_mismatch_logged,
+            )
+
             if isinstance(chunk, BaseModel):
                 chunk = chunk.model_dump_json(exclude_none=True, exclude_unset=True)
             elif isinstance(chunk, str) and chunk.startswith("data: "):
diff --git a/litellm/utils.py b/litellm/utils.py
index d7fb4855a48..61a446564dc 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -771,13 +771,15 @@ def function_setup(  # noqa: PLR0915
         function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None
 
         ## LAZY LOAD COROUTINE CHECKER ##
-        get_coroutine_checker_fn = getattr(sys.modules[__name__], "get_coroutine_checker")
+        get_coroutine_checker_fn = getattr(
+            sys.modules[__name__], "get_coroutine_checker"
+        )
         coroutine_checker = get_coroutine_checker_fn()
 
         ## DYNAMIC CALLBACKS ##
-        dynamic_callbacks: Optional[
-            List[Union[str, Callable, "CustomLogger"]]
-        ] = kwargs.pop("callbacks", None)
+        dynamic_callbacks: Optional[List[Union[str, Callable, "CustomLogger"]]] = (
+            kwargs.pop("callbacks", None)
+        )
         all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)
 
         if len(all_callbacks) > 0:
@@ -1660,9 +1662,9 @@ def wrapper(*args, **kwargs):  # noqa: PLR0915
                         exception=e,
                         retry_policy=kwargs.get("retry_policy"),
                     )
-                    kwargs[
-                        "retry_policy"
-                    ] = reset_retry_policy()  # prevent infinite loops
+                    kwargs["retry_policy"] = (
+                        reset_retry_policy()
+                    )  # prevent infinite loops
                 litellm.num_retries = (
                     None  # set retries to None to prevent infinite loops
                 )
@@ -1709,9 +1711,9 @@ def wrapper(*args, **kwargs):  # noqa: PLR0915
                         exception=e,
                         retry_policy=kwargs.get("retry_policy"),
                     )
-                    kwargs[
-                        "retry_policy"
-                    ] = reset_retry_policy()  # prevent infinite loops
+                    kwargs["retry_policy"] = (
+                        reset_retry_policy()
+                    )  # prevent infinite loops
                 litellm.num_retries = (
                     None  # set retries to None to prevent infinite loops
                 )
@@ -3640,10 +3642,10 @@ def pre_process_non_default_params(
 
     if "response_format" in non_default_params:
         if provider_config is not None:
-            non_default_params[
-                "response_format"
-            ] = provider_config.get_json_schema_from_pydantic_object(
-                response_format=non_default_params["response_format"]
+            non_default_params["response_format"] = (
+                provider_config.get_json_schema_from_pydantic_object(
+                    response_format=non_default_params["response_format"]
+                )
             )
         else:
             non_default_params["response_format"] = type_to_response_format_param(
@@ -3772,16 +3774,16 @@ def pre_process_optional_params(
                     True  # so that main.py adds the function call to the prompt
                 )
                 if "tools" in non_default_params:
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.pop("tools")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.pop("tools")
+                    )
                     non_default_params.pop(
                         "tool_choice", None
                     )  # causes ollama requests to hang
                 elif "functions" in non_default_params:
-                    optional_params[
-                        "functions_unsupported_model"
-                    ] = non_default_params.pop("functions")
+                    optional_params["functions_unsupported_model"] = (
+                        non_default_params.pop("functions")
+                    )
             elif (
                 litellm.add_function_to_prompt
             ):  # if user opts to add it to prompt instead
@@ -4937,9 +4939,9 @@ def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream])
             return delta if isinstance(delta, str) else ""
 
     # Handle standard ModelResponse and ModelResponseStream
-    _choices: Union[
-        List[Union[Choices, StreamingChoices]], List[StreamingChoices]
-    ] = response_obj.choices
+    _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
+        response_obj.choices
+    )
 
     # Use list accumulation to avoid O(n^2) string concatenation across choices
     response_parts: List[str] = []
@@ -7714,25 +7716,29 @@ def validate_chat_completion_tool_choice(
         f"Invalid tool choice, tool_choice={tool_choice}. Got={type(tool_choice)}. Expecting str, or dict. Please ensure tool_choice follows the OpenAI tool_choice spec"
     )
 
-def validate_openai_optional_params(  
-    stop: Optional[Union[str, List[str]]] = None,  
-    **kwargs  
-) -> Optional[Union[str, List[str]]]:  
-    """  
-    Validates and fixes OpenAI optional parameters.  
-      
-    Args:  
-        stop: Stop sequences (string or list of strings)  
-        **kwargs: Additional optional parameters  
-          
-    Returns:  
-        Validated stop parameter (truncated to 4 elements if needed)  
-    """  
-    if stop is not None and isinstance(stop, list) and not litellm.disable_stop_sequence_limit:  
+
+def validate_openai_optional_params(
+    stop: Optional[Union[str, List[str]]] = None, **kwargs
+) -> Optional[Union[str, List[str]]]:
+    """
+    Validates and fixes OpenAI optional parameters.
+
+    Args:
+        stop: Stop sequences (string or list of strings)
+        **kwargs: Additional optional parameters
+
+    Returns:
+        Validated stop parameter (truncated to 4 elements if needed)
+    """
+    if (
+        stop is not None
+        and isinstance(stop, list)
+        and not litellm.disable_stop_sequence_limit
+    ):
         # Truncate to 4 elements if more are provided as openai only supports up to 4 stop sequences
-        if len(stop) > 4:  
-            stop = stop[:4]  
-      
+        if len(stop) > 4:
+            stop = stop[:4]
+
     return stop
 
 
@@ -8061,6 +8067,8 @@ def get_provider_embedding_config(
             return VercelAIGatewayEmbeddingConfig()
         elif litellm.LlmProviders.GIGACHAT == provider:
             return litellm.GigaChatEmbeddingConfig()
+        elif litellm.LlmProviders.HOSTED_VLLM == provider:
+            return litellm.HostedVLLMEmbeddingConfig()
         elif litellm.LlmProviders.SAGEMAKER == provider:
             from litellm.llms.sagemaker.embedding.transformation import (
                 SagemakerEmbeddingConfig,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 6a605f460d4..8dd02537923 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -23790,6 +23790,20 @@
         "output_cost_per_token": 6.5e-07,
         "supports_tool_choice": true
     },
+    "openrouter/moonshotai/kimi-k2.5": {
+        "cache_read_input_token_cost": 1e-07,
+        "input_cost_per_token": 6e-07,
+        "litellm_provider": "openrouter",
+        "max_input_tokens": 262144,
+        "max_output_tokens": 262144,
+        "max_tokens": 262144,
+        "mode": "chat",
+        "output_cost_per_token": 3e-06,
+        "source": "https://openrouter.ai/moonshotai/kimi-k2.5",
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_vision": true
+    },
     "openrouter/nousresearch/nous-hermes-llama2-13b": {
         "input_cost_per_token": 2e-07,
         "litellm_provider": "openrouter",
diff --git a/tests/litellm/test_stream_chunk_builder_images.py b/tests/litellm/test_stream_chunk_builder_images.py
new file mode 100644
index 00000000000..c51a14ede67
--- /dev/null
+++ b/tests/litellm/test_stream_chunk_builder_images.py
@@ -0,0 +1,242 @@
+"""
+Test that stream_chunk_builder correctly preserves images from streaming chunks.
+
+This tests the fix for https://github.com/BerriAI/litellm/issues/19478
+where images from models like gemini-2.5-flash-image were lost when
+rebuilding the response from streaming chunks.
+"""
+import pytest
+import litellm
+from litellm import stream_chunk_builder
+
+
+def test_stream_chunk_builder_preserves_images():
+    """
+    Test that stream_chunk_builder correctly preserves images from streaming chunks.
+    """
+    # Simulate streaming chunks from an image generation model
+    init_chunks = [
+        {
+            "id": "chatcmpl-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "role": "assistant",
+                    },
+                    "finish_reason": None,
+                }
+            ],
+            "created": 1737654321,
+            "model": "gemini/gemini-2.5-flash-image",
+            "object": "chat.completion.chunk",
+        },
+        {
+            "id": "chatcmpl-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "images": [
+                            {
+                                "image_url": {
+                                    "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==",
+                                    "detail": "auto"
+                                },
+                                "index": 0,
+                                "type": "image_url"
+                            }
+                        ],
+                    },
+                    "finish_reason": None,
+                }
+            ],
+            "created": 1737654321,
+            "model": "gemini/gemini-2.5-flash-image",
+            "object": "chat.completion.chunk",
+        },
+        {
+            "id": "chatcmpl-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": "stop",
+                }
+            ],
+            "created": 1737654321,
+            "model": "gemini/gemini-2.5-flash-image",
+            "object": "chat.completion.chunk",
+        },
+    ]
+
+    chunks = []
+    for chunk in init_chunks:
+        chunks.append(litellm.ModelResponse(**chunk, stream=True))
+
+    response = stream_chunk_builder(chunks=chunks)
+
+    # Verify that images are preserved in the rebuilt response
+    assert response.choices[0].message.images is not None, "Images should be preserved in stream_chunk_builder"
+    assert len(response.choices[0].message.images) == 1, "Should have exactly 1 image"
+    assert response.choices[0].message.images[0]["type"] == "image_url"
+    assert "base64" in response.choices[0].message.images[0]["image_url"]["url"]
+
+
+def test_stream_chunk_builder_preserves_multiple_images():
+    """
+    Test that stream_chunk_builder correctly preserves multiple images from different chunks.
+    """
+    init_chunks = [
+        {
+            "id": "chatcmpl-multi-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "role": "assistant",
+                        "content": "Here are your images:",
+                    },
+                    "finish_reason": None,
+                }
+            ],
+            "created": 1737654321,
+            "model": "gemini/gemini-2.5-flash-image",
+            "object": "chat.completion.chunk",
+        },
+        {
+            "id": "chatcmpl-multi-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "images": [
+                            {
+                                "image_url": {"url": "data:image/png;base64,image1data", "detail": "auto"},
+                                "index": 0,
+                                "type": "image_url"
+                            }
+                        ],
+                    },
+                    "finish_reason": None,
+                }
+            ],
+            "created": 1737654321,
+            "model": "gemini/gemini-2.5-flash-image",
+            "object": "chat.completion.chunk",
+        },
+        {
+            "id": "chatcmpl-multi-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "images": [
+                            {
+                                "image_url": {"url": "data:image/png;base64,image2data", "detail": "auto"},
+                                "index": 1,
+                                "type": "image_url"
+                            }
+                        ],
+                    },
+                    "finish_reason": None,
+                }
+            ],
+            "created": 1737654321,
+            "model": "gemini/gemini-2.5-flash-image",
+            "object": "chat.completion.chunk",
+        },
+        {
+            "id": "chatcmpl-multi-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": "stop",
+                }
+            ],
+            "created": 1737654321,
+            "model": "gemini/gemini-2.5-flash-image",
+            "object": "chat.completion.chunk",
+        },
+    ]
+
+    chunks = []
+    for chunk in init_chunks:
+        chunks.append(litellm.ModelResponse(**chunk, stream=True))
+
+    response = stream_chunk_builder(chunks=chunks)
+
+    # Verify content is preserved
+    assert response.choices[0].message.content == "Here are your images:"
+
+    # Verify all images are preserved
+    assert response.choices[0].message.images is not None, "Images should be preserved"
+    assert len(response.choices[0].message.images) == 2, "Should have exactly 2 images"
+    assert "image1data" in response.choices[0].message.images[0]["image_url"]["url"]
+    assert "image2data" in response.choices[0].message.images[1]["image_url"]["url"]
+
+
+def test_stream_chunk_builder_no_images():
+    """
+    Test that stream_chunk_builder works correctly when there are no images (regression test).
+    """
+    init_chunks = [
+        {
+            "id": "chatcmpl-no-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "role": "assistant",
+                        "content": "Hello, ",
+                    },
+                    "finish_reason": None,
+                }
+            ],
+            "created": 1737654321,
+            "model": "gpt-4",
+            "object": "chat.completion.chunk",
+        },
+        {
+            "id": "chatcmpl-no-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "content": "world!",
+                    },
+                    "finish_reason": None,
+                }
+            ],
+            "created": 1737654321,
+            "model": "gpt-4",
+            "object": "chat.completion.chunk",
+        },
+        {
+            "id": "chatcmpl-no-image-test",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": "stop",
+                }
+            ],
+            "created": 1737654321,
+            "model": "gpt-4",
+            "object": "chat.completion.chunk",
+        },
+    ]
+
+    chunks = []
+    for chunk in init_chunks:
+        chunks.append(litellm.ModelResponse(**chunk, stream=True))
+
+    response = stream_chunk_builder(chunks=chunks)
+
+    # Verify content is preserved
+    assert response.choices[0].message.content == "Hello, world!"
+
+    # Verify images attribute doesn't exist or is None (no images in this stream)
+    images = getattr(response.choices[0].message, 'images', None)
+    assert images is None, "Should not have images when none were in the stream"
diff --git a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
index 9b3b6aeaea1..ddc01db0ec7 100644
--- a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
+++ b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
@@ -1,10 +1,7 @@
 import json
 import os
 import sys
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import httpx
-import pytest
+from unittest.mock import MagicMock, patch
 
 sys.path.insert(
     0, os.path.abspath("../../../../..")
@@ -47,15 +44,34 @@ def test_hosted_vllm_chat_transformation_file_url():
 
 def test_hosted_vllm_chat_transformation_with_audio_url():
     from litellm import completion
-    from litellm.llms.custom_httpx.http_handler import HTTPHandler
-
-    client = MagicMock()
 
-    with patch.object(
-        client.chat.completions.with_raw_response, "create", return_value=MagicMock()
-    ) as mock_post:
+    mock_client = MagicMock()
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = {
+        "id": "chatcmpl-test",
+        "object": "chat.completion",
+        "created": 1234567890,
+        "model": "llama-3.1-70b-instruct",
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": "Test response"},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
+    }
+    mock_response.text = json.dumps(mock_response.json.return_value)
+    mock_client.post.return_value = mock_response
+
+    with patch(
+        "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
+        return_value=mock_client,
+    ):
         try:
-            response = completion(
+            completion(
                 model="hosted_vllm/llama-3.1-70b-instruct",
                 messages=[
                     {
@@ -68,14 +84,15 @@ def test_hosted_vllm_chat_transformation_with_audio_url():
                         ],
                     },
                 ],
-                client=client,
+                api_base="https://test-vllm.example.com/v1",
             )
-        except Exception as e:
-            print(f"Error: {e}")
+        except Exception:
+            pass
 
-        mock_post.assert_called_once()
-        print(f"mock_post.call_args.kwargs: {mock_post.call_args.kwargs}")
-        assert mock_post.call_args.kwargs["messages"] == [
+        mock_client.post.assert_called_once()
+        call_kwargs = mock_client.post.call_args[1]
+        request_data = json.loads(call_kwargs["data"])
+        assert request_data["messages"] == [
             {
                 "role": "user",
                 "content": [
diff --git a/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py
new file mode 100644
index 00000000000..8f98b3ca8f1
--- /dev/null
+++ b/tests/test_litellm/llms/hosted_vllm/chat/test_hosted_vllm_ssl_verify.py
@@ -0,0 +1,152 @@
+"""
+Test SSL verification for hosted_vllm provider.
+
+This test ensures that the ssl_verify parameter is properly passed through
+to the HTTP client when using the hosted_vllm provider.
+
+Issue: ssl_verify parameter was being ignored because hosted_vllm fell through
+to the OpenAI catch-all path in main.py, which doesn't pass ssl_verify to the HTTP client.
+"""
+
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+
+
+class TestHostedVLLMSSLVerify:
+    """Test suite for SSL verification in hosted_vllm provider."""
+
+    @patch("litellm.llms.custom_httpx.llm_http_handler._get_httpx_client")
+    def test_hosted_vllm_ssl_verify_false_sync(self, mock_get_httpx_client):
+        """Test that ssl_verify=False is passed to the HTTP client for sync calls."""
+        # Setup mock client
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"content-type": "application/json"}
+        mock_response.json.return_value = {
+            "id": "chatcmpl-test",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "test-model",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Test response",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 10,
+                "completion_tokens": 5,
+                "total_tokens": 15,
+            },
+        }
+        mock_response.text = '{"id": "chatcmpl-test", "object": "chat.completion", "created": 1234567890, "model": "test-model", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Test response"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}}'
+        mock_client.post.return_value = mock_response
+        mock_get_httpx_client.return_value = mock_client
+
+        try:
+            litellm.completion(
+                model="hosted_vllm/test-model",
+                messages=[{"role": "user", "content": "Hello"}],
+                api_base="https://test-vllm.example.com/v1",
+                ssl_verify=False,
+            )
+        except Exception:
+            # Even if the response parsing fails, we just need to verify
+            # that the mock was called with the correct ssl_verify parameter
+            pass
+
+        # Verify _get_httpx_client was called with ssl_verify=False
+        mock_get_httpx_client.assert_called()
+        call_args = mock_get_httpx_client.call_args
+
+        # Check that params contains ssl_verify=False
+        if call_args[0]:
+            # Positional argument
+            params = call_args[0][0]
+        else:
+            # Keyword argument
+            params = call_args[1].get("params", {})
+
+        assert (
+            params.get("ssl_verify") is False
+        ), f"Expected ssl_verify=False in params, got {params}"
+
+    @patch("litellm.llms.custom_httpx.llm_http_handler.get_async_httpx_client")
+    @pytest.mark.asyncio
+    async def test_hosted_vllm_ssl_verify_false_async(
+        self, mock_get_async_httpx_client
+    ):
+        """Test that ssl_verify=False is passed to the HTTP client for async calls."""
+        # Setup mock async client
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"content-type": "application/json"}
+        mock_response.json.return_value = {
+            "id": "chatcmpl-test",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "test-model",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Test response",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 10,
+                "completion_tokens": 5,
+                "total_tokens": 15,
+            },
+        }
+        mock_response.text = '{"id": "chatcmpl-test", "object": "chat.completion", "created": 1234567890, "model": "test-model", "choices": [{"index": 0, "message": {"role": "assistant", "content": "Test response"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}}'
+
+        async def mock_post(*args, **kwargs):
+            return mock_response
+
+        mock_client.post = mock_post
+        mock_get_async_httpx_client.return_value = mock_client
+
+        try:
+            await litellm.acompletion(
+                model="hosted_vllm/test-model",
+                messages=[{"role": "user", "content": "Hello"}],
+                api_base="https://test-vllm.example.com/v1",
+                ssl_verify=False,
+            )
+        except Exception:
+            # Even if the response parsing fails, we just need to verify
+            # that the mock was called with the correct ssl_verify parameter
+            pass
+
+        # Verify get_async_httpx_client was called with ssl_verify=False
+        mock_get_async_httpx_client.assert_called()
+        call_kwargs = mock_get_async_httpx_client.call_args[1]
+
+        # Check that params contains ssl_verify=False
+        params = call_kwargs.get("params", {})
+        assert (
+            params.get("ssl_verify") is False
+        ), f"Expected ssl_verify=False in params, got {params}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
diff --git a/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py b/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py
new file mode 100644
index 00000000000..bb911814c23
--- /dev/null
+++ b/tests/test_litellm/llms/hosted_vllm/embedding/test_hosted_vllm_embedding_ssl_verify.py
@@ -0,0 +1,140 @@
+"""
+Test SSL verification for hosted_vllm provider embeddings.
+
+This test ensures that the ssl_verify parameter is properly passed through
+to the HTTP client when using the hosted_vllm provider for embeddings.
+
+Issue: ssl_verify parameter was being ignored because hosted_vllm fell through
+to the openai_like catch-all path in main.py, which doesn't pass ssl_verify to the HTTP client.
+"""
+
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+
+
+class TestHostedVLLMEmbeddingSSLVerify:
+    """Test suite for SSL verification in hosted_vllm provider embeddings."""
+
+    @patch("litellm.llms.custom_httpx.llm_http_handler._get_httpx_client")
+    def test_hosted_vllm_embedding_ssl_verify_false_sync(self, mock_get_httpx_client):
+        """Test that ssl_verify=False is passed to the HTTP client for sync embedding calls."""
+        # Setup mock client
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"content-type": "application/json"}
+        mock_response.json.return_value = {
+            "object": "list",
+            "data": [
+                {
+                    "object": "embedding",
+                    "index": 0,
+                    "embedding": [0.1, 0.2, 0.3, 0.4, 0.5],
+                }
+            ],
+            "model": "text-embedding-model",
+            "usage": {
+                "prompt_tokens": 5,
+                "total_tokens": 5,
+            },
+        }
+        mock_response.text = '{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}], "model": "text-embedding-model", "usage": {"prompt_tokens": 5, "total_tokens": 5}}'
+        mock_client.post.return_value = mock_response
+        mock_get_httpx_client.return_value = mock_client
+
+        try:
+            litellm.embedding(
+                model="hosted_vllm/text-embedding-model",
+                input=["hello world"],
+                api_base="https://test-vllm.example.com/v1",
+                ssl_verify=False,
+            )
+        except Exception:
+            # Even if the response parsing fails, we just need to verify
+            # that the mock was called with the correct ssl_verify parameter
+            pass
+
+        # Verify _get_httpx_client was called with ssl_verify=False
+        mock_get_httpx_client.assert_called()
+        call_args = mock_get_httpx_client.call_args
+
+        # Check that params contains ssl_verify=False
+        if call_args[0]:
+            # Positional argument
+            params = call_args[0][0]
+        else:
+            # Keyword argument
+            params = call_args[1].get("params", {})
+
+        assert (
+            params.get("ssl_verify") is False
+        ), f"Expected ssl_verify=False in params, got {params}"
+
+    @patch("litellm.llms.custom_httpx.llm_http_handler.get_async_httpx_client")
+    @pytest.mark.asyncio
+    async def test_hosted_vllm_embedding_ssl_verify_false_async(
+        self, mock_get_async_httpx_client
+    ):
+        """Test that ssl_verify=False is passed to the HTTP client for async embedding calls."""
+        # Setup mock async client
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"content-type": "application/json"}
+        mock_response.json.return_value = {
+            "object": "list",
+            "data": [
+                {
+                    "object": "embedding",
+                    "index": 0,
+                    "embedding": [0.1, 0.2, 0.3, 0.4, 0.5],
+                }
+            ],
+            "model": "text-embedding-model",
+            "usage": {
+                "prompt_tokens": 5,
+                "total_tokens": 5,
+            },
+        }
+        mock_response.text = '{"object": "list", "data": [{"object": "embedding", "index": 0, "embedding": [0.1, 0.2, 0.3, 0.4, 0.5]}], "model": "text-embedding-model", "usage": {"prompt_tokens": 5, "total_tokens": 5}}'
+
+        async def mock_post(*args, **kwargs):
+            return mock_response
+
+        mock_client.post = mock_post
+        mock_get_async_httpx_client.return_value = mock_client
+
+        try:
+            await litellm.aembedding(
+                model="hosted_vllm/text-embedding-model",
+                input=["hello world"],
+                api_base="https://test-vllm.example.com/v1",
+                ssl_verify=False,
+            )
+        except Exception:
+            # Even if the response parsing fails, we just need to verify
+            # that the mock was called with the correct ssl_verify parameter
+            pass
+
+        # Verify get_async_httpx_client was called with ssl_verify=False
+        mock_get_async_httpx_client.assert_called()
+        call_kwargs = mock_get_async_httpx_client.call_args[1]
+
+        # Check that params contains ssl_verify=False
+        params = call_kwargs.get("params", {})
+        assert (
+            params.get("ssl_verify") is False
+        ), f"Expected ssl_verify=False in params, got {params}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s"])
diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_anthropic_image_url_handling.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_anthropic_image_url_handling.py
index fca784342d7..3f014d65d4d 100644
--- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_anthropic_image_url_handling.py
+++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_anthropic_image_url_handling.py
@@ -17,6 +17,7 @@
 
 from litellm.litellm_core_utils.prompt_templates.factory import (
     anthropic_messages_pt,
+    convert_to_anthropic_tool_result,
     create_anthropic_image_param,
 )
 
@@ -177,3 +178,194 @@ def test_no_force_uses_url_type(self, mock_convert_url: MagicMock):
         mock_convert_url.assert_not_called()
         assert result["source"]["type"] == "url"
         assert result["source"]["url"] == "https://example.com/image.jpg"
+
+
+class TestToolMessageImageURLHandling:
+    """
+    Test that tool messages with image_url are converted to base64 for Vertex AI.
+
+    Issue: https://github.com/BerriAI/litellm/issues/19891
+    """
+
+    @patch("litellm.litellm_core_utils.prompt_templates.factory.convert_url_to_base64")
+    def test_convert_to_anthropic_tool_result_with_force_base64(
+        self, mock_convert_url: MagicMock
+    ):
+        """
+        Test that convert_to_anthropic_tool_result converts image URLs to base64
+        when force_base64=True.
+        """
+        mock_convert_url.return_value = "data:image/jpeg;base64,/9j/4AAQSkZJRg=="
+
+        tool_message = {
+            "role": "tool",
+            "tool_call_id": "call_123",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": "https://example.com/tool_result.jpg"},
+                }
+            ],
+        }
+
+        result = convert_to_anthropic_tool_result(tool_message, force_base64=True)
+
+        mock_convert_url.assert_called_once_with(url="https://example.com/tool_result.jpg")
+        assert result["type"] == "tool_result"
+        assert result["tool_use_id"] == "call_123"
+
+        # Check the image content is base64
+        content = result["content"]
+        assert len(content) == 1
+        assert content[0]["type"] == "image"
+        assert content[0]["source"]["type"] == "base64"
+
+    @patch("litellm.litellm_core_utils.prompt_templates.factory.convert_url_to_base64")
+    def test_convert_to_anthropic_tool_result_without_force_base64(
+        self, mock_convert_url: MagicMock
+    ):
+        """
+        Test that convert_to_anthropic_tool_result uses URL type when force_base64=False.
+        """
+        tool_message = {
+            "role": "tool",
+            "tool_call_id": "call_456",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": "https://example.com/image.jpg"},
+                }
+            ],
+        }
+
+        result = convert_to_anthropic_tool_result(tool_message, force_base64=False)
+
+        mock_convert_url.assert_not_called()
+        assert result["type"] == "tool_result"
+
+        # Check the image content uses URL type
+        content = result["content"]
+        assert len(content) == 1
+        assert content[0]["type"] == "image"
+        assert content[0]["source"]["type"] == "url"
+
+    @patch("litellm.litellm_core_utils.prompt_templates.factory.convert_url_to_base64")
+    def test_vertex_ai_tool_message_converts_image_to_base64(
+        self, mock_convert_url: MagicMock
+    ):
+        """
+        Test full conversation with tool result containing image for Vertex AI.
+        The image URL should be converted to base64.
+        """
+        mock_convert_url.return_value = "data:image/jpeg;base64,/9j/4AAQSkZJRg=="
+
+        messages = [
+            {
+                "role": "user",
+                "content": "Get me an image and describe it",
+            },
+            {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [
+                    {
+                        "id": "call_789",
+                        "type": "function",
+                        "function": {
+                            "name": "get_image",
+                            "arguments": "{}",
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_789",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/result.jpg"},
+                    }
+                ],
+            },
+        ]
+
+        result = anthropic_messages_pt(
+            messages=messages,
+            model="claude-sonnet-4",
+            llm_provider="vertex_ai",
+        )
+
+        # Verify convert_url_to_base64 was called for the tool result image
+        mock_convert_url.assert_called_once_with(url="https://example.com/result.jpg")
+
+        # Find the tool_result in the converted messages
+        for msg in result:
+            if msg.get("role") == "user":
+                for content_item in msg.get("content", []):
+                    if isinstance(content_item, dict) and content_item.get("type") == "tool_result":
+                        tool_content = content_item.get("content", [])
+                        for item in tool_content:
+                            if isinstance(item, dict) and item.get("type") == "image":
+                                assert item["source"]["type"] == "base64"
+                                return
+        pytest.fail("Could not find image in tool result")
+
+    @patch("litellm.litellm_core_utils.prompt_templates.factory.convert_url_to_base64")
+    def test_regular_anthropic_tool_message_uses_url(
+        self, mock_convert_url: MagicMock
+    ):
+        """
+        Test that regular Anthropic API uses URL type for tool result images.
+        """
+        messages = [
+            {
+                "role": "user",
+                "content": "Get me an image",
+            },
+            {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [
+                    {
+                        "id": "call_abc",
+                        "type": "function",
+                        "function": {
+                            "name": "get_image",
+                            "arguments": "{}",
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_abc",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/image.jpg"},
+                    }
+                ],
+            },
+        ]
+
+        result = anthropic_messages_pt(
+            messages=messages,
+            model="claude-sonnet-4",
+            llm_provider="anthropic",
+        )
+
+        # convert_url_to_base64 should NOT be called for regular Anthropic
+        mock_convert_url.assert_not_called()
+
+        # Find the tool_result and verify URL type
+        for msg in result:
+            if msg.get("role") == "user":
+                for content_item in msg.get("content", []):
+                    if isinstance(content_item, dict) and content_item.get("type") == "tool_result":
+                        tool_content = content_item.get("content", [])
+                        for item in tool_content:
+                            if isinstance(item, dict) and item.get("type") == "image":
+                                assert item["source"]["type"] == "url"
+                                return
+        pytest.fail("Could not find image in tool result")
diff --git a/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py b/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py
index edfdd9e4065..d6393bc6414 100644
--- a/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py
+++ b/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py
@@ -312,6 +312,43 @@ def mock_resolve_os_environ(params):
         assert "result" in result
 
 
+@pytest.mark.asyncio
+async def test_health_services_endpoint_datadog_llm_observability():
+    """
+    Verify that 'datadog_llm_observability' is accepted as a valid service
+    by the /health/services endpoint and does not raise a 400 error.
+
+    Regression test for: https://github.com/BerriAI/litellm/issues/XXXX
+    The service was missing from the allowed services validation list.
+    """
+    from litellm.proxy.health_endpoints._health_endpoints import (
+        health_services_endpoint,
+    )
+
+    # Mock datadog_llm_observability to be in success_callback so the generic branch handles it
+    with patch("litellm.success_callback", ["datadog_llm_observability"]):
+        result = await health_services_endpoint(
+            service="datadog_llm_observability"
+        )
+
+    # Should not raise HTTPException(400) and should return success
+    assert result["status"] == "success"
+    assert "datadog_llm_observability" in result["message"]
+
+
+@pytest.mark.asyncio
+async def test_health_services_endpoint_rejects_unknown_service():
+    """
+    Verify that an unknown service name is rejected with a 400 error.
+    """
+    from litellm.proxy._types import ProxyException
+
+    with pytest.raises(ProxyException):
+        await health_services_endpoint(
+            service="totally_unknown_service_xyz"
+        )
+
+
 @pytest.fixture(scope="function")
 def proxy_client(monkeypatch):
     """
diff --git a/tests/test_litellm/proxy/test_response_model_sanitization.py b/tests/test_litellm/proxy/test_response_model_sanitization.py
new file mode 100644
index 00000000000..b1bb8d0ed39
--- /dev/null
+++ b/tests/test_litellm/proxy/test_response_model_sanitization.py
@@ -0,0 +1,217 @@
+import asyncio
+import json
+import os
+import sys
+from typing import AsyncGenerator
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+import yaml
+from fastapi.testclient import TestClient
+
+sys.path.insert(0, os.path.abspath("../../.."))
+
+import litellm
+
+pytestmark = pytest.mark.flaky(condition=False)
+
+
+def _initialize_proxy_with_config(config: dict, tmp_path) -> TestClient:
+    """
+    Initialize the proxy server with a temporary config file and return a TestClient.
+
+    IMPORTANT: proxy_server.initialize() mutates module-level globals. We must call
+    cleanup_router_config_variables() before initializing to prevent cross-test bleed.
+    """
+    from litellm.proxy.proxy_server import app, cleanup_router_config_variables, initialize
+
+    cleanup_router_config_variables()
+
+    config_fp = tmp_path / "proxy_config.yaml"
+    config_fp.write_text(yaml.safe_dump(config))
+
+    asyncio.run(initialize(config=str(config_fp), debug=True))
+    return TestClient(app)
+
+
+def _make_minimal_chat_completion_response(model: str) -> litellm.ModelResponse:
+    response = litellm.ModelResponse()
+    response.model = model
+    response.choices[0].message.content = "hello"  # type: ignore[union-attr]
+    response.choices[0].finish_reason = "stop"  # type: ignore[union-attr]
+    return response
+
+
+def _make_model_response_stream_chunk(model: str) -> litellm.ModelResponseStream:
+    """
+    Create a minimal OpenAI-compatible chat.completion.chunk object.
+    """
+    chunk_dict = {
+        "id": "chatcmpl-test",
+        "object": "chat.completion.chunk",
+        "created": 0,
+        "model": model,
+        "choices": [
+            {
+                "index": 0,
+                "delta": {"role": "assistant", "content": "hi"},
+                "finish_reason": None,
+            }
+        ],
+    }
+    return litellm.ModelResponseStream(**chunk_dict)
+
+
+def test_proxy_chat_completion_does_not_return_provider_prefixed_model(tmp_path, monkeypatch):
+    """
+    Regression test:
+
+    - Client asks for `model="vllm-model"` (no provider prefix)
+    - Internal provider path uses `hosted_vllm/...`
+    - Proxy should not leak `hosted_vllm/` in the client-facing `model` field.
+    """
+    client_model = "vllm-model"
+    internal_model = f"hosted_vllm/{client_model}"
+
+    client = _initialize_proxy_with_config(
+        config={
+            "general_settings": {"master_key": "sk-1234"},
+            "model_list": [
+                {
+                    "model_name": client_model,
+                    "litellm_params": {"model": internal_model},
+                }
+            ],
+        },
+        tmp_path=tmp_path,
+    )
+
+    # Patch router call to avoid making any real network request.
+    from litellm.proxy import proxy_server
+
+    monkeypatch.setattr(
+        proxy_server.llm_router,  # type: ignore[arg-type]
+        "acompletion",
+        AsyncMock(return_value=_make_minimal_chat_completion_response(model=internal_model)),
+    )
+
+    # Also no-op proxy logging hooks to keep this test focused and deterministic.
+    monkeypatch.setattr(proxy_server.proxy_logging_obj, "during_call_hook", AsyncMock(return_value=None))
+    monkeypatch.setattr(proxy_server.proxy_logging_obj, "update_request_status", AsyncMock(return_value=None))
+    monkeypatch.setattr(proxy_server.proxy_logging_obj, "post_call_success_hook", AsyncMock(side_effect=lambda **kwargs: kwargs["response"]))
+
+    resp = client.post(
+        "/v1/chat/completions",
+        headers={"Authorization": "Bearer sk-1234"},
+        json={"model": client_model, "messages": [{"role": "user", "content": "hi"}]},
+    )
+
+    assert resp.status_code == 200, resp.text
+    body = resp.json()
+    assert body["model"] == client_model
+    assert not body["model"].startswith("hosted_vllm/")
+
+
+@pytest.mark.asyncio
+async def test_proxy_streaming_chunks_do_not_return_provider_prefixed_model(monkeypatch):
+    """
+    Regression test for streaming:
+
+    Even if a streaming chunk contains `model="hosted_vllm/<...>"`, the proxy SSE layer
+    should not leak the provider prefix to the client.
+    """
+    client_model = "vllm-model"
+    internal_model = f"hosted_vllm/{client_model}"
+
+    from litellm.proxy._types import UserAPIKeyAuth
+    from litellm.proxy import proxy_server
+
+    # Patch proxy_logging_obj hooks so async_data_generator yields exactly our chunk.
+    async def _iterator_hook(
+        user_api_key_dict: UserAPIKeyAuth,
+        response: AsyncGenerator,
+        request_data: dict,
+    ):
+        yield _make_model_response_stream_chunk(model=internal_model)
+
+    monkeypatch.setattr(proxy_server.proxy_logging_obj, "async_post_call_streaming_iterator_hook", _iterator_hook)
+    monkeypatch.setattr(
+        proxy_server.proxy_logging_obj,
+        "async_post_call_streaming_hook",
+        AsyncMock(side_effect=lambda **kwargs: kwargs["response"]),
+    )
+
+    user_api_key_dict = UserAPIKeyAuth(api_key="sk-1234")
+
+    gen = proxy_server.async_data_generator(
+        response=MagicMock(),
+        user_api_key_dict=user_api_key_dict,
+        request_data={"model": client_model},
+    )
+
+    chunks = []
+    async for item in gen:
+        chunks.append(item)
+
+    # First chunk is expected to be JSON, last chunk is [DONE]
+    assert len(chunks) >= 2
+    first = chunks[0]
+    assert first.startswith("data: ")
+
+    payload = json.loads(first[len("data: ") :].strip())
+    assert payload["model"] == client_model
+    assert not payload["model"].startswith("hosted_vllm/")
+
+
+@pytest.mark.asyncio
+async def test_proxy_streaming_chunks_use_client_requested_model_before_alias_mapping(monkeypatch):
+    """
+    Regression test for alias mapping on streaming:
+
+    - `common_processing_pre_call_logic` can rewrite `request_data["model"]` via model_alias_map / key-specific aliases.
+    - Non-streaming responses are restamped using the original client-requested model (captured before the rewrite).
+    - Streaming chunks must do the same to avoid mismatched `model` values between streaming and non-streaming.
+    """
+    client_model_alias = "alias-model"
+    canonical_model = "vllm-model"
+    internal_model = f"hosted_vllm/{canonical_model}"
+
+    from litellm.proxy._types import UserAPIKeyAuth
+    from litellm.proxy import proxy_server
+
+    async def _iterator_hook(
+        user_api_key_dict: UserAPIKeyAuth,
+        response: AsyncGenerator,
+        request_data: dict,
+    ):
+        yield _make_model_response_stream_chunk(model=internal_model)
+
+    monkeypatch.setattr(proxy_server.proxy_logging_obj, "async_post_call_streaming_iterator_hook", _iterator_hook)
+    monkeypatch.setattr(
+        proxy_server.proxy_logging_obj,
+        "async_post_call_streaming_hook",
+        AsyncMock(side_effect=lambda **kwargs: kwargs["response"]),
+    )
+
+    user_api_key_dict = UserAPIKeyAuth(api_key="sk-1234")
+
+    gen = proxy_server.async_data_generator(
+        response=MagicMock(),
+        user_api_key_dict=user_api_key_dict,
+        request_data={
+            "model": canonical_model,
+            "_litellm_client_requested_model": client_model_alias,
+        },
+    )
+
+    chunks = []
+    async for item in gen:
+        chunks.append(item)
+
+    assert len(chunks) >= 2
+    first = chunks[0]
+    assert first.startswith("data: ")
+
+    payload = json.loads(first[len("data: ") :].strip())
+    assert payload["model"] == client_model_alias
+    assert not payload["model"].startswith("hosted_vllm/")
diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py
index f6c24d19df5..6a79fd0823b 100644
--- a/tests/test_litellm/test_utils.py
+++ b/tests/test_litellm/test_utils.py
@@ -2543,6 +2543,48 @@ def test_model_info_for_vertex_ai_deepseek_model():
     print("vertex deepseek model info", model_info)
 
 
+def test_model_info_for_openrouter_kimi_k2_5():
+    """
+    Test that openrouter/moonshotai/kimi-k2.5 model info is correctly configured
+    in model_prices_and_context_window.json.
+
+    Model properties from OpenRouter API:
+    - context_length: 262144
+    - pricing: prompt=$0.0000006, completion=$0.000003, input_cache_read=$0.0000001
+    - modality: text+image->text (supports vision)
+    - supports: tool_choice, tools (function calling)
+    """
+    import json
+    from pathlib import Path
+
+    # Load directly from the local JSON file
+    json_path = Path(__file__).parents[2] / "model_prices_and_context_window.json"
+    with open(json_path) as f:
+        model_cost = json.load(f)
+
+    model_info = model_cost.get("openrouter/moonshotai/kimi-k2.5")
+    assert model_info is not None, "Model not found in model_prices_and_context_window.json"
+    assert model_info["litellm_provider"] == "openrouter"
+    assert model_info["mode"] == "chat"
+
+    # Verify context window
+    assert model_info["max_input_tokens"] == 262144
+    assert model_info["max_output_tokens"] == 262144
+    assert model_info["max_tokens"] == 262144
+
+    # Verify pricing
+    assert model_info["input_cost_per_token"] == 6e-07
+    assert model_info["output_cost_per_token"] == 3e-06
+    assert model_info["cache_read_input_token_cost"] == 1e-07
+
+    # Verify capabilities
+    assert model_info["supports_vision"] is True
+    assert model_info["supports_function_calling"] is True
+    assert model_info["supports_tool_choice"] is True
+
+    print("openrouter kimi-k2.5 model info", model_info)
+
+
 class TestGetValidModelsWithCLI:
     """Test get_valid_models function as used in CLI token usage"""
 
diff --git a/ui/litellm-dashboard/src/components/prompts/prompt_editor_view/utils.ts b/ui/litellm-dashboard/src/components/prompts/prompt_editor_view/utils.ts
index aa8532b80c7..523470e34d0 100644
--- a/ui/litellm-dashboard/src/components/prompts/prompt_editor_view/utils.ts
+++ b/ui/litellm-dashboard/src/components/prompts/prompt_editor_view/utils.ts
@@ -74,6 +74,144 @@ export const convertToDotPrompt = (prompt: PromptType): string => {
   return result.trim();
 };
 
+type ParsedFrontmatter = {
+  model?: string;
+  config: {
+    temperature?: number;
+    max_tokens?: number;
+    top_p?: number;
+  };
+  tools: Tool[];
+};
+
+const parseNumber = (raw: string): number | undefined => {
+  const value = Number(raw);
+  return Number.isFinite(value) ? value : undefined;
+};
+
+const parseToolsFromFrontmatter = (lines: string[]): Tool[] => {
+  const tools: Tool[] = [];
+  let inToolsBlock = false;
+
+  for (const line of lines) {
+    const trimmed = line.trim();
+
+    if (!inToolsBlock) {
+      if (trimmed === "tools:" || trimmed.startsWith("tools:")) {
+        inToolsBlock = true;
+      }
+      continue;
+    }
+
+    // New top-level key ends the tools block
+    if (line.length > 0 && !/^\s/.test(line) && trimmed !== "-" && !trimmed.startsWith("-")) {
+      break;
+    }
+
+    const match = trimmed.match(/^-+\s*(.+)$/);
+    if (!match) continue;
+
+    const rawJson = match[1].trim();
+    if (!rawJson) continue;
+
+    try {
+      const toolObj = JSON.parse(rawJson);
+      tools.push({
+        name: toolObj?.function?.name || "Unnamed Tool",
+        description: toolObj?.function?.description || "",
+        json: JSON.stringify(toolObj, null, 2),
+      });
+    } catch {
+    }
+  }
+
+  return tools;
+};
+
+const parseDotpromptFrontmatter = (frontmatter: string): ParsedFrontmatter => {
+  const result: ParsedFrontmatter = { config: {}, tools: [] };
+  const lines = frontmatter.split("\n");
+
+  result.tools = parseToolsFromFrontmatter(lines);
+
+  for (const line of lines) {
+    const trimmedLine = line.trim();
+    if (!trimmedLine) continue;
+
+    // Skip known nested yaml sections and list items.
+    if (
+      trimmedLine.startsWith("input:") ||
+      trimmedLine.startsWith("output:") ||
+      trimmedLine.startsWith("schema:") ||
+      trimmedLine.startsWith("format:") ||
+      trimmedLine.startsWith("tools:") ||
+      trimmedLine.startsWith("-")
+    ) {
+      continue;
+    }
+
+    const colonIndex = trimmedLine.indexOf(":");
+    if (colonIndex <= 0) continue;
+
+    const key = trimmedLine.substring(0, colonIndex).trim();
+    const value = trimmedLine.substring(colonIndex + 1).trim();
+
+    if (key === "model") {
+      result.model = value;
+      continue;
+    }
+
+    if (key === "temperature") result.config.temperature = parseNumber(value);
+    if (key === "max_tokens") result.config.max_tokens = parseNumber(value);
+    if (key === "top_p") result.config.top_p = parseNumber(value);
+  }
+
+  return result;
+};
+
+type ParsedBody = { developerMessage: string; messages: Message[] };
+
+const parseDotpromptBody = (body: string): ParsedBody => {
+  const roleHeader = /^(System|Developer|User|Assistant):(?:\s(.*)|\s*)$/;
+  const messages: Message[] = [];
+  let developerMessage = "";
+
+  let currentRole: string | null = null;
+  let buffer: string[] = [];
+
+  const commit = () => {
+    if (!currentRole) return;
+
+    const content = buffer.join("\n").trim();
+    if (currentRole === "developer") {
+      if (content) {
+        developerMessage = developerMessage ? `${developerMessage}\n\n${content}` : content;
+      }
+    } else if (content) {
+      messages.push({ role: currentRole, content });
+    } else {
+      messages.push({ role: currentRole, content: "" });
+    }
+  };
+
+  for (const line of body.split("\n")) {
+    const match = line.match(roleHeader);
+    if (match) {
+      commit();
+      currentRole = match[1].toLowerCase();
+      buffer = [match[2] ?? ""];
+      continue;
+    }
+
+    if (!currentRole) continue;
+    buffer.push(line);
+  }
+
+  commit();
+
+  return { developerMessage, messages };
+};
+
 export const parseExistingPrompt = (apiResponse: any): PromptType => {
   // Extract dotprompt_content from litellm_params
   const dotpromptContent = apiResponse?.prompt_spec?.litellm_params?.dotprompt_content || "";
@@ -88,63 +226,11 @@ export const parseExistingPrompt = (apiResponse: any): PromptType => {
     throw new Error("Invalid dotprompt format");
   }
 
-  // Parse YAML frontmatter (parts[1])
   const frontmatter = parts[1];
   const content = parts.slice(2).join("---").trim();
 
-  // Extract metadata from frontmatter
-  const metadata: any = {};
-  frontmatter.split("\n").forEach((line: string) => {
-    const trimmedLine = line.trim();
-    if (trimmedLine && !trimmedLine.startsWith("input:") && !trimmedLine.startsWith("output:") && !trimmedLine.startsWith("schema:") && !trimmedLine.startsWith("format:")) {
-      const colonIndex = trimmedLine.indexOf(":");
-      if (colonIndex > 0) {
-        const key = trimmedLine.substring(0, colonIndex).trim();
-        const value = trimmedLine.substring(colonIndex + 1).trim();
-        if (key === "temperature" || key === "max_tokens" || key === "top_p") {
-          metadata[key] = parseFloat(value);
-        } else if (key === "model") {
-          metadata[key] = value;
-        }
-      }
-    }
-  });
-
-  // Parse content to extract developer message and user messages
-  let developerMessage = "";
-  const messages: Message[] = [];
-  const lines = content.split("\n");
-  let currentRole: "user" | "assistant" | null = null;
-  let currentContent = "";
-
-  for (const line of lines) {
-    if (line.startsWith("Developer:")) {
-      developerMessage = line.substring("Developer:".length).trim();
-    } else if (line.startsWith("User:")) {
-      if (currentRole && currentContent) {
-        messages.push({ role: currentRole, content: currentContent.trim() });
-      }
-      currentRole = "user";
-      currentContent = line.substring("User:".length).trim();
-    } else if (line.startsWith("Assistant:")) {
-      if (currentRole && currentContent) {
-        messages.push({ role: currentRole, content: currentContent.trim() });
-      }
-      currentRole = "assistant";
-      currentContent = line.substring("Assistant:".length).trim();
-    } else if (line.trim() && currentRole) {
-      currentContent += "\n" + line.trim();
-    }
-  }
-
-  // Add the last message
-  if (currentRole && currentContent) {
-    messages.push({ role: currentRole, content: currentContent.trim() });
-  }
-
-  // Parse tools from frontmatter if present
-  const tools: Tool[] = [];
-  // TODO: Add tool parsing if needed
+  const parsedFrontmatter = parseDotpromptFrontmatter(frontmatter);
+  const parsedBody = parseDotpromptBody(content);
 
   // Strip version suffix from prompt name for display
   const promptId = apiResponse?.prompt_spec?.prompt_id || "Unnamed Prompt";
@@ -152,15 +238,14 @@ export const parseExistingPrompt = (apiResponse: any): PromptType => {
 
   return {
     name: baseName,
-    model: metadata.model || "gpt-4o",
-    config: {
-      temperature: metadata.temperature,
-      max_tokens: metadata.max_tokens,
-      top_p: metadata.top_p,
-    },
-    tools: tools,
-    developerMessage: developerMessage,
-    messages: messages.length > 0 ? messages : [{ role: "user", content: "Enter task specifics. Use {{template_variables}} for dynamic inputs" }],
+    model: parsedFrontmatter.model || "gpt-4o",
+    config: parsedFrontmatter.config,
+    tools: parsedFrontmatter.tools,
+    developerMessage: parsedBody.developerMessage,
+    messages:
+      parsedBody.messages.length > 0
+        ? parsedBody.messages
+        : [{ role: "user", content: "Enter task specifics. Use {{template_variables}} for dynamic inputs" }],
   };
 };