Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ FROM $LITELLM_RUNTIME_IMAGE AS runtime
# Ensure runtime stage runs as root
USER root

# Install runtime dependencies
RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip
# Install runtime dependencies (libsndfile needed for audio processing on ARM64)
RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile

WORKDIR /app
# Copy the current directory contents into the container at /app
Expand Down
1 change: 1 addition & 0 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1469,6 +1469,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
from .llms.azure.chat.gpt_5_transformation import AzureOpenAIGPT5Config as AzureOpenAIGPT5Config
from .llms.azure.completion.transformation import AzureOpenAITextConfig as AzureOpenAITextConfig
from .llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig as HostedVLLMChatConfig
from .llms.hosted_vllm.embedding.transformation import HostedVLLMEmbeddingConfig as HostedVLLMEmbeddingConfig
from .llms.github_copilot.chat.transformation import GithubCopilotConfig as GithubCopilotConfig
from .llms.github_copilot.responses.transformation import GithubCopilotResponsesAPIConfig as GithubCopilotResponsesAPIConfig
from .llms.github_copilot.embedding.transformation import GithubCopilotEmbeddingConfig as GithubCopilotEmbeddingConfig
Expand Down
1,044 changes: 830 additions & 214 deletions litellm/_lazy_imports_registry.py

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions litellm/litellm_core_utils/prompt_templates/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -1632,6 +1632,7 @@ def _sanitize_anthropic_tool_use_id(tool_use_id: str) -> str:

def convert_to_anthropic_tool_result(
message: Union[ChatCompletionToolMessage, ChatCompletionFunctionMessage],
force_base64: bool = False,
) -> AnthropicMessagesToolResultParam:
"""
OpenAI message with a tool result looks like:
Expand Down Expand Up @@ -1694,7 +1695,7 @@ def convert_to_anthropic_tool_result(
else None
)
_anthropic_image_param = create_anthropic_image_param(
content["image_url"], format=format
content["image_url"], format=format, is_bedrock_invoke=force_base64
)
_anthropic_image_param = add_cache_control_to_content(
anthropic_content_element=_anthropic_image_param,
Expand Down Expand Up @@ -2056,6 +2057,12 @@ def anthropic_messages_pt( # noqa: PLR0915
else:
messages.append(DEFAULT_USER_CONTINUE_MESSAGE_TYPED)

# Bedrock invoke models have format: invoke/...
# Vertex AI Anthropic also doesn't support URL sources for images
is_bedrock_invoke = model.lower().startswith("invoke/")
is_vertex_ai = llm_provider.startswith("vertex_ai") if llm_provider else False
force_base64 = is_bedrock_invoke or is_vertex_ai

msg_i = 0
while msg_i < len(messages):
user_content: List[AnthropicMessagesUserMessageValues] = []
Expand Down Expand Up @@ -2165,7 +2172,9 @@ def anthropic_messages_pt( # noqa: PLR0915
):
# OpenAI's tool message content will always be a string
user_content.append(
convert_to_anthropic_tool_result(user_message_types_block)
convert_to_anthropic_tool_result(
user_message_types_block, force_base64=force_base64
)
)

msg_i += 1
Expand Down
55 changes: 53 additions & 2 deletions litellm/llms/gemini/files/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,15 +180,66 @@ def transform_retrieve_file_request(
optional_params: dict,
litellm_params: dict,
) -> tuple[str, dict]:
raise NotImplementedError("GoogleAIStudioFilesHandler does not support file retrieval")
"""
Get the URL to retrieve a file from Google AI Studio.

We expect file_id to be the URI (e.g. https://generativelanguage.googleapis.com/v1beta/files/...)
as returned by the upload response.
"""
api_key = litellm_params.get("api_key")
if not api_key:
raise ValueError("api_key is required")

if file_id.startswith("http"):
url = "{}?key={}".format(file_id, api_key)
else:
# Fallback for just file name (files/...)
api_base = self.get_api_base(litellm_params.get("api_base")) or "https://generativelanguage.googleapis.com"
api_base = api_base.rstrip("/")
url = "{}/v1beta/{}?key={}".format(api_base, file_id, api_key)

return url, {"Content-Type": "application/json"}

def transform_retrieve_file_response(
self,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
litellm_params: dict,
) -> OpenAIFileObject:
raise NotImplementedError("GoogleAIStudioFilesHandler does not support file retrieval")
"""
Transform Gemini's file retrieval response into OpenAI-style FileObject
"""
try:
response_json = raw_response.json()

# Map Gemini state to OpenAI status
gemini_state = response_json.get("state", "STATE_UNSPECIFIED")
status = "uploaded" # Default
if gemini_state == "ACTIVE":
status = "processed"
elif gemini_state == "FAILED":
status = "error"

return OpenAIFileObject(
id=response_json.get("uri", ""),
bytes=int(response_json.get("sizeBytes", 0)),
created_at=int(
time.mktime(
time.strptime(
response_json["createTime"].replace("Z", "+00:00"),
"%Y-%m-%dT%H:%M:%S.%f%z",
)
)
),
filename=response_json.get("displayName", ""),
object="file",
purpose="user_data",
status=status,
status_details=str(response_json.get("error", "")) if gemini_state == "FAILED" else None,
)
except Exception as e:
verbose_logger.exception(f"Error parsing file retrieve response: {str(e)}")
raise ValueError(f"Error parsing file retrieve response: {str(e)}")

def transform_delete_file_request(
self,
Expand Down
180 changes: 180 additions & 0 deletions litellm/llms/hosted_vllm/embedding/transformation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
"""
Hosted VLLM Embedding API Configuration.

This module provides the configuration for hosted VLLM's Embedding API.
VLLM is OpenAI-compatible and supports embeddings via the /v1/embeddings endpoint.

Docs: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
"""

from typing import TYPE_CHECKING, Any, List, Optional, Union

import httpx

from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllEmbeddingInputValues, AllMessageValues
from litellm.types.utils import EmbeddingResponse
from litellm.utils import convert_to_model_response_object

if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj

LiteLLMLoggingObj = _LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any


class HostedVLLMEmbeddingError(BaseLLMException):
"""Exception class for Hosted VLLM Embedding errors."""

pass


class HostedVLLMEmbeddingConfig(BaseEmbeddingConfig):
"""
Configuration for Hosted VLLM's Embedding API.

Reference: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
"""

def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
"""
Validate environment and set up headers for Hosted VLLM API.
"""
if api_key is None:
api_key = get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"

default_headers = {
"Content-Type": "application/json",
}

# Only add Authorization header if api_key is not "fake-api-key"
if api_key and api_key != "fake-api-key":
default_headers["Authorization"] = f"Bearer {api_key}"

# Merge with existing headers (user's headers take priority)
return {**default_headers, **headers}

def get_complete_url(
self,
api_base: Optional[str],
api_key: Optional[str],
model: str,
optional_params: dict,
litellm_params: dict,
stream: Optional[bool] = None,
) -> str:
"""
Get the complete URL for Hosted VLLM Embedding API endpoint.
"""
if api_base is None:
api_base = get_secret_str("HOSTED_VLLM_API_BASE")
if api_base is None:
raise ValueError("api_base is required for hosted_vllm embeddings")

# Remove trailing slashes
api_base = api_base.rstrip("/")

# Ensure the URL ends with /embeddings
if not api_base.endswith("/embeddings"):
api_base = f"{api_base}/embeddings"

return api_base

def transform_embedding_request(
self,
model: str,
input: AllEmbeddingInputValues,
optional_params: dict,
headers: dict,
) -> dict:
"""
Transform embedding request to Hosted VLLM format (OpenAI-compatible).
"""
# Ensure input is a list
if isinstance(input, str):
input = [input]

# Strip 'hosted_vllm/' prefix if present
if model.startswith("hosted_vllm/"):
model = model.replace("hosted_vllm/", "", 1)

return {
"model": model,
"input": input,
**optional_params,
}

def transform_embedding_response(
self,
model: str,
raw_response: httpx.Response,
model_response: EmbeddingResponse,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str],
request_data: dict,
optional_params: dict,
litellm_params: dict,
) -> EmbeddingResponse:
"""
Transform embedding response from Hosted VLLM format (OpenAI-compatible).
"""
logging_obj.post_call(original_response=raw_response.text)

# VLLM returns standard OpenAI-compatible embedding response
response_json = raw_response.json()

return convert_to_model_response_object(
response_object=response_json,
model_response_object=model_response,
response_type="embedding",
)

def get_supported_openai_params(self, model: str) -> list:
"""
Get list of supported OpenAI parameters for Hosted VLLM embeddings.
"""
return [
"timeout",
"dimensions",
"encoding_format",
"user",
]

def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
"""
Map OpenAI parameters to Hosted VLLM format.
"""
for param, value in non_default_params.items():
if param in self.get_supported_openai_params(model):
optional_params[param] = value
return optional_params

def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
"""
Get the error class for Hosted VLLM errors.
"""
return HostedVLLMEmbeddingError(
message=error_message,
status_code=status_code,
headers=headers,
)
Loading
Loading