Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion litellm/batches/batch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,27 @@ async def _get_batch_output_file_content_as_dictionary(
Get the batch output file content as a list of dictionaries
"""
from litellm.files.main import afile_content
from litellm.proxy.openai_files_endpoints.common_utils import (
_is_base64_encoded_unified_file_id,
)

if custom_llm_provider == "vertex_ai":
raise ValueError("Vertex AI does not support file content retrieval")

if batch.output_file_id is None:
raise ValueError("Output file id is None cannot retrieve file content")

file_id = batch.output_file_id
is_base64_unified_file_id = _is_base64_encoded_unified_file_id(file_id)
if is_base64_unified_file_id:
try:
file_id = is_base64_unified_file_id.split("llm_output_file_id,")[1].split(";")[0]
verbose_logger.debug(f"Extracted LLM output file ID from unified file ID: {file_id}")
except (IndexError, AttributeError) as e:
verbose_logger.error(f"Failed to extract LLM output file ID from unified file ID: {batch.output_file_id}, error: {e}")

_file_content = await afile_content(
file_id=batch.output_file_id,
file_id=file_id,
custom_llm_provider=custom_llm_provider,
)
return _get_file_content_as_dictionary(_file_content.content)
Expand Down
16 changes: 13 additions & 3 deletions litellm/litellm_core_utils/litellm_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -2336,18 +2336,28 @@ async def async_success_handler( # noqa: PLR0915
batch_cost = kwargs.get("batch_cost", None)
batch_usage = kwargs.get("batch_usage", None)
batch_models = kwargs.get("batch_models", None)
if all([batch_cost, batch_usage, batch_models]) is not None:
has_explicit_batch_data = all(
x is not None for x in (batch_cost, batch_usage, batch_models)
)

should_compute_batch_data = (
not is_base64_unified_file_id
or not has_explicit_batch_data
and result.status == "completed"
)
if has_explicit_batch_data:
result._hidden_params["response_cost"] = batch_cost
result._hidden_params["batch_models"] = batch_models
result.usage = batch_usage

elif not is_base64_unified_file_id: # only run for non-unified file ids
elif should_compute_batch_data:
(
response_cost,
batch_usage,
batch_models,
) = await _handle_completed_batch(
batch=result, custom_llm_provider=self.custom_llm_provider
batch=result,
custom_llm_provider=self.custom_llm_provider,
)

result._hidden_params["response_cost"] = response_cost
Expand Down
97 changes: 97 additions & 0 deletions litellm/proxy/common_utils/http_parsing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,103 @@ async def get_request_body(request: Request) -> Dict[str, Any]:
return {}


def extract_nested_form_metadata(
form_data: Dict[str, Any],
prefix: str = "litellm_metadata["
) -> Dict[str, Any]:
"""
Extract nested metadata from form data with bracket notation.

Handles form data that uses bracket notation to represent nested dictionaries,
such as litellm_metadata[spend_logs_metadata][owner] = "value".

This is commonly encountered when SDKs or clients send form data with nested
structures using bracket notation instead of JSON.

Args:
form_data: Dictionary containing form data (from request.form())
prefix: The prefix to look for in form keys (default: "litellm_metadata[")

Returns:
Dictionary with nested structure reconstructed from bracket notation

Example:
Input form_data:
{
"litellm_metadata[spend_logs_metadata][owner]": "john",
"litellm_metadata[spend_logs_metadata][team]": "engineering",
"litellm_metadata[tags]": "production",
"other_field": "value"
}

Output:
{
"spend_logs_metadata": {
"owner": "john",
"team": "engineering"
},
"tags": "production"
}
"""
if not form_data:
return {}

metadata: Dict[str, Any] = {}

for key, value in form_data.items():
# Skip keys that don't start with the prefix
if not isinstance(key, str) or not key.startswith(prefix):
continue

# Skip UploadFile objects - they should not be in metadata
if isinstance(value, UploadFile):
verbose_proxy_logger.warning(
f"Skipping UploadFile in metadata extraction for key: {key}"
)
continue

# Extract the nested path from bracket notation
# Example: "litellm_metadata[spend_logs_metadata][owner]" -> ["spend_logs_metadata", "owner"]
try:
# Remove the prefix and strip trailing ']'
path_string = key.replace(prefix, "").rstrip("]")

# Split by "][" to get individual path parts
parts = path_string.split("][")

if not parts or not parts[0]:
verbose_proxy_logger.warning(
f"Invalid metadata key format (empty path): {key}"
)
continue

# Navigate/create nested dictionary structure
current = metadata
for part in parts[:-1]:
if not isinstance(current, dict):
verbose_proxy_logger.warning(
f"Cannot create nested path - intermediate value is not a dict at: {part}"
)
break
current = current.setdefault(part, {})
else:
# Set the final value (only if we didn't break out of the loop)
if isinstance(current, dict):
current[parts[-1]] = value
else:
verbose_proxy_logger.warning(
f"Cannot set value - parent is not a dict for key: {key}"
)

except Exception as e:
verbose_proxy_logger.error(
f"Error parsing metadata key '{key}': {str(e)}"
)
continue

return metadata


def get_tags_from_request_body(request_body: dict) -> List[str]:
"""
Extract tags from request body metadata.
Expand Down
17 changes: 12 additions & 5 deletions litellm/proxy/openai_files_endpoints/files_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
from litellm.proxy._types import *
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm.proxy.common_utils.http_parsing_utils import (
_read_request_body,
extract_nested_form_metadata,
)
from litellm.proxy.common_utils.openai_endpoint_utils import (
get_custom_llm_provider_from_request_body,
get_custom_llm_provider_from_request_headers,
Expand Down Expand Up @@ -354,16 +357,20 @@ async def create_file( # noqa: PLR0915

data = {}

# Add litellm_metadata to data if provided (from form field)
if litellm_metadata is not None:
data["litellm_metadata"] = litellm_metadata

# Parse expires_after if provided
expires_after = None
form_data = await request.form()
litellm_metadata = extract_nested_form_metadata(
form_data=form_data,
prefix="litellm_metadata["
)
expires_after_anchor = form_data.get("expires_after[anchor]")
expires_after_seconds_str = form_data.get("expires_after[seconds]")

# Add litellm_metadata to data if provided (from form field)
if litellm_metadata is not None:
data["litellm_metadata"] = litellm_metadata

if expires_after_anchor is not None or expires_after_seconds_str is not None:
if expires_after_anchor is None or expires_after_seconds_str is None:
raise HTTPException(
Expand Down
Loading
Loading