Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/my-website/docs/proxy/config_settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ router_settings:
| BATCH_STATUS_POLL_INTERVAL_SECONDS | Interval in seconds for polling batch status. Default is 3600 (1 hour)
| BATCH_STATUS_POLL_MAX_ATTEMPTS | Maximum number of attempts for polling batch status. Default is 24 (for 24 hours)
| BEDROCK_MAX_POLICY_SIZE | Maximum size for Bedrock policy. Default is 75
| BEDROCK_MIN_THINKING_BUDGET_TOKENS | Minimum thinking budget in tokens for Bedrock reasoning models. Bedrock returns a 400 error if budget_tokens is below this value. Requests with lower values are clamped to this minimum. Default is 1024
| BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
| BRAINTRUST_API_KEY | API key for Braintrust integration
| BRAINTRUST_API_BASE | Base URL for Braintrust API. Default is https://api.braintrustdata.com/v1
Expand Down
60 changes: 8 additions & 52 deletions litellm/proxy/management_endpoints/key_management_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -3277,6 +3277,14 @@ async def _execute_virtual_key_regeneration(
update_data.update(non_default_values)
update_data = prisma_client.jsonify_object(data=update_data)

# If grace period set, insert deprecated key so old key remains valid
await _insert_deprecated_key(
prisma_client=prisma_client,
old_token_hash=hashed_api_key,
new_token_hash=new_token_hash,
grace_period=data.grace_period if data else None,
)

updated_token = await prisma_client.db.litellm_verificationtoken.update(
where={"token": hashed_api_key},
data=update_data, # type: ignore
Expand Down Expand Up @@ -3474,58 +3482,6 @@ async def regenerate_key_fn( # noqa: PLR0915
)
verbose_proxy_logger.debug("key_in_db: %s", _key_in_db)

new_token = get_new_token(data=data)

new_token_hash = hash_token(new_token)
new_token_key_name = f"sk-...{new_token[-4:]}"

# Prepare the update data
update_data = {
"token": new_token_hash,
"key_name": new_token_key_name,
}

non_default_values = {}
if data is not None:
# Update with any provided parameters from GenerateKeyRequest
non_default_values = await prepare_key_update_data(
data=data, existing_key_row=_key_in_db
)
verbose_proxy_logger.debug("non_default_values: %s", non_default_values)

update_data.update(non_default_values)
update_data = prisma_client.jsonify_object(data=update_data)

# If grace period set, insert deprecated key so old key remains valid
await _insert_deprecated_key(
prisma_client=prisma_client,
old_token_hash=hashed_api_key,
new_token_hash=new_token_hash,
grace_period=data.grace_period if data else None,
)

# Update the token in the database
updated_token = await prisma_client.db.litellm_verificationtoken.update(
where={"token": hashed_api_key},
data=update_data, # type: ignore
)

updated_token_dict = {}
if updated_token is not None:
updated_token_dict = dict(updated_token)

updated_token_dict["key"] = new_token
updated_token_dict["token_id"] = updated_token_dict.pop("token")

### 3. remove existing key entry from cache
######################################################################

if hashed_api_key or key:
await _delete_cache_key_object(
hashed_token=hash_token(key),
user_api_key_cache=user_api_key_cache,
proxy_logging_obj=proxy_logging_obj,
)
# Normalize litellm_changed_by: if it's a Header object or not a string, convert to None
if litellm_changed_by is not None and not isinstance(litellm_changed_by, str):
litellm_changed_by = None
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.81.12"
version = "1.81.13"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
Expand Down Expand Up @@ -182,7 +182,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"

[tool.commitizen]
version = "1.81.12"
version = "1.81.13"
version_files = [
"pyproject.toml:^version"
]
Expand Down
2 changes: 1 addition & 1 deletion tests/llm_responses_api_testing/base_responses_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def validate_responses_api_response(response, final_chunk: bool = False):
"top_p": (int, float, type(None)),
"max_output_tokens": (int, type(None)),
"previous_response_id": (str, type(None)),
"reasoning": dict,
"reasoning": (dict, type(None)),
"status": str,
"text": dict,
"truncation": (str, type(None)),
Expand Down
11 changes: 10 additions & 1 deletion tests/test_litellm/containers/test_container_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,15 @@ def test_error_handling_integration(self):
@pytest.mark.parametrize("provider", ["openai"])
def test_provider_support(self, provider):
"""Test that the container API works with supported providers."""
import importlib
import litellm.containers.main as containers_main_module

# Reload the module to ensure it has a fresh reference to base_llm_http_handler
# after conftest reloads litellm (same pattern as test_error_handling_integration)
importlib.reload(containers_main_module)

from litellm.containers.main import create_container as create_container_fresh

mock_response = ContainerObject(
id="cntr_provider_test",
object="container",
Expand All @@ -398,7 +407,7 @@ def test_provider_support(self, provider):
with patch('litellm.containers.main.base_llm_http_handler') as mock_handler:
mock_handler.container_create_handler.return_value = mock_response

response = create_container(
response = create_container_fresh(
name="Provider Test Container",
custom_llm_provider=provider
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import sys
from unittest.mock import AsyncMock, patch

import pytest

Expand Down Expand Up @@ -47,67 +46,26 @@ def test_map_openai_params():
assert "response_format" in result


@pytest.mark.asyncio
async def test_llama_api_streaming_no_307_error():
"""Test that streaming works without 307 redirect errors due to follow_redirects=True"""

# Mock the httpx client to simulate a successful streaming response
with patch(
"litellm.llms.custom_httpx.http_handler.get_async_httpx_client"
) as mock_get_client:
# Create a mock client
mock_client = AsyncMock()
mock_get_client.return_value = mock_client

# Mock a successful streaming response (not a 307 redirect)
mock_response = AsyncMock()
mock_response.status_code = 200
mock_response.headers = {"content-type": "text/plain; charset=utf-8"}

# Mock streaming data that would come from a successful request
async def mock_aiter_lines():
yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}'
yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{"content":" there"},"finish_reason":null}]}'
yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}'
yield "data: [DONE]"

mock_response.aiter_lines.return_value = mock_aiter_lines()
mock_client.stream.return_value.__aenter__.return_value = mock_response

# Test the streaming completion
try:
response = await litellm.acompletion(
model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
messages=[{"role": "user", "content": "Tell me about yourself"}],
stream=True,
temperature=0.0,
)

# Verify we get a CustomStreamWrapper (streaming response)
from litellm.utils import CustomStreamWrapper

assert isinstance(response, CustomStreamWrapper)

# Verify the HTTP client was called with follow_redirects=True
mock_client.stream.assert_called_once()
call_kwargs = mock_client.stream.call_args[1]
assert (
call_kwargs.get("follow_redirects") is True
), "follow_redirects should be True to prevent 307 errors"

# Verify the response status is 200 (not 307)
assert (
mock_response.status_code == 200
), "Should get 200 response, not 307 redirect"

except Exception as e:
# If there's an exception, make sure it's not a 307 error
error_str = str(e)
assert (
"307" not in error_str
), f"Should not get 307 redirect error: {error_str}"

# Still verify that follow_redirects was set correctly
if mock_client.stream.called:
call_kwargs = mock_client.stream.call_args[1]
assert call_kwargs.get("follow_redirects") is True
def test_llama_api_streaming_no_307_error():
"""
Test that the OpenAI-compatible httpx clients use follow_redirects=True.

meta_llama routes through the OpenAI SDK path (BaseOpenAILLM), so the
follow_redirects setting on that SDK's underlying httpx client is what
actually prevents 307 redirect errors for LLaMA API streaming.
"""
from litellm.llms.openai.common_utils import BaseOpenAILLM

# Verify the async httpx client has follow_redirects enabled
async_client = BaseOpenAILLM._get_async_http_client()
assert async_client is not None
assert (
async_client.follow_redirects is True
), "Async httpx client should set follow_redirects=True to prevent 307 errors"

# Verify the sync httpx client has follow_redirects enabled
sync_client = BaseOpenAILLM._get_sync_http_client()
assert sync_client is not None
assert (
sync_client.follow_redirects is True
), "Sync httpx client should set follow_redirects=True to prevent 307 errors"
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import os
import sys
from unittest.mock import patch

sys.path.insert(
0, os.path.abspath("../../../../..")
Expand Down Expand Up @@ -51,9 +52,13 @@ def test_default_api_base(self, config):
assert result["Authorization"] == f"Bearer {api_key}"
assert result["Content-Type"] == "application/json"

def test_get_supported_openai_params(self, config):
@patch("litellm.utils.supports_function_calling", return_value=True)
def test_get_supported_openai_params(self, mock_supports_fc, config):
"""
Test that get_supported_openai_params returns correct params
Test that get_supported_openai_params returns correct params.
We mock supports_function_calling because the test model name
'swiss-ai-apertus' is not in the model registry; this test validates
config behaviour, not registry lookups.
"""
supported_params = config.get_supported_openai_params(model="swiss-ai-apertus")

Expand All @@ -66,9 +71,12 @@ def test_get_supported_openai_params(self, config):
# Note: JSON-based configs inherit from OpenAIGPTConfig which includes functions
# This is expected behavior for JSON-based providers

def test_map_openai_params_includes_functions(self, config):
@patch("litellm.utils.supports_function_calling", return_value=True)
def test_map_openai_params_includes_functions(self, mock_supports_fc, config):
"""
Test that functions parameter is mapped (JSON-based configs don't exclude functions)
Test that functions parameter is mapped (JSON-based configs don't exclude functions).
We mock supports_function_calling because the test model name
'swiss-ai-apertus' is not in the model registry.
"""
non_default_params = {
"functions": [{"name": "test_function", "description": "Test function"}],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def setup_method(self):
"GOOGLE_APPLICATION_CREDENTIALS",
"GOOGLE_CLOUD_PROJECT",
"VERTEXAI_PROJECT",
"VERTEXAI_CREDENTIALS",
"VERTEX_AI_CREDENTIALS",
"VERTEX_PROJECT",
"VERTEX_LOCATION",
"VERTEX_AI_PROJECT",
Expand Down Expand Up @@ -471,16 +473,20 @@ def test_validate_environment_with_optional_params(self, mock_ensure_access_toke
}
assert headers == expected_headers

@patch('litellm.llms.vertex_ai.rerank.transformation.VertexAIRerankConfig._ensure_access_token')
def test_validate_environment_preserves_optional_params_for_get_complete_url(
self,
mock_ensure_access_token,
):
"""
Validate that calling validate_environment does not remove vertex-specific
parameters needed later by get_complete_url.

Uses instance-level mocking to avoid class-reference issues caused by
importlib.reload(litellm) in conftest.py.
"""
mock_ensure_access_token.return_value = ("test-access-token", "project-from-token")
mock_ensure_access_token = MagicMock(
return_value=("test-access-token", "project-from-token")
)
self.config._ensure_access_token = mock_ensure_access_token

optional_params = {
"vertex_credentials": "path/to/credentials.json",
Expand Down
36 changes: 31 additions & 5 deletions tests/test_litellm/proxy/test_proxy_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,8 +446,24 @@ async def mock_get_config(config_file_path=None):
mock_proxy_config_instance.get_config = mock_get_config
mock_proxy_config.return_value = mock_proxy_config_instance

# Ensure DATABASE_URL is not set in the environment
with patch.dict(os.environ, {"DATABASE_URL": ""}, clear=True):
mock_proxy_server_module = MagicMock(app=mock_app)

# Only remove DATABASE_URL and DIRECT_URL to prevent the database setup
# code path from running. Do NOT use clear=True as it removes PATH, HOME,
# etc., which causes imports inside run_server to break in CI (the real
# litellm.proxy.proxy_server import at line 820 of proxy_cli.py has heavy
# side effects that fail without a proper environment).
env_overrides = {
"DATABASE_URL": "",
"DIRECT_URL": "",
"IAM_TOKEN_DB_AUTH": "",
"USE_AWS_KMS": "",
}
with patch.dict(os.environ, env_overrides):
# Remove DATABASE_URL entirely so the DB setup block is skipped
os.environ.pop("DATABASE_URL", None)
os.environ.pop("DIRECT_URL", None)

with patch.dict(
"sys.modules",
{
Expand All @@ -456,7 +472,11 @@ async def mock_get_config(config_file_path=None):
ProxyConfig=mock_proxy_config,
KeyManagementSettings=mock_key_mgmt,
save_worker_config=mock_save_worker_config,
)
),
# Also mock litellm.proxy.proxy_server to prevent the real
# import at line 820 of proxy_cli.py which has heavy side
# effects (FastAPI app init, logging setup, etc.)
"litellm.proxy.proxy_server": mock_proxy_server_module,
},
), patch(
"litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args"
Expand All @@ -470,7 +490,10 @@ async def mock_get_config(config_file_path=None):
# Test with no config parameter (config=None)
result = runner.invoke(run_server, ["--local"])

assert result.exit_code == 0
assert result.exit_code == 0, (
f"run_server failed with exit_code={result.exit_code}, "
f"output={result.output}, exception={result.exception}"
)

# Verify that uvicorn.run was called
mock_uvicorn_run.assert_called_once()
Expand All @@ -481,7 +504,10 @@ async def mock_get_config(config_file_path=None):
# Test with explicit --config None (should behave the same)
result = runner.invoke(run_server, ["--local", "--config", "None"])

assert result.exit_code == 0
assert result.exit_code == 0, (
f"run_server failed with exit_code={result.exit_code}, "
f"output={result.output}, exception={result.exception}"
)

# Verify that uvicorn.run was called again
mock_uvicorn_run.assert_called_once()
Expand Down
Loading