diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 5e3f56c4206..775cdf6876a 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -450,6 +450,7 @@ router_settings: | BATCH_STATUS_POLL_INTERVAL_SECONDS | Interval in seconds for polling batch status. Default is 3600 (1 hour) | BATCH_STATUS_POLL_MAX_ATTEMPTS | Maximum number of attempts for polling batch status. Default is 24 (for 24 hours) | BEDROCK_MAX_POLICY_SIZE | Maximum size for Bedrock policy. Default is 75 +| BEDROCK_MIN_THINKING_BUDGET_TOKENS | Minimum thinking budget in tokens for Bedrock reasoning models. Bedrock returns a 400 error if budget_tokens is below this value. Requests with lower values are clamped to this minimum. Default is 1024 | BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service | BRAINTRUST_API_KEY | API key for Braintrust integration | BRAINTRUST_API_BASE | Base URL for Braintrust API. Default is https://api.braintrustdata.com/v1 diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 9dcc25e7a87..21459a1b802 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -3277,6 +3277,14 @@ async def _execute_virtual_key_regeneration( update_data.update(non_default_values) update_data = prisma_client.jsonify_object(data=update_data) + # If grace period set, insert deprecated key so old key remains valid + await _insert_deprecated_key( + prisma_client=prisma_client, + old_token_hash=hashed_api_key, + new_token_hash=new_token_hash, + grace_period=data.grace_period if data else None, + ) + updated_token = await prisma_client.db.litellm_verificationtoken.update( where={"token": hashed_api_key}, data=update_data, # type: ignore @@ -3474,58 +3482,6 @@ async def regenerate_key_fn( # noqa: PLR0915 ) verbose_proxy_logger.debug("key_in_db: %s", _key_in_db) - new_token = get_new_token(data=data) - - new_token_hash = hash_token(new_token) - new_token_key_name = f"sk-...{new_token[-4:]}" - - # Prepare the update data - update_data = { - "token": new_token_hash, - "key_name": new_token_key_name, - } - - non_default_values = {} - if data is not None: - # Update with any provided parameters from GenerateKeyRequest - non_default_values = await prepare_key_update_data( - data=data, existing_key_row=_key_in_db - ) - verbose_proxy_logger.debug("non_default_values: %s", non_default_values) - - update_data.update(non_default_values) - update_data = prisma_client.jsonify_object(data=update_data) - - # If grace period set, insert deprecated key so old key remains valid - await _insert_deprecated_key( - prisma_client=prisma_client, - old_token_hash=hashed_api_key, - new_token_hash=new_token_hash, - grace_period=data.grace_period if data else None, - ) - - # Update the token in the database - updated_token = await prisma_client.db.litellm_verificationtoken.update( - where={"token": hashed_api_key}, - data=update_data, # type: ignore - ) - - updated_token_dict = {} - if updated_token is not None: - updated_token_dict = dict(updated_token) - - updated_token_dict["key"] = new_token - updated_token_dict["token_id"] = updated_token_dict.pop("token") - - ### 3. remove existing key entry from cache - ###################################################################### - - if hashed_api_key or key: - await _delete_cache_key_object( - hashed_token=hash_token(key), - user_api_key_cache=user_api_key_cache, - proxy_logging_obj=proxy_logging_obj, - ) # Normalize litellm_changed_by: if it's a Header object or not a string, convert to None if litellm_changed_by is not None and not isinstance(litellm_changed_by, str): litellm_changed_by = None diff --git a/pyproject.toml b/pyproject.toml index 68b38fb5ff8..4deb61836b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.81.12" +version = "1.81.13" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -182,7 +182,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.81.12" +version = "1.81.13" version_files = [ "pyproject.toml:^version" ] diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py index 0850f742231..f38ce67cede 100644 --- a/tests/llm_responses_api_testing/base_responses_api.py +++ b/tests/llm_responses_api_testing/base_responses_api.py @@ -74,7 +74,7 @@ def validate_responses_api_response(response, final_chunk: bool = False): "top_p": (int, float, type(None)), "max_output_tokens": (int, type(None)), "previous_response_id": (str, type(None)), - "reasoning": dict, + "reasoning": (dict, type(None)), "status": str, "text": dict, "truncation": (str, type(None)), diff --git a/tests/test_litellm/containers/test_container_integration.py b/tests/test_litellm/containers/test_container_integration.py index b2f52fcea97..177996abd99 100644 --- a/tests/test_litellm/containers/test_container_integration.py +++ b/tests/test_litellm/containers/test_container_integration.py @@ -385,6 +385,15 @@ def test_error_handling_integration(self): @pytest.mark.parametrize("provider", ["openai"]) def test_provider_support(self, provider): """Test that the container API works with supported providers.""" + import importlib + import litellm.containers.main as containers_main_module + + # Reload the module to ensure it has a fresh reference to base_llm_http_handler + # after conftest reloads litellm (same pattern as test_error_handling_integration) + importlib.reload(containers_main_module) + + from litellm.containers.main import create_container as create_container_fresh + mock_response = ContainerObject( id="cntr_provider_test", object="container", @@ -398,7 +407,7 @@ def test_provider_support(self, provider): with patch('litellm.containers.main.base_llm_http_handler') as mock_handler: mock_handler.container_create_handler.return_value = mock_response - response = create_container( + response = create_container_fresh( name="Provider Test Container", custom_llm_provider=provider ) diff --git a/tests/test_litellm/llms/meta_llama/test_meta_llama_chat_transformation.py b/tests/test_litellm/llms/meta_llama/test_meta_llama_chat_transformation.py index fa605154bb0..7b974aba35c 100644 --- a/tests/test_litellm/llms/meta_llama/test_meta_llama_chat_transformation.py +++ b/tests/test_litellm/llms/meta_llama/test_meta_llama_chat_transformation.py @@ -1,6 +1,5 @@ import os import sys -from unittest.mock import AsyncMock, patch import pytest @@ -47,67 +46,26 @@ def test_map_openai_params(): assert "response_format" in result -@pytest.mark.asyncio -async def test_llama_api_streaming_no_307_error(): - """Test that streaming works without 307 redirect errors due to follow_redirects=True""" - - # Mock the httpx client to simulate a successful streaming response - with patch( - "litellm.llms.custom_httpx.http_handler.get_async_httpx_client" - ) as mock_get_client: - # Create a mock client - mock_client = AsyncMock() - mock_get_client.return_value = mock_client - - # Mock a successful streaming response (not a 307 redirect) - mock_response = AsyncMock() - mock_response.status_code = 200 - mock_response.headers = {"content-type": "text/plain; charset=utf-8"} - - # Mock streaming data that would come from a successful request - async def mock_aiter_lines(): - yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}' - yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{"content":" there"},"finish_reason":null}]}' - yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}' - yield "data: [DONE]" - - mock_response.aiter_lines.return_value = mock_aiter_lines() - mock_client.stream.return_value.__aenter__.return_value = mock_response - - # Test the streaming completion - try: - response = await litellm.acompletion( - model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - messages=[{"role": "user", "content": "Tell me about yourself"}], - stream=True, - temperature=0.0, - ) - - # Verify we get a CustomStreamWrapper (streaming response) - from litellm.utils import CustomStreamWrapper - - assert isinstance(response, CustomStreamWrapper) - - # Verify the HTTP client was called with follow_redirects=True - mock_client.stream.assert_called_once() - call_kwargs = mock_client.stream.call_args[1] - assert ( - call_kwargs.get("follow_redirects") is True - ), "follow_redirects should be True to prevent 307 errors" - - # Verify the response status is 200 (not 307) - assert ( - mock_response.status_code == 200 - ), "Should get 200 response, not 307 redirect" - - except Exception as e: - # If there's an exception, make sure it's not a 307 error - error_str = str(e) - assert ( - "307" not in error_str - ), f"Should not get 307 redirect error: {error_str}" - - # Still verify that follow_redirects was set correctly - if mock_client.stream.called: - call_kwargs = mock_client.stream.call_args[1] - assert call_kwargs.get("follow_redirects") is True +def test_llama_api_streaming_no_307_error(): + """ + Test that the OpenAI-compatible httpx clients use follow_redirects=True. + + meta_llama routes through the OpenAI SDK path (BaseOpenAILLM), so the + follow_redirects setting on that SDK's underlying httpx client is what + actually prevents 307 redirect errors for LLaMA API streaming. + """ + from litellm.llms.openai.common_utils import BaseOpenAILLM + + # Verify the async httpx client has follow_redirects enabled + async_client = BaseOpenAILLM._get_async_http_client() + assert async_client is not None + assert ( + async_client.follow_redirects is True + ), "Async httpx client should set follow_redirects=True to prevent 307 errors" + + # Verify the sync httpx client has follow_redirects enabled + sync_client = BaseOpenAILLM._get_sync_http_client() + assert sync_client is not None + assert ( + sync_client.follow_redirects is True + ), "Sync httpx client should set follow_redirects=True to prevent 307 errors" diff --git a/tests/test_litellm/llms/publicai/test_publicai_chat_transformation.py b/tests/test_litellm/llms/publicai/test_publicai_chat_transformation.py index f6e5e05fe51..cd530cd3b40 100644 --- a/tests/test_litellm/llms/publicai/test_publicai_chat_transformation.py +++ b/tests/test_litellm/llms/publicai/test_publicai_chat_transformation.py @@ -7,6 +7,7 @@ import os import sys +from unittest.mock import patch sys.path.insert( 0, os.path.abspath("../../../../..") @@ -51,9 +52,13 @@ def test_default_api_base(self, config): assert result["Authorization"] == f"Bearer {api_key}" assert result["Content-Type"] == "application/json" - def test_get_supported_openai_params(self, config): + @patch("litellm.utils.supports_function_calling", return_value=True) + def test_get_supported_openai_params(self, mock_supports_fc, config): """ - Test that get_supported_openai_params returns correct params + Test that get_supported_openai_params returns correct params. + We mock supports_function_calling because the test model name + 'swiss-ai-apertus' is not in the model registry; this test validates + config behaviour, not registry lookups. """ supported_params = config.get_supported_openai_params(model="swiss-ai-apertus") @@ -66,9 +71,12 @@ def test_get_supported_openai_params(self, config): # Note: JSON-based configs inherit from OpenAIGPTConfig which includes functions # This is expected behavior for JSON-based providers - def test_map_openai_params_includes_functions(self, config): + @patch("litellm.utils.supports_function_calling", return_value=True) + def test_map_openai_params_includes_functions(self, mock_supports_fc, config): """ - Test that functions parameter is mapped (JSON-based configs don't exclude functions) + Test that functions parameter is mapped (JSON-based configs don't exclude functions). + We mock supports_function_calling because the test model name + 'swiss-ai-apertus' is not in the model registry. """ non_default_params = { "functions": [{"name": "test_function", "description": "Test function"}], diff --git a/tests/test_litellm/llms/vertex_ai/rerank/test_vertex_ai_rerank_transformation.py b/tests/test_litellm/llms/vertex_ai/rerank/test_vertex_ai_rerank_transformation.py index fbf5239797f..5e29f927b67 100644 --- a/tests/test_litellm/llms/vertex_ai/rerank/test_vertex_ai_rerank_transformation.py +++ b/tests/test_litellm/llms/vertex_ai/rerank/test_vertex_ai_rerank_transformation.py @@ -22,6 +22,8 @@ def setup_method(self): "GOOGLE_APPLICATION_CREDENTIALS", "GOOGLE_CLOUD_PROJECT", "VERTEXAI_PROJECT", + "VERTEXAI_CREDENTIALS", + "VERTEX_AI_CREDENTIALS", "VERTEX_PROJECT", "VERTEX_LOCATION", "VERTEX_AI_PROJECT", @@ -471,16 +473,20 @@ def test_validate_environment_with_optional_params(self, mock_ensure_access_toke } assert headers == expected_headers - @patch('litellm.llms.vertex_ai.rerank.transformation.VertexAIRerankConfig._ensure_access_token') def test_validate_environment_preserves_optional_params_for_get_complete_url( self, - mock_ensure_access_token, ): """ Validate that calling validate_environment does not remove vertex-specific parameters needed later by get_complete_url. + + Uses instance-level mocking to avoid class-reference issues caused by + importlib.reload(litellm) in conftest.py. """ - mock_ensure_access_token.return_value = ("test-access-token", "project-from-token") + mock_ensure_access_token = MagicMock( + return_value=("test-access-token", "project-from-token") + ) + self.config._ensure_access_token = mock_ensure_access_token optional_params = { "vertex_credentials": "path/to/credentials.json", diff --git a/tests/test_litellm/proxy/test_proxy_cli.py b/tests/test_litellm/proxy/test_proxy_cli.py index be91800732b..a18c2dba032 100644 --- a/tests/test_litellm/proxy/test_proxy_cli.py +++ b/tests/test_litellm/proxy/test_proxy_cli.py @@ -446,8 +446,24 @@ async def mock_get_config(config_file_path=None): mock_proxy_config_instance.get_config = mock_get_config mock_proxy_config.return_value = mock_proxy_config_instance - # Ensure DATABASE_URL is not set in the environment - with patch.dict(os.environ, {"DATABASE_URL": ""}, clear=True): + mock_proxy_server_module = MagicMock(app=mock_app) + + # Only remove DATABASE_URL and DIRECT_URL to prevent the database setup + # code path from running. Do NOT use clear=True as it removes PATH, HOME, + # etc., which causes imports inside run_server to break in CI (the real + # litellm.proxy.proxy_server import at line 820 of proxy_cli.py has heavy + # side effects that fail without a proper environment). + env_overrides = { + "DATABASE_URL": "", + "DIRECT_URL": "", + "IAM_TOKEN_DB_AUTH": "", + "USE_AWS_KMS": "", + } + with patch.dict(os.environ, env_overrides): + # Remove DATABASE_URL entirely so the DB setup block is skipped + os.environ.pop("DATABASE_URL", None) + os.environ.pop("DIRECT_URL", None) + with patch.dict( "sys.modules", { @@ -456,7 +472,11 @@ async def mock_get_config(config_file_path=None): ProxyConfig=mock_proxy_config, KeyManagementSettings=mock_key_mgmt, save_worker_config=mock_save_worker_config, - ) + ), + # Also mock litellm.proxy.proxy_server to prevent the real + # import at line 820 of proxy_cli.py which has heavy side + # effects (FastAPI app init, logging setup, etc.) + "litellm.proxy.proxy_server": mock_proxy_server_module, }, ), patch( "litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args" @@ -470,7 +490,10 @@ async def mock_get_config(config_file_path=None): # Test with no config parameter (config=None) result = runner.invoke(run_server, ["--local"]) - assert result.exit_code == 0 + assert result.exit_code == 0, ( + f"run_server failed with exit_code={result.exit_code}, " + f"output={result.output}, exception={result.exception}" + ) # Verify that uvicorn.run was called mock_uvicorn_run.assert_called_once() @@ -481,7 +504,10 @@ async def mock_get_config(config_file_path=None): # Test with explicit --config None (should behave the same) result = runner.invoke(run_server, ["--local", "--config", "None"]) - assert result.exit_code == 0 + assert result.exit_code == 0, ( + f"run_server failed with exit_code={result.exit_code}, " + f"output={result.output}, exception={result.exception}" + ) # Verify that uvicorn.run was called again mock_uvicorn_run.assert_called_once()