BerriAI · yuneng-jiang · Feb 17, 2026 · Feb 17, 2026 · Feb 16, 2026 · Feb 16, 2026
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
@@ -450,6 +450,7 @@ router_settings:
 | BATCH_STATUS_POLL_INTERVAL_SECONDS | Interval in seconds for polling batch status. Default is 3600 (1 hour)
 | BATCH_STATUS_POLL_MAX_ATTEMPTS | Maximum number of attempts for polling batch status. Default is 24 (for 24 hours)
 | BEDROCK_MAX_POLICY_SIZE | Maximum size for Bedrock policy. Default is 75
+| BEDROCK_MIN_THINKING_BUDGET_TOKENS | Minimum thinking budget in tokens for Bedrock reasoning models. Bedrock returns a 400 error if budget_tokens is below this value. Requests with lower values are clamped to this minimum. Default is 1024
 | BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
 | BRAINTRUST_API_KEY | API key for Braintrust integration
 | BRAINTRUST_API_BASE | Base URL for Braintrust API. Default is https://api.braintrustdata.com/v1

diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py
@@ -3277,6 +3277,14 @@ async def _execute_virtual_key_regeneration(
     update_data.update(non_default_values)
     update_data = prisma_client.jsonify_object(data=update_data)
 
+    # If grace period set, insert deprecated key so old key remains valid
+    await _insert_deprecated_key(
+        prisma_client=prisma_client,
+        old_token_hash=hashed_api_key,
+        new_token_hash=new_token_hash,
+        grace_period=data.grace_period if data else None,
+    )
+
     updated_token = await prisma_client.db.litellm_verificationtoken.update(
         where={"token": hashed_api_key},
         data=update_data,  # type: ignore
@@ -3474,58 +3482,6 @@ async def regenerate_key_fn(  # noqa: PLR0915
         )
         verbose_proxy_logger.debug("key_in_db: %s", _key_in_db)
 
-        new_token = get_new_token(data=data)
-
-        new_token_hash = hash_token(new_token)
-        new_token_key_name = f"sk-...{new_token[-4:]}"
-
-        # Prepare the update data
-        update_data = {
-            "token": new_token_hash,
-            "key_name": new_token_key_name,
-        }
-
-        non_default_values = {}
-        if data is not None:
-            # Update with any provided parameters from GenerateKeyRequest
-            non_default_values = await prepare_key_update_data(
-                data=data, existing_key_row=_key_in_db
-            )
-            verbose_proxy_logger.debug("non_default_values: %s", non_default_values)
-
-        update_data.update(non_default_values)
-        update_data = prisma_client.jsonify_object(data=update_data)
-
-        # If grace period set, insert deprecated key so old key remains valid
-        await _insert_deprecated_key(
-            prisma_client=prisma_client,
-            old_token_hash=hashed_api_key,
-            new_token_hash=new_token_hash,
-            grace_period=data.grace_period if data else None,
-        )
-
-        # Update the token in the database
-        updated_token = await prisma_client.db.litellm_verificationtoken.update(
-            where={"token": hashed_api_key},
-            data=update_data,  # type: ignore
-        )
-
-        updated_token_dict = {}
-        if updated_token is not None:
-            updated_token_dict = dict(updated_token)
-
-        updated_token_dict["key"] = new_token
-        updated_token_dict["token_id"] = updated_token_dict.pop("token")
-
-        ### 3. remove existing key entry from cache
-        ######################################################################
-
-        if hashed_api_key or key:
-            await _delete_cache_key_object(
-                hashed_token=hash_token(key),
-                user_api_key_cache=user_api_key_cache,
-                proxy_logging_obj=proxy_logging_obj,
-            )
         # Normalize litellm_changed_by: if it's a Header object or not a string, convert to None
         if litellm_changed_by is not None and not isinstance(litellm_changed_by, str):
             litellm_changed_by = None

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.81.12"
+version = "1.81.13"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -182,7 +182,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.81.12"
+version = "1.81.13"
 version_files = [
     "pyproject.toml:^version"
 ]

diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py
@@ -74,7 +74,7 @@ def validate_responses_api_response(response, final_chunk: bool = False):
         "top_p": (int, float, type(None)),
         "max_output_tokens": (int, type(None)),
         "previous_response_id": (str, type(None)),
-        "reasoning": dict,
+        "reasoning": (dict, type(None)),
         "status": str,
         "text": dict,
         "truncation": (str, type(None)),

diff --git a/tests/test_litellm/containers/test_container_integration.py b/tests/test_litellm/containers/test_container_integration.py
@@ -385,6 +385,15 @@ def test_error_handling_integration(self):
     @pytest.mark.parametrize("provider", ["openai"])
     def test_provider_support(self, provider):
         """Test that the container API works with supported providers."""
+        import importlib
+        import litellm.containers.main as containers_main_module
+
+        # Reload the module to ensure it has a fresh reference to base_llm_http_handler
+        # after conftest reloads litellm (same pattern as test_error_handling_integration)
+        importlib.reload(containers_main_module)
+
+        from litellm.containers.main import create_container as create_container_fresh
+
         mock_response = ContainerObject(
             id="cntr_provider_test",
             object="container",
@@ -398,7 +407,7 @@ def test_provider_support(self, provider):
         with patch('litellm.containers.main.base_llm_http_handler') as mock_handler:
             mock_handler.container_create_handler.return_value = mock_response
 
-            response = create_container(
+            response = create_container_fresh(
                 name="Provider Test Container",
                 custom_llm_provider=provider
             )

diff --git a/tests/test_litellm/llms/meta_llama/test_meta_llama_chat_transformation.py b/tests/test_litellm/llms/meta_llama/test_meta_llama_chat_transformation.py
@@ -1,6 +1,5 @@
 import os
 import sys
-from unittest.mock import AsyncMock, patch
 
 import pytest
 
@@ -47,67 +46,26 @@ def test_map_openai_params():
     assert "response_format" in result
 
 
-@pytest.mark.asyncio
-async def test_llama_api_streaming_no_307_error():
-    """Test that streaming works without 307 redirect errors due to follow_redirects=True"""
-
-    # Mock the httpx client to simulate a successful streaming response
-    with patch(
-        "litellm.llms.custom_httpx.http_handler.get_async_httpx_client"
-    ) as mock_get_client:
-        # Create a mock client
-        mock_client = AsyncMock()
-        mock_get_client.return_value = mock_client
-
-        # Mock a successful streaming response (not a 307 redirect)
-        mock_response = AsyncMock()
-        mock_response.status_code = 200
-        mock_response.headers = {"content-type": "text/plain; charset=utf-8"}
-
-        # Mock streaming data that would come from a successful request
-        async def mock_aiter_lines():
-            yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}'
-            yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{"content":" there"},"finish_reason":null}]}'
-            yield 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}'
-            yield "data: [DONE]"
-
-        mock_response.aiter_lines.return_value = mock_aiter_lines()
-        mock_client.stream.return_value.__aenter__.return_value = mock_response
-
-        # Test the streaming completion
-        try:
-            response = await litellm.acompletion(
-                model="meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
-                messages=[{"role": "user", "content": "Tell me about yourself"}],
-                stream=True,
-                temperature=0.0,
-            )
-
-            # Verify we get a CustomStreamWrapper (streaming response)
-            from litellm.utils import CustomStreamWrapper
-
-            assert isinstance(response, CustomStreamWrapper)
-
-            # Verify the HTTP client was called with follow_redirects=True
-            mock_client.stream.assert_called_once()
-            call_kwargs = mock_client.stream.call_args[1]
-            assert (
-                call_kwargs.get("follow_redirects") is True
-            ), "follow_redirects should be True to prevent 307 errors"
-
-            # Verify the response status is 200 (not 307)
-            assert (
-                mock_response.status_code == 200
-            ), "Should get 200 response, not 307 redirect"
-
-        except Exception as e:
-            # If there's an exception, make sure it's not a 307 error
-            error_str = str(e)
-            assert (
-                "307" not in error_str
-            ), f"Should not get 307 redirect error: {error_str}"
-
-            # Still verify that follow_redirects was set correctly
-            if mock_client.stream.called:
-                call_kwargs = mock_client.stream.call_args[1]
-                assert call_kwargs.get("follow_redirects") is True
+def test_llama_api_streaming_no_307_error():
+    """
+    Test that the OpenAI-compatible httpx clients use follow_redirects=True.
+
+    meta_llama routes through the OpenAI SDK path (BaseOpenAILLM), so the
+    follow_redirects setting on that SDK's underlying httpx client is what
+    actually prevents 307 redirect errors for LLaMA API streaming.
+    """
+    from litellm.llms.openai.common_utils import BaseOpenAILLM
+
+    # Verify the async httpx client has follow_redirects enabled
+    async_client = BaseOpenAILLM._get_async_http_client()
+    assert async_client is not None
+    assert (
+        async_client.follow_redirects is True
+    ), "Async httpx client should set follow_redirects=True to prevent 307 errors"
+
+    # Verify the sync httpx client has follow_redirects enabled
+    sync_client = BaseOpenAILLM._get_sync_http_client()
+    assert sync_client is not None
+    assert (
+        sync_client.follow_redirects is True
+    ), "Sync httpx client should set follow_redirects=True to prevent 307 errors"
diff --git a/tests/test_litellm/llms/publicai/test_publicai_chat_transformation.py b/tests/test_litellm/llms/publicai/test_publicai_chat_transformation.py
@@ -7,6 +7,7 @@
 
 import os
 import sys
+from unittest.mock import patch
 
 sys.path.insert(
     0, os.path.abspath("../../../../..")
@@ -51,9 +52,13 @@ def test_default_api_base(self, config):
         assert result["Authorization"] == f"Bearer {api_key}"
         assert result["Content-Type"] == "application/json"
 
-    def test_get_supported_openai_params(self, config):
+    @patch("litellm.utils.supports_function_calling", return_value=True)
+    def test_get_supported_openai_params(self, mock_supports_fc, config):
         """
-        Test that get_supported_openai_params returns correct params
+        Test that get_supported_openai_params returns correct params.
+        We mock supports_function_calling because the test model name
+        'swiss-ai-apertus' is not in the model registry; this test validates
+        config behaviour, not registry lookups.
         """
         supported_params = config.get_supported_openai_params(model="swiss-ai-apertus")
 
@@ -66,9 +71,12 @@ def test_get_supported_openai_params(self, config):
         # Note: JSON-based configs inherit from OpenAIGPTConfig which includes functions
         # This is expected behavior for JSON-based providers
 
-    def test_map_openai_params_includes_functions(self, config):
+    @patch("litellm.utils.supports_function_calling", return_value=True)
+    def test_map_openai_params_includes_functions(self, mock_supports_fc, config):
         """
-        Test that functions parameter is mapped (JSON-based configs don't exclude functions)
+        Test that functions parameter is mapped (JSON-based configs don't exclude functions).
+        We mock supports_function_calling because the test model name
+        'swiss-ai-apertus' is not in the model registry.
         """
         non_default_params = {
             "functions": [{"name": "test_function", "description": "Test function"}],

diff --git a/tests/test_litellm/llms/vertex_ai/rerank/test_vertex_ai_rerank_transformation.py b/tests/test_litellm/llms/vertex_ai/rerank/test_vertex_ai_rerank_transformation.py
@@ -22,6 +22,8 @@ def setup_method(self):
             "GOOGLE_APPLICATION_CREDENTIALS",
             "GOOGLE_CLOUD_PROJECT",
             "VERTEXAI_PROJECT",
+            "VERTEXAI_CREDENTIALS",
+            "VERTEX_AI_CREDENTIALS",
             "VERTEX_PROJECT",
             "VERTEX_LOCATION",
             "VERTEX_AI_PROJECT",
@@ -471,16 +473,20 @@ def test_validate_environment_with_optional_params(self, mock_ensure_access_toke
         }
         assert headers == expected_headers
 
-    @patch('litellm.llms.vertex_ai.rerank.transformation.VertexAIRerankConfig._ensure_access_token')
     def test_validate_environment_preserves_optional_params_for_get_complete_url(
         self,
-        mock_ensure_access_token,
     ):
         """
         Validate that calling validate_environment does not remove vertex-specific
         parameters needed later by get_complete_url.
+
+        Uses instance-level mocking to avoid class-reference issues caused by
+        importlib.reload(litellm) in conftest.py.
         """
-        mock_ensure_access_token.return_value = ("test-access-token", "project-from-token")
+        mock_ensure_access_token = MagicMock(
+            return_value=("test-access-token", "project-from-token")
+        )
+        self.config._ensure_access_token = mock_ensure_access_token
 
         optional_params = {
             "vertex_credentials": "path/to/credentials.json",

diff --git a/tests/test_litellm/proxy/test_proxy_cli.py b/tests/test_litellm/proxy/test_proxy_cli.py
@@ -446,8 +446,24 @@ async def mock_get_config(config_file_path=None):
         mock_proxy_config_instance.get_config = mock_get_config
         mock_proxy_config.return_value = mock_proxy_config_instance
 
-        # Ensure DATABASE_URL is not set in the environment
-        with patch.dict(os.environ, {"DATABASE_URL": ""}, clear=True):
+        mock_proxy_server_module = MagicMock(app=mock_app)
+
+        # Only remove DATABASE_URL and DIRECT_URL to prevent the database setup
+        # code path from running. Do NOT use clear=True as it removes PATH, HOME,
+        # etc., which causes imports inside run_server to break in CI (the real
+        # litellm.proxy.proxy_server import at line 820 of proxy_cli.py has heavy
+        # side effects that fail without a proper environment).
+        env_overrides = {
+            "DATABASE_URL": "",
+            "DIRECT_URL": "",
+            "IAM_TOKEN_DB_AUTH": "",
+            "USE_AWS_KMS": "",
+        }
+        with patch.dict(os.environ, env_overrides):
+            # Remove DATABASE_URL entirely so the DB setup block is skipped
+            os.environ.pop("DATABASE_URL", None)
+            os.environ.pop("DIRECT_URL", None)
+
             with patch.dict(
                 "sys.modules",
                 {
@@ -456,7 +472,11 @@ async def mock_get_config(config_file_path=None):
                         ProxyConfig=mock_proxy_config,
                         KeyManagementSettings=mock_key_mgmt,
                         save_worker_config=mock_save_worker_config,
-                    )
+                    ),
+                    # Also mock litellm.proxy.proxy_server to prevent the real
+                    # import at line 820 of proxy_cli.py which has heavy side
+                    # effects (FastAPI app init, logging setup, etc.)
+                    "litellm.proxy.proxy_server": mock_proxy_server_module,
                 },
             ), patch(
                 "litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args"
@@ -470,7 +490,10 @@ async def mock_get_config(config_file_path=None):
                 # Test with no config parameter (config=None)
                 result = runner.invoke(run_server, ["--local"])
 
-                assert result.exit_code == 0
+                assert result.exit_code == 0, (
+                    f"run_server failed with exit_code={result.exit_code}, "
+                    f"output={result.output}, exception={result.exception}"
+                )
 
                 # Verify that uvicorn.run was called
                 mock_uvicorn_run.assert_called_once()
@@ -481,7 +504,10 @@ async def mock_get_config(config_file_path=None):
                 # Test with explicit --config None (should behave the same)
                 result = runner.invoke(run_server, ["--local", "--config", "None"])
 
-                assert result.exit_code == 0
+                assert result.exit_code == 0, (
+                    f"run_server failed with exit_code={result.exit_code}, "
+                    f"output={result.output}, exception={result.exception}"
+                )
 
                 # Verify that uvicorn.run was called again
                 mock_uvicorn_run.assert_called_once()