BerriAI · ishaan-jaff · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/docs/my-website/docs/proxy/tag_routing.md b/docs/my-website/docs/proxy/tag_routing.md
@@ -209,6 +209,106 @@ Expect to see the following response header when this works
 x-litellm-model-id: default-model
 ```
 
+## Regex-based tag routing (`tag_regex`)
+
+Use `tag_regex` to route requests based on regex patterns matched against request headers, without requiring clients to pass a tag explicitly. This is useful when clients already send a recognisable header, such as `User-Agent`.
+
+**Use case: route all Claude Code traffic to dedicated AWS accounts**
+
+Claude Code always sends `User-Agent: claude-code/<version>`. With `tag_regex` you can route that traffic to a dedicated deployment automatically — no per-developer configuration needed.
+
+### 1. Config
+
+```yaml
+model_list:
+  # Claude Code traffic → dedicated deployment, matched by User-Agent
+  - model_name: claude-sonnet
+    litellm_params:
+      model: bedrock/converse/anthropic-claude-sonnet-4-6
+      aws_region_name: us-east-1
+      aws_role_name: arn:aws:iam::111122223333:role/LiteLLMClaudeCode
+      tag_regex:
+        - "^User-Agent: claude-code\\/"   # matches claude-code/1.x, 2.x, etc.
+    model_info:
+      id: claude-code-deployment
+
+  # All other traffic falls back to the default deployment
+  - model_name: claude-sonnet
+    litellm_params:
+      model: bedrock/converse/anthropic-claude-sonnet-4-6
+      aws_region_name: us-east-1
+      aws_role_name: arn:aws:iam::444455556666:role/LiteLLMDefault
+      tags:
+        - default
+    model_info:
+      id: regular-deployment
+
+router_settings:
+  enable_tag_filtering: true
+  tag_filtering_match_any: true
+
+general_settings:
+  master_key: sk-1234
+```
+
+### 2. Verify routing
+
+Claude Code sets `User-Agent: claude-code/<version>` automatically — no client config needed:
+
+```shell
+# Claude Code request (User-Agent set automatically by Claude Code)
+curl http://localhost:4000/v1/chat/completions \
+  -H "Authorization: Bearer sk-1234" \
+  -H "User-Agent: claude-code/1.2.3" \
+  -d '{"model": "claude-sonnet", "messages": [{"role": "user", "content": "hi"}]}'
+# → x-litellm-model-id: claude-code-deployment
+
+# Any other client (no matching User-Agent) → default deployment
+curl http://localhost:4000/v1/chat/completions \
+  -H "Authorization: Bearer sk-1234" \
+  -d '{"model": "claude-sonnet", "messages": [{"role": "user", "content": "hi"}]}'
+# → x-litellm-model-id: regular-deployment
+```
+
+### How matching works
+
+| Priority | Condition | Result |
+|----------|-----------|--------|
+| 1 | Request has `tags` AND deployment has `tags` | Exact tag match (respects `match_any` setting) |
+| 2 | Deployment has `tag_regex` AND request has a `User-Agent` | Regex match (always OR logic — any pattern match suffices) |
+| 3 | Deployment has `tags: [default]` | Default fallback |
+| 4 | No default set | All healthy deployments returned |
+
+`tag_regex` always uses OR semantics — `tag_filtering_match_any=False` applies only to exact tag matching, not to regex patterns.
+
+### Observability
+
+When a regex matches, `tag_routing` is written into request metadata and flows to SpendLogs:
+
+```json
+{
+  "tag_routing": {
+    "matched_via": "tag_regex",
+    "matched_value": "^User-Agent: claude-code\\/",
+    "user_agent": "claude-code/1.2.3",
+    "request_tags": []
+  }
+}
+```
+
+### Security note
+
+:::caution
+
+**`User-Agent` is a client-supplied header and can be set to any value.** Any API consumer can send `User-Agent: claude-code/1.0` regardless of whether they are actually using Claude Code.
+
+Do not rely on `tag_regex` routing to enforce access controls or spend limits — use [team/key-based routing](./users) for that. `tag_regex` is a **traffic classification hint** (useful for billing visibility, capacity planning, and routing convenience), not a security boundary.
+
+:::
+
+
+---
+
 ## ✨ Team based tag routing (Enterprise)
 
 LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams)

diff --git a/litellm/completion_extras/litellm_responses_transformation/transformation.py b/litellm/completion_extras/litellm_responses_transformation/transformation.py
@@ -398,6 +398,7 @@ def _convert_response_output_to_choices(
             ResponseOutputMessage,
             ResponseReasoningItem,
         )
+
         try:
             from openai.types.responses.response_output_item import (
                 ResponseApplyPatchToolCall,
@@ -460,7 +461,9 @@ def _convert_response_output_to_choices(
                 accumulated_tool_calls.append(tool_call_dict)
                 tool_call_index += 1
 
-            elif ResponseApplyPatchToolCall is not None and isinstance(item, ResponseApplyPatchToolCall):
+            elif ResponseApplyPatchToolCall is not None and isinstance(
+                item, ResponseApplyPatchToolCall
+            ):
                 from litellm.responses.litellm_completion_transformation.transformation import (
                     LiteLLMCompletionResponsesConfig,
                 )

diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -2680,7 +2680,9 @@ def anthropic_messages_pt(  # noqa: PLR0915
                 _content_is_list = "content" in assistant_content_block and isinstance(
                     assistant_content_block["content"], list
                 )
-                _content_list = assistant_content_block.get("content") if _content_is_list else None
+                _content_list = (
+                    assistant_content_block.get("content") if _content_is_list else None
+                )
                 _list_has_thinking = False
                 if _content_is_list and _content_list is not None:
                     for _item in _content_list:

diff --git a/litellm/llms/anthropic/files/transformation.py b/litellm/llms/anthropic/files/transformation.py
@@ -79,7 +79,9 @@ def get_error_class(
         return AnthropicError(
             status_code=status_code,
             message=error_message,
-            headers=cast(httpx.Headers, headers) if isinstance(headers, dict) else headers,
+            headers=cast(httpx.Headers, headers)
+            if isinstance(headers, dict)
+            else headers,
         )
 
     def validate_environment(

diff --git a/litellm/llms/base_llm/base_model_iterator.py b/litellm/llms/base_llm/base_model_iterator.py
@@ -144,9 +144,7 @@ def __next__(self):
                 # Skip empty lines (common in SSE streams between events).
                 # Only apply to str chunks — non-string objects (e.g. Pydantic
                 # BaseModel events from the Responses API) must pass through.
-                if isinstance(str_line, str) and (
-                    not str_line or not str_line.strip()
-                ):
+                if isinstance(str_line, str) and (not str_line or not str_line.strip()):
                     continue
 
                 # chunk is a str at this point
@@ -184,9 +182,7 @@ async def __anext__(self):
                 # Skip empty lines (common in SSE streams between events).
                 # Only apply to str chunks — non-string objects (e.g. Pydantic
                 # BaseModel events from the Responses API) must pass through.
-                if isinstance(str_line, str) and (
-                    not str_line or not str_line.strip()
-                ):
+                if isinstance(str_line, str) and (not str_line or not str_line.strip()):
                     continue
 
                 # chunk is a str at this point

diff --git a/litellm/llms/bedrock/base_aws_llm.py b/litellm/llms/bedrock/base_aws_llm.py
@@ -1268,7 +1268,8 @@ def get_request_headers(
 
             # Add back all original headers (including forwarded ones) after signature calculation
             for header_name, header_value in headers.items():
-                request.headers[header_name] = header_value
+                if header_value is not None:
+                    request.headers[header_name] = header_value
 
             if (
                 extra_headers is not None and "Authorization" in extra_headers
@@ -1298,6 +1299,8 @@ def _filter_headers_for_aws_signature(self, headers: dict) -> dict:
         }
 
         for header_name, header_value in headers.items():
+            if header_value is None:
+                continue
             header_lower = header_name.lower()
             if (
                 header_lower in aws_headers
@@ -1393,7 +1396,8 @@ def _sign_request(
         # Add back original headers after signing. Only headers in SignedHeaders
         # are integrity-protected; forwarded headers (x-forwarded-*) must remain unsigned.
         for header_name, header_value in headers.items():
-            request_headers_dict[header_name] = header_value
+            if header_value is not None:
+                request_headers_dict[header_name] = header_value
         if (
             headers is not None and "Authorization" in headers
         ):  # prevent sigv4 from overwriting the auth header

diff --git a/litellm/llms/bedrock/files/transformation.py b/litellm/llms/bedrock/files/transformation.py
@@ -173,7 +173,12 @@ def get_complete_file_url(
                 "S3 bucket_name is required. Set 's3_bucket_name' in litellm_params or AWS_S3_BUCKET_NAME env var"
             )
 
-        aws_region_name = self._get_aws_region_name(optional_params, model)
+        s3_region_name = litellm_params.get("s3_region_name") or optional_params.get(
+            "s3_region_name"
+        )
+        aws_region_name = s3_region_name or self._get_aws_region_name(
+            optional_params, model
+        )
 
         file_data = data.get("file")
         purpose = data.get("purpose")
@@ -398,6 +403,15 @@ def transform_create_file_request(
             data=create_file_data,
         )
 
+        # s3_region_name always wins for S3 operations (same priority as in
+        # get_complete_file_url above). Overwrite aws_region_name unconditionally
+        # so the SigV4 region matches the URL region, avoiding SignatureDoesNotMatch.
+        s3_region_name = litellm_params.get("s3_region_name") or optional_params.get(
+            "s3_region_name"
+        )
+        if s3_region_name:
+            optional_params = {**optional_params, "aws_region_name": s3_region_name}
+
         # Sign the request and return a pre-signed request object
         signed_headers, signed_body = self._sign_s3_request(
             content=file_content,

diff --git a/litellm/llms/black_forest_labs/image_edit/transformation.py b/litellm/llms/black_forest_labs/image_edit/transformation.py
@@ -201,7 +201,9 @@ def _read_image_bytes(
             return image
         elif isinstance(image, list):
             # If it's a list, take the first image
-            return self._read_image_bytes(image[0], depth=depth + 1, max_depth=max_depth)
+            return self._read_image_bytes(
+                image[0], depth=depth + 1, max_depth=max_depth
+            )
         elif isinstance(image, str):
             if image.startswith(("http://", "https://")):
                 # Download image from URL

diff --git a/litellm/llms/perplexity/responses/transformation.py b/litellm/llms/perplexity/responses/transformation.py
@@ -71,7 +71,9 @@ def _ensure_message_type(
             result: List[Any] = []
             for item in input:
                 if isinstance(item, dict) and "type" not in item:
-                    new_item = dict(item)  # convert to plain dict to avoid TypedDict checking
+                    new_item = dict(
+                        item
+                    )  # convert to plain dict to avoid TypedDict checking
                     new_item["type"] = "message"
                     result.append(new_item)
                 else:

diff --git a/litellm/proxy/_experimental/mcp_server/rest_endpoints.py b/litellm/proxy/_experimental/mcp_server/rest_endpoints.py
@@ -378,9 +378,7 @@ async def _list_tools_for_single_server(
         # Resolve a server name to its UUID if needed
         _name_resolved = None
         if server_id not in allowed_server_ids:
-            _name_resolved = global_mcp_server_manager.get_mcp_server_by_name(
-                server_id
-            )
+            _name_resolved = global_mcp_server_manager.get_mcp_server_by_name(server_id)
             if _name_resolved is not None and _name_resolved.server_id in set(
                 allowed_server_ids
             ):
@@ -442,9 +440,7 @@ async def _list_tools_for_single_server(
                 extra_headers=user_oauth_extra_headers,
             )
         except Exception as e:
-            verbose_logger.exception(
-                f"Error getting tools from {server.name}: {e}"
-            )
+            verbose_logger.exception(f"Error getting tools from {server.name}: {e}")
             return {
                 "tools": [],
                 "error": "server_error",
@@ -473,7 +469,9 @@ async def _list_tools_for_single_server(
         _name_resolved = None
         if server_id not in allowed_server_ids:
             _name_resolved = global_mcp_server_manager.get_mcp_server_by_name(server_id)
-            if _name_resolved is not None and _name_resolved.server_id in set(allowed_server_ids):
+            if _name_resolved is not None and _name_resolved.server_id in set(
+                allowed_server_ids
+            ):
                 server_id = _name_resolved.server_id
 
         if server_id not in allowed_server_ids:
@@ -518,7 +516,9 @@ async def _list_tools_for_single_server(
         server_auth_header = _get_server_auth_header(
             server, mcp_server_auth_headers, mcp_auth_header
         )
-        user_oauth_extra_headers = await _get_user_oauth_extra_headers(server, user_api_key_dict)
+        user_oauth_extra_headers = await _get_user_oauth_extra_headers(
+            server, user_api_key_dict
+        )
 
         try:
             list_tools_result = await _get_tools_for_single_server(
@@ -529,9 +529,7 @@ async def _list_tools_for_single_server(
                 extra_headers=user_oauth_extra_headers,
             )
         except Exception as e:
-            verbose_logger.exception(
-                f"Error getting tools from {server.name}: {e}"
-            )
+            verbose_logger.exception(f"Error getting tools from {server.name}: {e}")
             return {
                 "tools": [],
                 "error": "server_error",
@@ -905,7 +903,9 @@ async def _execute_with_mcp_client(
         try:
             client_id, client_secret, scopes = _extract_credentials(request)
 
-            _oauth2_flow: Optional[Literal["client_credentials", "authorization_code"]] = (
+            _oauth2_flow: Optional[
+                Literal["client_credentials", "authorization_code"]
+            ] = (
                 "client_credentials"
                 if client_id and client_secret and request.token_url
                 else None

diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
@@ -64,7 +64,11 @@
     populate_request_with_path_params,
 )
 from litellm.proxy.common_utils.realtime_utils import _realtime_request_body
-from litellm.proxy.utils import PrismaClient, ProxyLogging, normalize_route_for_root_path
+from litellm.proxy.utils import (
+    PrismaClient,
+    ProxyLogging,
+    normalize_route_for_root_path,
+)
 from litellm.secret_managers.main import get_secret_bool
 from litellm.types.services import ServiceTypes
 

diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py
@@ -106,9 +106,13 @@ def __init__(
         if (self.output_parse_pii or self.apply_to_output) and not logging_only:
             current_hook = self.event_hook
             if isinstance(current_hook, str) and current_hook != "post_call":
-                self.event_hook = cast(List[GuardrailEventHooks], [current_hook, "post_call"])
+                self.event_hook = cast(
+                    List[GuardrailEventHooks], [current_hook, "post_call"]
+                )
             elif isinstance(current_hook, list) and "post_call" not in current_hook:
-                self.event_hook = cast(List[GuardrailEventHooks], current_hook + ["post_call"])
+                self.event_hook = cast(
+                    List[GuardrailEventHooks], current_hook + ["post_call"]
+                )
         self.pii_entities_config: Dict[Union[PiiEntityType, str], PiiAction] = (
             pii_entities_config or {}
         )

diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py
@@ -1838,9 +1838,7 @@ async def _validate_update_key_data(
 
     # Check team limits if key has a team_id (from request or existing key)
     team_obj: Optional[LiteLLM_TeamTableCachedObj] = None
-    _team_id_to_check = data.team_id or getattr(
-        existing_key_row, "team_id", None
-    )
+    _team_id_to_check = data.team_id or getattr(existing_key_row, "team_id", None)
     if _team_id_to_check is not None:
         team_obj = await get_team_object(
             team_id=_team_id_to_check,
@@ -1910,9 +1908,7 @@ async def _validate_update_key_data(
         if team_obj is None:
             raise HTTPException(
                 status_code=500,
-                detail={
-                    "error": "Team object not found for team change validation"
-                },
+                detail={"error": "Team object not found for team change validation"},
             )
         await validate_key_team_change(
             key=existing_key_row,