BerriAI · ishaan-jaff · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md
@@ -461,3 +461,48 @@ generateContent();
 
 </TabItem>
 </Tabs>
+
+### Using Anthropic Beta Features on Vertex AI
+
+When using Anthropic models via Vertex AI passthrough (e.g., Claude on Vertex), you can enable Anthropic beta features like extended context windows.
+
+The `anthropic-beta` header is automatically forwarded to Vertex AI when calling Anthropic models.
+
+```bash
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "anthropic-beta: context-1m-2025-08-07" \
+  -d '{
+    "anthropic_version": "vertex-2023-10-16",
+    "messages": [{"role": "user", "content": "Hello"}],
+    "max_tokens": 500
+  }'
+```
+
+### Forwarding Custom Headers with `x-pass-` Prefix
+
+You can forward any custom header to the provider by prefixing it with `x-pass-`. The prefix is stripped before the header is sent to the provider.
+
+For example:
+- `x-pass-anthropic-beta: value` becomes `anthropic-beta: value`
+- `x-pass-custom-header: value` becomes `custom-header: value`
+
+This is useful when you need to send provider-specific headers that aren't in the default allowlist.
+
+```bash
+curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "x-pass-anthropic-beta: context-1m-2025-08-07" \
+  -H "x-pass-custom-feature: enabled" \
+  -d '{
+    "anthropic_version": "vertex-2023-10-16",
+    "messages": [{"role": "user", "content": "Hello"}],
+    "max_tokens": 500
+  }'
+```
+
+:::info
+The `x-pass-` prefix works for all LLM pass-through endpoints, not just Vertex AI.
+:::
diff --git a/litellm/constants.py b/litellm/constants.py
@@ -1122,6 +1122,20 @@
     "generateQuery/",
     "optimize-prompt/",
 ]
+
+
+# Headers that are safe to forward from incoming requests to Vertex AI
+# Using an allowlist approach for security - only forward headers we explicitly trust
+ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS = {
+    "anthropic-beta",  # Required for Anthropic features like extended context windows
+    "content-type",  # Required for request body parsing
+}
+
+# Prefix for headers that should be forwarded to the provider with the prefix stripped
+# e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
+# Works for all LLM pass-through endpoints (Vertex AI, Anthropic, Bedrock, etc.)
+PASS_THROUGH_HEADER_PREFIX = "x-pass-"
+
 BASE_MCP_ROUTE = "/mcp"
 
 BATCH_STATUS_POLL_INTERVAL_SECONDS = int(

diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py
@@ -3,6 +3,8 @@
 
 import httpx
 
+from litellm.constants import PASS_THROUGH_HEADER_PREFIX
+
 
 class BasePassthroughUtils:
     @staticmethod
@@ -27,7 +29,11 @@ def forward_headers_from_request(
         forward_headers: Optional[bool] = False,
     ):
         """
-        Helper to forward headers from original request
+        Helper to forward headers from original request.
+
+        Also handles 'x-pass-' prefixed headers which are always forwarded
+        with the prefix stripped, regardless of forward_headers setting.
+        e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
         """
         if forward_headers is True:
             # Header We Should NOT forward
@@ -36,6 +42,14 @@ def forward_headers_from_request(
 
             # Combine request headers with custom headers
             headers = {**request_headers, **headers}
+
+        # Always process x-pass- prefixed headers (strip prefix and forward)
+        for header_name, header_value in request_headers.items():
+            if header_name.lower().startswith(PASS_THROUGH_HEADER_PREFIX):
+                # Strip the 'x-pass-' prefix to get the actual header name
+                actual_header_name = header_name[len(PASS_THROUGH_HEADER_PREFIX) :]
+                headers[actual_header_name] = header_value
+
         return headers
 
 class CommonUtils:

diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -17,7 +17,10 @@
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES
+from litellm.constants import (
+    ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS,
+    BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES,
+)
 from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
 from litellm.proxy._types import *
 from litellm.proxy.auth.route_checks import RouteChecks
@@ -1369,24 +1372,24 @@ def get_vertex_base_url(vertex_location: Optional[str]) -> str:
     return f"https://{vertex_location}-aiplatform.googleapis.com/"
 
 
-def add_incoming_headers(request: Request, auth_header: str) -> dict:
+def get_vertex_ai_allowed_incoming_headers(request: Request) -> dict:
     """
-    Build headers from incoming request, preserving headers like anthropic-beta,
-    while removing headers that should not be forwarded and adding authorization.
+    Extract only the allowed headers from incoming request for Vertex AI pass-through.
+
+    Uses an allowlist approach for security - only forwards headers we explicitly trust.
+    This prevents accidentally forwarding sensitive headers like the LiteLLM auth token.
 
     Args:
         request: The FastAPI request object
-        auth_header: The authorization token to add
 
     Returns:
-        dict: Headers dictionary with authorization added
+        dict: Headers dictionary with only allowed headers
     """
-    headers = dict(request.headers) or {}
-    # Remove headers that should not be forwarded
-    headers.pop("content-length", None)
-    headers.pop("host", None)
-    # Add/override the Authorization header
-    headers["Authorization"] = f"Bearer {auth_header}"
+    incoming_headers = dict(request.headers) or {}
+    headers = {}
+    for header_name in ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS:
+        if header_name in incoming_headers:
+            headers[header_name] = incoming_headers[header_name]
     return headers
 
 
@@ -1533,12 +1536,9 @@ async def _prepare_vertex_auth_headers(
             api_base="",
         )
 
-        # Start with incoming request headers to preserve headers like anthropic-beta
-        headers = dict(request.headers) or {}
-        # Remove headers that should not be forwarded
-        headers.pop("content-length", None)
-        headers.pop("host", None)
-        # Add/override the Authorization header
+        # Use allowlist approach - only forward specific safe headers
+        headers = get_vertex_ai_allowed_incoming_headers(request)
+        # Add the Authorization header with vendor credentials
         headers["Authorization"] = f"Bearer {auth_header}"
 
         if base_target_url is not None:

diff --git a/proxy_config.yaml b/proxy_config.yaml
@@ -0,0 +1,7 @@
+model_list:
+  - model_name: "*"
+    litellm_params:
+      model: "*"
+
+general_settings:
+  master_key: sk-1234
diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py
@@ -294,23 +294,156 @@ async def test_vertex_passthrough_forwards_anthropic_beta_header():
             get_vertex_pass_through_handler=mock_handler,
         )
 
-        # Verify that the anthropic-beta header is preserved
+        # Verify that allowlisted headers are preserved
         assert "anthropic-beta" in headers
         assert headers["anthropic-beta"] == "context-1m-2025-08-07"
-
-        # Verify that other headers are preserved
         assert "content-type" in headers
         assert headers["content-type"] == "application/json"
-        assert "user-agent" in headers
 
-        # Verify that the Authorization header was updated
-        assert "authorization" in headers
-        assert headers["authorization"] == "Bearer new-access-token"
+        # Verify that the Authorization header is set with vendor credentials
+        assert "Authorization" in headers
+        assert headers["Authorization"] == "Bearer new-access-token"
 
-        # Verify that content-length and host headers were removed
-        assert "content-length" not in headers
-        assert "host" not in headers
+        # Verify that non-allowlisted headers are NOT forwarded (security)
+        # Only anthropic-beta, content-type, and Authorization should be present
+        assert "authorization" not in headers  # lowercase auth token not forwarded
+        assert "user-agent" not in headers     # not in allowlist
+        assert "content-length" not in headers  # not in allowlist
+        assert "host" not in headers            # not in allowlist
 
         # Verify that headers_passed_through is False (since we have credentials)
         assert headers_passed_through is False
 
+
+@pytest.mark.asyncio
+async def test_vertex_passthrough_does_not_forward_litellm_auth_token():
+    """
+    Test that the LiteLLM authorization header is NOT forwarded to Vertex AI.
+
+    This test validates the fix for the issue where both the LiteLLM auth token
+    (lowercase 'authorization') and the Vertex AI token (uppercase 'Authorization')
+    were being sent, causing 401 errors on the vendor side.
+
+    The incoming request has:
+      - authorization: Bearer <litellm_token>  (should NOT be forwarded)
+
+    The outgoing request should only have:
+      - Authorization: Bearer <vertex_token>  (vendor credentials)
+    """
+    from starlette.datastructures import Headers
+
+    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+        _prepare_vertex_auth_headers,
+    )
+
+    # Create a mock request with ONLY the litellm auth token (no other headers)
+    mock_request = MagicMock()
+    mock_request.headers = Headers({
+        "authorization": "Bearer sk-litellm-secret-key",  # LiteLLM token - should NOT be forwarded
+        "Authorization": "Bearer sk-litellm-secret-key-uppercase",  # Also try uppercase
+    })
+
+    # Create mock vertex credentials
+    mock_vertex_credentials = MagicMock()
+    mock_vertex_credentials.vertex_project = "test-project"
+    mock_vertex_credentials.vertex_location = "us-central1"
+    mock_vertex_credentials.vertex_credentials = "test-credentials"
+
+    # Create mock handler
+    mock_handler = MagicMock()
+    mock_handler.update_base_target_url_with_credential_location.return_value = (
+        "https://us-central1-aiplatform.googleapis.com"
+    )
+
+    with patch.object(
+        VertexBase,
+        "_ensure_access_token_async",
+        new_callable=AsyncMock,
+        return_value=("test-auth-header", "test-project"),
+    ), patch.object(
+        VertexBase,
+        "_get_token_and_url",
+        return_value=("vertex-access-token", None),
+    ):
+
+        (
+            headers,
+            _base_target_url,
+            _headers_passed_through,
+            _vertex_project,
+            _vertex_location,
+        ) = await _prepare_vertex_auth_headers(
+            request=mock_request,
+            vertex_credentials=mock_vertex_credentials,
+            router_credentials=None,
+            vertex_project="test-project",
+            vertex_location="us-central1",
+            base_target_url="https://us-central1-aiplatform.googleapis.com",
+            get_vertex_pass_through_handler=mock_handler,
+        )
+
+        # The ONLY Authorization header should be the Vertex token
+        assert headers["Authorization"] == "Bearer vertex-access-token"
+
+        # The LiteLLM token should NOT be present (neither lowercase nor as a duplicate)
+        assert "authorization" not in headers
+        assert headers.get("Authorization") != "Bearer sk-litellm-secret-key"
+        assert headers.get("Authorization") != "Bearer sk-litellm-secret-key-uppercase"
+
+        # Verify we only have the expected headers (Authorization + any allowlisted ones present)
+        # Since the request only had auth headers, only Authorization should be in output
+        assert set(headers.keys()) == {"Authorization"}
+
+
+def test_forward_headers_from_request_x_pass_prefix():
+    """
+    Test that headers with 'x-pass-' prefix are forwarded with the prefix stripped.
+
+    This allows users to force-forward arbitrary headers to the vendor API:
+    - 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
+    - 'x-pass-custom-header: value' becomes 'custom-header: value'
+
+    This is tested on BasePassthroughUtils.forward_headers_from_request which is used
+    by all pass-through endpoints (not just Vertex AI).
+    """
+    from litellm.passthrough.utils import BasePassthroughUtils
+
+    # Simulate incoming request headers
+    request_headers = {
+        "x-pass-anthropic-beta": "context-1m-2025-08-07",
+        "x-pass-custom-header": "custom-value",
+        "x-pass-another-header": "another-value",
+        "authorization": "Bearer sk-litellm-key",
+        "x-litellm-api-key": "sk-1234",
+        "content-type": "application/json",
+    }
+
+    # Start with empty headers dict (simulating custom headers from endpoint config)
+    headers = {}
+
+    # Call the method with forward_headers=False (default behavior)
+    # x-pass- headers should still be forwarded
+    result = BasePassthroughUtils.forward_headers_from_request(
+        request_headers=request_headers,
+        headers=headers,
+        forward_headers=False,
+    )
+
+    # Verify x-pass- prefixed headers are forwarded with prefix stripped
+    assert "anthropic-beta" in result
+    assert result["anthropic-beta"] == "context-1m-2025-08-07"
+    assert "custom-header" in result
+    assert result["custom-header"] == "custom-value"
+    assert "another-header" in result
+    assert result["another-header"] == "another-value"
+
+    # Verify other headers are NOT forwarded (since forward_headers=False)
+    assert "authorization" not in result
+    assert "x-litellm-api-key" not in result
+    assert "content-type" not in result
+
+    # Verify original x-pass- prefixed headers are NOT in output (only stripped versions)
+    assert "x-pass-anthropic-beta" not in result
+    assert "x-pass-custom-header" not in result
+