From 998aa1bff4806c733d87a9d28db024d5f5d5dfcd Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Wed, 21 Jan 2026 17:51:57 -0800 Subject: [PATCH 1/7] fix ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS --- litellm/constants.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/litellm/constants.py b/litellm/constants.py index e142c7d6304..13a26c6d0ea 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -1122,6 +1122,15 @@ "generateQuery/", "optimize-prompt/", ] + + +# Headers that are safe to forward from incoming requests to Vertex AI +# Using an allowlist approach for security - only forward headers we explicitly trust +ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS = { + "anthropic-beta", # Required for Anthropic features like extended context windows + "content-type", # Required for request body parsing +} + BASE_MCP_ROUTE = "/mcp" BATCH_STATUS_POLL_INTERVAL_SECONDS = int( From 91c500a48b3660bfe808f1454f8ec79811abc52e Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Wed, 21 Jan 2026 17:54:05 -0800 Subject: [PATCH 2/7] test_vertex_passthrough_forwards_anthropic_beta_header --- .../test_vertex_passthrough_load_balancing.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py index a6701451f20..cab77ce94b2 100644 --- a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py +++ b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py @@ -294,22 +294,22 @@ async def test_vertex_passthrough_forwards_anthropic_beta_header(): get_vertex_pass_through_handler=mock_handler, ) - # Verify that the anthropic-beta header is preserved + # Verify that allowlisted headers are preserved assert "anthropic-beta" in headers assert headers["anthropic-beta"] == "context-1m-2025-08-07" - - # Verify that other headers are preserved assert "content-type" in headers assert headers["content-type"] == "application/json" - assert "user-agent" in headers - # Verify that the Authorization header was updated - assert "authorization" in headers - assert headers["authorization"] == "Bearer new-access-token" + # Verify that the Authorization header is set with vendor credentials + assert "Authorization" in headers + assert headers["Authorization"] == "Bearer new-access-token" - # Verify that content-length and host headers were removed - assert "content-length" not in headers - assert "host" not in headers + # Verify that non-allowlisted headers are NOT forwarded (security) + # Only anthropic-beta, content-type, and Authorization should be present + assert "authorization" not in headers # lowercase auth token not forwarded + assert "user-agent" not in headers # not in allowlist + assert "content-length" not in headers # not in allowlist + assert "host" not in headers # not in allowlist # Verify that headers_passed_through is False (since we have credentials) assert headers_passed_through is False From 82bfea9d66252e06ab75da4d496bf233a3089c6b Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Wed, 21 Jan 2026 17:54:30 -0800 Subject: [PATCH 3/7] fix test_vertex_passthrough_forwards_anthropic_beta_header --- .../llm_passthrough_endpoints.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py index 0a94fc95342..b079e161519 100644 --- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py @@ -17,7 +17,10 @@ import litellm from litellm._logging import verbose_proxy_logger -from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES +from litellm.constants import ( + ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS, + BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES, +) from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.proxy._types import * from litellm.proxy.auth.route_checks import RouteChecks @@ -1369,24 +1372,24 @@ def get_vertex_base_url(vertex_location: Optional[str]) -> str: return f"https://{vertex_location}-aiplatform.googleapis.com/" -def add_incoming_headers(request: Request, auth_header: str) -> dict: +def get_vertex_ai_allowed_incoming_headers(request: Request) -> dict: """ - Build headers from incoming request, preserving headers like anthropic-beta, - while removing headers that should not be forwarded and adding authorization. + Extract only the allowed headers from incoming request for Vertex AI pass-through. + + Uses an allowlist approach for security - only forwards headers we explicitly trust. + This prevents accidentally forwarding sensitive headers like the LiteLLM auth token. Args: request: The FastAPI request object - auth_header: The authorization token to add Returns: - dict: Headers dictionary with authorization added + dict: Headers dictionary with only allowed headers """ - headers = dict(request.headers) or {} - # Remove headers that should not be forwarded - headers.pop("content-length", None) - headers.pop("host", None) - # Add/override the Authorization header - headers["Authorization"] = f"Bearer {auth_header}" + incoming_headers = dict(request.headers) or {} + headers = {} + for header_name in ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS: + if header_name in incoming_headers: + headers[header_name] = incoming_headers[header_name] return headers @@ -1533,12 +1536,9 @@ async def _prepare_vertex_auth_headers( api_base="", ) - # Start with incoming request headers to preserve headers like anthropic-beta - headers = dict(request.headers) or {} - # Remove headers that should not be forwarded - headers.pop("content-length", None) - headers.pop("host", None) - # Add/override the Authorization header + # Use allowlist approach - only forward specific safe headers + headers = get_vertex_ai_allowed_incoming_headers(request) + # Add the Authorization header with vendor credentials headers["Authorization"] = f"Bearer {auth_header}" if base_target_url is not None: From b125f944eb9ba1047288a03da34ff9fc64fe5856 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Wed, 21 Jan 2026 18:00:16 -0800 Subject: [PATCH 4/7] test_vertex_passthrough_does_not_forward_litellm_auth_token --- .../test_vertex_passthrough_load_balancing.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py index cab77ce94b2..ac010a890e3 100644 --- a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py +++ b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py @@ -314,3 +314,84 @@ async def test_vertex_passthrough_forwards_anthropic_beta_header(): # Verify that headers_passed_through is False (since we have credentials) assert headers_passed_through is False + +@pytest.mark.asyncio +async def test_vertex_passthrough_does_not_forward_litellm_auth_token(): + """ + Test that the LiteLLM authorization header is NOT forwarded to Vertex AI. + + This test validates the fix for the issue where both the LiteLLM auth token + (lowercase 'authorization') and the Vertex AI token (uppercase 'Authorization') + were being sent, causing 401 errors on the vendor side. + + The incoming request has: + - authorization: Bearer (should NOT be forwarded) + + The outgoing request should only have: + - Authorization: Bearer (vendor credentials) + """ + from starlette.datastructures import Headers + + from litellm.llms.vertex_ai.vertex_llm_base import VertexBase + from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( + _prepare_vertex_auth_headers, + ) + + # Create a mock request with ONLY the litellm auth token (no other headers) + mock_request = MagicMock() + mock_request.headers = Headers({ + "authorization": "Bearer sk-litellm-secret-key", # LiteLLM token - should NOT be forwarded + "Authorization": "Bearer sk-litellm-secret-key-uppercase", # Also try uppercase + }) + + # Create mock vertex credentials + mock_vertex_credentials = MagicMock() + mock_vertex_credentials.vertex_project = "test-project" + mock_vertex_credentials.vertex_location = "us-central1" + mock_vertex_credentials.vertex_credentials = "test-credentials" + + # Create mock handler + mock_handler = MagicMock() + mock_handler.update_base_target_url_with_credential_location.return_value = ( + "https://us-central1-aiplatform.googleapis.com" + ) + + with patch.object( + VertexBase, + "_ensure_access_token_async", + new_callable=AsyncMock, + return_value=("test-auth-header", "test-project"), + ), patch.object( + VertexBase, + "_get_token_and_url", + return_value=("vertex-access-token", None), + ): + + ( + headers, + _base_target_url, + _headers_passed_through, + _vertex_project, + _vertex_location, + ) = await _prepare_vertex_auth_headers( + request=mock_request, + vertex_credentials=mock_vertex_credentials, + router_credentials=None, + vertex_project="test-project", + vertex_location="us-central1", + base_target_url="https://us-central1-aiplatform.googleapis.com", + get_vertex_pass_through_handler=mock_handler, + ) + + # The ONLY Authorization header should be the Vertex token + assert headers["Authorization"] == "Bearer vertex-access-token" + + # The LiteLLM token should NOT be present (neither lowercase nor as a duplicate) + assert "authorization" not in headers + assert headers.get("Authorization") != "Bearer sk-litellm-secret-key" + assert headers.get("Authorization") != "Bearer sk-litellm-secret-key-uppercase" + + # Verify we only have the expected headers (Authorization + any allowlisted ones present) + # Since the request only had auth headers, only Authorization should be in output + assert set(headers.keys()) == {"Authorization"} + From 4bafb26c12f090e2190c9246e4f18a108caa277a Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Wed, 21 Jan 2026 19:03:26 -0800 Subject: [PATCH 5/7] fix utils --- litellm/constants.py | 5 +++++ litellm/passthrough/utils.py | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/litellm/constants.py b/litellm/constants.py index 13a26c6d0ea..5da0cf1489f 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -1131,6 +1131,11 @@ "content-type", # Required for request body parsing } +# Prefix for headers that should be forwarded to the provider with the prefix stripped +# e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value' +# Works for all LLM pass-through endpoints (Vertex AI, Anthropic, Bedrock, etc.) +PASS_THROUGH_HEADER_PREFIX = "x-pass-" + BASE_MCP_ROUTE = "/mcp" BATCH_STATUS_POLL_INTERVAL_SECONDS = int( diff --git a/litellm/passthrough/utils.py b/litellm/passthrough/utils.py index 4bf66d49881..fbbf9cd2581 100644 --- a/litellm/passthrough/utils.py +++ b/litellm/passthrough/utils.py @@ -3,6 +3,8 @@ import httpx +from litellm.constants import PASS_THROUGH_HEADER_PREFIX + class BasePassthroughUtils: @staticmethod @@ -27,7 +29,11 @@ def forward_headers_from_request( forward_headers: Optional[bool] = False, ): """ - Helper to forward headers from original request + Helper to forward headers from original request. + + Also handles 'x-pass-' prefixed headers which are always forwarded + with the prefix stripped, regardless of forward_headers setting. + e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value' """ if forward_headers is True: # Header We Should NOT forward @@ -36,6 +42,14 @@ def forward_headers_from_request( # Combine request headers with custom headers headers = {**request_headers, **headers} + + # Always process x-pass- prefixed headers (strip prefix and forward) + for header_name, header_value in request_headers.items(): + if header_name.lower().startswith(PASS_THROUGH_HEADER_PREFIX): + # Strip the 'x-pass-' prefix to get the actual header name + actual_header_name = header_name[len(PASS_THROUGH_HEADER_PREFIX) :] + headers[actual_header_name] = header_value + return headers class CommonUtils: From 3248f267acbadf1b45045032c66fccb18adbc1c0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Wed, 21 Jan 2026 19:04:36 -0800 Subject: [PATCH 6/7] Using Anthropic Beta Features on Vertex AI --- .../my-website/docs/pass_through/vertex_ai.md | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/docs/my-website/docs/pass_through/vertex_ai.md b/docs/my-website/docs/pass_through/vertex_ai.md index adbd06187d5..00df6def704 100644 --- a/docs/my-website/docs/pass_through/vertex_ai.md +++ b/docs/my-website/docs/pass_through/vertex_ai.md @@ -461,3 +461,48 @@ generateContent(); + +### Using Anthropic Beta Features on Vertex AI + +When using Anthropic models via Vertex AI passthrough (e.g., Claude on Vertex), you can enable Anthropic beta features like extended context windows. + +The `anthropic-beta` header is automatically forwarded to Vertex AI when calling Anthropic models. + +```bash +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -H "anthropic-beta: context-1m-2025-08-07" \ + -d '{ + "anthropic_version": "vertex-2023-10-16", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 500 + }' +``` + +### Forwarding Custom Headers with `x-pass-` Prefix + +You can forward any custom header to the provider by prefixing it with `x-pass-`. The prefix is stripped before the header is sent to the provider. + +For example: +- `x-pass-anthropic-beta: value` becomes `anthropic-beta: value` +- `x-pass-custom-header: value` becomes `custom-header: value` + +This is useful when you need to send provider-specific headers that aren't in the default allowlist. + +```bash +curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -H "x-pass-anthropic-beta: context-1m-2025-08-07" \ + -H "x-pass-custom-feature: enabled" \ + -d '{ + "anthropic_version": "vertex-2023-10-16", + "messages": [{"role": "user", "content": "Hello"}], + "max_tokens": 500 + }' +``` + +:::info +The `x-pass-` prefix works for all LLM pass-through endpoints, not just Vertex AI. +::: From 319e1b8645032606ca8b6255b73123224f6c828b Mon Sep 17 00:00:00 2001 From: Ishaan Jaffer Date: Wed, 21 Jan 2026 19:05:17 -0800 Subject: [PATCH 7/7] test_forward_headers_from_request_x_pass_prefix --- proxy_config.yaml | 7 +++ .../test_vertex_passthrough_load_balancing.py | 52 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 proxy_config.yaml diff --git a/proxy_config.yaml b/proxy_config.yaml new file mode 100644 index 00000000000..57397181cda --- /dev/null +++ b/proxy_config.yaml @@ -0,0 +1,7 @@ +model_list: + - model_name: "*" + litellm_params: + model: "*" + +general_settings: + master_key: sk-1234 diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py index ac010a890e3..28b3ba0a179 100644 --- a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py +++ b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py @@ -395,3 +395,55 @@ async def test_vertex_passthrough_does_not_forward_litellm_auth_token(): # Since the request only had auth headers, only Authorization should be in output assert set(headers.keys()) == {"Authorization"} + +def test_forward_headers_from_request_x_pass_prefix(): + """ + Test that headers with 'x-pass-' prefix are forwarded with the prefix stripped. + + This allows users to force-forward arbitrary headers to the vendor API: + - 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value' + - 'x-pass-custom-header: value' becomes 'custom-header: value' + + This is tested on BasePassthroughUtils.forward_headers_from_request which is used + by all pass-through endpoints (not just Vertex AI). + """ + from litellm.passthrough.utils import BasePassthroughUtils + + # Simulate incoming request headers + request_headers = { + "x-pass-anthropic-beta": "context-1m-2025-08-07", + "x-pass-custom-header": "custom-value", + "x-pass-another-header": "another-value", + "authorization": "Bearer sk-litellm-key", + "x-litellm-api-key": "sk-1234", + "content-type": "application/json", + } + + # Start with empty headers dict (simulating custom headers from endpoint config) + headers = {} + + # Call the method with forward_headers=False (default behavior) + # x-pass- headers should still be forwarded + result = BasePassthroughUtils.forward_headers_from_request( + request_headers=request_headers, + headers=headers, + forward_headers=False, + ) + + # Verify x-pass- prefixed headers are forwarded with prefix stripped + assert "anthropic-beta" in result + assert result["anthropic-beta"] == "context-1m-2025-08-07" + assert "custom-header" in result + assert result["custom-header"] == "custom-value" + assert "another-header" in result + assert result["another-header"] == "another-value" + + # Verify other headers are NOT forwarded (since forward_headers=False) + assert "authorization" not in result + assert "x-litellm-api-key" not in result + assert "content-type" not in result + + # Verify original x-pass- prefixed headers are NOT in output (only stripped versions) + assert "x-pass-anthropic-beta" not in result + assert "x-pass-custom-header" not in result +