Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions docs/my-website/docs/pass_through/vertex_ai.md
Original file line number Diff line number Diff line change
Expand Up @@ -461,3 +461,48 @@ generateContent();

</TabItem>
</Tabs>

### Using Anthropic Beta Features on Vertex AI

When using Anthropic models via Vertex AI passthrough (e.g., Claude on Vertex), you can enable Anthropic beta features like extended context windows.

The `anthropic-beta` header is automatically forwarded to Vertex AI when calling Anthropic models.

```bash
curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "anthropic-beta: context-1m-2025-08-07" \
-d '{
"anthropic_version": "vertex-2023-10-16",
"messages": [{"role": "user", "content": "Hello"}],
"max_tokens": 500
}'
```

### Forwarding Custom Headers with `x-pass-` Prefix

You can forward any custom header to the provider by prefixing it with `x-pass-`. The prefix is stripped before the header is sent to the provider.

For example:
- `x-pass-anthropic-beta: value` becomes `anthropic-beta: value`
- `x-pass-custom-header: value` becomes `custom-header: value`

This is useful when you need to send provider-specific headers that aren't in the default allowlist.

```bash
curl http://localhost:4000/vertex_ai/v1/projects/${PROJECT_ID}/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet:rawPredict \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-H "x-pass-anthropic-beta: context-1m-2025-08-07" \
-H "x-pass-custom-feature: enabled" \
-d '{
"anthropic_version": "vertex-2023-10-16",
"messages": [{"role": "user", "content": "Hello"}],
"max_tokens": 500
}'
```

:::info
The `x-pass-` prefix works for all LLM pass-through endpoints, not just Vertex AI.
:::
14 changes: 14 additions & 0 deletions litellm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,20 @@
"generateQuery/",
"optimize-prompt/",
]


# Headers that are safe to forward from incoming requests to Vertex AI
# Using an allowlist approach for security - only forward headers we explicitly trust
ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS = {
"anthropic-beta", # Required for Anthropic features like extended context windows
"content-type", # Required for request body parsing
}

# Prefix for headers that should be forwarded to the provider with the prefix stripped
# e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
# Works for all LLM pass-through endpoints (Vertex AI, Anthropic, Bedrock, etc.)
PASS_THROUGH_HEADER_PREFIX = "x-pass-"

BASE_MCP_ROUTE = "/mcp"

BATCH_STATUS_POLL_INTERVAL_SECONDS = int(
Expand Down
16 changes: 15 additions & 1 deletion litellm/passthrough/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import httpx

from litellm.constants import PASS_THROUGH_HEADER_PREFIX


class BasePassthroughUtils:
@staticmethod
Expand All @@ -27,7 +29,11 @@ def forward_headers_from_request(
forward_headers: Optional[bool] = False,
):
"""
Helper to forward headers from original request
Helper to forward headers from original request.

Also handles 'x-pass-' prefixed headers which are always forwarded
with the prefix stripped, regardless of forward_headers setting.
e.g., 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
"""
if forward_headers is True:
# Header We Should NOT forward
Expand All @@ -36,6 +42,14 @@ def forward_headers_from_request(

# Combine request headers with custom headers
headers = {**request_headers, **headers}

# Always process x-pass- prefixed headers (strip prefix and forward)
for header_name, header_value in request_headers.items():
if header_name.lower().startswith(PASS_THROUGH_HEADER_PREFIX):
# Strip the 'x-pass-' prefix to get the actual header name
actual_header_name = header_name[len(PASS_THROUGH_HEADER_PREFIX) :]
headers[actual_header_name] = header_value

return headers

class CommonUtils:
Expand Down
36 changes: 18 additions & 18 deletions litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

import litellm
from litellm._logging import verbose_proxy_logger
from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES
from litellm.constants import (
ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS,
BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES,
)
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
from litellm.proxy._types import *
from litellm.proxy.auth.route_checks import RouteChecks
Expand Down Expand Up @@ -1369,24 +1372,24 @@ def get_vertex_base_url(vertex_location: Optional[str]) -> str:
return f"https://{vertex_location}-aiplatform.googleapis.com/"


def add_incoming_headers(request: Request, auth_header: str) -> dict:
def get_vertex_ai_allowed_incoming_headers(request: Request) -> dict:
"""
Build headers from incoming request, preserving headers like anthropic-beta,
while removing headers that should not be forwarded and adding authorization.
Extract only the allowed headers from incoming request for Vertex AI pass-through.

Uses an allowlist approach for security - only forwards headers we explicitly trust.
This prevents accidentally forwarding sensitive headers like the LiteLLM auth token.

Args:
request: The FastAPI request object
auth_header: The authorization token to add

Returns:
dict: Headers dictionary with authorization added
dict: Headers dictionary with only allowed headers
"""
headers = dict(request.headers) or {}
# Remove headers that should not be forwarded
headers.pop("content-length", None)
headers.pop("host", None)
# Add/override the Authorization header
headers["Authorization"] = f"Bearer {auth_header}"
incoming_headers = dict(request.headers) or {}
headers = {}
for header_name in ALLOWED_VERTEX_AI_PASSTHROUGH_HEADERS:
if header_name in incoming_headers:
headers[header_name] = incoming_headers[header_name]
return headers


Expand Down Expand Up @@ -1533,12 +1536,9 @@ async def _prepare_vertex_auth_headers(
api_base="",
)

# Start with incoming request headers to preserve headers like anthropic-beta
headers = dict(request.headers) or {}
# Remove headers that should not be forwarded
headers.pop("content-length", None)
headers.pop("host", None)
# Add/override the Authorization header
# Use allowlist approach - only forward specific safe headers
headers = get_vertex_ai_allowed_incoming_headers(request)
# Add the Authorization header with vendor credentials
headers["Authorization"] = f"Bearer {auth_header}"

if base_target_url is not None:
Expand Down
7 changes: 7 additions & 0 deletions proxy_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
model_list:
- model_name: "*"
litellm_params:
model: "*"

general_settings:
master_key: sk-1234
Original file line number Diff line number Diff line change
Expand Up @@ -294,23 +294,156 @@ async def test_vertex_passthrough_forwards_anthropic_beta_header():
get_vertex_pass_through_handler=mock_handler,
)

# Verify that the anthropic-beta header is preserved
# Verify that allowlisted headers are preserved
assert "anthropic-beta" in headers
assert headers["anthropic-beta"] == "context-1m-2025-08-07"

# Verify that other headers are preserved
assert "content-type" in headers
assert headers["content-type"] == "application/json"
assert "user-agent" in headers

# Verify that the Authorization header was updated
assert "authorization" in headers
assert headers["authorization"] == "Bearer new-access-token"
# Verify that the Authorization header is set with vendor credentials
assert "Authorization" in headers
assert headers["Authorization"] == "Bearer new-access-token"

# Verify that content-length and host headers were removed
assert "content-length" not in headers
assert "host" not in headers
# Verify that non-allowlisted headers are NOT forwarded (security)
# Only anthropic-beta, content-type, and Authorization should be present
assert "authorization" not in headers # lowercase auth token not forwarded
assert "user-agent" not in headers # not in allowlist
assert "content-length" not in headers # not in allowlist
assert "host" not in headers # not in allowlist

# Verify that headers_passed_through is False (since we have credentials)
assert headers_passed_through is False


@pytest.mark.asyncio
async def test_vertex_passthrough_does_not_forward_litellm_auth_token():
"""
Test that the LiteLLM authorization header is NOT forwarded to Vertex AI.

This test validates the fix for the issue where both the LiteLLM auth token
(lowercase 'authorization') and the Vertex AI token (uppercase 'Authorization')
were being sent, causing 401 errors on the vendor side.

The incoming request has:
- authorization: Bearer <litellm_token> (should NOT be forwarded)

The outgoing request should only have:
- Authorization: Bearer <vertex_token> (vendor credentials)
"""
from starlette.datastructures import Headers

from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
_prepare_vertex_auth_headers,
)

# Create a mock request with ONLY the litellm auth token (no other headers)
mock_request = MagicMock()
mock_request.headers = Headers({
"authorization": "Bearer sk-litellm-secret-key", # LiteLLM token - should NOT be forwarded
"Authorization": "Bearer sk-litellm-secret-key-uppercase", # Also try uppercase
})

# Create mock vertex credentials
mock_vertex_credentials = MagicMock()
mock_vertex_credentials.vertex_project = "test-project"
mock_vertex_credentials.vertex_location = "us-central1"
mock_vertex_credentials.vertex_credentials = "test-credentials"

# Create mock handler
mock_handler = MagicMock()
mock_handler.update_base_target_url_with_credential_location.return_value = (
"https://us-central1-aiplatform.googleapis.com"
)

with patch.object(
VertexBase,
"_ensure_access_token_async",
new_callable=AsyncMock,
return_value=("test-auth-header", "test-project"),
), patch.object(
VertexBase,
"_get_token_and_url",
return_value=("vertex-access-token", None),
):

(
headers,
_base_target_url,
_headers_passed_through,
_vertex_project,
_vertex_location,
) = await _prepare_vertex_auth_headers(
request=mock_request,
vertex_credentials=mock_vertex_credentials,
router_credentials=None,
vertex_project="test-project",
vertex_location="us-central1",
base_target_url="https://us-central1-aiplatform.googleapis.com",
get_vertex_pass_through_handler=mock_handler,
)

# The ONLY Authorization header should be the Vertex token
assert headers["Authorization"] == "Bearer vertex-access-token"

# The LiteLLM token should NOT be present (neither lowercase nor as a duplicate)
assert "authorization" not in headers
assert headers.get("Authorization") != "Bearer sk-litellm-secret-key"
assert headers.get("Authorization") != "Bearer sk-litellm-secret-key-uppercase"

# Verify we only have the expected headers (Authorization + any allowlisted ones present)
# Since the request only had auth headers, only Authorization should be in output
assert set(headers.keys()) == {"Authorization"}


def test_forward_headers_from_request_x_pass_prefix():
"""
Test that headers with 'x-pass-' prefix are forwarded with the prefix stripped.

This allows users to force-forward arbitrary headers to the vendor API:
- 'x-pass-anthropic-beta: value' becomes 'anthropic-beta: value'
- 'x-pass-custom-header: value' becomes 'custom-header: value'

This is tested on BasePassthroughUtils.forward_headers_from_request which is used
by all pass-through endpoints (not just Vertex AI).
"""
from litellm.passthrough.utils import BasePassthroughUtils

# Simulate incoming request headers
request_headers = {
"x-pass-anthropic-beta": "context-1m-2025-08-07",
"x-pass-custom-header": "custom-value",
"x-pass-another-header": "another-value",
"authorization": "Bearer sk-litellm-key",
"x-litellm-api-key": "sk-1234",
"content-type": "application/json",
}

# Start with empty headers dict (simulating custom headers from endpoint config)
headers = {}

# Call the method with forward_headers=False (default behavior)
# x-pass- headers should still be forwarded
result = BasePassthroughUtils.forward_headers_from_request(
request_headers=request_headers,
headers=headers,
forward_headers=False,
)

# Verify x-pass- prefixed headers are forwarded with prefix stripped
assert "anthropic-beta" in result
assert result["anthropic-beta"] == "context-1m-2025-08-07"
assert "custom-header" in result
assert result["custom-header"] == "custom-value"
assert "another-header" in result
assert result["another-header"] == "another-value"

# Verify other headers are NOT forwarded (since forward_headers=False)
assert "authorization" not in result
assert "x-litellm-api-key" not in result
assert "content-type" not in result

# Verify original x-pass- prefixed headers are NOT in output (only stripped versions)
assert "x-pass-anthropic-beta" not in result
assert "x-pass-custom-header" not in result

Loading