Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -1369,6 +1369,27 @@ def get_vertex_base_url(vertex_location: Optional[str]) -> str:
return f"https://{vertex_location}-aiplatform.googleapis.com/"


def add_incoming_headers(request: Request, auth_header: str) -> dict:
"""
Build headers from incoming request, preserving headers like anthropic-beta,
while removing headers that should not be forwarded and adding authorization.

Args:
request: The FastAPI request object
auth_header: The authorization token to add

Returns:
dict: Headers dictionary with authorization added
"""
headers = dict(request.headers) or {}
# Remove headers that should not be forwarded
headers.pop("content-length", None)
headers.pop("host", None)
# Add/override the Authorization header
headers["Authorization"] = f"Bearer {auth_header}"
return headers


def get_vertex_pass_through_handler(
call_type: Literal["discovery", "aiplatform"],
) -> BaseVertexAIPassThroughHandler:
Expand Down Expand Up @@ -1512,9 +1533,13 @@ async def _prepare_vertex_auth_headers(
api_base="",
)

headers = {
"Authorization": f"Bearer {auth_header}",
}
# Start with incoming request headers to preserve headers like anthropic-beta
headers = dict(request.headers) or {}
# Remove headers that should not be forwarded
headers.pop("content-length", None)
headers.pop("host", None)
# Add/override the Authorization header
headers["Authorization"] = f"Bearer {auth_header}"

if base_target_url is not None:
base_target_url = get_vertex_pass_through_handler.update_base_target_url_with_credential_location(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@

from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from unittest.mock import MagicMock, AsyncMock, patch
from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import _base_vertex_proxy_route

from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
_base_vertex_proxy_route,
)
from litellm.types.router import DeploymentTypedDict


@pytest.mark.asyncio
async def test_vertex_passthrough_load_balancing():
"""
Expand Down Expand Up @@ -220,3 +225,92 @@ async def test_async_get_available_deployment_for_pass_through():
assert deployment is not None
assert deployment["litellm_params"]["use_in_pass_through"] is True


@pytest.mark.asyncio
async def test_vertex_passthrough_forwards_anthropic_beta_header():
"""
Test that _prepare_vertex_auth_headers forwards the anthropic-beta header
(and other important headers) from the incoming request when credentials are available.

This test validates the fix for the issue where the 1M context window header
(anthropic-beta: context-1m-2025-08-07) was being dropped when forwarding
requests to Vertex AI.
"""
from starlette.datastructures import Headers

from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
_prepare_vertex_auth_headers,
)

# Create a mock request with anthropic-beta header
mock_request = MagicMock()
mock_request.headers = Headers({
"authorization": "Bearer old-token",
"anthropic-beta": "context-1m-2025-08-07",
"content-type": "application/json",
"user-agent": "test-client",
"content-length": "1234", # Should be removed
"host": "localhost:4000", # Should be removed
})

# Create mock vertex credentials
mock_vertex_credentials = MagicMock()
mock_vertex_credentials.vertex_project = "test-project"
mock_vertex_credentials.vertex_location = "us-central1"
mock_vertex_credentials.vertex_credentials = "test-credentials"

# Create mock handler
mock_handler = MagicMock()
mock_handler.update_base_target_url_with_credential_location.return_value = (
"https://us-central1-aiplatform.googleapis.com"
)

with patch.object(
VertexBase,
"_ensure_access_token_async",
new_callable=AsyncMock,
return_value=("test-auth-header", "test-project"),
) as mock_ensure_token, patch.object(
VertexBase,
"_get_token_and_url",
return_value=("new-access-token", None),
) as mock_get_token:

# Call the function
(
headers,
base_target_url,
headers_passed_through,
vertex_project,
vertex_location,
) = await _prepare_vertex_auth_headers(
request=mock_request,
vertex_credentials=mock_vertex_credentials,
router_credentials=None,
vertex_project="test-project",
vertex_location="us-central1",
base_target_url="https://us-central1-aiplatform.googleapis.com",
get_vertex_pass_through_handler=mock_handler,
)

# Verify that the anthropic-beta header is preserved
assert "anthropic-beta" in headers
assert headers["anthropic-beta"] == "context-1m-2025-08-07"

# Verify that other headers are preserved
assert "content-type" in headers
assert headers["content-type"] == "application/json"
assert "user-agent" in headers

# Verify that the Authorization header was updated
assert "authorization" in headers
assert headers["authorization"] == "Bearer new-access-token"

# Verify that content-length and host headers were removed
assert "content-length" not in headers
assert "host" not in headers

# Verify that headers_passed_through is False (since we have credentials)
assert headers_passed_through is False

Loading