diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index c4d0d2f8f1c..7a71af1da5c 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -616,6 +616,14 @@ def get_model_from_request( if match: model = match.group(1) + # If still not found, extract from Vertex AI passthrough route + # Pattern: /vertex_ai/.../models/{model_id}:* + # Example: /vertex_ai/v1/.../models/gemini-1.5-pro:generateContent + if model is None and "/vertex" in route.lower(): + vertex_match = re.search(r"/models/([^/:]+)", route) + if vertex_match: + model = vertex_match.group(1) + return model diff --git a/tests/local_testing/test_auth_utils.py b/tests/local_testing/test_auth_utils.py index 11261592c32..72f799a6cf0 100644 --- a/tests/local_testing/test_auth_utils.py +++ b/tests/local_testing/test_auth_utils.py @@ -311,3 +311,56 @@ def test_get_internal_user_header_from_mapping_no_internal_returns_none(): single_mapping = {"header_name": "X-Only-Customer", "litellm_user_role": "customer"} result = LiteLLMProxyRequestSetup.get_internal_user_header_from_mapping(single_mapping) assert result is None + + +@pytest.mark.parametrize( + "request_data, route, expected_model", + [ + # Vertex AI passthrough URL patterns + ( + {}, + "/vertex_ai/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-1.5-pro:generateContent", + "gemini-1.5-pro" + ), + ( + {}, + "/vertex_ai/v1beta1/projects/my-project/locations/us-central1/publishers/google/models/gemini-1.0-pro:streamGenerateContent", + "gemini-1.0-pro" + ), + ( + {}, + "/vertex_ai/v1/projects/my-project/locations/asia-southeast1/publishers/google/models/gemini-2.0-flash:generateContent", + "gemini-2.0-flash" + ), + # Model without method suffix (no colon) - should still extract + ( + {}, + "/vertex_ai/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-pro", + "gemini-pro" # Should match even without colon + ), + # Request body model takes precedence over URL + ( + {"model": "gpt-4o"}, + "/vertex_ai/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-1.5-pro:generateContent", + "gpt-4o" + ), + # Non-vertex route should not extract from vertex pattern + ( + {}, + "/openai/v1/chat/completions", + None + ), + # Azure deployment pattern should still work + ( + {}, + "/openai/deployments/my-deployment/chat/completions", + "my-deployment" + ), + ], +) +def test_get_model_from_request_vertex_ai_passthrough(request_data, route, expected_model): + """Test that get_model_from_request correctly extracts Vertex AI model from URL""" + from litellm.proxy.auth.auth_utils import get_model_from_request + + model = get_model_from_request(request_data, route) + assert model == expected_model