diff --git a/tests/otel_tests/test_prometheus.py b/tests/otel_tests/test_prometheus.py index ce3031b5141..78806733329 100644 --- a/tests/otel_tests/test_prometheus.py +++ b/tests/otel_tests/test_prometheus.py @@ -106,15 +106,19 @@ async def test_proxy_failure_metrics(): print("/metrics", metrics) # Check if the failure metric is present and correct - use pattern matching for robustness - expected_metric_pattern = 'litellm_proxy_failed_requests_metric_total{api_key_alias="None",end_user="None",exception_class="Openai.RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",route="/chat/completions",team="None",team_alias="None",user="default_user_id",user_email="None"}' + # Labels are ordered alphabetically by Prometheus: api_key_alias, client_ip, end_user, exception_class, + # exception_status, hashed_api_key, model_id, requested_model, route, team, team_alias, user, user_agent, user_email + expected_metric_pattern = 'litellm_proxy_failed_requests_metric_total{api_key_alias="None",client_ip="None",end_user="None",exception_class="Openai.RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",model_id="None",requested_model="fake-azure-endpoint",route="/chat/completions",team="None",team_alias="None",user="default_user_id",user_agent="None",user_email="None"}' - # Check if the pattern is in metrics (this metric doesn't include user_email field) + # Check if the pattern is in metrics assert any( expected_metric_pattern in line for line in metrics.split("\n") ), f"Expected failure metric pattern not found in /metrics. Pattern: {expected_metric_pattern}" - # Check total requests metric which includes user_email - total_requests_pattern = 'litellm_proxy_total_requests_metric_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",route="/chat/completions",status_code="429",team="None",team_alias="None",user="default_user_id",user_email="None"}' + # Check total requests metric + # Labels are ordered alphabetically: api_key_alias, client_ip, end_user, hashed_api_key, model_id, + # requested_model, route, status_code, team, team_alias, user, user_agent, user_email + total_requests_pattern = 'litellm_proxy_total_requests_metric_total{api_key_alias="None",client_ip="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",model_id="None",requested_model="fake-azure-endpoint",route="/chat/completions",status_code="429",team="None",team_alias="None",user="default_user_id",user_agent="None",user_email="None"}' assert any( total_requests_pattern in line for line in metrics.split("\n") diff --git a/tests/test_litellm/enterprise/enterprise_callbacks/send_emails/test_resend_email.py b/tests/test_litellm/enterprise/enterprise_callbacks/send_emails/test_resend_email.py index 1065a8ed514..87f62cf4468 100644 --- a/tests/test_litellm/enterprise/enterprise_callbacks/send_emails/test_resend_email.py +++ b/tests/test_litellm/enterprise/enterprise_callbacks/send_emails/test_resend_email.py @@ -98,7 +98,7 @@ async def test_send_email_success(mock_env_vars, mock_httpx_client): @pytest.mark.asyncio @respx.mock -async def test_send_email_missing_api_key(mock_httpx_client): +async def test_send_email_missing_api_key(): # Block all HTTP requests at network level to prevent real API calls respx.post("https://api.resend.com/emails").mock( return_value=httpx.Response(200, json={"id": "test_email_id"}) @@ -117,13 +117,18 @@ async def test_send_email_missing_api_key(mock_httpx_client): subject = "Test Subject" html_body = "

Test email body

" - # Mock the response to avoid making real HTTP requests + # Create mock HTTP client and inject it directly into the logger + # This ensures the mock is used regardless of any caching issues mock_response = mock.Mock(spec=Response) mock_response.raise_for_status.return_value = None - mock_response.status_code = 200 mock_response.json.return_value = {"id": "test_email_id"} - mock_httpx_client.post.return_value = mock_response + + mock_async_client = mock.AsyncMock() + mock_async_client.post.return_value = mock_response + + # Directly inject the mock client to bypass any caching + logger.async_httpx_client = mock_async_client # Send email await logger.send_email( @@ -131,8 +136,8 @@ async def test_send_email_missing_api_key(mock_httpx_client): ) # Verify the HTTP client was called with None as the API key - mock_httpx_client.post.assert_called_once() - call_args = mock_httpx_client.post.call_args + mock_async_client.post.assert_called_once() + call_args = mock_async_client.post.call_args assert call_args[1]["headers"] == {"Authorization": "Bearer None"} finally: # Restore the original key if it existed diff --git a/tests/test_litellm/google_genai/test_google_genai_handler.py b/tests/test_litellm/google_genai/test_google_genai_handler.py index fc120280511..17a6cba2d63 100644 --- a/tests/test_litellm/google_genai/test_google_genai_handler.py +++ b/tests/test_litellm/google_genai/test_google_genai_handler.py @@ -183,10 +183,8 @@ def test_stream_transformation_error_sync(): "translate_completion_output_params_streaming", return_value=None ): - # Mock litellm.completion at the module level where it's imported - # We need to patch it in the handler module, not in litellm itself - with patch("litellm.google_genai.adapters.handler.litellm") as mock_litellm: - mock_litellm.completion.return_value = mock_stream + # Patch litellm.completion directly to prevent real API calls + with patch("litellm.completion", return_value=mock_stream): # Call the handler with stream=True and expect a ValueError with pytest.raises(ValueError, match="Failed to transform streaming response"): GenerateContentToCompletionHandler.generate_content_handler( diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py index d11fe8d921f..14ba94f47d7 100644 --- a/tests/test_litellm/test_utils.py +++ b/tests/test_litellm/test_utils.py @@ -2282,8 +2282,19 @@ def test_register_model_with_scientific_notation(): """ Test that the register_model function can handle scientific notation in the model name. """ + # Use a unique model name to avoid conflicts with other tests + test_model_name = "test-scientific-notation-model-unique-12345" + + # Clean up any pre-existing entry and clear caches + if test_model_name in litellm.model_cost: + del litellm.model_cost[test_model_name] + + # Clear LRU caches that might have stale data + from litellm.utils import get_model_info, _cached_get_model_info_helper, _invalidate_model_cost_lowercase_map + _invalidate_model_cost_lowercase_map() + model_cost_dict = { - "my-custom-model": { + test_model_name: { "max_tokens": 8192, "input_cost_per_token": "3e-07", "output_cost_per_token": "6e-07", @@ -2294,12 +2305,17 @@ def test_register_model_with_scientific_notation(): litellm.register_model(model_cost_dict) - registered_model = litellm.model_cost["my-custom-model"] + registered_model = litellm.model_cost[test_model_name] print(registered_model) assert registered_model["input_cost_per_token"] == 3e-07 assert registered_model["output_cost_per_token"] == 6e-07 assert registered_model["litellm_provider"] == "openai" assert registered_model["mode"] == "chat" + + # Clean up after test + if test_model_name in litellm.model_cost: + del litellm.model_cost[test_model_name] + _invalidate_model_cost_lowercase_map() def test_reasoning_content_preserved_in_text_completion_wrapper(): diff --git a/tests/test_litellm/vector_stores/test_vector_store_registry.py b/tests/test_litellm/vector_stores/test_vector_store_registry.py index 9fbef21c294..50177584245 100644 --- a/tests/test_litellm/vector_stores/test_vector_store_registry.py +++ b/tests/test_litellm/vector_stores/test_vector_store_registry.py @@ -136,6 +136,9 @@ def test_add_vector_store_to_registry(): @respx.mock def test_search_uses_registry_credentials(): """search() should pull credentials from vector_store_registry when available""" + # Import the actual instance to patch it correctly + from litellm.vector_stores.main import base_llm_http_handler + # Block all HTTP requests at the network level to prevent real API calls respx.route().mock(return_value=httpx.Response(200, json={"object": "list", "data": []})) @@ -168,8 +171,9 @@ def test_search_uses_registry_credentials(): ) as mock_get_creds, patch( "litellm.vector_stores.main.ProviderConfigManager.get_provider_vector_stores_config", return_value=MagicMock(), - ), patch( - "litellm.vector_stores.main.base_llm_http_handler.vector_store_search_handler", + ), patch.object( + base_llm_http_handler, + "vector_store_search_handler", return_value=mock_search_response, ) as mock_handler: search(vector_store_id="vs1", query="test", litellm_logging_obj=logger)