diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 00c38eac188..0a61dab0680 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -1,6 +1,7 @@ # used for /metrics endpoint on LiteLLM Proxy #### What this does #### # On success, log events to Prometheus +import asyncio import os import sys from datetime import datetime, timedelta @@ -1188,28 +1189,34 @@ async def _increment_remaining_budget_metrics( _user_spend = _metadata.get("user_api_key_user_spend", None) _user_max_budget = _metadata.get("user_api_key_user_max_budget", None) - await self._set_api_key_budget_metrics_after_api_request( - user_api_key=user_api_key, - user_api_key_alias=user_api_key_alias, - response_cost=response_cost, - key_max_budget=_api_key_max_budget, - key_spend=_api_key_spend, - ) - - await self._set_team_budget_metrics_after_api_request( - user_api_team=user_api_team, - user_api_team_alias=user_api_team_alias, - team_spend=_team_spend, - team_max_budget=_team_max_budget, - response_cost=response_cost, - ) - - await self._set_user_budget_metrics_after_api_request( - user_id=user_id, - user_spend=_user_spend, - user_max_budget=_user_max_budget, - response_cost=response_cost, + results = await asyncio.gather( + self._set_api_key_budget_metrics_after_api_request( + user_api_key=user_api_key, + user_api_key_alias=user_api_key_alias, + response_cost=response_cost, + key_max_budget=_api_key_max_budget, + key_spend=_api_key_spend, + ), + self._set_team_budget_metrics_after_api_request( + user_api_team=user_api_team, + user_api_team_alias=user_api_team_alias, + team_spend=_team_spend, + team_max_budget=_team_max_budget, + response_cost=response_cost, + ), + self._set_user_budget_metrics_after_api_request( + user_id=user_id, + user_spend=_user_spend, + user_max_budget=_user_max_budget, + response_cost=response_cost, + ), + return_exceptions=True, ) + for i, r in enumerate(results): + if isinstance(r, Exception): + verbose_logger.debug( + f"[Non-Blocking] Prometheus: Budget metric lookup {['key', 'team', 'user'][i]} failed: {r}" + ) def _increment_top_level_request_and_spend_metrics( self, @@ -2898,12 +2905,14 @@ async def _assemble_user_object( max_budget=max_budget, ) try: + # Note: Setting check_db_only=True bypasses cache and hits DB on every request, + # causing huge latency increase and CPU spikes. Keep check_db_only=False. user_info = await get_user_object( user_id=user_id, prisma_client=prisma_client, user_api_key_cache=user_api_key_cache, user_id_upsert=False, - check_db_only=True, + check_db_only=False, ) except Exception as e: verbose_logger.debug( diff --git a/poetry.lock b/poetry.lock index 5e926509d54..b37fd863431 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8531,8 +8531,4 @@ utils = ["numpydoc"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<4.0" -<<<<<<< litellm_oss_staging_02_04_2026 -content-hash = "797603dcfef0a79781c7d3cba5dfe18f6aea4aa792220f47487ebc7bd04ae2e3" -======= content-hash = "e5447e14dd37e324ac07a8fc6286d27e9a0d355ed93ebb24fc11e3f5df12fd3e" ->>>>>>> main diff --git a/tests/test_litellm/llms/azure_ai/test_cost_calculator.py b/tests/test_litellm/llms/azure_ai/test_azure_ai_cost_calculator.py similarity index 100% rename from tests/test_litellm/llms/azure_ai/test_cost_calculator.py rename to tests/test_litellm/llms/azure_ai/test_azure_ai_cost_calculator.py