diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index cd2b3b68f37..d5a3466a84c 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -121,8 +121,8 @@ Use this to track overall LiteLLM Proxy usage. | Metric Name | Description | |----------------------|--------------------------------------| -| `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "exception_status", "exception_class", "route"` | -| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "status_code", "user_email", "route"` | +| `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "user_email", "exception_status", "exception_class", "route", "model_id"` | +| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "status_code", "user_email", "route", "model_id"` | ### Callback Logging Metrics @@ -191,10 +191,10 @@ Use this for LLM API Error monitoring and tracking remaining rate limits and tok | Metric Name | Description | |----------------------|--------------------------------------| -| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels "end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "model" | +| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels "end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "model", "model_id" | | `litellm_overhead_latency_metric` | Latency overhead (seconds) added by LiteLLM processing - tracked for labels "model_group", "api_provider", "api_base", "litellm_model_name", "hashed_api_key", "api_key_alias" | | `litellm_llm_api_latency_metric` | Latency (seconds) for just the LLM API call - tracked for labels "model", "hashed_api_key", "api_key_alias", "team", "team_alias", "requested_model", "end_user", "user" | -| `litellm_llm_api_time_to_first_token_metric` | Time to first token for LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` [Note: only emitted for streaming requests] | +| `litellm_llm_api_time_to_first_token_metric` | Time to first token for LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias`, `requested_model`, `end_user`, `user`, `model_id` [Note: only emitted for streaming requests] | ## Tracking `end_user` on Prometheus diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 4ce818f0cef..9a9f2e945c4 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -1126,12 +1126,14 @@ def _set_latency_metrics( time_to_first_token_seconds is not None and kwargs.get("stream", False) is True # only emit for streaming requests ): + _ttft_labels = prometheus_label_factory( + supported_enum_labels=self.get_labels_for_metric( + metric_name="litellm_llm_api_time_to_first_token_metric" + ), + enum_values=enum_values, + ) self.litellm_llm_api_time_to_first_token_metric.labels( - model, - user_api_key, - user_api_key_alias, - user_api_team, - user_api_team_alias, + **_ttft_labels ).observe(time_to_first_token_seconds) else: verbose_logger.debug( diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py index 6a254fc8252..7d8f29761c8 100644 --- a/litellm/types/integrations/prometheus.py +++ b/litellm/types/integrations/prometheus.py @@ -206,6 +206,10 @@ class PrometheusMetricLabels: UserAPIKeyLabelNames.API_KEY_ALIAS.value, UserAPIKeyLabelNames.TEAM.value, UserAPIKeyLabelNames.TEAM_ALIAS.value, + UserAPIKeyLabelNames.REQUESTED_MODEL.value, + UserAPIKeyLabelNames.END_USER.value, + UserAPIKeyLabelNames.USER.value, + UserAPIKeyLabelNames.MODEL_ID.value, ] litellm_request_total_latency_metric = [ @@ -217,6 +221,7 @@ class PrometheusMetricLabels: UserAPIKeyLabelNames.TEAM_ALIAS.value, UserAPIKeyLabelNames.USER.value, UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value, + UserAPIKeyLabelNames.MODEL_ID.value, ] litellm_proxy_total_requests_metric = [ @@ -230,6 +235,7 @@ class PrometheusMetricLabels: UserAPIKeyLabelNames.STATUS_CODE.value, UserAPIKeyLabelNames.USER_EMAIL.value, UserAPIKeyLabelNames.ROUTE.value, + UserAPIKeyLabelNames.MODEL_ID.value, ] litellm_proxy_failed_requests_metric = [ @@ -244,6 +250,7 @@ class PrometheusMetricLabels: UserAPIKeyLabelNames.EXCEPTION_STATUS.value, UserAPIKeyLabelNames.EXCEPTION_CLASS.value, UserAPIKeyLabelNames.ROUTE.value, + UserAPIKeyLabelNames.MODEL_ID.value, ] litellm_deployment_latency_per_output_token = [ diff --git a/tests/enterprise/litellm_enterprise/enterprise_callbacks/test_prometheus_logging_callbacks.py b/tests/enterprise/litellm_enterprise/enterprise_callbacks/test_prometheus_logging_callbacks.py index e8fe4dd3393..af1cd467e2b 100644 --- a/tests/enterprise/litellm_enterprise/enterprise_callbacks/test_prometheus_logging_callbacks.py +++ b/tests/enterprise/litellm_enterprise/enterprise_callbacks/test_prometheus_logging_callbacks.py @@ -411,7 +411,15 @@ def test_set_latency_metrics(prometheus_logger): # completion_start_time - api_call_start_time prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_called_once_with( - "gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1" + end_user=None, + user="test_user", + hashed_api_key="test_hash", + api_key_alias="test_alias", + team="test_team", + team_alias="test_team_alias", + requested_model="openai-gpt", + model="gpt-3.5-turbo", + model_id="model-123", ) prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels().observe.assert_called_once_with( 0.5 @@ -442,6 +450,7 @@ def test_set_latency_metrics(prometheus_logger): team_alias="test_team_alias", requested_model="openai-gpt", model="gpt-3.5-turbo", + model_id="model-123", ) prometheus_logger.litellm_request_total_latency_metric.labels().observe.assert_called_once_with( 2.0 @@ -737,6 +746,7 @@ async def test_async_post_call_failure_hook(prometheus_logger): exception_status="429", exception_class="Openai.RateLimitError", route=user_api_key_dict.request_route, + model_id=None, ) prometheus_logger.litellm_proxy_failed_requests_metric.labels().inc.assert_called_once() @@ -752,6 +762,7 @@ async def test_async_post_call_failure_hook(prometheus_logger): status_code="429", user_email=None, route=user_api_key_dict.request_route, + model_id=None, ) prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once() @@ -798,6 +809,7 @@ async def test_async_post_call_success_hook(prometheus_logger): status_code="200", user_email=None, route=user_api_key_dict.request_route, + model_id=None, ) prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once() diff --git a/tests/test_litellm/integrations/test_prometheus_labels.py b/tests/test_litellm/integrations/test_prometheus_labels.py index 8a4295e98fc..b29d7044009 100644 --- a/tests/test_litellm/integrations/test_prometheus_labels.py +++ b/tests/test_litellm/integrations/test_prometheus_labels.py @@ -69,8 +69,39 @@ def test_prometheus_metric_labels_structure(): print(f"✅ {metric_name} has proper label structure with user_email") +def test_model_id_in_required_metrics(): + """ + Test that model_id label is present in all the metrics that should have it: + - litellm_proxy_total_requests_metric + - litellm_proxy_failed_requests_metric + - litellm_request_total_latency_metric + - litellm_llm_api_time_to_first_token_metric + """ + model_id_label = UserAPIKeyLabelNames.MODEL_ID.value + + # Metrics that should have model_id + metrics_with_model_id = [ + "litellm_proxy_total_requests_metric", + "litellm_proxy_failed_requests_metric", + "litellm_request_total_latency_metric", + "litellm_llm_api_time_to_first_token_metric" + ] + + for metric_name in metrics_with_model_id: + labels = PrometheusMetricLabels.get_labels(metric_name) + assert model_id_label in labels, f"Metric {metric_name} should contain model_id label" + print(f"✅ {metric_name} contains model_id label") + + +def test_model_id_label_exists(): + """Test that the MODEL_ID label is properly defined""" + assert UserAPIKeyLabelNames.MODEL_ID.value == "model_id" + + if __name__ == "__main__": test_user_email_in_required_metrics() test_user_email_label_exists() test_prometheus_metric_labels_structure() + test_model_id_in_required_metrics() + test_model_id_label_exists() print("All prometheus label tests passed!") \ No newline at end of file