diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index fdd68c953f..fdc86e1968 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -570,6 +570,7 @@ router_settings: | PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service | PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES | Refresh interval in minutes for Prometheus budget metrics. Default is 5 | PROMETHEUS_FALLBACK_STATS_SEND_TIME_HOURS | Fallback time in hours for sending stats to Prometheus. Default is 9 +| PROMETHEUS_MULTIPROC_DIR | Temp directory for Prometheus multiprocess collector | PROMETHEUS_URL | URL for Prometheus service | PROMPTLAYER_API_KEY | API key for PromptLayer integration | PROXY_ADMIN_ID | Admin identifier for proxy server diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 0ce94ab962..b5f6a964de 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -261,7 +261,12 @@ Use these metrics to monitor the health of the DB Transaction Queue. Eg. Monitor | `litellm_in_memory_spend_update_queue_size` | In-memory aggregate spend values for keys, users, teams, team members, etc.| In-Memory | | `litellm_redis_spend_update_queue_size` | Redis aggregate spend values for keys, users, teams, etc. | Redis | +## Multiple Workers +When using multiple workers, a special temporary directory is used to store metrics across workers. +By default a temporary directory is created and deleted at when litellm closes. +You can manually specify the directory by setting the environment variable `PROMETHEUS_MULTIPROC_DIR`. +Do note that it needs to be cleared manually before litellm starts. ## **🔥 LiteLLM Maintained Grafana Dashboards ** diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index a66b1e755f..92ca3898d1 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -2,6 +2,7 @@ #### What this does #### # On success, log events to Prometheus import sys +import os from datetime import datetime, timedelta from typing import ( TYPE_CHECKING, @@ -1805,20 +1806,30 @@ def _mount_metrics_endpoint(premium_user: bool): Defaults to False. """ from prometheus_client import make_asgi_app - + from fastapi import Response, FastAPI from litellm._logging import verbose_proxy_logger from litellm.proxy._types import CommonProxyErrors from litellm.proxy.proxy_server import app + from prometheus_client import CollectorRegistry, multiprocess, generate_latest, CONTENT_TYPE_LATEST if premium_user is not True: verbose_proxy_logger.warning( f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}" ) - # Create metrics ASGI app - metrics_app = make_asgi_app() - - # Mount the metrics app to the app + # If we're running in a multi-process environment, use the multiprocess collector + # See https://prometheus.github.io/client_python/multiprocess/ + if os.environ.get("PROMETHEUS_MULTIPROC_DIR") is None: + metrics_app = make_asgi_app() + else: + metrics_app = FastAPI() + def metrics_endpoint(): + """Multiprocess-aware metrics endpoint""" + registry = CollectorRegistry() + multiprocess.MultiProcessCollector(registry) + data = generate_latest(registry) + return Response(content=data, media_type=CONTENT_TYPE_LATEST, status_code=200) + metrics_app.add_api_route("/", metrics_endpoint, methods=["GET"]) app.mount("/metrics", metrics_app) verbose_proxy_logger.debug( "Starting Prometheus Metrics on /metrics (no authentication)" diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 4c022991f1..30338dcc6d 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -1,10 +1,13 @@ # ruff: noqa: T201 +import atexit import importlib import json import os import random +import shutil import subprocess import sys +import tempfile import urllib.parse as urlparse from typing import TYPE_CHECKING, Any, Optional, Union @@ -787,6 +790,24 @@ def run_server( # noqa: PLR0915 if loop_type: uvicorn_args["loop"] = loop_type + # If we're using prometheus and will be using multiple workers we need to set the PROMETHEUS_MULTIPROC_DIR + # See https://prometheus.github.io/client_python/multiprocess/ + if num_workers > 1 and "prometheus" in _config.get("litellm_settings", {}).get("callbacks", []): + if 'PROMETHEUS_MULTIPROC_DIR' in os.environ: + print(f"LITELLM: Using PROMETHEUS_MULTIPROC_DIR dir: {os.environ['PROMETHEUS_MULTIPROC_DIR']}") + else: + # Setup the temp dir for prometheus + shared_dir = tempfile.mkdtemp(prefix="litellm_prometheus_") + os.environ['PROMETHEUS_MULTIPROC_DIR'] = shared_dir + + print(f"LITELLM: Using PROMETHEUS_MULTIPROC_DIR dir: {os.environ['PROMETHEUS_MULTIPROC_DIR']}") + + def cleanup(): + if os.path.exists(shared_dir): + shutil.rmtree(shared_dir) + print(f"Cleaned up: {shared_dir}") + + atexit.register(cleanup) uvicorn.run( **uvicorn_args, workers=num_workers,