BerriAI · Penagwin · May 22, 2025 · May 22, 2025 · May 22, 2025
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
@@ -570,6 +570,7 @@ router_settings:
 | PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service
 | PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES | Refresh interval in minutes for Prometheus budget metrics. Default is 5
 | PROMETHEUS_FALLBACK_STATS_SEND_TIME_HOURS | Fallback time in hours for sending stats to Prometheus. Default is 9
+| PROMETHEUS_MULTIPROC_DIR | Temp directory for Prometheus multiprocess collector
 | PROMETHEUS_URL | URL for Prometheus service
 | PROMPTLAYER_API_KEY | API key for PromptLayer integration
 | PROXY_ADMIN_ID | Admin identifier for proxy server

diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md
@@ -261,7 +261,12 @@ Use these metrics to monitor the health of the DB Transaction Queue. Eg. Monitor
 | `litellm_in_memory_spend_update_queue_size`         | In-memory aggregate spend values for keys, users, teams, team members, etc.| In-Memory    |
 | `litellm_redis_spend_update_queue_size`             | Redis aggregate spend values for keys, users, teams, etc.                  | Redis        |
 
+## Multiple Workers
+When using multiple workers, a special temporary directory is used to store metrics across workers.
+By default a temporary directory is created and deleted at when litellm closes.
 
+You can manually specify the directory by setting the environment variable `PROMETHEUS_MULTIPROC_DIR`.
+Do note that it needs to be cleared manually before litellm starts.
 
 ## **🔥 LiteLLM Maintained Grafana Dashboards **
 

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
@@ -2,6 +2,7 @@
 #### What this does ####
 #    On success, log events to Prometheus
 import sys
+import os
 from datetime import datetime, timedelta
 from typing import (
     TYPE_CHECKING,
@@ -1805,20 +1806,30 @@ def _mount_metrics_endpoint(premium_user: bool):
                                         Defaults to False.
         """
         from prometheus_client import make_asgi_app
-
+        from fastapi import Response, FastAPI
         from litellm._logging import verbose_proxy_logger
         from litellm.proxy._types import CommonProxyErrors
         from litellm.proxy.proxy_server import app
+        from prometheus_client import CollectorRegistry, multiprocess, generate_latest, CONTENT_TYPE_LATEST
 
         if premium_user is not True:
             verbose_proxy_logger.warning(
                 f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}"
             )
 
-        # Create metrics ASGI app
-        metrics_app = make_asgi_app()
-
-        # Mount the metrics app to the app
+        # If we're running in a multi-process environment, use the multiprocess collector
+        # See https://prometheus.github.io/client_python/multiprocess/
+        if os.environ.get("PROMETHEUS_MULTIPROC_DIR") is None:
+            metrics_app = make_asgi_app()
+        else:
+            metrics_app = FastAPI()
+            def metrics_endpoint():
+                """Multiprocess-aware metrics endpoint"""
+                registry = CollectorRegistry()
+                multiprocess.MultiProcessCollector(registry)
+                data = generate_latest(registry)
+                return Response(content=data, media_type=CONTENT_TYPE_LATEST, status_code=200)
+            metrics_app.add_api_route("/", metrics_endpoint, methods=["GET"])
         app.mount("/metrics", metrics_app)
         verbose_proxy_logger.debug(
             "Starting Prometheus Metrics on /metrics (no authentication)"

diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
@@ -1,10 +1,13 @@
 # ruff: noqa: T201
+import atexit
 import importlib
 import json
 import os
 import random
+import shutil
 import subprocess
 import sys
+import tempfile
 import urllib.parse as urlparse
 from typing import TYPE_CHECKING, Any, Optional, Union
 
@@ -787,6 +790,24 @@ def run_server(  # noqa: PLR0915
             if loop_type:
                 uvicorn_args["loop"] = loop_type
 
+                # If we're using prometheus and will be using multiple workers we need to set the PROMETHEUS_MULTIPROC_DIR
+                # See https://prometheus.github.io/client_python/multiprocess/
+                if num_workers > 1 and "prometheus" in  _config.get("litellm_settings", {}).get("callbacks", []):
+                    if 'PROMETHEUS_MULTIPROC_DIR' in os.environ:
+                        print(f"LITELLM: Using PROMETHEUS_MULTIPROC_DIR dir: {os.environ['PROMETHEUS_MULTIPROC_DIR']}")
+                    else:
+                        # Setup the temp dir for prometheus
+                        shared_dir = tempfile.mkdtemp(prefix="litellm_prometheus_")
+                        os.environ['PROMETHEUS_MULTIPROC_DIR'] = shared_dir
+
+                        print(f"LITELLM: Using PROMETHEUS_MULTIPROC_DIR dir: {os.environ['PROMETHEUS_MULTIPROC_DIR']}")
+
+                        def cleanup():
+                            if os.path.exists(shared_dir):
+                                shutil.rmtree(shared_dir)
+                                print(f"Cleaned up: {shared_dir}")
+
+                        atexit.register(cleanup)
             uvicorn.run(
                 **uvicorn_args,
                 workers=num_workers,