Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/my-website/docs/proxy/config_settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ router_settings:
| PRESIDIO_ANONYMIZER_API_BASE | Base URL for Presidio Anonymizer service
| PROMETHEUS_BUDGET_METRICS_REFRESH_INTERVAL_MINUTES | Refresh interval in minutes for Prometheus budget metrics. Default is 5
| PROMETHEUS_FALLBACK_STATS_SEND_TIME_HOURS | Fallback time in hours for sending stats to Prometheus. Default is 9
| PROMETHEUS_MULTIPROC_DIR | Temp directory for Prometheus multiprocess collector
| PROMETHEUS_URL | URL for Prometheus service
| PROMPTLAYER_API_KEY | API key for PromptLayer integration
| PROXY_ADMIN_ID | Admin identifier for proxy server
Expand Down
5 changes: 5 additions & 0 deletions docs/my-website/docs/proxy/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,12 @@ Use these metrics to monitor the health of the DB Transaction Queue. Eg. Monitor
| `litellm_in_memory_spend_update_queue_size` | In-memory aggregate spend values for keys, users, teams, team members, etc.| In-Memory |
| `litellm_redis_spend_update_queue_size` | Redis aggregate spend values for keys, users, teams, etc. | Redis |

## Multiple Workers
When using multiple workers, a special temporary directory is used to store metrics across workers.
By default a temporary directory is created and deleted at when litellm closes.

You can manually specify the directory by setting the environment variable `PROMETHEUS_MULTIPROC_DIR`.
Do note that it needs to be cleared manually before litellm starts.

## **🔥 LiteLLM Maintained Grafana Dashboards **

Expand Down
21 changes: 16 additions & 5 deletions litellm/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#### What this does ####
# On success, log events to Prometheus
import sys
import os
from datetime import datetime, timedelta
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -1805,20 +1806,30 @@ def _mount_metrics_endpoint(premium_user: bool):
Defaults to False.
"""
from prometheus_client import make_asgi_app

from fastapi import Response, FastAPI
from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import CommonProxyErrors
from litellm.proxy.proxy_server import app
from prometheus_client import CollectorRegistry, multiprocess, generate_latest, CONTENT_TYPE_LATEST

if premium_user is not True:
verbose_proxy_logger.warning(
f"Prometheus metrics are only available for premium users. {CommonProxyErrors.not_premium_user.value}"
)

# Create metrics ASGI app
metrics_app = make_asgi_app()

# Mount the metrics app to the app
# If we're running in a multi-process environment, use the multiprocess collector
# See https://prometheus.github.io/client_python/multiprocess/
if os.environ.get("PROMETHEUS_MULTIPROC_DIR") is None:
metrics_app = make_asgi_app()
else:
metrics_app = FastAPI()
def metrics_endpoint():
"""Multiprocess-aware metrics endpoint"""
registry = CollectorRegistry()
multiprocess.MultiProcessCollector(registry)
data = generate_latest(registry)
return Response(content=data, media_type=CONTENT_TYPE_LATEST, status_code=200)
metrics_app.add_api_route("/", metrics_endpoint, methods=["GET"])
app.mount("/metrics", metrics_app)
verbose_proxy_logger.debug(
"Starting Prometheus Metrics on /metrics (no authentication)"
Expand Down
21 changes: 21 additions & 0 deletions litellm/proxy/proxy_cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# ruff: noqa: T201
import atexit
import importlib
import json
import os
import random
import shutil
import subprocess
import sys
import tempfile
import urllib.parse as urlparse
from typing import TYPE_CHECKING, Any, Optional, Union

Expand Down Expand Up @@ -787,6 +790,24 @@ def run_server( # noqa: PLR0915
if loop_type:
uvicorn_args["loop"] = loop_type

# If we're using prometheus and will be using multiple workers we need to set the PROMETHEUS_MULTIPROC_DIR
# See https://prometheus.github.io/client_python/multiprocess/
if num_workers > 1 and "prometheus" in _config.get("litellm_settings", {}).get("callbacks", []):
if 'PROMETHEUS_MULTIPROC_DIR' in os.environ:
print(f"LITELLM: Using PROMETHEUS_MULTIPROC_DIR dir: {os.environ['PROMETHEUS_MULTIPROC_DIR']}")
else:
# Setup the temp dir for prometheus
shared_dir = tempfile.mkdtemp(prefix="litellm_prometheus_")
os.environ['PROMETHEUS_MULTIPROC_DIR'] = shared_dir

print(f"LITELLM: Using PROMETHEUS_MULTIPROC_DIR dir: {os.environ['PROMETHEUS_MULTIPROC_DIR']}")

def cleanup():
if os.path.exists(shared_dir):
shutil.rmtree(shared_dir)
print(f"Cleaned up: {shared_dir}")

atexit.register(cleanup)
uvicorn.run(
**uvicorn_args,
workers=num_workers,
Expand Down
Loading