diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 2bc1c8f8e98..7a9dd659da9 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -1,10 +1,13 @@ # ruff: noqa: T201 +import atexit import importlib import json import os import random +import shutil import subprocess import sys +import tempfile import urllib.parse as urlparse from typing import TYPE_CHECKING, Any, Optional, Union @@ -681,6 +684,50 @@ def run_server( # noqa: PLR0915 litellm.json_logs = True litellm._turn_on_json() + + ### PROMETHEUS MULTIPROCESS SETUP ### + # When running with multiple workers, Prometheus needs a shared + # directory so metrics are aggregated across all worker processes. + # See https://prometheus.github.io/client_python/multiprocess/ + _callbacks = ( + litellm_settings.get("callbacks", []) + if litellm_settings + else [] + ) + _success_callbacks = ( + litellm_settings.get("success_callback", []) + if litellm_settings + else [] + ) + _has_prometheus = ( + "prometheus" in _callbacks + or "prometheus" in _success_callbacks + ) + if ( + num_workers > 1 + and _has_prometheus + and "PROMETHEUS_MULTIPROC_DIR" not in os.environ + ): + _prom_dir = tempfile.mkdtemp(prefix="litellm_prometheus_") + os.environ["PROMETHEUS_MULTIPROC_DIR"] = _prom_dir + print( # noqa + f"\033[1;32mLiteLLM: Auto-configured PROMETHEUS_MULTIPROC_DIR={_prom_dir} for {num_workers} workers\033[0m" # noqa + ) + + def _cleanup_prometheus_dir(): + if os.path.exists(_prom_dir): + shutil.rmtree(_prom_dir, ignore_errors=True) + + atexit.register(_cleanup_prometheus_dir) + elif ( + num_workers > 1 + and _has_prometheus + and "PROMETHEUS_MULTIPROC_DIR" in os.environ + ): + print( # noqa + f"\033[1;32mLiteLLM: Using existing PROMETHEUS_MULTIPROC_DIR={os.environ['PROMETHEUS_MULTIPROC_DIR']}\033[0m" # noqa + ) + ### GENERAL SETTINGS ### general_settings = _config.get("general_settings", {}) if general_settings is None: diff --git a/tests/test_litellm/proxy/test_proxy_cli.py b/tests/test_litellm/proxy/test_proxy_cli.py index 12065ad5b4d..4d5c5217234 100644 --- a/tests/test_litellm/proxy/test_proxy_cli.py +++ b/tests/test_litellm/proxy/test_proxy_cli.py @@ -481,6 +481,192 @@ async def mock_get_config(config_file_path=None): mock_uvicorn_run.assert_called_once() +class TestPrometheusMultiprocessSetup: + """Test cases for auto-configuring PROMETHEUS_MULTIPROC_DIR with multiple workers""" + + @staticmethod + def _write_config(tmp_path, callbacks=None, success_callback=None): + """Write a minimal config yaml and return the path.""" + import yaml + + config = { + "general_settings": {}, + "litellm_settings": {}, + } + if callbacks is not None: + config["litellm_settings"]["callbacks"] = callbacks + if success_callback is not None: + config["litellm_settings"]["success_callback"] = success_callback + config_path = os.path.join(str(tmp_path), "config.yaml") + with open(config_path, "w") as f: + yaml.dump(config, f) + return config_path + + @patch("uvicorn.run") + def test_prometheus_multiproc_dir_auto_created(self, mock_uvicorn_run, tmp_path): + """When num_workers > 1 and prometheus is in callbacks, PROMETHEUS_MULTIPROC_DIR should be auto-set""" + import shutil + + from click.testing import CliRunner + + from litellm.proxy.proxy_cli import run_server + + runner = CliRunner() + config_path = self._write_config(tmp_path, callbacks=["prometheus"]) + + # Ensure clean state + orig_val = os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None) + try: + with patch( + "litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args" + ) as mock_get_args: + mock_get_args.return_value = { + "app": "litellm.proxy.proxy_server:app", + "host": "localhost", + "port": 8000, + } + + result = runner.invoke( + run_server, + ["--config", config_path, "--num_workers", "4", "--skip_server_startup"], + ) + + assert result.exit_code == 0, result.output + assert "PROMETHEUS_MULTIPROC_DIR" in os.environ + prom_dir = os.environ["PROMETHEUS_MULTIPROC_DIR"] + assert "litellm_prometheus_" in prom_dir + assert os.path.isdir(prom_dir) + finally: + # Cleanup + prom_dir = os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None) + if prom_dir and os.path.exists(prom_dir): + shutil.rmtree(prom_dir, ignore_errors=True) + if orig_val is not None: + os.environ["PROMETHEUS_MULTIPROC_DIR"] = orig_val + + @patch("uvicorn.run") + def test_prometheus_multiproc_dir_not_set_for_single_worker( + self, mock_uvicorn_run, tmp_path + ): + """With num_workers=1, PROMETHEUS_MULTIPROC_DIR should NOT be auto-set""" + from click.testing import CliRunner + + from litellm.proxy.proxy_cli import run_server + + runner = CliRunner() + config_path = self._write_config(tmp_path, callbacks=["prometheus"]) + + orig_val = os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None) + try: + with patch( + "litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args" + ) as mock_get_args: + mock_get_args.return_value = { + "app": "litellm.proxy.proxy_server:app", + "host": "localhost", + "port": 8000, + } + + result = runner.invoke( + run_server, + ["--config", config_path, "--skip_server_startup"], + ) + + assert result.exit_code == 0, result.output + assert "PROMETHEUS_MULTIPROC_DIR" not in os.environ + finally: + if orig_val is not None: + os.environ["PROMETHEUS_MULTIPROC_DIR"] = orig_val + + @patch("uvicorn.run") + def test_prometheus_multiproc_dir_respects_existing_env( + self, mock_uvicorn_run, tmp_path + ): + """When PROMETHEUS_MULTIPROC_DIR is already set, it should not be overwritten""" + from click.testing import CliRunner + + from litellm.proxy.proxy_cli import run_server + + runner = CliRunner() + config_path = self._write_config(tmp_path, callbacks=["prometheus"]) + + orig_val = os.environ.get("PROMETHEUS_MULTIPROC_DIR") + os.environ["PROMETHEUS_MULTIPROC_DIR"] = "/custom/prom/dir" + try: + with patch( + "litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args" + ) as mock_get_args: + mock_get_args.return_value = { + "app": "litellm.proxy.proxy_server:app", + "host": "localhost", + "port": 8000, + } + + result = runner.invoke( + run_server, + ["--config", config_path, "--num_workers", "4", "--skip_server_startup"], + ) + + assert result.exit_code == 0, result.output + assert os.environ["PROMETHEUS_MULTIPROC_DIR"] == "/custom/prom/dir" + finally: + if orig_val is not None: + os.environ["PROMETHEUS_MULTIPROC_DIR"] = orig_val + else: + os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None) + + + @patch("uvicorn.run") + def test_prometheus_multiproc_dir_auto_created_via_success_callback( + self, mock_uvicorn_run, tmp_path + ): + """When num_workers > 1 and prometheus is in success_callback, PROMETHEUS_MULTIPROC_DIR should be auto-set""" + import shutil + + from click.testing import CliRunner + + from litellm.proxy.proxy_cli import run_server + + runner = CliRunner() + config_path = self._write_config( + tmp_path, success_callback=["prometheus"] + ) + + orig_val = os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None) + try: + with patch( + "litellm.proxy.proxy_cli.ProxyInitializationHelpers._get_default_unvicorn_init_args" + ) as mock_get_args: + mock_get_args.return_value = { + "app": "litellm.proxy.proxy_server:app", + "host": "localhost", + "port": 8000, + } + + result = runner.invoke( + run_server, + [ + "--config", + config_path, + "--num_workers", + "4", + "--skip_server_startup", + ], + ) + + assert result.exit_code == 0, result.output + assert "PROMETHEUS_MULTIPROC_DIR" in os.environ + prom_dir = os.environ["PROMETHEUS_MULTIPROC_DIR"] + assert "litellm_prometheus_" in prom_dir + assert os.path.isdir(prom_dir) + finally: + prom_dir = os.environ.pop("PROMETHEUS_MULTIPROC_DIR", None) + if prom_dir and os.path.exists(prom_dir): + shutil.rmtree(prom_dir, ignore_errors=True) + if orig_val is not None: + os.environ["PROMETHEUS_MULTIPROC_DIR"] = orig_val + + class TestHealthAppFactory: """Test cases for the health app factory module"""