diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 847f8623e72..d563adcaa74 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -775,6 +775,10 @@ router_settings: | LITELLM_METER_NAME | Name for OTEL Meter | LITELLM_OTEL_INTEGRATION_ENABLE_EVENTS | Optionally enable semantic logs for OTEL | LITELLM_OTEL_INTEGRATION_ENABLE_METRICS | Optionally enable emantic metrics for OTEL +| LITELLM_ENABLE_PYROSCOPE | If true, enables Pyroscope CPU profiling. Profiles are sent to PYROSCOPE_SERVER_ADDRESS. Off by default. See [Pyroscope profiling](/proxy/pyroscope_profiling). +| PYROSCOPE_APP_NAME | Application name reported to Pyroscope. Required when LITELLM_ENABLE_PYROSCOPE is true. No default. +| PYROSCOPE_SERVER_ADDRESS | Pyroscope server URL to send profiles to. Required when LITELLM_ENABLE_PYROSCOPE is true. No default. +| PYROSCOPE_SAMPLE_RATE | Optional. Sample rate for Pyroscope profiling (integer). No default; when unset, the pyroscope-io library default is used. | LITELLM_MASTER_KEY | Master key for proxy authentication | LITELLM_MODE | Operating mode for LiteLLM (e.g., production, development) | LITELLM_NON_ROOT | Flag to run LiteLLM in non-root mode for enhanced security in Docker containers diff --git a/docs/my-website/docs/proxy/pyroscope_profiling.md b/docs/my-website/docs/proxy/pyroscope_profiling.md new file mode 100644 index 00000000000..fa3db3a8782 --- /dev/null +++ b/docs/my-website/docs/proxy/pyroscope_profiling.md @@ -0,0 +1,43 @@ +# Grafana Pyroscope CPU profiling + +LiteLLM proxy can send continuous CPU profiles to [Grafana Pyroscope](https://grafana.com/docs/pyroscope/latest/) when enabled via environment variables. This is optional and off by default. + +## Quick start + +1. **Install the optional dependency** (required only when enabling Pyroscope): + + ```bash + pip install pyroscope-io + ``` + + Or install the proxy extra: + + ```bash + pip install "litellm[proxy]" + ``` + +2. **Set environment variables** before starting the proxy: + + | Variable | Required | Description | + |----------|----------|-------------| + | `LITELLM_ENABLE_PYROSCOPE` | Yes (to enable) | Set to `true` to enable Pyroscope profiling. | + | `PYROSCOPE_APP_NAME` | Yes (when enabled) | Application name shown in the Pyroscope UI. | + | `PYROSCOPE_SERVER_ADDRESS` | Yes (when enabled) | Pyroscope server URL (e.g. `http://localhost:4040`). | + | `PYROSCOPE_SAMPLE_RATE` | No | Sample rate (integer). If unset, the pyroscope-io library default is used. | + +3. **Start the proxy**; profiling will begin automatically when the proxy starts. + + ```bash + export LITELLM_ENABLE_PYROSCOPE=true + export PYROSCOPE_APP_NAME=litellm-proxy + export PYROSCOPE_SERVER_ADDRESS=http://localhost:4040 + litellm --config config.yaml + ``` + +4. **View profiles** in the Pyroscope (or Grafana) UI and select your `PYROSCOPE_APP_NAME`. + +## Notes + +- **Optional dependency**: `pyroscope-io` is an optional dependency. If it is not installed and `LITELLM_ENABLE_PYROSCOPE=true`, the proxy will log a warning and continue without profiling. +- **Platform support**: The `pyroscope-io` package uses a native extension and is not available on all platforms (e.g. Windows is excluded by the package). +- **Other settings**: See [Configuration settings](/proxy/config_settings) for all proxy environment variables. diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 9e2eb47f4c9..9b3581cce32 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -107,7 +107,8 @@ const sidebars = { items: [ "proxy/alerting", "proxy/pagerduty", - "proxy/prometheus" + "proxy/prometheus", + "proxy/pyroscope_profiling" ] }, { diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 45751c5724c..bc2d32f141d 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -867,6 +867,9 @@ async def proxy_startup_event(app: FastAPI): # noqa: PLR0915 ## [Optional] Initialize dd tracer ProxyStartupEvent._init_dd_tracer() + ## [Optional] Initialize Pyroscope continuous profiling (env: LITELLM_ENABLE_PYROSCOPE=true) + ProxyStartupEvent._init_pyroscope() + ## Initialize shared aiohttp session for connection reuse shared_aiohttp_session = await _initialize_shared_aiohttp_session() @@ -5814,6 +5817,69 @@ def _init_dd_tracer(cls): prof.start() verbose_proxy_logger.debug("Datadog Profiler started......") + @classmethod + def _init_pyroscope(cls): + """ + Optional continuous profiling via Grafana Pyroscope. + + Off by default. Enable with LITELLM_ENABLE_PYROSCOPE=true. + Requires: pip install pyroscope-io (optional dependency). + When enabled, PYROSCOPE_SERVER_ADDRESS and PYROSCOPE_APP_NAME are required (no defaults). + Optional: PYROSCOPE_SAMPLE_RATE (parsed as integer) to set the sample rate. + """ + if not get_secret_bool("LITELLM_ENABLE_PYROSCOPE", False): + verbose_proxy_logger.debug( + "LiteLLM: Pyroscope profiling is disabled (set LITELLM_ENABLE_PYROSCOPE=true to enable)." + ) + try: + import pyroscope + + app_name = os.getenv("PYROSCOPE_APP_NAME") + if not app_name: + raise ValueError( + "LITELLM_ENABLE_PYROSCOPE is true but PYROSCOPE_APP_NAME is not set. " + "Set PYROSCOPE_APP_NAME when enabling Pyroscope." + ) + server_address = os.getenv("PYROSCOPE_SERVER_ADDRESS") + if not server_address: + raise ValueError( + "LITELLM_ENABLE_PYROSCOPE is true but PYROSCOPE_SERVER_ADDRESS is not set. " + "Set PYROSCOPE_SERVER_ADDRESS when enabling Pyroscope." + ) + tags = {} + env_name = os.getenv("OTEL_ENVIRONMENT_NAME") or os.getenv( + "LITELLM_DEPLOYMENT_ENVIRONMENT", + ) + if env_name: + tags["environment"] = env_name + sample_rate_env = os.getenv("PYROSCOPE_SAMPLE_RATE") + configure_kwargs = { + "app_name": app_name, + "server_address": server_address, + "tags": tags if tags else None, + } + if sample_rate_env is not None: + try: + # pyroscope-io expects sample_rate as an integer + configure_kwargs["sample_rate"] = int(float(sample_rate_env)) + except (ValueError, TypeError): + raise ValueError( + "PYROSCOPE_SAMPLE_RATE must be a number, got: " + f"{sample_rate_env!r}" + ) + pyroscope.configure(**configure_kwargs) + msg = ( + f"LiteLLM: Pyroscope profiling started (app_name={app_name}, server_address={server_address}). " + f"View CPU profiles at the Pyroscope UI and select application '{app_name}'." + ) + if "sample_rate" in configure_kwargs: + msg += f" sample_rate={configure_kwargs['sample_rate']}" + verbose_proxy_logger.info(msg) + except ImportError: + verbose_proxy_logger.warning( + "LiteLLM: LITELLM_ENABLE_PYROSCOPE is set but the 'pyroscope-io' package is not installed. " + "Pyroscope profiling will not run. Install with: pip install pyroscope-io" + ) #### API ENDPOINTS #### @router.get( diff --git a/poetry.lock b/poetry.lock index d01baa854af..e30857a3b2f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "a2a-sdk" @@ -5659,6 +5659,24 @@ files = [ [package.extras] dev = ["build", "flake8", "mypy", "pytest", "twine"] +[[package]] +name = "pyroscope-io" +version = "0.8.16" +description = "Pyroscope Python integration" +optional = false +python-versions = "*" +groups = ["main"] +markers = "extra == \"proxy\" and sys_platform != \"win32\"" +files = [ + {file = "pyroscope_io-0.8.16-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:e07edcfd59f5bdce42948b92c9b118c824edbd551730305f095a6b9af401a9e8"}, + {file = "pyroscope_io-0.8.16-py2.py3-none-macosx_11_0_x86_64.whl", hash = "sha256:dc98355e27c0b7b61f27066500fe1045b70e9459bb8b9a3082bc4755cb6392b6"}, + {file = "pyroscope_io-0.8.16-py2.py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:86f0f047554ff62bd92c3e5a26bc2809ccd467d11fbacb9fef898ba299dbda59"}, + {file = "pyroscope_io-0.8.16-py2.py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6b91ce5b240f8de756c16a17022ca8e25ef8a4eed461c7d074b8a0841cf7b445"}, +] + +[package.dependencies] +cffi = ">=1.6.0" + [[package]] name = "pytest" version = "7.4.4" @@ -8516,7 +8534,7 @@ extra-proxy = ["a2a-sdk", "azure-identity", "azure-keyvault-secrets", "google-cl google = ["google-cloud-aiplatform"] grpc = ["grpcio", "grpcio"] mlflow = ["mlflow"] -proxy = ["PyJWT", "apscheduler", "azure-identity", "azure-storage-blob", "backoff", "boto3", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "litellm-enterprise", "litellm-proxy-extras", "mcp", "orjson", "polars", "pynacl", "python-multipart", "pyyaml", "rich", "rq", "soundfile", "uvicorn", "uvloop", "websockets"] +proxy = ["PyJWT", "apscheduler", "azure-identity", "azure-storage-blob", "backoff", "boto3", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "litellm-enterprise", "litellm-proxy-extras", "mcp", "orjson", "polars", "pynacl", "pyroscope-io", "python-multipart", "pyyaml", "rich", "rq", "soundfile", "uvicorn", "uvloop", "websockets"] semantic-router = ["semantic-router"] utils = ["numpydoc"] diff --git a/pyproject.toml b/pyproject.toml index 6ed7618dd26..8f52e34d7ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,7 @@ polars = {version = "^1.31.0", optional = true, python = ">=3.10"} semantic-router = {version = ">=0.1.12", optional = true, python = ">=3.9,<3.14"} mlflow = {version = ">3.1.4", optional = true, python = ">=3.10"} soundfile = {version = "^0.12.1", optional = true} +pyroscope-io = {version = "^0.8", optional = true, markers = "sys_platform != 'win32'"} # grpcio constraints: # - 1.62.3+ required by grpcio-status # - 1.68.0-1.68.1 has reconnect bug (https://github.com/grpc/grpc/issues/38290) @@ -104,6 +105,7 @@ proxy = [ "rich", "polars", "soundfile", + "pyroscope-io", ] extra_proxy = [ @@ -121,6 +123,8 @@ utils = [ "numpydoc", ] + + caching = ["diskcache"] semantic-router = ["semantic-router"] diff --git a/tests/test_litellm/proxy/test_pyroscope.py b/tests/test_litellm/proxy/test_pyroscope.py new file mode 100644 index 00000000000..6bfdf81ec1a --- /dev/null +++ b/tests/test_litellm/proxy/test_pyroscope.py @@ -0,0 +1,138 @@ +"""Unit tests for ProxyStartupEvent._init_pyroscope (Grafana Pyroscope profiling).""" + +import os +import sys +from unittest.mock import MagicMock, patch + +import pytest + +from litellm.proxy.proxy_server import ProxyStartupEvent + + +def _mock_pyroscope_module(): + """Return a mock module so 'import pyroscope' succeeds in _init_pyroscope.""" + m = MagicMock() + m.configure = MagicMock() + return m + + +def test_init_pyroscope_returns_cleanly_when_disabled(): + """When LITELLM_ENABLE_PYROSCOPE is false, _init_pyroscope returns without error.""" + with patch( + "litellm.proxy.proxy_server.get_secret_bool", + return_value=False, + ): + ProxyStartupEvent._init_pyroscope() + + +def test_init_pyroscope_raises_when_enabled_but_missing_app_name(): + """When LITELLM_ENABLE_PYROSCOPE is true but PYROSCOPE_APP_NAME is not set, raises ValueError.""" + mock_pyroscope = _mock_pyroscope_module() + with patch( + "litellm.proxy.proxy_server.get_secret_bool", + return_value=True, + ), patch.dict( + sys.modules, + {"pyroscope": mock_pyroscope}, + ), patch.dict( + os.environ, + { + "PYROSCOPE_APP_NAME": "", + "PYROSCOPE_SERVER_ADDRESS": "http://localhost:4040", + }, + clear=False, + ): + with pytest.raises(ValueError, match="PYROSCOPE_APP_NAME"): + ProxyStartupEvent._init_pyroscope() + + +def test_init_pyroscope_raises_when_enabled_but_missing_server_address(): + """When LITELLM_ENABLE_PYROSCOPE is true but PYROSCOPE_SERVER_ADDRESS is not set, raises ValueError.""" + mock_pyroscope = _mock_pyroscope_module() + with patch( + "litellm.proxy.proxy_server.get_secret_bool", + return_value=True, + ), patch.dict( + sys.modules, + {"pyroscope": mock_pyroscope}, + ), patch.dict( + os.environ, + { + "PYROSCOPE_APP_NAME": "myapp", + "PYROSCOPE_SERVER_ADDRESS": "", + }, + clear=False, + ): + with pytest.raises(ValueError, match="PYROSCOPE_SERVER_ADDRESS"): + ProxyStartupEvent._init_pyroscope() + + +def test_init_pyroscope_raises_when_sample_rate_invalid(): + """When PYROSCOPE_SAMPLE_RATE is not a number, raises ValueError.""" + mock_pyroscope = _mock_pyroscope_module() + with patch( + "litellm.proxy.proxy_server.get_secret_bool", + return_value=True, + ), patch.dict( + sys.modules, + {"pyroscope": mock_pyroscope}, + ), patch.dict( + os.environ, + { + "PYROSCOPE_APP_NAME": "myapp", + "PYROSCOPE_SERVER_ADDRESS": "http://localhost:4040", + "PYROSCOPE_SAMPLE_RATE": "not-a-number", + }, + clear=False, + ): + with pytest.raises(ValueError, match="PYROSCOPE_SAMPLE_RATE"): + ProxyStartupEvent._init_pyroscope() + + +def test_init_pyroscope_accepts_integer_sample_rate(): + """When enabled with valid config and integer sample rate, configures pyroscope.""" + mock_pyroscope = _mock_pyroscope_module() + with patch( + "litellm.proxy.proxy_server.get_secret_bool", + return_value=True, + ), patch.dict( + sys.modules, + {"pyroscope": mock_pyroscope}, + ), patch.dict( + os.environ, + { + "PYROSCOPE_APP_NAME": "myapp", + "PYROSCOPE_SERVER_ADDRESS": "http://localhost:4040", + "PYROSCOPE_SAMPLE_RATE": "100", + }, + clear=False, + ): + ProxyStartupEvent._init_pyroscope() + mock_pyroscope.configure.assert_called_once() + call_kw = mock_pyroscope.configure.call_args[1] + assert call_kw["app_name"] == "myapp" + assert call_kw["server_address"] == "http://localhost:4040" + assert call_kw["sample_rate"] == 100 + + +def test_init_pyroscope_accepts_float_sample_rate_parsed_as_int(): + """PYROSCOPE_SAMPLE_RATE can be a float string; it is parsed as integer.""" + mock_pyroscope = _mock_pyroscope_module() + with patch( + "litellm.proxy.proxy_server.get_secret_bool", + return_value=True, + ), patch.dict( + sys.modules, + {"pyroscope": mock_pyroscope}, + ), patch.dict( + os.environ, + { + "PYROSCOPE_APP_NAME": "myapp", + "PYROSCOPE_SERVER_ADDRESS": "http://localhost:4040", + "PYROSCOPE_SAMPLE_RATE": "100.7", + }, + clear=False, + ): + ProxyStartupEvent._init_pyroscope() + call_kw = mock_pyroscope.configure.call_args[1] + assert call_kw["sample_rate"] == 100