Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions docs/my-website/docs/proxy/alerting.md
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,59 @@ curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \

- `event_message` *str*: A human-readable description of the event.

### Digest Mode (Reducing Alert Noise)

By default, LiteLLM sends a separate Slack message for **every** alert event. For high-frequency alert types like `llm_requests_hanging` or `llm_too_slow`, this can produce hundreds of duplicate messages per day.

**Digest mode** aggregates duplicate alerts within a configurable time window and emits a single summary message with the total count and time range.

#### Configuration

Use `alert_type_config` in `general_settings` to enable digest mode per alert type:

```yaml
general_settings:
alerting: ["slack"]
alert_type_config:
llm_requests_hanging:
digest: true
digest_interval: 86400 # 24 hours (default)
llm_too_slow:
digest: true
digest_interval: 3600 # 1 hour
llm_exceptions:
digest: true
# uses default interval (86400 seconds / 24 hours)
```

| Parameter | Type | Default | Description |
|-----------|------|---------|-------------|
| `digest` | bool | `false` | Enable digest mode for this alert type |
| `digest_interval` | int | `86400` (24h) | Time window in seconds. Alerts are aggregated within this interval. |

#### How It Works

1. When an alert fires for a digest-enabled type, it is **grouped** by `(alert_type, request_model, api_base)` instead of being sent immediately
2. A counter tracks how many times the alert fires within the interval
3. When the interval expires, a **single summary message** is sent:

```
Alert type: `llm_requests_hanging` (Digest)
Level: `Medium`
Start: `2026-02-19 03:27:39`
End: `2026-02-20 03:27:39`
Count: `847`
Message: `Requests are hanging - 600s+ request time`
Request Model: `gemini-2.5-flash`
API Base: `None`
```

#### Limitations

- **Per-instance**: Digest state is held in memory per proxy instance. If you run multiple instances (e.g., Cloud Run with autoscaling), each instance maintains its own digest and emits its own summary.
- **Not durable**: If an instance is terminated before the digest interval expires, the aggregated alerts for that instance are lost.

## Region-outage alerting (✨ Enterprise feature)

:::info
Expand Down
2 changes: 2 additions & 0 deletions litellm/integrations/SlackAlerting/hanging_request_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,6 @@ async def send_hanging_request_alert(
level="Medium",
alert_type=AlertType.llm_requests_hanging,
alerting_metadata=hanging_request_data.alerting_metadata or {},
request_model=hanging_request_data.model,
api_base=hanging_request_data.api_base,
)
121 changes: 121 additions & 0 deletions litellm/integrations/SlackAlerting/slack_alerting.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def __init__(
] = None, # if user wants to separate alerts to diff channels
alerting_args={},
default_webhook_url: Optional[str] = None,
alert_type_config: Optional[Dict[str, dict]] = None,
**kwargs,
):
if alerting_threshold is None:
Expand All @@ -92,6 +93,12 @@ def __init__(
self.hanging_request_check = AlertingHangingRequestCheck(
slack_alerting_object=self,
)
self.alert_type_config: Dict[str, AlertTypeConfig] = {}
if alert_type_config:
for key, val in alert_type_config.items():
self.alert_type_config[key] = AlertTypeConfig(**val) if isinstance(val, dict) else val
self.digest_buckets: Dict[str, DigestEntry] = {}
self.digest_lock = asyncio.Lock()
super().__init__(**kwargs, flush_lock=self.flush_lock)

def update_values(
Expand All @@ -102,6 +109,7 @@ def update_values(
alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] = None,
alerting_args: Optional[Dict] = None,
llm_router: Optional[Router] = None,
alert_type_config: Optional[Dict[str, dict]] = None,
):
if alerting is not None:
self.alerting = alerting
Expand All @@ -116,6 +124,9 @@ def update_values(
if not self.periodic_started:
asyncio.create_task(self.periodic_flush())
self.periodic_started = True
if alert_type_config is not None:
for key, val in alert_type_config.items():
self.alert_type_config[key] = AlertTypeConfig(**val) if isinstance(val, dict) else val

if alert_to_webhook_url is not None:
# update the dict
Expand Down Expand Up @@ -284,6 +295,8 @@ async def response_taking_too_long_callback(
level="Low",
alert_type=AlertType.llm_too_slow,
alerting_metadata=alerting_metadata,
request_model=model,
api_base=api_base,
)

async def async_update_daily_reports(
Expand Down Expand Up @@ -1360,6 +1373,8 @@ async def send_alert(
alert_type: AlertType,
alerting_metadata: dict,
user_info: Optional[WebhookEvent] = None,
request_model: Optional[str] = None,
api_base: Optional[str] = None,
**kwargs,
):
"""
Expand All @@ -1375,6 +1390,8 @@ async def send_alert(
Parameters:
level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
message: str - what is the alert about
request_model: Optional[str] - model name for digest grouping
api_base: Optional[str] - api base for digest grouping
"""
if self.alerting is None:
return
Expand Down Expand Up @@ -1408,6 +1425,44 @@ async def send_alert(

from datetime import datetime

# Check if digest mode is enabled for this alert type
alert_type_name_str = getattr(alert_type, "value", str(alert_type))
_atc = self.alert_type_config.get(alert_type_name_str)
if _atc is not None and _atc.digest:
# Resolve webhook URL for this alert type (needed for digest entry)
if (
self.alert_to_webhook_url is not None
and alert_type in self.alert_to_webhook_url
):
_digest_webhook: Optional[Union[str, List[str]]] = self.alert_to_webhook_url[alert_type]
elif self.default_webhook_url is not None:
_digest_webhook = self.default_webhook_url
else:
_digest_webhook = os.getenv("SLACK_WEBHOOK_URL", None)
if _digest_webhook is None:
raise ValueError("Missing SLACK_WEBHOOK_URL from environment")

digest_key = f"{alert_type_name_str}:{request_model or ''}:{api_base or ''}"

async with self.digest_lock:
now = datetime.now()
if digest_key in self.digest_buckets:
self.digest_buckets[digest_key]["count"] += 1
self.digest_buckets[digest_key]["last_time"] = now
else:
self.digest_buckets[digest_key] = DigestEntry(
alert_type=alert_type_name_str,
request_model=request_model or "",
api_base=api_base or "",
first_message=message,
level=level,
count=1,
start_time=now,
last_time=now,
webhook_url=_digest_webhook,
)
return # Suppress immediate alert; will be emitted by _flush_digest_buckets

# Get the current timestamp
current_time = datetime.now().strftime("%H:%M:%S")
_proxy_base_url = os.getenv("PROXY_BASE_URL", None)
Expand Down Expand Up @@ -1483,6 +1538,72 @@ async def async_send_batch(self):
await asyncio.gather(*tasks)
self.log_queue.clear()

async def _flush_digest_buckets(self):
"""Flush any digest buckets whose interval has expired.

For each expired bucket, formats a digest summary message and
appends it to the log_queue for delivery via the normal batching path.
"""
from datetime import datetime

now = datetime.now()
flushed_keys: List[str] = []

async with self.digest_lock:
for key, entry in self.digest_buckets.items():
alert_type_name = entry["alert_type"]
_atc = self.alert_type_config.get(alert_type_name)
if _atc is None:
continue
elapsed = (now - entry["start_time"]).total_seconds()
if elapsed < _atc.digest_interval:
continue

# Build digest summary message
start_ts = entry["start_time"].strftime("%H:%M:%S")
end_ts = entry["last_time"].strftime("%H:%M:%S")
start_date = entry["start_time"].strftime("%Y-%m-%d")
end_date = entry["last_time"].strftime("%Y-%m-%d")
formatted_message = (
f"Alert type: `{alert_type_name}` (Digest)\n"
f"Level: `{entry['level']}`\n"
f"Start: `{start_date} {start_ts}`\n"
f"End: `{end_date} {end_ts}`\n"
f"Count: `{entry['count']}`\n\n"
f"Message: {entry['first_message']}"
)
_proxy_base_url = os.getenv("PROXY_BASE_URL", None)
if _proxy_base_url is not None:
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"

payload = {"text": formatted_message}
headers = {"Content-type": "application/json"}
webhook_url = entry["webhook_url"]

if isinstance(webhook_url, list):
for url in webhook_url:
self.log_queue.append(
{"url": url, "headers": headers, "payload": payload, "alert_type": alert_type_name}
)
else:
self.log_queue.append(
{"url": webhook_url, "headers": headers, "payload": payload, "alert_type": alert_type_name}
)
flushed_keys.append(key)

for key in flushed_keys:
del self.digest_buckets[key]

async def periodic_flush(self):
"""Override base periodic_flush to also flush digest buckets."""
while True:
await asyncio.sleep(self.flush_interval)
try:
await self._flush_digest_buckets()
except Exception as e:
verbose_proxy_logger.debug(f"Error flushing digest buckets: {str(e)}")
await self.flush_queue()

async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
"""Log deployment latency"""
try:
Expand Down
1 change: 1 addition & 0 deletions litellm/proxy/proxy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2982,6 +2982,7 @@ def _load_alerting_settings(self, general_settings: dict):
alert_types=general_settings.get("alert_types", None),
alert_to_webhook_url=general_settings.get("alert_to_webhook_url", None),
alerting_args=general_settings.get("alerting_args", None),
alert_type_config=general_settings.get("alert_type_config", None),
redis_cache=redis_usage_cache,
)

Expand Down
4 changes: 4 additions & 0 deletions litellm/proxy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ def update_values(
alert_types: Optional[List[AlertType]] = None,
alerting_args: Optional[dict] = None,
alert_to_webhook_url: Optional[dict] = None,
alert_type_config: Optional[dict] = None,
):
updated_slack_alerting: bool = False
if alerting is not None:
Expand All @@ -392,6 +393,8 @@ def update_values(
if alert_to_webhook_url is not None:
self.alert_to_webhook_url = alert_to_webhook_url
updated_slack_alerting = True
if alert_type_config is not None:
updated_slack_alerting = True

if updated_slack_alerting is True:
self.slack_alerting_instance.update_values(
Expand All @@ -400,6 +403,7 @@ def update_values(
alert_types=self.alert_types,
alerting_args=alerting_args,
alert_to_webhook_url=self.alert_to_webhook_url,
alert_type_config=alert_type_config,
)

if self.alerting is not None and "slack" in self.alerting:
Expand Down
31 changes: 30 additions & 1 deletion litellm/types/integrations/slack_alerting.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import os
from datetime import datetime as dt
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Set
from typing import Any, Dict, List, Literal, Optional, Set, Union

from pydantic import BaseModel, Field
from typing_extensions import TypedDict

from litellm.types.utils import LiteLLMPydanticObjectBase

DEFAULT_DIGEST_INTERVAL = 86400 # 24 hours in seconds

SLACK_ALERTING_THRESHOLD_5_PERCENT = 0.05
SLACK_ALERTING_THRESHOLD_15_PERCENT = 0.15
MAX_OLDEST_HANGING_REQUESTS_TO_CHECK = 20
Expand Down Expand Up @@ -199,3 +201,30 @@ class HangingRequestData(BaseModel):
key_alias: Optional[str] = None
team_alias: Optional[str] = None
alerting_metadata: Optional[dict] = None


class AlertTypeConfig(LiteLLMPydanticObjectBase):
"""Per-alert-type configuration, including digest mode settings."""

digest: bool = Field(
default=False,
description="Enable digest mode for this alert type. When enabled, duplicate alerts are aggregated into a single summary message.",
)
digest_interval: int = Field(
default=DEFAULT_DIGEST_INTERVAL,
description="Digest window in seconds. Alerts are aggregated within this interval. Default 24 hours.",
)


class DigestEntry(TypedDict):
"""Tracks an in-flight digest bucket for a unique (alert_type, model, api_base) combination."""

alert_type: str
request_model: str
api_base: str
first_message: str
level: str
count: int
start_time: dt
last_time: dt
webhook_url: Union[str, List[str]]
Loading
Loading