-
-
Notifications
You must be signed in to change notification settings - Fork 6.9k
Litellm update blog posts rss #23791
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,8 +1,8 @@ | ||||||
| """ | ||||||
| Pulls the latest LiteLLM blog posts from GitHub. | ||||||
| Pulls the latest LiteLLM blog posts from the docs RSS feed. | ||||||
|
|
||||||
| Falls back to the bundled local backup on any failure. | ||||||
| GitHub JSON URL is configured via litellm.blog_posts_url (or LITELLM_BLOG_POSTS_URL env var). | ||||||
| RSS URL is configured via litellm.blog_posts_url (or LITELLM_BLOG_POSTS_URL env var). | ||||||
|
|
||||||
| Disable remote fetching entirely: | ||||||
| export LITELLM_LOCAL_BLOG_POSTS=True | ||||||
|
|
@@ -11,8 +11,10 @@ | |||||
| import json | ||||||
| import os | ||||||
| import time | ||||||
| import xml.etree.ElementTree as ET | ||||||
| from email.utils import parsedate_to_datetime | ||||||
| from importlib.resources import files | ||||||
| from typing import Any, Dict, List, Optional | ||||||
| from typing import Dict, List, Optional | ||||||
|
|
||||||
| import httpx | ||||||
| from pydantic import BaseModel | ||||||
|
|
@@ -37,9 +39,8 @@ class GetBlogPosts: | |||||
| """ | ||||||
| Fetches, validates, and caches LiteLLM blog posts. | ||||||
|
|
||||||
| Mirrors the structure of GetModelCostMap: | ||||||
| - Fetches from GitHub with a 5-second timeout | ||||||
| - Validates the response has a non-empty ``posts`` list | ||||||
| - Fetches RSS feed from docs site with a 5-second timeout | ||||||
| - Parses the XML and extracts the latest blog post | ||||||
| - Caches the result in-process for BLOG_POSTS_TTL_SECONDS (1 hour) | ||||||
| - Falls back to the bundled local backup on any failure | ||||||
| """ | ||||||
|
|
@@ -56,30 +57,67 @@ def load_local_blog_posts() -> List[Dict[str, str]]: | |||||
| return content.get("posts", []) | ||||||
|
|
||||||
| @staticmethod | ||||||
| def fetch_remote_blog_posts(url: str, timeout: int = 5) -> dict: | ||||||
| def fetch_rss_feed(url: str, timeout: int = 5) -> str: | ||||||
| """ | ||||||
| Fetch blog posts JSON from a remote URL. | ||||||
| Fetch RSS XML from a remote URL. | ||||||
|
|
||||||
| Returns the parsed response. Raises on network/parse errors. | ||||||
| Returns the raw XML text. Raises on network errors. | ||||||
| """ | ||||||
| response = httpx.get(url, timeout=timeout) | ||||||
| response.raise_for_status() | ||||||
| return response.json() | ||||||
| return response.text | ||||||
|
|
||||||
| @staticmethod | ||||||
| def validate_blog_posts(data: Any) -> bool: | ||||||
| """Return True if data is a dict with a non-empty ``posts`` list.""" | ||||||
| if not isinstance(data, dict): | ||||||
| verbose_logger.warning( | ||||||
| "LiteLLM: Blog posts response is not a dict (type=%s). " | ||||||
| "Falling back to local backup.", | ||||||
| type(data).__name__, | ||||||
| def parse_rss_to_posts(xml_text: str, max_posts: int = 1) -> List[Dict[str, str]]: | ||||||
| """ | ||||||
| Parse RSS XML and return a list of blog post dicts. | ||||||
|
|
||||||
| Extracts title, description, date (YYYY-MM-DD), and url from each <item>. | ||||||
| """ | ||||||
| root = ET.fromstring(xml_text) | ||||||
| channel = root.find("channel") | ||||||
| if channel is None: | ||||||
| raise ValueError("RSS feed missing <channel> element") | ||||||
|
|
||||||
| posts: List[Dict[str, str]] = [] | ||||||
| for item in channel.findall("item"): | ||||||
| if len(posts) >= max_posts: | ||||||
| break | ||||||
|
|
||||||
| title_el = item.find("title") | ||||||
| link_el = item.find("link") | ||||||
| desc_el = item.find("description") | ||||||
| pub_date_el = item.find("pubDate") | ||||||
|
|
||||||
| if title_el is None or link_el is None: | ||||||
| continue | ||||||
|
|
||||||
| # Parse RFC 2822 date to YYYY-MM-DD | ||||||
| date_str = "" | ||||||
| if pub_date_el is not None and pub_date_el.text: | ||||||
| try: | ||||||
| dt = parsedate_to_datetime(pub_date_el.text) | ||||||
| date_str = dt.strftime("%Y-%m-%d") | ||||||
| except Exception: | ||||||
| date_str = pub_date_el.text | ||||||
|
|
||||||
| posts.append( | ||||||
| { | ||||||
| "title": title_el.text or "", | ||||||
| "description": desc_el.text or "" if desc_el is not None else "", | ||||||
| "date": date_str, | ||||||
| "url": link_el.text or "", | ||||||
| } | ||||||
| ) | ||||||
| return False | ||||||
| posts = data.get("posts") | ||||||
|
|
||||||
| return posts | ||||||
|
|
||||||
| @staticmethod | ||||||
| def validate_blog_posts(posts: List[Dict[str, str]]) -> bool: | ||||||
| """Return True if posts is a non-empty list.""" | ||||||
| if not isinstance(posts, list) or len(posts) == 0: | ||||||
| verbose_logger.warning( | ||||||
| "LiteLLM: Blog posts response has no valid 'posts' list. " | ||||||
| "LiteLLM: Parsed RSS feed has no valid posts. " | ||||||
| "Falling back to local backup.", | ||||||
| ) | ||||||
| return False | ||||||
|
|
@@ -102,7 +140,8 @@ def get_blog_posts(cls, url: str) -> List[Dict[str, str]]: | |||||
| return cached | ||||||
|
|
||||||
| try: | ||||||
| data = cls.fetch_remote_blog_posts(url) | ||||||
| xml_text = cls.fetch_rss_feed(url) | ||||||
| posts = cls.parse_rss_to_posts(xml_text) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The default of
Suggested change
(Adjust the value to whatever the UI is designed to display.) |
||||||
| except Exception as e: | ||||||
| verbose_logger.warning( | ||||||
| "LiteLLM: Failed to fetch blog posts from %s: %s. " | ||||||
|
|
@@ -112,10 +151,9 @@ def get_blog_posts(cls, url: str) -> List[Dict[str, str]]: | |||||
| ) | ||||||
| return cls.load_local_blog_posts() | ||||||
|
|
||||||
| if not cls.validate_blog_posts(data): | ||||||
| if not cls.validate_blog_posts(posts): | ||||||
| return cls.load_local_blog_posts() | ||||||
|
|
||||||
| posts = data["posts"] | ||||||
| cls._cached_posts = posts | ||||||
| cls._last_fetch_time = now | ||||||
| return posts | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
xml.etree.ElementTreeis vulnerable to XML entity expansion attacksPython's own documentation explicitly states that
xml.etree.ElementTreeis not secure against maliciously constructed data and is vulnerable to "Billion Laughs" and "Quadratic Blowup" entity-expansion DoS attacks.While the default URL (
https://docs.litellm.ai/blog/rss.xml) is trusted, the URL is user-configurable viaLITELLM_BLOG_POSTS_URL. If an operator points this to an attacker-controlled endpoint, the server can return a deeply nested entity-expansion payload that exhausts memory/CPU before the response is even fully processed.The
defusedxmllibrary is the recommended drop-in replacement:This single swap neutralises billion-laughs, quadratic-blowup, and external-entity attacks without any other code changes. Alternatively, you can validate the response size before parsing (e.g., reject payloads over ~1 MB).