diff --git a/litellm/__init__.py b/litellm/__init__.py index 299bb18245c..51c66838613 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -358,7 +358,7 @@ ) blog_posts_url: str = os.getenv( "LITELLM_BLOG_POSTS_URL", - "https://raw.githubusercontent.com/BerriAI/litellm/main/litellm/blog_posts.json", + "https://docs.litellm.ai/blog/rss.xml", ) anthropic_beta_headers_url: str = os.getenv( "LITELLM_ANTHROPIC_BETA_HEADERS_URL", diff --git a/litellm/litellm_core_utils/get_blog_posts.py b/litellm/litellm_core_utils/get_blog_posts.py index f54deb59290..2f9a14f1279 100644 --- a/litellm/litellm_core_utils/get_blog_posts.py +++ b/litellm/litellm_core_utils/get_blog_posts.py @@ -1,8 +1,8 @@ """ -Pulls the latest LiteLLM blog posts from GitHub. +Pulls the latest LiteLLM blog posts from the docs RSS feed. Falls back to the bundled local backup on any failure. -GitHub JSON URL is configured via litellm.blog_posts_url (or LITELLM_BLOG_POSTS_URL env var). +RSS URL is configured via litellm.blog_posts_url (or LITELLM_BLOG_POSTS_URL env var). Disable remote fetching entirely: export LITELLM_LOCAL_BLOG_POSTS=True @@ -11,8 +11,10 @@ import json import os import time +import xml.etree.ElementTree as ET +from email.utils import parsedate_to_datetime from importlib.resources import files -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional import httpx from pydantic import BaseModel @@ -37,9 +39,8 @@ class GetBlogPosts: """ Fetches, validates, and caches LiteLLM blog posts. - Mirrors the structure of GetModelCostMap: - - Fetches from GitHub with a 5-second timeout - - Validates the response has a non-empty ``posts`` list + - Fetches RSS feed from docs site with a 5-second timeout + - Parses the XML and extracts the latest blog post - Caches the result in-process for BLOG_POSTS_TTL_SECONDS (1 hour) - Falls back to the bundled local backup on any failure """ @@ -56,30 +57,67 @@ def load_local_blog_posts() -> List[Dict[str, str]]: return content.get("posts", []) @staticmethod - def fetch_remote_blog_posts(url: str, timeout: int = 5) -> dict: + def fetch_rss_feed(url: str, timeout: int = 5) -> str: """ - Fetch blog posts JSON from a remote URL. + Fetch RSS XML from a remote URL. - Returns the parsed response. Raises on network/parse errors. + Returns the raw XML text. Raises on network errors. """ response = httpx.get(url, timeout=timeout) response.raise_for_status() - return response.json() + return response.text @staticmethod - def validate_blog_posts(data: Any) -> bool: - """Return True if data is a dict with a non-empty ``posts`` list.""" - if not isinstance(data, dict): - verbose_logger.warning( - "LiteLLM: Blog posts response is not a dict (type=%s). " - "Falling back to local backup.", - type(data).__name__, + def parse_rss_to_posts(xml_text: str, max_posts: int = 1) -> List[Dict[str, str]]: + """ + Parse RSS XML and return a list of blog post dicts. + + Extracts title, description, date (YYYY-MM-DD), and url from each . + """ + root = ET.fromstring(xml_text) + channel = root.find("channel") + if channel is None: + raise ValueError("RSS feed missing element") + + posts: List[Dict[str, str]] = [] + for item in channel.findall("item"): + if len(posts) >= max_posts: + break + + title_el = item.find("title") + link_el = item.find("link") + desc_el = item.find("description") + pub_date_el = item.find("pubDate") + + if title_el is None or link_el is None: + continue + + # Parse RFC 2822 date to YYYY-MM-DD + date_str = "" + if pub_date_el is not None and pub_date_el.text: + try: + dt = parsedate_to_datetime(pub_date_el.text) + date_str = dt.strftime("%Y-%m-%d") + except Exception: + date_str = pub_date_el.text + + posts.append( + { + "title": title_el.text or "", + "description": desc_el.text or "" if desc_el is not None else "", + "date": date_str, + "url": link_el.text or "", + } ) - return False - posts = data.get("posts") + + return posts + + @staticmethod + def validate_blog_posts(posts: List[Dict[str, str]]) -> bool: + """Return True if posts is a non-empty list.""" if not isinstance(posts, list) or len(posts) == 0: verbose_logger.warning( - "LiteLLM: Blog posts response has no valid 'posts' list. " + "LiteLLM: Parsed RSS feed has no valid posts. " "Falling back to local backup.", ) return False @@ -102,7 +140,8 @@ def get_blog_posts(cls, url: str) -> List[Dict[str, str]]: return cached try: - data = cls.fetch_remote_blog_posts(url) + xml_text = cls.fetch_rss_feed(url) + posts = cls.parse_rss_to_posts(xml_text) except Exception as e: verbose_logger.warning( "LiteLLM: Failed to fetch blog posts from %s: %s. " @@ -112,10 +151,9 @@ def get_blog_posts(cls, url: str) -> List[Dict[str, str]]: ) return cls.load_local_blog_posts() - if not cls.validate_blog_posts(data): + if not cls.validate_blog_posts(posts): return cls.load_local_blog_posts() - posts = data["posts"] cls._cached_posts = posts cls._last_fetch_time = now return posts diff --git a/tests/test_litellm/test_get_blog_posts.py b/tests/test_litellm/test_get_blog_posts.py index a17d78e0bb6..b04fb4ec703 100644 --- a/tests/test_litellm/test_get_blog_posts.py +++ b/tests/test_litellm/test_get_blog_posts.py @@ -1,5 +1,4 @@ """Tests for GetBlogPosts utility class.""" -import json import time from unittest.mock import MagicMock, patch @@ -13,16 +12,26 @@ get_blog_posts, ) -SAMPLE_RESPONSE = { - "posts": [ - { - "title": "Test Post", - "description": "A test post.", - "date": "2026-01-01", - "url": "https://www.litellm.ai/blog/test", - } - ] -} +SAMPLE_RSS = """\ + + + + LiteLLM Blog + + Test Post + https://docs.litellm.ai/blog/test + A test post. + Wed, 01 Jan 2026 10:00:00 GMT + + + Second Post + https://docs.litellm.ai/blog/second + Another post. + Tue, 31 Dec 2025 10:00:00 GMT + + + +""" @pytest.fixture(autouse=True) @@ -45,26 +54,48 @@ def test_load_local_blog_posts_returns_list(): assert "url" in first -def test_validate_blog_posts_valid(): - assert GetBlogPosts.validate_blog_posts(SAMPLE_RESPONSE) is True +def test_parse_rss_to_posts(): + posts = GetBlogPosts.parse_rss_to_posts(SAMPLE_RSS, max_posts=1) + assert len(posts) == 1 + assert posts[0]["title"] == "Test Post" + assert posts[0]["url"] == "https://docs.litellm.ai/blog/test" + assert posts[0]["description"] == "A test post." + assert posts[0]["date"] == "2026-01-01" + + +def test_parse_rss_to_posts_multiple(): + posts = GetBlogPosts.parse_rss_to_posts(SAMPLE_RSS, max_posts=5) + assert len(posts) == 2 + assert posts[1]["title"] == "Second Post" -def test_validate_blog_posts_missing_posts_key(): - assert GetBlogPosts.validate_blog_posts({"other": []}) is False +def test_parse_rss_to_posts_invalid_xml(): + with pytest.raises(Exception): + GetBlogPosts.parse_rss_to_posts("not xml") + + +def test_parse_rss_to_posts_missing_channel(): + with pytest.raises(ValueError, match="missing "): + GetBlogPosts.parse_rss_to_posts("") + + +def test_validate_blog_posts_valid(): + posts = [{"title": "T", "description": "D", "date": "2026-01-01", "url": "https://x.com"}] + assert GetBlogPosts.validate_blog_posts(posts) is True def test_validate_blog_posts_empty_list(): - assert GetBlogPosts.validate_blog_posts({"posts": []}) is False + assert GetBlogPosts.validate_blog_posts([]) is False -def test_validate_blog_posts_not_dict(): - assert GetBlogPosts.validate_blog_posts("not a dict") is False +def test_validate_blog_posts_not_list(): + assert GetBlogPosts.validate_blog_posts("not a list") is False def test_get_blog_posts_success(): - """Fetches from remote on first call.""" + """Fetches from RSS on first call.""" mock_response = MagicMock() - mock_response.json.return_value = SAMPLE_RESPONSE + mock_response.text = SAMPLE_RSS mock_response.raise_for_status = MagicMock() with patch("litellm.litellm_core_utils.get_blog_posts.httpx.get", return_value=mock_response): @@ -86,10 +117,10 @@ def test_get_blog_posts_network_error_falls_back_to_local(): assert len(posts) > 0 -def test_get_blog_posts_invalid_json_falls_back_to_local(): - """Falls back when remote returns non-dict.""" +def test_get_blog_posts_invalid_xml_falls_back_to_local(): + """Falls back when remote returns invalid XML.""" mock_response = MagicMock() - mock_response.json.return_value = "not a dict" + mock_response.text = "not valid xml" mock_response.raise_for_status = MagicMock() with patch("litellm.litellm_core_utils.get_blog_posts.httpx.get", return_value=mock_response): @@ -101,7 +132,8 @@ def test_get_blog_posts_invalid_json_falls_back_to_local(): def test_get_blog_posts_ttl_cache_not_refetched(): """Within TTL window, does not re-fetch.""" - GetBlogPosts._cached_posts = SAMPLE_RESPONSE["posts"] + cached = [{"title": "Cached", "description": "D", "date": "2026-01-01", "url": "https://x.com"}] + GetBlogPosts._cached_posts = cached GetBlogPosts._last_fetch_time = time.time() # just now call_count = 0 @@ -110,7 +142,7 @@ def mock_get(*args, **kwargs): nonlocal call_count call_count += 1 m = MagicMock() - m.json.return_value = SAMPLE_RESPONSE + m.text = SAMPLE_RSS m.raise_for_status = MagicMock() return m @@ -123,11 +155,12 @@ def mock_get(*args, **kwargs): def test_get_blog_posts_ttl_expired_refetches(): """After TTL window, re-fetches from remote.""" - GetBlogPosts._cached_posts = SAMPLE_RESPONSE["posts"] + cached = [{"title": "Cached", "description": "D", "date": "2026-01-01", "url": "https://x.com"}] + GetBlogPosts._cached_posts = cached GetBlogPosts._last_fetch_time = time.time() - 7200 # 2 hours ago mock_response = MagicMock() - mock_response.json.return_value = SAMPLE_RESPONSE + mock_response.text = SAMPLE_RSS mock_response.raise_for_status = MagicMock() with patch(