-
Notifications
You must be signed in to change notification settings - Fork 5.4k
RSS feed error handling #15476
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RSS feed error handling #15476
Changes from all commits
13c6f02
02da378
0a7e55b
8f40319
7fcca62
9a1d824
360c439
7142a0a
6e9692e
0a5739f
29e3f12
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,12 +1,7 @@ | ||
| import { parseString } from "xml2js" | ||
|
|
||
| import type { | ||
| AtomElement, | ||
| AtomResult, | ||
| RSSChannel, | ||
| RSSItem, | ||
| RSSResult, | ||
| } from "../types" | ||
| import { RSS_DISPLAY_COUNT } from "../constants" | ||
| import type { AtomElement, AtomResult, RSSItem, RSSResult } from "../types" | ||
| import { isValidDate } from "../utils/date" | ||
|
|
||
| /** | ||
|
|
@@ -18,108 +13,125 @@ export const fetchRSS = async (xmlUrl: string | string[]) => { | |
| const urls = Array.isArray(xmlUrl) ? xmlUrl : [xmlUrl] | ||
| const allItems: RSSItem[][] = [] | ||
| for (const url of urls) { | ||
| const response = (await fetchXml(url)) as RSSResult | AtomResult | ||
| try { | ||
| const response = (await fetchXml(url)) as RSSResult | AtomResult | ||
|
|
||
| if ("rss" in response) { | ||
| const [mainChannel] = response.rss.channel as RSSChannel[] | ||
| const [source] = mainChannel.title | ||
| const [sourceUrl] = mainChannel.link | ||
| const channelImage = mainChannel.image ? mainChannel.image[0].url[0] : "" | ||
| if ("rss" in response) { | ||
| const [mainChannel] = response.rss.channel | ||
| const [source] = mainChannel.title | ||
| const [sourceUrl] = mainChannel.link | ||
| const channelImage = mainChannel.image | ||
| ? mainChannel.image[0].url[0] | ||
| : "" | ||
|
|
||
| const parsedRssItems = mainChannel.item | ||
| // Filter out items with invalid dates | ||
| .filter((item) => { | ||
| if (!item.pubDate) return false | ||
| const [pubDate] = item.pubDate | ||
| return isValidDate(pubDate) | ||
| }) | ||
| // Sort by pubDate (most recent is first in array | ||
| .sort((a, b) => { | ||
| const dateA = new Date(a.pubDate[0]) | ||
| const dateB = new Date(b.pubDate[0]) | ||
| return dateB.getTime() - dateA.getTime() | ||
| }) | ||
| // Map to RSSItem object | ||
| .map((item) => { | ||
| const getImgSrc = () => { | ||
| if (item["content:encoded"]) | ||
| return item["content:encoded"][0].match( | ||
| /https?:\/\/[^"]*?\.(jpe?g|png|webp)/g | ||
| )?.[0] | ||
| if (item.enclosure) return item.enclosure[0].$.url | ||
| if (item["media:content"]) return item["media:content"][0].$.url | ||
| return channelImage | ||
| } | ||
| return { | ||
| pubDate: item.pubDate[0], | ||
| title: item.title[0], | ||
| link: item.link[0], | ||
| imgSrc: getImgSrc(), | ||
| source, | ||
| sourceUrl, | ||
| sourceFeedUrl: url, | ||
| } as RSSItem | ||
| }) | ||
| const parsedRssItems = mainChannel.item | ||
| // Filter out items with invalid dates | ||
| .filter((item) => { | ||
| if (!item.pubDate) return false | ||
| const [pubDate] = item.pubDate | ||
| return isValidDate(pubDate) | ||
| }) | ||
| // Sort by pubDate (most recent is first in array | ||
| .sort((a, b) => { | ||
| const dateA = new Date(a.pubDate[0]) | ||
| const dateB = new Date(b.pubDate[0]) | ||
| return dateB.getTime() - dateA.getTime() | ||
| }) | ||
| // Map to RSSItem object | ||
| .map((item) => { | ||
| const getImgSrc = () => { | ||
| if (item["content:encoded"]) | ||
| return item["content:encoded"][0].match( | ||
| /https?:\/\/[^"]*?\.(jpe?g|png|webp)/g | ||
| )?.[0] | ||
| if (item.enclosure) return item.enclosure[0].$.url | ||
| if (item["media:content"]) return item["media:content"][0].$.url | ||
| return channelImage | ||
| } | ||
| return { | ||
| pubDate: item.pubDate[0], | ||
| title: item.title[0], | ||
| link: item.link[0], | ||
| imgSrc: getImgSrc(), | ||
| source, | ||
| sourceUrl, | ||
| sourceFeedUrl: url, | ||
| } | ||
| }) | ||
|
|
||
| allItems.push(parsedRssItems) | ||
| } else if ("feed" in response) { | ||
| const [source] = response.feed.title | ||
| const [sourceUrl] = response.feed.id | ||
| const feedImage = response.feed.icon?.[0] | ||
| allItems.push(parsedRssItems) | ||
| } else if ("feed" in response) { | ||
| const [source] = response.feed.title | ||
| const [sourceUrl] = response.feed.id | ||
| const feedImage = response.feed.icon?.[0] | ||
|
|
||
| const parsedAtomItems = response.feed.entry | ||
| // Filter out items with invalid dates | ||
| .filter((entry) => { | ||
| if (!entry.updated) return false | ||
| const [published] = entry.updated | ||
| return isValidDate(published) | ||
| }) | ||
| // Sort by published (most recent is first in array | ||
| .sort((a, b) => { | ||
| const dateA = new Date(a.updated[0]) | ||
| const dateB = new Date(b.updated[0]) | ||
| return dateB.getTime() - dateA.getTime() | ||
| }) | ||
| // Map to RSSItem object | ||
| .map((entry) => { | ||
| const getString = (el?: AtomElement[]): string => { | ||
| if (!el) return "" | ||
| const [firstEl] = el | ||
| if (typeof firstEl === "string") return firstEl | ||
| return firstEl._ || "" | ||
| } | ||
| const getHref = (): string => { | ||
| if (!entry.link) { | ||
| console.warn(`No link found for RSS url: ${url}`) | ||
| return "" | ||
| const parsedAtomItems = response.feed.entry | ||
| // Filter out items with invalid dates | ||
| .filter((entry) => { | ||
| if (!entry.updated) return false | ||
| const [published] = entry.updated | ||
| return isValidDate(published) | ||
| }) | ||
| // Sort by published (most recent is first in array | ||
| .sort((a, b) => { | ||
| const dateA = new Date(a.updated[0]) | ||
| const dateB = new Date(b.updated[0]) | ||
| return dateB.getTime() - dateA.getTime() | ||
| }) | ||
| // Map to RSSItem object | ||
| .map((entry) => { | ||
| const getString = (el?: AtomElement[]): string => { | ||
| if (!el) return "" | ||
| const [firstEl] = el | ||
| if (typeof firstEl === "string") return firstEl | ||
| return firstEl._ || "" | ||
| } | ||
| const getHref = (): string => { | ||
| if (!entry.link) { | ||
| console.warn(`No link found for RSS url: ${url}`) | ||
| return "" | ||
| } | ||
| const link = entry.link[0] | ||
| if (typeof link === "string") return link | ||
| return link.$.href || "" | ||
| } | ||
| const getImgSrc = (): string => { | ||
| const imgRegEx = /https?:\/\/[^"]*?\.(jpe?g|png|webp)/g | ||
| const contentMatch = getString(entry.content).match(imgRegEx) | ||
| if (contentMatch) return contentMatch[0] | ||
| const summaryMatch = getString(entry.summary).match(imgRegEx) | ||
| if (summaryMatch) return summaryMatch[0] | ||
| return feedImage || "" | ||
| } | ||
| return { | ||
| pubDate: entry.updated[0], | ||
| title: getString(entry.title), | ||
| link: getHref(), | ||
| imgSrc: getImgSrc(), | ||
| source, | ||
| sourceUrl, | ||
| sourceFeedUrl: url, | ||
| } | ||
| const link = entry.link[0] | ||
| if (typeof link === "string") return link | ||
| return link.$.href || "" | ||
| } | ||
| const getImgSrc = (): string => { | ||
| const imgRegEx = /https?:\/\/[^"]*?\.(jpe?g|png|webp)/g | ||
| const contentMatch = getString(entry.content).match(imgRegEx) | ||
| if (contentMatch) return contentMatch[0] | ||
| const summaryMatch = getString(entry.summary).match(imgRegEx) | ||
| if (summaryMatch) return summaryMatch[0] | ||
| return feedImage || "" | ||
| } | ||
| return { | ||
| pubDate: entry.updated[0], | ||
| title: getString(entry.title), | ||
| link: getHref(), | ||
| imgSrc: getImgSrc(), | ||
| source, | ||
| sourceUrl, | ||
| sourceFeedUrl: url, | ||
| } as RSSItem | ||
| }) | ||
| }) | ||
|
|
||
| allItems.push(parsedAtomItems) | ||
| allItems.push(parsedAtomItems) | ||
| } else { | ||
| throw new Error( | ||
| `Error parsing XML, invalid RSSResult or AtomResult type: ${url}` | ||
| ) | ||
| } | ||
| } catch (error) { | ||
| console.error(error instanceof Error ? error.message : error) | ||
| // Do not break build for single fetch failure | ||
| continue | ||
| } | ||
| } | ||
| return allItems as RSSItem[][] | ||
|
|
||
| // Only break build if insufficient number of items fetched | ||
| if (allItems.length < RSS_DISPLAY_COUNT) | ||
| throw new Error("Insufficient number of RSS items fetched") | ||
|
Comment on lines
+131
to
+132
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't actually breaking the build as-is, since it is contained inside an api function call... 🤔 cc: @pettinarip
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, with the addition of |
||
|
|
||
| return allItems | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this function fetch will never throw, is that the intention? if we want to break the build, we should
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't want a singular feed to break the build (ie. recent example where one blog changed their feed link)... these cases should just skip over that feed. BUT, in the event something happens where we don't get ANY feed results, then that should break the build... which was the intent of throwing an error at the end if there aren't enough results. |
||
| } | ||
|
|
||
| /** | ||
|
|
@@ -135,16 +147,12 @@ export const fetchXml = async (url: string) => { | |
| credentials: "omit", // Don't send or receive cookies | ||
| }) | ||
| const xml = await response.text() | ||
| let returnObject: Record<string, unknown> = {} | ||
| parseString(xml, (err, result) => { | ||
| if (err) { | ||
| throw err // Throw the error to be caught by the outer try-catch | ||
| } | ||
| returnObject = result | ||
| return await new Promise<Record<string, unknown>>((resolve, reject) => { | ||
| parseString(xml, (err, result) => { | ||
| err ? reject(err) : resolve(result) | ||
| }) | ||
| }) | ||
| return returnObject | ||
| } catch (error) { | ||
| console.error("Error fetching or parsing XML:", url, error) | ||
| throw error | ||
| throw new Error(`Error fetching or parsing XML: ${url}`) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Forces homepage to builds paths at build, which allows
throw Errorto break the build