diff --git a/apps/realtime/src/index.ts b/apps/realtime/src/index.ts index 41dc396..6926545 100644 --- a/apps/realtime/src/index.ts +++ b/apps/realtime/src/index.ts @@ -533,10 +533,14 @@ io.on("connection", (socket) => { // Enqueue link unfurl job if the message contains a URL const rawUrlMatches = - parsed.content?.match(/https?:\/\/[^\s<>"]+/g) ?? null - const urlMatches = rawUrlMatches?.map((u) => - u.replace(/[.,!?:;'")\]]+$/, "") - ) + parsed.content?.match(/https?:\/\/[^\s<>"[\]]+/g) ?? null + const urlMatches = rawUrlMatches + ? [ + ...new Set( + rawUrlMatches.map((u) => u.replace(/[.,!?:;'")\]]+$/, "")) + ), + ] + : null if (urlMatches && urlMatches.length > 0) { void linkUnfurlQueue .add("unfurl", { diff --git a/apps/web/src/lib/editor-utils.ts b/apps/web/src/lib/editor-utils.ts index fdfffdf..b8f1845 100644 --- a/apps/web/src/lib/editor-utils.ts +++ b/apps/web/src/lib/editor-utils.ts @@ -12,6 +12,8 @@ export function toStoredMarkdown(markdown: string) { // TipTap outputs either ++[url](url)++ or ++bareUrl++ .replace(/\+\+\[([^\]]+)\]\([^)]+\)\+\+/g, "$1") .replace(/\+\+([\s\S]+?)\+\+/g, "$1") + // Strip bare markdown links where text matches href (autolinked URLs without ++ wrappers) + .replace(/\[([^\]]+)\]\(\1\)/g, "$1") .replace(TIPTAP_MARKDOWN_MENTION_REGEX, (_match, mentionId: string) => { if (mentionId.toLowerCase() === EVERYONE_MENTION_ID) { return "@everyone" diff --git a/apps/worker/src/jobs/link-unfurl.ts b/apps/worker/src/jobs/link-unfurl.ts index 182444c..2c49c6a 100644 --- a/apps/worker/src/jobs/link-unfurl.ts +++ b/apps/worker/src/jobs/link-unfurl.ts @@ -12,8 +12,7 @@ import type { Job } from "bullmq" import ogs from "open-graph-scraper" import { logger } from "@/lib/logger" -const OG_FETCH_TIMEOUT_MS = 5000 -const MAX_REDIRECTS = 5 +const OG_FETCH_TIMEOUT_MS = 5 const PRIVATE_IP_REGEX = /^(127\.|10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|0\.|169\.254\.|::1|fc|fd|fe80)/i @@ -77,54 +76,6 @@ function matchProxyRule(originalUrl: string) { return null } -/** Follow redirects manually, validating each hop through isSafeUrl. */ -async function resolveRedirects(startUrl: string): Promise { - let current = startUrl - for (let i = 0; i < MAX_REDIRECTS; i++) { - let res: Response - try { - res = await fetch(current, { - method: "HEAD", - headers: { "user-agent": "Townhall/1.0 OGBot" }, - redirect: "manual", - signal: AbortSignal.timeout(OG_FETCH_TIMEOUT_MS), - }) - } catch (err) { - logger.warn( - { err, startUrl, current }, - "Redirect resolution fetch failed" - ) - return null - } - - if (res.status >= 300 && res.status < 400) { - const location = res.headers.get("location") - if (!location) return null - // Resolve relative Location headers against the current URL - let next: string - try { - next = new URL(location, current).toString() - } catch { - logger.warn({ location, current }, "Malformed redirect Location header") - return null - } - if (!(await isSafeUrl(next))) { - logger.warn( - { from: current, to: next }, - "Redirect target failed safety check" - ) - return null - } - current = next - continue - } - - return current - } - logger.warn({ url: startUrl }, "Too many redirects") - return null -} - async function fetchOgEmbed(url: string): Promise { const proxy = matchProxyRule(url) const fetchUrl = proxy?.fetchUrl ?? url @@ -134,26 +85,29 @@ async function fetchOgEmbed(url: string): Promise { return null } - const resolvedUrl = await resolveRedirects(fetchUrl) - if (!resolvedUrl) { - logger.info({ url, fetchUrl }, "Redirect resolution failed") - return null - } - try { - const { error, result } = await ogs({ - url: resolvedUrl, + const { error, result, response } = await ogs({ + url: fetchUrl, timeout: OG_FETCH_TIMEOUT_MS, fetchOptions: { headers: { "user-agent": "Townhall/1.0 OGBot", }, - redirect: "error", }, }) + // Validate the final URL after redirects to prevent SSRF via redirect chain + const finalUrl = (response as Response | undefined)?.url ?? fetchUrl + if (finalUrl !== fetchUrl && !(await isSafeUrl(finalUrl))) { + logger.warn( + { url, fetchUrl, finalUrl }, + "Redirected to unsafe URL, discarding result" + ) + return null + } + if (error || !result.success) { - logger.warn({ url, resolvedUrl, error }, "OG scrape returned no result") + logger.warn({ url, fetchUrl, error }, "OG scrape returned no result") return null } @@ -175,7 +129,7 @@ async function fetchOgEmbed(url: string): Promise { siteName: proxy?.siteName ?? result.ogSiteName ?? undefined, } } catch (err) { - logger.error({ err, url, resolvedUrl }, "OG scrape threw") + logger.error({ err, url, fetchUrl }, "OG scrape threw") return null } } diff --git a/turbo.json b/turbo.json index 7e636d6..019c162 100644 --- a/turbo.json +++ b/turbo.json @@ -1,6 +1,6 @@ { "$schema": "https://turborepo.dev/schema.json", - "ui": "stream-with-experimental-timestamps", + "ui": "stream", "tasks": { "build": { "dependsOn": ["^build"],