diff --git a/SEO_AUDIT_PLAN.md b/SEO_AUDIT_PLAN.md new file mode 100644 index 00000000000..40bd58b9338 --- /dev/null +++ b/SEO_AUDIT_PLAN.md @@ -0,0 +1,892 @@ +# Superset SEO Audit & Improvement Plan + +**Audit Date:** January 26, 2026 +**Sites Audited:** superset.sh (marketing + blog), docs.superset.sh + +--- + +## Executive Summary + +The Superset websites have a solid foundation with Next.js 16 and proper content structure, but are missing several critical SEO elements that significantly impact search visibility. The most urgent issues are: + +1. **No robots.txt** on either site - search engines have no crawl guidance +2. **No sitemap.xml** on either site - pages aren't efficiently discovered +3. **Docs OG/Twitter images resolve to localhost** - broken social previews and share cards +4. **LLM/MDX endpoints are indexable** - duplicate content + wasted crawl budget +5. **Missing canonical URLs** - risk of duplicate content issues +6. **No structured data (JSON-LD)** - missing rich snippets in search results +7. **Incomplete Open Graph/Twitter Cards** - poor social sharing experience (blog index + non-home pages) + +--- + +## Critical Issues (Fix Immediately) + +### 1. Missing robots.txt (Both Sites) + +**Impact:** Search engines have no guidance on crawl behavior, crawl budget may be wasted. + +**Files to create:** + +#### Marketing Site +``` +apps/marketing/src/app/robots.ts +``` +```typescript +import type { MetadataRoute } from "next"; + +export default function robots(): MetadataRoute.Robots { + return { + rules: [ + { + userAgent: "*", + allow: "/", + disallow: ["/api/", "/_next/"], + }, + ], + sitemap: "https://superset.sh/sitemap.xml", + }; +} +``` + +#### Docs Site +``` +apps/docs/src/app/robots.ts +``` +```typescript +import type { MetadataRoute } from "next"; + +export default function robots(): MetadataRoute.Robots { + return { + rules: [ + { + userAgent: "*", + allow: "/", + disallow: ["/api/", "/_next/", "/llms.mdx/", "/*.mdx"], + }, + ], + sitemap: "https://docs.superset.sh/sitemap.xml", + }; +} +``` + +--- + +### 2. Missing sitemap.xml (Both Sites) + +**Impact:** Search engines discover new pages slowly, blog posts may not be indexed promptly. + +#### Marketing Site +``` +apps/marketing/src/app/sitemap.ts +``` +```typescript +import type { MetadataRoute } from "next"; +import { getBlogPosts } from "@/lib/blog"; + +export default async function sitemap(): Promise { + const baseUrl = "https://superset.sh"; + + // Static pages + const staticPages: MetadataRoute.Sitemap = [ + { + url: baseUrl, + lastModified: new Date(), + changeFrequency: "weekly", + priority: 1.0, + }, + { + url: `${baseUrl}/blog`, + lastModified: new Date(), + changeFrequency: "daily", + priority: 0.9, + }, + { + url: `${baseUrl}/privacy`, + lastModified: new Date("2025-01-15"), + changeFrequency: "yearly", + priority: 0.3, + }, + { + url: `${baseUrl}/terms`, + lastModified: new Date("2025-01-15"), + changeFrequency: "yearly", + priority: 0.3, + }, + { + url: `${baseUrl}/ports`, + lastModified: new Date(), + changeFrequency: "monthly", + priority: 0.5, + }, + ]; + + // Dynamic blog posts + const posts = await getBlogPosts(); + const blogPages: MetadataRoute.Sitemap = posts.map((post) => ({ + url: `${baseUrl}/blog/${post.slug}`, + lastModified: new Date(post.date), + changeFrequency: "monthly" as const, + priority: 0.8, + })); + + return [...staticPages, ...blogPages]; +} +``` + +#### Docs Site +``` +apps/docs/src/app/sitemap.ts +``` +```typescript +import type { MetadataRoute } from "next"; +import { source } from "@/lib/source"; + +export default function sitemap(): MetadataRoute.Sitemap { + const baseUrl = "https://docs.superset.sh"; + + const pages = source.getPages(); + + return pages.map((page) => ({ + url: `${baseUrl}${page.url}`, + lastModified: new Date(), + changeFrequency: "weekly" as const, + priority: page.url === "/quick-start" ? 1.0 : 0.8, + })); +} +``` + +--- + +### 3. Missing Canonical URLs + +**Impact:** Potential duplicate content issues, PageRank dilution. + +#### Marketing Site - Update layout.tsx +``` +apps/marketing/src/app/layout.tsx +``` +Add `metadataBase`: +```typescript +export const metadata: Metadata = { + metadataBase: new URL("https://superset.sh"), + // ... existing metadata + alternates: { + canonical: "/", + }, +}; +``` + +#### Docs Site - Update layout.tsx +``` +apps/docs/src/app/layout.tsx +``` +Add `metadataBase`: +```typescript +export const metadata: Metadata = { + metadataBase: new URL("https://docs.superset.sh"), + // ... existing metadata +}; +``` + +--- + +### 4. Docs OG/Twitter Images Resolve to localhost + +**Impact:** Broken social previews and incorrect OG image URLs in production (currently resolving to `http://localhost:3000/...`). + +**Root cause:** `metadataBase` is not set for docs, and OG image URLs are generated as relative paths. + +**Fix:** Set `metadataBase` and ensure OG/Twitter images use relative URLs that resolve against it. + +``` +apps/docs/src/app/layout.tsx +``` +```typescript +export const metadata: Metadata = { + metadataBase: new URL("https://docs.superset.sh"), + // ... existing metadata +}; +``` + +``` +apps/docs/src/app/(docs)/[[...slug]]/page.tsx +``` +```typescript +export async function generateMetadata( + props: PageProps<"/[[...slug]]">, +): Promise { + const params = await props.params; + const page = source.getPage(params.slug); + if (!page) notFound(); + + const pageImage = getPageImage(page).url; + + return { + title: page.data.title, + description: page.data.description, + openGraph: { + images: [pageImage], + }, + twitter: { + card: "summary_large_image", + images: [pageImage], + }, + }; +} +``` + +--- + +### 5. LLM/MDX Endpoints Are Indexable (Duplicate Content) + +**Impact:** Duplicate content (`/quick-start.mdx`, `/llms.mdx/...`) can be indexed alongside the canonical docs URLs. + +**Fix:** Add `X-Robots-Tag: noindex, nofollow` and disallow these paths in `robots.txt`. + +``` +apps/docs/src/app/llms.mdx/[[...slug]]/route.ts +``` +```typescript +return new Response(await getLLMText(page), { + headers: { + "Content-Type": "text/markdown", + "X-Robots-Tag": "noindex, nofollow", + }, +}); +``` + +``` +apps/docs/src/app/robots.ts +``` +```typescript +export default function robots(): MetadataRoute.Robots { + return { + rules: [ + { + userAgent: "*", + allow: "/", + disallow: ["/api/", "/_next/", "/llms.mdx/", "/*.mdx"], + }, + ], + sitemap: "https://docs.superset.sh/sitemap.xml", + }; +} +``` + +--- + +### 6. Missing Structured Data (JSON-LD) + +**Impact:** No rich snippets in search results, missing knowledge graph signals. + +#### Homepage - Organization Schema +``` +apps/marketing/src/app/components/JsonLd/JsonLd.tsx +``` +```typescript +export function OrganizationJsonLd() { + const schema = { + "@context": "https://schema.org", + "@type": "Organization", + name: "Superset", + url: "https://superset.sh", + logo: "https://superset.sh/logo.png", + description: "Run 10+ parallel coding agents on your machine", + sameAs: [ + "https://github.com/AviSupersetSH/superset", + "https://twitter.com/AviSupersetSH", + "https://discord.gg/superset", + ], + foundingDate: "2024", + founders: [ + { + "@type": "Person", + name: "Avi Peltz", + }, + ], + }; + + return ( +