From 8316c549fb8f4e15c1e435371af6e6ac6dd58421 Mon Sep 17 00:00:00 2001 From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com> Date: Wed, 22 Apr 2026 09:36:04 -0300 Subject: [PATCH 01/10] doc: create Cursor rule for docs website --- .cursor/rules/website/main.mdc | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .cursor/rules/website/main.mdc diff --git a/.cursor/rules/website/main.mdc b/.cursor/rules/website/main.mdc new file mode 100644 index 0000000000..5a32f49a08 --- /dev/null +++ b/.cursor/rules/website/main.mdc @@ -0,0 +1,40 @@ +--- +description: documentation website - source code and content +globs: docs/website/** +alwaysApply: false +--- + +# Documentation Website + +## Language + +All generated text MUST be written in English. + +## Project Context + +QVAC is an OSS ecosystem for developers building local AI applications and systems. + +`docs/website` is the developer portal for QVAC project. + +The goal of this context (chat) is to improve this website. This includes: the website source code AND all the resources we provide for our developer-users to be capable of using QVAC — references, tutorials, learning materials, code examples, templates, starter kits, etc. + +## Repository Layout + +`docs/website/` contains website source code and WITHIN it, `docs/website/content/` contains all content (resources) in MDX files. + +## Tech Stack + +- **Framework:** Next.js + Fumadocs +- **Output:** 100% static site. At build time, all HTML pages are generated into `docs/website/out/` and then served from a CDN. There is no server-side runtime. + +## Scope of Work + +The goal of this context is one of the following or both: + +1. Improve website source code: changes to components, configuration, styling, or build pipeline under `docs/website/` +2. Generate new content: create or update pages under `docs/website/content/` + +## Environments + +- Production: https://docs.qvac.tether.io +- Staging: https://docs.qvac.tether.su \ No newline at end of file From d3af171954e8069d3ae257c12bb0809fd34deec5 Mon Sep 17 00:00:00 2001 From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com> Date: Wed, 22 Apr 2026 14:18:41 -0300 Subject: [PATCH 02/10] docs: add robots.txt to website --- docs/website/src/app/robots.ts | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 docs/website/src/app/robots.ts diff --git a/docs/website/src/app/robots.ts b/docs/website/src/app/robots.ts new file mode 100644 index 0000000000..b55a7a5f50 --- /dev/null +++ b/docs/website/src/app/robots.ts @@ -0,0 +1,33 @@ +import type { MetadataRoute } from 'next'; +import { allowDocsIndexingAtBuildTime } from '@/lib/docs-indexing'; +import { DOCS_SITE_ORIGIN } from '@/lib/docs-open-graph'; + +// Required for `output: 'export'` — resolves `robots()` at build time so the +// result is written to `out/robots.txt` as a static file. +export const dynamic = 'force-static'; + +/** + * Generates `/robots.txt` at build time. + * + * Indexing policy (allow all) — complements `docsRootMetadataRobots()` in `layout.tsx`: + * - Production (`DOCS_ALLOW_INDEXING=1`): permissive for all crawlers, including AI + * training bots. Declares the sitemap so crawlers can discover the page inventory. + * - Preview / local / PR builds (default): disallow everything so non-canonical + * deploys stay out of search indexes. + * + * Per-user-agent rules are intentionally omitted while the policy is "allow all" — + * the wildcard `User-agent: *` already covers every crawler. Add explicit rules + * only if the policy needs to diverge per crawler in the future. + */ +export default function robots(): MetadataRoute.Robots { + if (!allowDocsIndexingAtBuildTime()) { + return { + rules: [{ userAgent: '*', disallow: '/' }], + }; + } + + return { + rules: [{ userAgent: '*', allow: '/' }], + sitemap: `${DOCS_SITE_ORIGIN}/sitemap.xml`, + }; +} From 168c685e7901a5af756c1eae8b183abe44121940 Mon Sep 17 00:00:00 2001 From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com> Date: Wed, 22 Apr 2026 15:32:37 -0300 Subject: [PATCH 03/10] doc: website source - refactor - standardize env vars to standard used in JSON and infra envs like GH Actions --- docs/website/.env.example | 5 +--- docs/website/src/lib/docs-indexing.ts | 12 ++++------ docs/website/tests/docs-indexing.test.ts | 29 +++++++++++++++++------- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/docs/website/.env.example b/docs/website/.env.example index 1460e411e1..6a93028d48 100644 --- a/docs/website/.env.example +++ b/docs/website/.env.example @@ -1,9 +1,6 @@ # Production docs (Sevalla): set at build time on the app that serves https://docs.qvac.tether.io # so static HTML declares index,follow. Omit on preview/staging/PR builds. -# DOCS_ALLOW_INDEXING=1 - -# Force noindex in static HTML even when DOCS_ALLOW_INDEXING is set (emergency / wrong environment). -# DOCS_FORCE_NOINDEX=1 +# DOCS_ALLOW_INDEXING=true # Inkeep API Key for search and chat functionality NEXT_PUBLIC_INKEEP_API_KEY=your_inkeep_api_key_here diff --git a/docs/website/src/lib/docs-indexing.ts b/docs/website/src/lib/docs-indexing.ts index 707b821145..60aeaf302e 100644 --- a/docs/website/src/lib/docs-indexing.ts +++ b/docs/website/src/lib/docs-indexing.ts @@ -6,16 +6,12 @@ import type { Metadata } from 'next'; * preview, PR, and local builds. * * Enable indexing only for the deploy that serves `https://docs.qvac.tether.io`. - * On Sevalla, set **`DOCS_ALLOW_INDEXING=1`** for that application at **build time** - * (Next reads this when generating static metadata). - * - * Optional: `DOCS_FORCE_NOINDEX=1` forces noindex even when `DOCS_ALLOW_INDEXING` is set. + * On Sevalla, set **`DOCS_ALLOW_INDEXING=true`** for that application at **build + * time** (Next reads this when generating static metadata). Case-insensitive; + * any other value (including `1`) is treated as false. */ export function allowDocsIndexingAtBuildTime() { - if (process.env.DOCS_FORCE_NOINDEX === '1' || process.env.DOCS_FORCE_NOINDEX === 'true') { - return false; - } - return process.env.DOCS_ALLOW_INDEXING === '1' || process.env.DOCS_ALLOW_INDEXING === 'true'; + return process.env.DOCS_ALLOW_INDEXING?.toLowerCase() === 'true'; } export function docsRootMetadataRobots(): Metadata['robots'] { diff --git a/docs/website/tests/docs-indexing.test.ts b/docs/website/tests/docs-indexing.test.ts index 9a700a3193..ec15b2fcf5 100644 --- a/docs/website/tests/docs-indexing.test.ts +++ b/docs/website/tests/docs-indexing.test.ts @@ -8,30 +8,43 @@ afterEach(() => { describe('allowDocsIndexingAtBuildTime', () => { it('is false when no relevant env is set', () => { vi.stubEnv('DOCS_ALLOW_INDEXING', ''); - vi.stubEnv('DOCS_FORCE_NOINDEX', ''); expect(allowDocsIndexingAtBuildTime()).toBe(false); }); - it('is true when DOCS_ALLOW_INDEXING=1', () => { - vi.stubEnv('DOCS_ALLOW_INDEXING', '1'); + it('is true when DOCS_ALLOW_INDEXING=true', () => { + vi.stubEnv('DOCS_ALLOW_INDEXING', 'true'); expect(allowDocsIndexingAtBuildTime()).toBe(true); }); - it('is true when DOCS_ALLOW_INDEXING=true', () => { - vi.stubEnv('DOCS_ALLOW_INDEXING', 'true'); + it('is true when DOCS_ALLOW_INDEXING=TRUE (case-insensitive)', () => { + vi.stubEnv('DOCS_ALLOW_INDEXING', 'TRUE'); + expect(allowDocsIndexingAtBuildTime()).toBe(true); + }); + + it('is true when DOCS_ALLOW_INDEXING=True (case-insensitive)', () => { + vi.stubEnv('DOCS_ALLOW_INDEXING', 'True'); expect(allowDocsIndexingAtBuildTime()).toBe(true); }); - it('is false when DOCS_FORCE_NOINDEX=1 even if allow indexing', () => { + it('is false when DOCS_ALLOW_INDEXING=1 (rejects non-true values)', () => { vi.stubEnv('DOCS_ALLOW_INDEXING', '1'); - vi.stubEnv('DOCS_FORCE_NOINDEX', '1'); + expect(allowDocsIndexingAtBuildTime()).toBe(false); + }); + + it('is false when DOCS_ALLOW_INDEXING=yes (rejects non-true values)', () => { + vi.stubEnv('DOCS_ALLOW_INDEXING', 'yes'); + expect(allowDocsIndexingAtBuildTime()).toBe(false); + }); + + it('is false when DOCS_ALLOW_INDEXING=false', () => { + vi.stubEnv('DOCS_ALLOW_INDEXING', 'false'); expect(allowDocsIndexingAtBuildTime()).toBe(false); }); }); describe('docsRootMetadataRobots', () => { it('matches allow flag', () => { - vi.stubEnv('DOCS_ALLOW_INDEXING', '1'); + vi.stubEnv('DOCS_ALLOW_INDEXING', 'true'); expect(docsRootMetadataRobots()).toEqual({ index: true, follow: true }); }); From 95fadfebf955e2e5dc774317c818dc7a9e7efed2 Mon Sep 17 00:00:00 2001 From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com> Date: Wed, 22 Apr 2026 17:20:37 -0300 Subject: [PATCH 04/10] doc: website source - add autogen sitemap.xml --- docs/website/source.config.ts | 4 ++ .../src/app/(docs)/[[...slug]]/page.tsx | 8 ++++ docs/website/src/app/llms-full.txt/route.ts | 8 +++- docs/website/src/app/llms.txt/route.ts | 7 +++- docs/website/src/app/sitemap.ts | 41 +++++++++++++++++++ docs/website/src/lib/docs-open-graph.ts | 12 ++++++ 6 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 docs/website/src/app/sitemap.ts diff --git a/docs/website/source.config.ts b/docs/website/source.config.ts index 3779843bf6..37304327a5 100644 --- a/docs/website/source.config.ts +++ b/docs/website/source.config.ts @@ -4,6 +4,7 @@ import { frontmatterSchema, metaSchema, } from 'fumadocs-mdx/config'; +import lastModified from 'fumadocs-mdx/plugins/last-modified'; import { remarkMdxMermaid } from 'fumadocs-core/mdx-plugins'; import { z } from "zod"; import { resolve } from 'path'; @@ -32,6 +33,9 @@ export const docs = defineDocs({ }); export default defineConfig({ + // Injects `page.data.lastModified: Date` from `git log -1` per MDX file at build time. + // Consumed by `app/sitemap.ts` to emit `` entries. + plugins: [lastModified()], mdxOptions: { remarkPlugins: [ remarkMath, diff --git a/docs/website/src/app/(docs)/[[...slug]]/page.tsx b/docs/website/src/app/(docs)/[[...slug]]/page.tsx index 628e857272..42391915f5 100644 --- a/docs/website/src/app/(docs)/[[...slug]]/page.tsx +++ b/docs/website/src/app/(docs)/[[...slug]]/page.tsx @@ -15,6 +15,7 @@ import { LLMCopyButton, ViewOptions, VersionSelector } from '@/components/page-a import { buildCanonicalDocsUrl, inferDiataxisOpenGraph, + isArchivedVersionSlug, } from '@/lib/docs-open-graph'; import { QVAC_DOC_OG_HEIGHT, QVAC_DOC_OG_WIDTH } from '@/lib/qvac-doc-og'; @@ -110,10 +111,17 @@ export async function generateMetadata( const canonicalUrl = buildCanonicalDocsUrl(params.slug); const { section, tags } = inferDiataxisOpenGraph(page.path); const ogImage = getPageImage(page); + // Non-canonical bundles (dev + vX.Y.Z) are hidden from search engines and + // LLM training channels via per-page noindex. Canonical/OG/Twitter stay + // intact so shared links still render a rich social card; `noindex` makes + // the canonical pointer inert for Google even when its target was removed + // in latest (e.g., `ping` existed in v0.7.0 but not in v0.8.0+). + const isArchived = isArchivedVersionSlug(params.slug); return { title: isHomePage ? { absolute: title } : title, description, + ...(isArchived && { robots: { index: false, follow: true } }), alternates: { canonical: canonicalUrl, }, diff --git a/docs/website/src/app/llms-full.txt/route.ts b/docs/website/src/app/llms-full.txt/route.ts index bba97f01aa..370c6958e6 100644 --- a/docs/website/src/app/llms-full.txt/route.ts +++ b/docs/website/src/app/llms-full.txt/route.ts @@ -1,11 +1,17 @@ import { source } from '@/lib/source'; import { getLLMText } from '@/lib/get-llm-text'; +import { isArchivedVersionSlug } from '@/lib/docs-open-graph'; // cached forever export const revalidate = false; export async function GET() { - const scan = source.getPages().map(getLLMText); + // Non-canonical bundles (dev + vX.Y.Z) are excluded so the full LLM dump + // only contains the latest canonical documentation. + const scan = source + .getPages() + .filter((page) => !isArchivedVersionSlug(page.slugs)) + .map(getLLMText); const scanned = await Promise.all(scan); return new Response(scanned.join('\n\n')); diff --git a/docs/website/src/app/llms.txt/route.ts b/docs/website/src/app/llms.txt/route.ts index b1c6e25c1b..d58a9b27c0 100644 --- a/docs/website/src/app/llms.txt/route.ts +++ b/docs/website/src/app/llms.txt/route.ts @@ -1,10 +1,15 @@ import { source } from '@/lib/source'; import { LATEST_VERSION } from '@/lib/versions'; +import { isArchivedVersionSlug } from '@/lib/docs-open-graph'; export const revalidate = false; export function GET() { - const pages = source.getPages(); + // Non-canonical bundles (dev + vX.Y.Z) are excluded so the LLM index + // only advertises the latest canonical documentation. + const pages = source + .getPages() + .filter((page) => !isArchivedVersionSlug(page.slugs)); const index = [ '# QVAC Documentation (llms.txt)', '', diff --git a/docs/website/src/app/sitemap.ts b/docs/website/src/app/sitemap.ts new file mode 100644 index 0000000000..7988abf567 --- /dev/null +++ b/docs/website/src/app/sitemap.ts @@ -0,0 +1,41 @@ +import type { MetadataRoute } from 'next'; +import { source } from '@/lib/source'; +import { allowDocsIndexingAtBuildTime } from '@/lib/docs-indexing'; +import { + buildCanonicalDocsUrl, + isArchivedVersionSlug, +} from '@/lib/docs-open-graph'; + +// Required for `output: 'export'` — resolves `sitemap()` at build time so the +// result is written to `out/sitemap.xml` as a static file. +export const dynamic = 'force-static'; + +/** + * Generates `/sitemap.xml` at build time. + * + * Indexing policy — mirrors `robots.ts`: + * - Production (`DOCS_ALLOW_INDEXING=true`): emit one entry per latest page. + * - Preview / local / PR builds (default): emit an empty sitemap so non-canonical + * deploys don't advertise any URLs even if the file is fetched directly. + * + * Non-canonical bundles (`dev` preview + `vX.Y.Z` back-versions) are excluded + * entirely. Those pages still render so the in-page version selector keeps + * working, but each one is marked `noindex` by `generateMetadata`, and we do + * not advertise them here. Single source of truth for external crawlers and + * AI training channels: the latest bundle. + * + * Fields per entry are intentionally minimal (`url` + `lastModified`). Google + * and Bing have publicly stated that `changeFrequency` and `priority` are + * ignored, so they would only add noise. + */ +export default function sitemap(): MetadataRoute.Sitemap { + if (!allowDocsIndexingAtBuildTime()) return []; + + return source + .getPages() + .filter((page) => !isArchivedVersionSlug(page.slugs)) + .map((page) => ({ + url: buildCanonicalDocsUrl(page.slugs), + lastModified: (page.data as { lastModified?: Date }).lastModified, + })); +} diff --git a/docs/website/src/lib/docs-open-graph.ts b/docs/website/src/lib/docs-open-graph.ts index 8224c72b8b..254aa849ed 100644 --- a/docs/website/src/lib/docs-open-graph.ts +++ b/docs/website/src/lib/docs-open-graph.ts @@ -8,6 +8,18 @@ export const DOCS_SITE_ORIGIN = 'https://docs.qvac.tether.io'; const VERSION_SLUG_RE = /^v\d+\.\d+\.\d+$/; +/** + * True for pages served from a non-canonical bundle (`dev` preview or a + * released `vX.Y.Z` back-version). Used by sitemap, llms.txt/llms-full.txt, + * and per-page metadata to mark the page `noindex` so crawlers and LLM + * training channels only see the latest canonical documentation. + */ +export function isArchivedVersionSlug(slugs: string[] | undefined): boolean { + if (!slugs?.length) return false; + const first = slugs[0]; + return first === 'dev' || VERSION_SLUG_RE.test(first); +} + /** * Strip leading version segment from URL slugs (latest docs have no prefix; dev / vX.Y.Z do). */ From 7429dbe985b00df3ae1b314e7ab0d2159ba16af0 Mon Sep 17 00:00:00 2001 From: Bruno Campana <7632562+BrunoCampana@users.noreply.github.com> Date: Wed, 22 Apr 2026 19:30:09 -0300 Subject: [PATCH 05/10] doc: website source - add JSON-LD --- docs/website/source.config.ts | 2 + .../src/app/(docs)/[[...slug]]/page.tsx | 23 ++- docs/website/src/lib/docs-json-ld.ts | 169 ++++++++++++++++++ docs/website/src/lib/docs-open-graph.ts | 93 +--------- 4 files changed, 188 insertions(+), 99 deletions(-) create mode 100644 docs/website/src/lib/docs-json-ld.ts diff --git a/docs/website/source.config.ts b/docs/website/source.config.ts index 37304327a5..72ad2b6fbd 100644 --- a/docs/website/source.config.ts +++ b/docs/website/source.config.ts @@ -11,6 +11,7 @@ import { resolve } from 'path'; import rehypeKatex from 'rehype-katex'; import remarkMath from 'remark-math'; import codeImport from 'remark-code-import'; +import { SCHEMA_TYPES } from './src/lib/docs-json-ld'; const monorepoRoot = resolve(process.cwd(), '../..'); @@ -22,6 +23,7 @@ export const docs = defineDocs({ titleStyle: z.enum(["code", "text"]).optional(), version: z.string().optional(), ogImage: z.string().optional(), + schemaType: z.enum(SCHEMA_TYPES).optional(), }), postprocess: { includeProcessedMarkdown: true, diff --git a/docs/website/src/app/(docs)/[[...slug]]/page.tsx b/docs/website/src/app/(docs)/[[...slug]]/page.tsx index 42391915f5..f9012c063e 100644 --- a/docs/website/src/app/(docs)/[[...slug]]/page.tsx +++ b/docs/website/src/app/(docs)/[[...slug]]/page.tsx @@ -14,9 +14,9 @@ import { cloneElement, isValidElement } from "react"; import { LLMCopyButton, ViewOptions, VersionSelector } from '@/components/page-actions'; import { buildCanonicalDocsUrl, - inferDiataxisOpenGraph, isArchivedVersionSlug, } from '@/lib/docs-open-graph'; +import { buildDocsJsonLd } from '@/lib/docs-json-ld'; import { QVAC_DOC_OG_HEIGHT, QVAC_DOC_OG_WIDTH } from '@/lib/qvac-doc-og'; function TitleText({ @@ -58,9 +58,20 @@ export default async function Page(props: PageProps<'/[[...slug]]'>) { // Filter ToC to include H2 through H5 (depth 2, 3, 4, and 5) const filteredToc = page.data.toc?.filter(item => item.depth >= 2 && item.depth <= 5) || []; - + + const isHomePage = !params.slug || params.slug.length === 0; + const jsonLdBlocks = buildDocsJsonLd(page, params.slug ?? [], isHomePage); + return ( - + <> + {jsonLdBlocks?.map((block, i) => ( +