diff --git a/assistant/src/memory/v3/__tests__/validate.test.ts b/assistant/src/memory/v3/__tests__/validate.test.ts new file mode 100644 index 00000000000..693247e8946 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/validate.test.ts @@ -0,0 +1,245 @@ +/** + * Tests for `assistant/src/memory/v3/validate.ts`. + * + * Coverage matrix — one fixture per defect category plus a clean-tree control: + * - clean tree → every list empty, every count 0. + * - danglingChildRefs → a `node:` ref and a `page:` ref to absent targets. + * - orphanPages → a concept page on disk not wired into the tree; synthetic + * page-index entries (none here) and reachable pages excluded. + * - cycles → A → B → A back-edge detected during the full descent. + * - staleIndex → a parent node whose mtime predates a `node:` child's mtime. + * - unknownEdgeTargets → a page `edges:` entry pointing at a missing slug. + * + * Tests use temp workspaces under `os.tmpdir()`; they never touch `~/.vellum/`. + * mtimes are pinned with `utimes` so the freshness check is deterministic and + * independent of write ordering / filesystem timestamp granularity. + */ + +import { mkdtempSync, rmSync } from "node:fs"; +import { utimes } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; + +import { invalidateEdgeIndex } from "../../v2/edge-index.js"; +import { invalidatePageIndex } from "../../v2/page-index.js"; +import { writePage } from "../../v2/page-store.js"; +import type { ConceptPage } from "../../v2/types.js"; +import { invalidateTreeIndex } from "../tree-index.js"; +import { getTreeDir, ROOT_NODE_ID, writeNode } from "../tree-store.js"; +import type { TreeNode } from "../types.js"; +import { validateTree } from "../validate.js"; + +let workspaceDir: string; + +beforeEach(() => { + workspaceDir = mkdtempSync(join(tmpdir(), "vellum-tree-validate-test-")); +}); + +afterEach(() => { + invalidateTreeIndex(); + invalidatePageIndex(); + invalidateEdgeIndex(); + rmSync(workspaceDir, { recursive: true, force: true }); +}); + +function node(id: string, children: string[], body = `body ${id}`): TreeNode { + return { id, frontmatter: { children }, body }; +} + +function page(slug: string, edges: string[] = []): ConceptPage { + return { + slug, + frontmatter: { edges, ref_files: [], ref_urls: [] }, + body: `body ${slug}`, + }; +} + +/** Pin a node file's mtime (and atime) to an explicit epoch-ms value. */ +async function setNodeMtime(id: string, mtimeMs: number): Promise { + const path = join(getTreeDir(workspaceDir), `${id}.md`); + const t = new Date(mtimeMs); + await utimes(path, t, t); +} + +/** + * Invalidate every cached index after seeding so the first `validateTree` of a + * test body sees the on-disk fixture rather than a stale cache. + */ +function resetCaches(): void { + invalidateTreeIndex(); + invalidatePageIndex(); + invalidateEdgeIndex(); +} + +describe("validateTree — clean tree", () => { + test("returns an empty report for a well-formed tree", async () => { + // _root → node:people → page:alice ; all refs resolve, alice reachable. + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:people"])); + await writeNode(workspaceDir, node("people", ["page:alice"])); + await writePage(workspaceDir, page("alice")); + // Parent newest so the freshness check never fires on a clean tree. + await setNodeMtime("people", 1_000); + await setNodeMtime(ROOT_NODE_ID, 2_000); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.danglingChildRefs).toEqual([]); + expect(report.danglingChildRefCount).toBe(0); + expect(report.orphanPages).toEqual([]); + expect(report.orphanPageCount).toBe(0); + expect(report.cycles).toEqual([]); + expect(report.cycleCount).toBe(0); + expect(report.staleIndex).toEqual([]); + expect(report.staleIndexCount).toBe(0); + expect(report.unknownEdgeTargets).toEqual([]); + expect(report.unknownEdgeTargetCount).toBe(0); + }); +}); + +describe("validateTree — danglingChildRefs", () => { + test("flags node: and page: refs whose targets are missing", async () => { + await writeNode( + workspaceDir, + node(ROOT_NODE_ID, ["node:ghost", "page:missing-page"]), + ); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.danglingChildRefs).toEqual([ + { node: ROOT_NODE_ID, ref: "ghost", kind: "node" }, + { node: ROOT_NODE_ID, ref: "missing-page", kind: "page" }, + ]); + expect(report.danglingChildRefCount).toBe(2); + }); + + test("does not flag refs whose targets exist", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"])); + await writeNode(workspaceDir, node("child", ["page:alice"])); + await writePage(workspaceDir, page("alice")); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.danglingChildRefs).toEqual([]); + }); +}); + +describe("validateTree — orphanPages", () => { + test("flags concept pages not reachable from the root", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["page:reached"])); + await writePage(workspaceDir, page("reached")); + await writePage(workspaceDir, page("orphan")); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.orphanPages).toEqual(["orphan"]); + expect(report.orphanPageCount).toBe(1); + }); + + test("a page hanging off an unreachable node is still an orphan", async () => { + // `floating` is not referenced by _root, so its page child is unreachable. + await writeNode(workspaceDir, node(ROOT_NODE_ID, [])); + await writeNode(workspaceDir, node("floating", ["page:detached"])); + await writePage(workspaceDir, page("detached")); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.orphanPages).toEqual(["detached"]); + }); +}); + +describe("validateTree — cycles", () => { + test("detects an A → B → A node cycle as a back-edge", async () => { + // _root → node:a → node:b → node:a (cycle closes on the b → a edge). + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:a"])); + await writeNode(workspaceDir, node("a", ["node:b"])); + await writeNode(workspaceDir, node("b", ["node:a"])); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.cycles).toEqual([{ from: "b", to: "a" }]); + expect(report.cycleCount).toBe(1); + }); + + test("a shared DAG sub-node (two parents, no cycle) is not a cycle", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:p1", "node:p2"])); + await writeNode(workspaceDir, node("p1", ["node:shared"])); + await writeNode(workspaceDir, node("p2", ["node:shared"])); + await writeNode(workspaceDir, node("shared", [])); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.cycles).toEqual([]); + }); +}); + +describe("validateTree — staleIndex", () => { + test("flags a node whose mtime predates a node: child's mtime", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"])); + await writeNode(workspaceDir, node("child", [])); + // Parent older than child → stale. + await setNodeMtime(ROOT_NODE_ID, 1_000); + await setNodeMtime("child", 5_000); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.staleIndex).toEqual([ + { + node: ROOT_NODE_ID, + child: "child", + nodeMtimeMs: 1_000, + childMtimeMs: 5_000, + }, + ]); + expect(report.staleIndexCount).toBe(1); + }); + + test("a parent newer than its child is not stale", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"])); + await writeNode(workspaceDir, node("child", [])); + await setNodeMtime("child", 1_000); + await setNodeMtime(ROOT_NODE_ID, 5_000); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.staleIndex).toEqual([]); + }); +}); + +describe("validateTree — unknownEdgeTargets", () => { + test("flags a page edge pointing at a missing slug", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["page:alice"])); + await writePage(workspaceDir, page("alice", ["nonexistent"])); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.unknownEdgeTargets).toEqual([ + { from: "alice", to: "nonexistent" }, + ]); + expect(report.unknownEdgeTargetCount).toBe(1); + }); + + test("an edge to an existing page is not flagged", async () => { + await writeNode( + workspaceDir, + node(ROOT_NODE_ID, ["page:alice", "page:bob"]), + ); + await writePage(workspaceDir, page("alice", ["bob"])); + await writePage(workspaceDir, page("bob")); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.unknownEdgeTargets).toEqual([]); + }); +}); diff --git a/assistant/src/memory/v3/tree-store.ts b/assistant/src/memory/v3/tree-store.ts index 55dc023f2fd..86933c100ac 100644 --- a/assistant/src/memory/v3/tree-store.ts +++ b/assistant/src/memory/v3/tree-store.ts @@ -33,6 +33,7 @@ import { readFile, rename, rm, + stat, writeFile, } from "node:fs/promises"; import { dirname, join, relative, sep } from "node:path"; @@ -270,6 +271,26 @@ export async function readNode( return { id, frontmatter, body }; } +/** + * File mtime for a tree node, in epoch ms. Returns 0 when the file is missing + * or unreadable — callers treat 0 as "no mtime" (e.g. the validator's stale- + * index check reads a missing node as the oldest possible mtime so it never + * spuriously flags a parent against an absent child). Mirrors v2's + * `getPageMtimeMs`. + */ +export async function getNodeMtimeMs( + workspaceDir: string, + id: string, +): Promise { + validateNodeId(id); + try { + const s = await stat(getNodePath(workspaceDir, id)); + return s.mtimeMs; + } catch { + return 0; + } +} + /** * Write a tree node atomically (temp file + rename). A crash between the temp * write and the rename leaves the prior file intact; a crash after the rename diff --git a/assistant/src/memory/v3/validate.ts b/assistant/src/memory/v3/validate.ts new file mode 100644 index 00000000000..4a0acb2efc3 --- /dev/null +++ b/assistant/src/memory/v3/validate.ts @@ -0,0 +1,300 @@ +/** + * Memory v3 — Tree structure validator. + * + * The v3 tree is hand-authored by a data-migration during the v2 → v3 rollout + * (nodes reference pages and sub-nodes by `page:`/`node:` refs). Because the + * structure is authored, not derived, it can drift: a ref can dangle, a page + * can be left unwired, two nodes can reference each other into a cycle, a + * parent node's compositional summary can fall behind a freshly-edited child, + * or a page `edges:` entry can point at a slug with no page. + * + * `validateTree` is the read-only report the migration (and any later + * structure-health probe) runs to surface those defects. It is deliberately + * **non-throwing**: the migration is in progress, so an incomplete tree is + * expected — the report is informational, and the caller decides what (if + * anything) is fatal. It builds the three indices it needs (tree, page, edge), + * walks the DAG, and returns counts plus the offending ids for each category. + * + * Categories: + * - `danglingChildRefs` — a node `children` entry (`node:`/`page:`) whose + * target node/page does not exist on disk. + * - `orphanPages` — concept pages present in the page index but not reachable + * from the tree root by descending every `node:` child. Informational while + * the migration is mid-flight (not every page is wired in yet). Synthetic + * page-index entries (skills, CLI commands) are excluded — they are never + * tree members. + * - `cycles` — back-edges found during a full DFS over `node:` adjacency + * (A → B → A). A cycle would make a naive descent loop forever. + * - `staleIndex` — a node whose own file mtime predates one of its `node:` + * children's mtime, hinting its compositional index/summary may be out of + * date relative to the child it composes. + * - `unknownEdgeTargets` — page `edges:` targets with no corresponding page + * index slug, reusing v2's `validateEdgeTargets`. + */ + +import { CLI_COMMAND_SLUG_PREFIX } from "../v2/cli-command-store.js"; +import { getEdgeIndex, validateEdgeTargets } from "../v2/edge-index.js"; +import { getPageIndex } from "../v2/page-index.js"; +import { SKILL_SLUG_PREFIX } from "../v2/skill-store.js"; +import { getTreeIndex, type TreeIndex } from "./tree-index.js"; +import { getNodeMtimeMs } from "./tree-store.js"; + +/** + * A `node:` child whose mtime is newer than the parent node that composes it. + * `node` is the parent, `child` the fresher child, and the two `*MtimeMs` + * fields are their epoch-ms mtimes (parent < child triggers the report). + */ +export interface StaleIndexEntry { + node: string; + child: string; + nodeMtimeMs: number; + childMtimeMs: number; +} + +/** + * Read-only health report over the v3 tree + its referenced pages/edges. + * Every list is sorted for deterministic output; `*Count` fields mirror the + * corresponding list length so callers can summarize without re-counting. + */ +export interface TreeValidationReport { + /** `node:`/`page:` children whose target does not exist. */ + danglingChildRefs: Array<{ + node: string; + ref: string; + kind: "node" | "page"; + }>; + danglingChildRefCount: number; + /** Concept pages not reachable from the root by descending all node children. */ + orphanPages: string[]; + orphanPageCount: number; + /** Back-edges (`from → to`) closing a cycle during the full DFS descent. */ + cycles: Array<{ from: string; to: string }>; + cycleCount: number; + /** Nodes whose mtime predates a child node's mtime. */ + staleIndex: StaleIndexEntry[]; + staleIndexCount: number; + /** Page `edges:` targets with no corresponding page-index slug. */ + unknownEdgeTargets: Array<{ from: string; to: string }>; + unknownEdgeTargetCount: number; +} + +/** True when a page-index slug is a synthetic (non-concept-page) entry. */ +function isSyntheticSlug(slug: string): boolean { + return ( + slug.startsWith(SKILL_SLUG_PREFIX) || + slug.startsWith(CLI_COMMAND_SLUG_PREFIX) + ); +} + +/** + * Collect dangling `node:`/`page:` child refs: every node child whose target + * node id is absent from `tree.nodes`, and every page child whose slug is + * absent from `knownPageSlugs`. Sorted by `(node, kind, ref)`. + */ +function collectDanglingChildRefs( + tree: TreeIndex, + knownPageSlugs: ReadonlySet, +): Array<{ node: string; ref: string; kind: "node" | "page" }> { + const dangling: Array<{ node: string; ref: string; kind: "node" | "page" }> = + []; + for (const [nodeId, children] of tree.childrenByNode) { + for (const child of children) { + const exists = + child.kind === "node" + ? tree.nodes.has(child.ref) + : knownPageSlugs.has(child.ref); + if (!exists) { + dangling.push({ node: nodeId, ref: child.ref, kind: child.kind }); + } + } + } + dangling.sort( + (a, b) => + a.node.localeCompare(b.node) || + a.kind.localeCompare(b.kind) || + a.ref.localeCompare(b.ref), + ); + return dangling; +} + +/** + * Resolve the existing `node:` children of `nodeId`, in `children` order. Refs + * to absent nodes are skipped (those are reported separately as dangling) so + * the descent never recurses into a node that isn't on disk. + */ +function nodeChildrenOf(tree: TreeIndex, nodeId: string): string[] { + const children = tree.childrenByNode.get(nodeId) ?? []; + const out: string[] = []; + for (const child of children) { + if (child.kind === "node" && tree.nodes.has(child.ref)) { + out.push(child.ref); + } + } + return out; +} + +/** + * Full DFS over `node:` adjacency from `tree.root`. Returns the set of + * reachable node ids (for orphan-page reachability) and the back-edges that + * close a cycle. A back-edge is an edge into a node still on the active + * recursion stack (classic gray-node cycle detection); `visited` (black) + * prevents re-walking shared DAG sub-nodes. + */ +function descend(tree: TreeIndex): { + reachableNodes: Set; + cycles: Array<{ from: string; to: string }>; +} { + const reachableNodes = new Set(); + const onStack = new Set(); + const cycles: Array<{ from: string; to: string }> = []; + + // Iterative DFS with an explicit stack so deep trees don't blow the call + // stack. Each frame tracks its child cursor; we push a child frame, and on + // exhaustion pop the parent off the recursion stack (`onStack`). + type Frame = { node: string; children: string[]; cursor: number }; + const stack: Frame[] = []; + + function enter(nodeId: string): void { + reachableNodes.add(nodeId); + onStack.add(nodeId); + stack.push({ + node: nodeId, + children: nodeChildrenOf(tree, nodeId), + cursor: 0, + }); + } + + if (tree.nodes.has(tree.root)) { + enter(tree.root); + } + + while (stack.length > 0) { + const frame = stack[stack.length - 1]; + if (frame.cursor >= frame.children.length) { + onStack.delete(frame.node); + stack.pop(); + continue; + } + const child = frame.children[frame.cursor++]; + if (onStack.has(child)) { + // Edge into an ancestor still on the stack → cycle-closing back-edge. + cycles.push({ from: frame.node, to: child }); + continue; + } + if (reachableNodes.has(child)) { + // Already fully explored via another parent (shared DAG sub-node). + continue; + } + enter(child); + } + + cycles.sort( + (a, b) => a.from.localeCompare(b.from) || a.to.localeCompare(b.to), + ); + return { reachableNodes, cycles }; +} + +/** + * Concept pages reachable from the tree: every `page:` child of a reachable + * node. Pages hanging off unreachable nodes are *not* counted reachable — they + * only become reachable once their parent chain links back to the root. + */ +function reachablePages( + tree: TreeIndex, + reachableNodes: ReadonlySet, +): Set { + const pages = new Set(); + for (const nodeId of reachableNodes) { + for (const child of tree.childrenByNode.get(nodeId) ?? []) { + if (child.kind === "page") pages.add(child.ref); + } + } + return pages; +} + +/** + * Nodes whose own mtime predates one of their `node:` children's mtime. A + * missing node file reads as mtime 0 (oldest), so the check never flags a + * parent against an absent child. Sorted by `(node, child)`. + */ +async function collectStaleIndex( + workspaceDir: string, + tree: TreeIndex, +): Promise { + const ids = [...tree.nodes.keys()]; + const mtimes = new Map(); + await Promise.all( + ids.map(async (id) => { + mtimes.set(id, await getNodeMtimeMs(workspaceDir, id)); + }), + ); + + const stale: StaleIndexEntry[] = []; + for (const node of ids) { + const nodeMtimeMs = mtimes.get(node) ?? 0; + for (const child of nodeChildrenOf(tree, node)) { + const childMtimeMs = mtimes.get(child) ?? 0; + if (nodeMtimeMs < childMtimeMs) { + stale.push({ node, child, nodeMtimeMs, childMtimeMs }); + } + } + } + stale.sort( + (a, b) => a.node.localeCompare(b.node) || a.child.localeCompare(b.child), + ); + return stale; +} + +/** + * Validate the hand-authored v3 tree structure for `workspaceDir` and return a + * {@link TreeValidationReport}. Builds the tree, page, and edge indices, walks + * the DAG from the root, and reports the five defect categories. Never throws — + * it is a report, not an assertion. + */ +export async function validateTree( + workspaceDir: string, +): Promise { + const [tree, pageIndex, edgeIndex] = await Promise.all([ + getTreeIndex(workspaceDir), + getPageIndex(workspaceDir), + getEdgeIndex(workspaceDir), + ]); + + const knownPageSlugs = new Set(pageIndex.bySlug.keys()); + + // Kick off the stale-index mtime stats up front — it only depends on the + // tree, not on the DAG walk below — so its filesystem reads overlap the + // (synchronous) descent rather than running strictly after it. + const staleIndexPromise = collectStaleIndex(workspaceDir, tree); + + const danglingChildRefs = collectDanglingChildRefs(tree, knownPageSlugs); + + const { reachableNodes, cycles } = descend(tree); + + const reached = reachablePages(tree, reachableNodes); + const orphanPages = [...knownPageSlugs] + .filter((slug) => !isSyntheticSlug(slug) && !reached.has(slug)) + .sort(); + + const staleIndex = await staleIndexPromise; + + // Edge graph is page-only; knownSlugs is the full page-index slug set so an + // edge pointing at a skill/CLI entry is not spuriously flagged unknown. + const unknownEdgeTargets = validateEdgeTargets( + edgeIndex, + knownPageSlugs, + ).missing; + + return { + danglingChildRefs, + danglingChildRefCount: danglingChildRefs.length, + orphanPages, + orphanPageCount: orphanPages.length, + cycles, + cycleCount: cycles.length, + staleIndex, + staleIndexCount: staleIndex.length, + unknownEdgeTargets, + unknownEdgeTargetCount: unknownEdgeTargets.length, + }; +}