diff --git a/assistant/src/memory/v3/__tests__/tree-store.test.ts b/assistant/src/memory/v3/__tests__/tree-store.test.ts new file mode 100644 index 00000000000..3d582cd10d7 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/tree-store.test.ts @@ -0,0 +1,529 @@ +/** + * Tests for `assistant/src/memory/v3/tree-store.ts`. + * + * Coverage matrix: + * - slugify: lowercase / kebab-case / ascii / 80-char cap / empty fallback. + * - validateNodeId: accept set, reject set (path-traversal, malformed shapes), + * reserved `_root` accepted. + * - readNode / writeNode round-trip: frontmatter survives, body preserved. + * - children refs parse for both `page:` and `node:` forms. + * - malformed YAML / unknown frontmatter keys throw. + * - readNode on missing file: returns null. + * - writeNode atomicity: no orphan tmp on success, parent dirs created. + * - listNodes: walks subdirectories, returns nested ids in `/`-form, excludes + * hidden dirs / non-.md / temp files, missing dir → []. + * - deleteNode: nested-id round-trip, idempotent on missing. + * - renderNodeContent: frontmatter + body shape. + * - No change to memory/concepts/ (v3 lives under memory/v3/tree/). + * + * Tests use temp workspaces under `os.tmpdir()`; they never touch `~/.vellum/`. + */ + +import { + existsSync, + mkdirSync, + mkdtempSync, + readdirSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; + +import { + deleteNode, + getTreeDir, + listNodes, + readNode, + renderNodeContent, + ROOT_NODE_ID, + slugify, + validateNodeId, + writeNode, +} from "../tree-store.js"; +import type { TreeNode } from "../types.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +let workspaceDir: string; + +beforeEach(() => { + workspaceDir = mkdtempSync(join(tmpdir(), "vellum-tree-store-test-")); + // Mirror the workspace migration so readNode / writeNode have a target dir. + mkdirSync(getTreeDir(workspaceDir), { recursive: true }); +}); + +afterEach(() => { + if (existsSync(workspaceDir)) { + rmSync(workspaceDir, { recursive: true, force: true }); + } +}); + +function makeNode(overrides: Partial = {}): TreeNode { + return { + id: "people", + frontmatter: { + children: ["page:people/alice", "node:people/colleagues"], + routing_hints: "for work relationships see people/colleagues", + summary: "People I know.", + }, + body: "The people branch of the memory tree.\n", + ...overrides, + }; +} + +// --------------------------------------------------------------------------- +// slugify +// --------------------------------------------------------------------------- + +describe("slugify", () => { + test("lowercases ASCII letters", () => { + expect(slugify("AliceBob")).toBe("alicebob"); + }); + + test("converts spaces and punctuation to single hyphens", () => { + expect(slugify("Alice's Preferred IDE!")).toBe("alice-s-preferred-ide"); + }); + + test("collapses runs of separators to one hyphen", () => { + expect(slugify("foo ___ bar")).toBe("foo-bar"); + }); + + test("trims leading and trailing hyphens", () => { + expect(slugify("---hello world---")).toBe("hello-world"); + }); + + test("collapses '/' to hyphen — slugify produces a single segment", () => { + expect(slugify("People/Colleagues")).toBe("people-colleagues"); + }); + + test("caps slug length at 80 chars and re-trims trailing hyphen", () => { + const long = "a".repeat(120); + const slug = slugify(long); + expect(slug.length).toBe(80); + expect(slug.endsWith("-")).toBe(false); + }); + + test("falls back to a unique placeholder for empty inputs", () => { + const a = slugify(""); + const b = slugify("!!!"); + expect(a).toMatch(/^node-[a-f0-9]{8}$/); + expect(b).toMatch(/^node-[a-f0-9]{8}$/); + expect(a).not.toBe(b); + }); +}); + +// --------------------------------------------------------------------------- +// validateNodeId +// --------------------------------------------------------------------------- + +describe("validateNodeId", () => { + test.each([ + ["people"], + ["a"], + ["people-colleagues"], + ["people/alice"], + ["people/colleagues/alice"], + ["a/b/c/d/e"], + [ROOT_NODE_ID], + ])("accepts %p", (id) => { + expect(() => validateNodeId(id)).not.toThrow(); + }); + + test.each([ + ["empty string", ""], + ["leading slash", "/people"], + ["trailing slash", "people/"], + ["double slash", "people//alice"], + ["dot-dot segment", "people/../alice"], + ["pure dot-dot", ".."], + ["leading dot segment", ".hidden/alice"], + ["backslash", "people\\alice"], + ["null byte", "people\0evil"], + ["whitespace", "people alice"], + ["uppercase", "People"], + ["non-ascii", "café"], + ["leading hyphen", "-people"], + ["non-alphanumeric", "people!"], + ["leading underscore (only _root reserved)", "_other"], + ])("rejects %s (%p)", (_label, id) => { + expect(() => validateNodeId(id)).toThrow(/Invalid tree-node id/); + }); + + test("rejects ids longer than 200 chars", () => { + expect(() => validateNodeId("a".repeat(201))).toThrow( + /Invalid tree-node id/, + ); + }); + + test("rejects segments longer than 80 chars even if total is under 200", () => { + expect(() => validateNodeId("a".repeat(81))).toThrow( + /Invalid tree-node id/, + ); + }); +}); + +// --------------------------------------------------------------------------- +// readNode / writeNode round-trip +// --------------------------------------------------------------------------- + +describe("writeNode + readNode round-trip", () => { + test("round-trips frontmatter and body verbatim", async () => { + const node = makeNode(); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, node.id); + expect(read).not.toBeNull(); + expect(read!.id).toBe(node.id); + expect(read!.frontmatter.children).toEqual(node.frontmatter.children); + expect(read!.frontmatter.routing_hints).toBe( + node.frontmatter.routing_hints, + ); + expect(read!.frontmatter.summary).toBe(node.frontmatter.summary); + expect(read!.body).toBe(node.body); + }); + + test("children parse for both page: and node: reference forms", async () => { + const node = makeNode({ + id: "mixed", + frontmatter: { + children: ["page:procs/git-flow", "node:procs", "page:alice"], + }, + body: "mixed refs\n", + }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, "mixed"); + expect(read!.frontmatter.children).toEqual([ + "page:procs/git-flow", + "node:procs", + "page:alice", + ]); + }); + + test("the children list IS the DAG edge — a page may be referenced by multiple parents", async () => { + await writeNode( + workspaceDir, + makeNode({ + id: "team-a", + frontmatter: { children: ["page:people/alice"] }, + body: "team a\n", + }), + ); + await writeNode( + workspaceDir, + makeNode({ + id: "team-b", + frontmatter: { children: ["page:people/alice"] }, + body: "team b\n", + }), + ); + + const a = await readNode(workspaceDir, "team-a"); + const b = await readNode(workspaceDir, "team-b"); + expect(a!.frontmatter.children).toContain("page:people/alice"); + expect(b!.frontmatter.children).toContain("page:people/alice"); + }); + + test("renders frontmatter at the top with --- delimiters", async () => { + const node = makeNode(); + await writeNode(workspaceDir, node); + + const raw = readFileSync( + join(getTreeDir(workspaceDir), `${node.id}.md`), + "utf-8", + ); + expect(raw.startsWith("---\n")).toBe(true); + expect(raw.split("---").length).toBeGreaterThanOrEqual(3); + expect(raw).toContain("The people branch"); + }); + + test("preserves an empty body", async () => { + const node = makeNode({ body: "" }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, node.id); + expect(read!.body).toBe(""); + }); + + test("preserves multiline body with embedded YAML-looking lines", async () => { + const tricky = "key: value\n---\nnot-frontmatter\n"; + const node = makeNode({ id: "tricky", body: tricky }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, node.id); + expect(read!.body).toBe(tricky); + }); + + test("defaults children to [] for a node with empty frontmatter", async () => { + const node = makeNode({ + id: "bare", + frontmatter: { children: [] }, + body: "bare\n", + }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, "bare"); + expect(read!.frontmatter.children).toEqual([]); + expect(read!.frontmatter.routing_hints).toBeUndefined(); + expect(read!.frontmatter.summary).toBeUndefined(); + }); + + test("readNode returns null for an id that does not exist", async () => { + const result = await readNode(workspaceDir, "nonexistent"); + expect(result).toBeNull(); + }); + + test("readNode parses a hand-written node with no frontmatter as empty frontmatter + full body", async () => { + const id = "no-frontmatter"; + const body = "Just some prose, no YAML.\n"; + writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), body, "utf-8"); + + const read = await readNode(workspaceDir, id); + expect(read).not.toBeNull(); + expect(read!.frontmatter.children).toEqual([]); + expect(read!.body).toBe(body); + }); + + test("readNode throws on malformed YAML frontmatter", async () => { + const id = "bad-yaml"; + // Unclosed bracket inside the frontmatter block — invalid YAML. + const raw = "---\nchildren: [unterminated\n---\nbody\n"; + writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), raw, "utf-8"); + + await expect(readNode(workspaceDir, id)).rejects.toThrow(); + }); + + test("readNode throws on unknown frontmatter keys instead of silently dropping them", async () => { + const id = "extra-keys"; + const raw = "---\nchildren: []\nunknown_field: oops\n---\nbody\n"; + writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), raw, "utf-8"); + + await expect(readNode(workspaceDir, id)).rejects.toThrow(); + }); + + test("writeNode overwrites an existing node", async () => { + await writeNode(workspaceDir, makeNode({ body: "first\n" })); + await writeNode(workspaceDir, makeNode({ body: "second\n" })); + + const read = await readNode(workspaceDir, "people"); + expect(read!.body).toBe("second\n"); + }); + + test("writeNode creates parent directories for nested ids", async () => { + const node = makeNode({ id: "people/colleagues" }); + await writeNode(workspaceDir, node); + + const filePath = join(getTreeDir(workspaceDir), "people", "colleagues.md"); + expect(existsSync(filePath)).toBe(true); + + const read = await readNode(workspaceDir, "people/colleagues"); + expect(read!.id).toBe("people/colleagues"); + expect(read!.body).toBe(node.body); + }); + + test("writeNode round-trips deeply nested ids", async () => { + const node = makeNode({ id: "people/colleagues/alice" }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, "people/colleagues/alice"); + expect(read!.id).toBe("people/colleagues/alice"); + expect(read!.frontmatter.children).toEqual(node.frontmatter.children); + expect(read!.body).toBe(node.body); + }); + + test("writeNode + readNode round-trip the reserved _root id", async () => { + const node = makeNode({ + id: ROOT_NODE_ID, + frontmatter: { children: ["node:people"] }, + body: "root of the tree\n", + }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, ROOT_NODE_ID); + expect(read!.id).toBe(ROOT_NODE_ID); + expect(read!.frontmatter.children).toEqual(["node:people"]); + }); + + test("writeNode rejects malicious ids and writes nothing at the escape target", async () => { + await expect( + writeNode(workspaceDir, makeNode({ id: "../escape" })), + ).rejects.toThrow(/Invalid tree-node id/); + + // `../escape` would resolve to `/memory/v3/escape.md`. Confirm + // the validation throw fired before any I/O — no file at that target. + expect(existsSync(join(workspaceDir, "memory", "v3", "escape.md"))).toBe( + false, + ); + }); + + test("readNode rejects malicious ids", async () => { + await expect(readNode(workspaceDir, "../escape")).rejects.toThrow( + /Invalid tree-node id/, + ); + }); + + test("successful write produces no orphan tmp files", async () => { + await writeNode(workspaceDir, makeNode()); + + const remaining = readdirSync(getTreeDir(workspaceDir)); + const orphanTmps = remaining.filter((name) => name.includes(".tmp.")); + expect(orphanTmps).toEqual([]); + }); + + test("does not touch memory/concepts/", async () => { + await writeNode(workspaceDir, makeNode({ id: "people/colleagues" })); + + expect(existsSync(join(workspaceDir, "memory", "concepts"))).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// renderNodeContent +// --------------------------------------------------------------------------- + +describe("renderNodeContent", () => { + test("emits frontmatter block followed by body", () => { + const rendered = renderNodeContent(makeNode()); + expect(rendered.startsWith("---\n")).toBe(true); + expect(rendered).toContain("children:"); + expect(rendered).toContain("page:people/alice"); + expect(rendered.endsWith("The people branch of the memory tree.\n")).toBe( + true, + ); + }); + + test("keeps the explicit children key even when empty", () => { + const rendered = renderNodeContent( + makeNode({ frontmatter: { children: [] }, body: "x\n" }), + ); + expect(rendered).toContain("children: []"); + }); +}); + +// --------------------------------------------------------------------------- +// listNodes +// --------------------------------------------------------------------------- + +describe("listNodes", () => { + test("returns ids (filename minus .md) for every node on disk", async () => { + await writeNode(workspaceDir, makeNode({ id: "alice" })); + await writeNode(workspaceDir, makeNode({ id: "bob" })); + await writeNode(workspaceDir, makeNode({ id: "carol" })); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual(["alice", "bob", "carol"]); + }); + + test("excludes non-.md files in the tree directory", async () => { + await writeNode(workspaceDir, makeNode({ id: "alice" })); + + const treeDir = getTreeDir(workspaceDir); + writeFileSync(join(treeDir, "README.txt"), "ignore me", "utf-8"); + writeFileSync(join(treeDir, "image.png"), "fake", "utf-8"); + writeFileSync(join(treeDir, ".hidden"), "fake", "utf-8"); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual(["alice"]); + }); + + test("walks subdirectories and returns nested ids in '/'-form", async () => { + await writeNode(workspaceDir, makeNode({ id: "alice" })); + await writeNode(workspaceDir, makeNode({ id: "people/bob" })); + await writeNode(workspaceDir, makeNode({ id: "people/carol" })); + await writeNode(workspaceDir, makeNode({ id: "arcs/2025-04/cutover" })); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual([ + "alice", + "arcs/2025-04/cutover", + "people/bob", + "people/carol", + ]); + }); + + test("skips hidden subdirectories and non-.md files inside nested dirs", async () => { + await writeNode(workspaceDir, makeNode({ id: "people/alice" })); + + const treeDir = getTreeDir(workspaceDir); + mkdirSync(join(treeDir, ".git"), { recursive: true }); + writeFileSync(join(treeDir, ".git", "config.md"), "fake", "utf-8"); + writeFileSync(join(treeDir, "people", "notes.txt"), "ignore", "utf-8"); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual(["people/alice"]); + }); + + test("skips orphaned .tmp.* files at any depth", async () => { + const treeDir = getTreeDir(workspaceDir); + await writeNode(workspaceDir, makeNode({ id: "people/alice" })); + + writeFileSync( + join(treeDir, "alice.md.tmp.123.abc-def"), + "stranded", + "utf-8", + ); + writeFileSync( + join(treeDir, "people", "bob.md.tmp.123.abc-def"), + "stranded", + "utf-8", + ); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual(["people/alice"]); + }); + + test("returns [] when the tree directory does not exist", async () => { + rmSync(getTreeDir(workspaceDir), { recursive: true, force: true }); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual([]); + }); + + test("returns [] when the tree directory is empty", async () => { + const ids = await listNodes(workspaceDir); + expect(ids).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// deleteNode +// --------------------------------------------------------------------------- + +describe("deleteNode", () => { + test("removes the node from disk", async () => { + const node = makeNode(); + await writeNode(workspaceDir, node); + expect(await readNode(workspaceDir, node.id)).not.toBeNull(); + + await deleteNode(workspaceDir, node.id); + expect(await readNode(workspaceDir, node.id)).toBeNull(); + }); + + test("removes nested nodes", async () => { + const node = makeNode({ id: "people/colleagues" }); + await writeNode(workspaceDir, node); + + await deleteNode(workspaceDir, "people/colleagues"); + expect(await readNode(workspaceDir, "people/colleagues")).toBeNull(); + }); + + test("is idempotent — deleting a missing node does not throw", async () => { + await deleteNode(workspaceDir, "never-existed"); + await deleteNode(workspaceDir, "never-existed"); + }); + + test("does not affect other nodes", async () => { + await writeNode(workspaceDir, makeNode({ id: "alice" })); + await writeNode(workspaceDir, makeNode({ id: "bob" })); + + await deleteNode(workspaceDir, "alice"); + + expect(await readNode(workspaceDir, "alice")).toBeNull(); + expect(await readNode(workspaceDir, "bob")).not.toBeNull(); + }); +}); diff --git a/assistant/src/memory/v3/tree-store.ts b/assistant/src/memory/v3/tree-store.ts new file mode 100644 index 00000000000..be13e489f8e --- /dev/null +++ b/assistant/src/memory/v3/tree-store.ts @@ -0,0 +1,370 @@ +/** + * Memory v3 — Tree node store. + * + * Owns the on-disk read/write contract for `memory/v3/tree/.md`. Nodes may + * live directly under `memory/v3/tree/` or nested in subdirectories (e.g. + * `memory/v3/tree/people/colleagues.md`); the id encodes the relative path from + * `tree/` minus the `.md` extension, using forward slashes as separators (so + * `people/colleagues` is a valid id). + * + * The v3 tree is a DAG *overlay* over the existing flat `memory/concepts/` + * pages — this module never touches `memory/concepts/`. Pages stay canonical + * and shared; nodes reference pages and sub-nodes by `children` refs + * (`page:` / `node:`), which are the portable replacement for + * filesystem symlinks. + * + * Each node is a YAML-frontmatter Markdown file: a `---`-delimited block + * (`children`, optional `routing_hints` / `summary`) followed by the prose body + * that is the node's full self-description. This module is the only v3 + * component that knows how to parse or render that format — every other v3 + * module routes through `readNode` / `writeNode` so the on-disk shape can + * evolve without touching downstream callers. + * + * Writes are atomic (temp + rename) so a crash mid-write leaves either the old + * file or the new file in place — never a half-written node. The id machinery + * mirrors v2's page-store `slugify` / `validateSlug` so node ids and page slugs + * share the same filesystem-safe shape. + */ + +import { randomUUID } from "node:crypto"; +import { + mkdir, + readdir, + readFile, + rename, + rm, + writeFile, +} from "node:fs/promises"; +import { dirname, join, relative, sep } from "node:path"; + +import { parse as parseYaml, stringify as stringifyYaml } from "yaml"; + +import { FRONTMATTER_REGEX } from "../../skills/frontmatter.js"; +import { type TreeNode, TreeNodeFrontmatterSchema } from "./types.js"; + +/** Filename suffix for tree nodes. */ +const NODE_EXTENSION = ".md"; + +/** Cap individual id-segment length so we stay well under filesystem limits. */ +const MAX_ID_SEGMENT_LENGTH = 80; + +/** Cap the full id (including any folder separators) to a sane bound. */ +const MAX_ID_TOTAL_LENGTH = 200; + +/** Each path segment must match this — same shape `slugify` produces. */ +const ID_SEGMENT_REGEX = /^[a-z0-9](?:[a-z0-9-]*)$/; + +/** + * Reserved id for the root of the v3 tree. The root node is the entry point a + * future migration authors first; reserving the id keeps the well-known handle + * stable across the codebase. + */ +export const ROOT_NODE_ID = "_root"; + +/** + * Convert an arbitrary input string into a filesystem-safe id **segment**. + * + * Returns a single path segment (no `/`). Path-shaped ids are constructed by + * the authoring migration writing files at full paths; this helper is for + * turning free-form text (e.g. a node label) into one clean segment. + * + * Rules: + * - Lowercase ASCII letters, digits, and hyphens only. + * - Non-ASCII / non-alphanumeric characters (including `/`) collapse to hyphens. + * - Consecutive hyphens collapse to one; leading/trailing hyphens trimmed. + * - Truncated to {@link MAX_ID_SEGMENT_LENGTH} characters (with trailing + * hyphen re-trimmed after truncation). + * - Empty inputs (e.g. emoji-only) fall back to `node-` so the caller + * always gets a non-empty, write-safe segment. + */ +export function slugify(input: string): string { + let slug = input + .toLowerCase() + .normalize("NFKD") + .replace(/[^a-z0-9-]+/g, "-") + .replace(/-{2,}/g, "-") + .replace(/^-+|-+$/g, ""); + + if (slug.length > MAX_ID_SEGMENT_LENGTH) { + slug = slug.slice(0, MAX_ID_SEGMENT_LENGTH).replace(/-+$/, ""); + } + + if (!slug) { + slug = `node-${randomUUID().slice(0, 8)}`; + } + + return slug; +} + +/** + * Validate a node id — possibly path-shaped — that is about to cross the + * storage boundary. Throws on any malformed or unsafe value. + * + * The on-disk tree treats ids as relative paths under `memory/v3/tree/`. A + * malformed id (e.g. `..`, leading `/`, embedded null byte) could escape that + * root via `path.join` if it slipped through, so we enforce shape here at every + * read/write/delete entry point rather than relying on callers. + * + * The reserved {@link ROOT_NODE_ID} (`_root`) is accepted as a special case; + * its leading underscore would otherwise fail {@link ID_SEGMENT_REGEX}. + * + * Rules: + * - Non-empty, ≤ {@link MAX_ID_TOTAL_LENGTH} chars. + * - Each `/`-separated segment matches {@link ID_SEGMENT_REGEX} + * (lowercase alphanum + hyphen, no leading hyphen, ≤80 chars). + * - No `..` segments, no empty segments (`a//b`), no leading or trailing `/`. + * - No `\` (Windows separator), no null bytes, no whitespace, no non-ASCII. + */ +export function validateNodeId(id: string): void { + if (typeof id !== "string" || id.length === 0) { + throw new Error(`Invalid tree-node id: empty`); + } + if (id === ROOT_NODE_ID) { + return; + } + if (id.length > MAX_ID_TOTAL_LENGTH) { + throw new Error( + `Invalid tree-node id: length ${id.length} exceeds max ${MAX_ID_TOTAL_LENGTH}: ${id}`, + ); + } + if (id.includes("\\")) { + throw new Error(`Invalid tree-node id: backslash not allowed: ${id}`); + } + if (id.includes("\0")) { + throw new Error(`Invalid tree-node id: null byte not allowed`); + } + if (/\s/.test(id)) { + throw new Error(`Invalid tree-node id: whitespace not allowed: ${id}`); + } + if (id.startsWith("/") || id.endsWith("/")) { + throw new Error( + `Invalid tree-node id: leading or trailing '/' not allowed: ${id}`, + ); + } + const segments = id.split("/"); + for (const segment of segments) { + if (segment.length === 0) { + throw new Error(`Invalid tree-node id: empty path segment: ${id}`); + } + if (segment === "..") { + throw new Error(`Invalid tree-node id: '..' segment not allowed: ${id}`); + } + if (segment.length > MAX_ID_SEGMENT_LENGTH) { + throw new Error( + `Invalid tree-node id: segment '${segment}' exceeds max ${MAX_ID_SEGMENT_LENGTH} chars: ${id}`, + ); + } + if (!ID_SEGMENT_REGEX.test(segment)) { + throw new Error( + `Invalid tree-node id: segment '${segment}' must match [a-z0-9][a-z0-9-]*: ${id}`, + ); + } + } +} + +// --------------------------------------------------------------------------- +// Path helpers +// --------------------------------------------------------------------------- + +export function getTreeDir(workspaceDir: string): string { + return join(workspaceDir, "memory", "v3", "tree"); +} + +/** + * Resolve the absolute path for a node id. Ids may contain `/` to indicate + * folder hierarchy under `memory/v3/tree/`; `path.join` handles those correctly + * on POSIX, and `validateNodeId` (called at every public entry point) rejects + * shapes that could escape the tree root. + */ +function getNodePath(workspaceDir: string, id: string): string { + return join(getTreeDir(workspaceDir), `${id}${NODE_EXTENSION}`); +} + +/** + * Compute the id for a tree-node file, given the tree root and the absolute + * file path. Returns the path-relative location with `.md` stripped and + * platform separators normalized to `/`. Tolerant of paths that don't end in + * `.md` so callers walking arbitrary content can use it defensively. + */ +function idFromNodePath(treeRoot: string, filePath: string): string { + const rel = relative(treeRoot, filePath); + const withoutExt = rel.endsWith(NODE_EXTENSION) + ? rel.slice(0, -NODE_EXTENSION.length) + : rel; + return sep === "/" ? withoutExt : withoutExt.split(sep).join("/"); +} + +// --------------------------------------------------------------------------- +// Frontmatter parse / render +// --------------------------------------------------------------------------- + +/** + * Split raw file contents into (frontmatter, body). If no frontmatter block is + * present the entire input is treated as body and an empty frontmatter block is + * returned (validated by `TreeNodeFrontmatterSchema` so any unexpected shape — + * bad types, extra junk — surfaces as a parse error to the caller, not silent + * dropped data). + * + * The schema's default guarantees `children` is always an array even on a + * freshly created node with empty frontmatter. + */ +function parseNodeContent(raw: string): { + frontmatter: TreeNode["frontmatter"]; + body: string; +} { + const match = raw.match(FRONTMATTER_REGEX); + if (!match) { + return { + frontmatter: TreeNodeFrontmatterSchema.parse({}), + body: raw, + }; + } + const yamlBlock = match[1]; + const body = raw.slice(match[0].length); + const parsed = parseYaml(yamlBlock) ?? {}; + return { + frontmatter: TreeNodeFrontmatterSchema.parse(parsed), + body, + }; +} + +/** + * Render a tree node back into the on-disk Markdown form. The output is always + * frontmatter + body; even nodes with empty `children` keep the explicit YAML + * key so callers see the canonical shape on round-trip. + */ +export function renderNodeContent(node: TreeNode): string { + const frontmatter = TreeNodeFrontmatterSchema.parse(node.frontmatter); + const yamlBlock = stringifyYaml(frontmatter, { indent: 2 }).trimEnd(); + return `---\n${yamlBlock}\n---\n${node.body}`; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Read a single tree node. Returns `null` if the file does not exist. + * + * Any other read or parse failure (permission denied, malformed YAML, + * frontmatter that fails schema validation) throws — unlike "missing", these + * are programmer / data-corruption errors the caller needs to see. + */ +export async function readNode( + workspaceDir: string, + id: string, +): Promise { + validateNodeId(id); + const path = getNodePath(workspaceDir, id); + let raw: string; + try { + raw = await readFile(path, "utf-8"); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return null; + } + throw err; + } + const { frontmatter, body } = parseNodeContent(raw); + return { id, frontmatter, body }; +} + +/** + * Write a tree node atomically (temp file + rename). A crash between the temp + * write and the rename leaves the prior file intact; a crash after the rename + * leaves the new file. Readers therefore never observe a partial node. + * + * Parent directories are created on demand (`mkdir -p`) so nested-folder ids + * like `people/colleagues` work without callers pre-creating the folder. + */ +export async function writeNode( + workspaceDir: string, + node: TreeNode, +): Promise { + validateNodeId(node.id); + const path = getNodePath(workspaceDir, node.id); + const tmpPath = `${path}.tmp.${process.pid}.${randomUUID()}`; + const content = renderNodeContent(node); + try { + await mkdir(dirname(path), { recursive: true }); + await writeFile(tmpPath, content, "utf-8"); + await rename(tmpPath, path); + } catch (err) { + // Best-effort cleanup: if the rename failed (or the write succeeded but the + // rename did not), remove the orphan tmp file so we don't leak it into the + // tree/ directory where listNodes would then surface it. + await rm(tmpPath, { force: true }).catch(() => {}); + throw err; + } +} + +/** + * List every tree-node id present on disk, walking subdirectories. + * + * Ids are returned in path-relative form with forward slashes as separators + * (e.g. `people/colleagues`) so callers can pass them straight back to + * `readNode`. + * + * Hidden directories (segment starts with `.`), non-`.md` files, and atomic- + * write temp files (`.tmp..`) are skipped. If the tree/ directory + * does not yet exist (fresh workspace pre-migration), returns `[]`. + */ +export async function listNodes(workspaceDir: string): Promise { + const root = getTreeDir(workspaceDir); + const ids: string[] = []; + const queue: string[] = [root]; + + while (queue.length > 0) { + const dir = queue.shift()!; + let entries; + try { + entries = await readdir(dir, { withFileTypes: true }); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + // Root missing → return []. Nested missing dir is impossible mid-walk + // (we only enqueue what readdir surfaced) but treat the same defensively. + if (dir === root) return []; + continue; + } + throw err; + } + + for (const entry of entries) { + if (entry.name.startsWith(".")) continue; + const fullPath = join(dir, entry.name); + if (entry.isDirectory()) { + queue.push(fullPath); + continue; + } + if (!entry.isFile()) continue; + if (!entry.name.endsWith(NODE_EXTENSION)) continue; + // Skip orphaned temp files left behind by a crashed atomic write. + if (entry.name.includes(".tmp.")) continue; + ids.push(idFromNodePath(root, fullPath)); + } + } + + ids.sort(); + return ids; +} + +/** + * Delete a tree node. Idempotent — missing files are not an error. + * + * Any other failure (permission denied, etc.) throws so the caller can react. + */ +export async function deleteNode( + workspaceDir: string, + id: string, +): Promise { + validateNodeId(id); + const path = getNodePath(workspaceDir, id); + try { + await rm(path); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return; + } + throw err; + } +} diff --git a/assistant/src/memory/v3/types.ts b/assistant/src/memory/v3/types.ts new file mode 100644 index 00000000000..c6ae766ec53 --- /dev/null +++ b/assistant/src/memory/v3/types.ts @@ -0,0 +1,65 @@ +// --------------------------------------------------------------------------- +// Memory v3 — Shared types +// --------------------------------------------------------------------------- +// +// Types shared across the v3 memory subsystem. Like v2, every value here +// crosses a serialization boundary — YAML frontmatter on disk — so it ships as +// a Zod schema with an inferred TypeScript type so runtime validation runs +// wherever a node is read. +// +// This file must not import from any other `memory/v3/*` module — it is the +// leaf of the v3 dependency graph. + +import { z } from "zod"; + +// --------------------------------------------------------------------------- +// Tree nodes +// --------------------------------------------------------------------------- + +/** + * YAML frontmatter at the top of a v3 tree node (`memory/v3/tree/.md`). + * + * The v3 tree is a DAG *overlay* over the existing flat `memory/concepts/` + * pages. A node organizes a region of the graph: its markdown body is the + * node's full self-description and `children` is the list of outgoing edges. + * + * `children` is the canonical, ordered list of child *references*. Each entry + * is either: + * - `"page:"` — a leaf concept page (canonical content stays in + * `memory/concepts/.md`, shared and untouched by v3), or + * - `"node:"` — a sub-node in the v3 tree. + * + * This reference list IS the DAG edge — it is the portable replacement for the + * filesystem symlinks an earlier design would have used. A page or node may be + * referenced by more than one parent (hence DAG, not tree). + * + * `routing_hints` is a thin, hand-written line of cross-branch disambiguation + * — e.g. "for *work* relationships see people/colleagues, not this node". + * Kept deliberately small so it stays cheap to inject during routing. + * + * `summary` is the node's self-description headline (1-line); the markdown body + * is the full self-description. Optional so a freshly authored node with only a + * body still parses. + */ +export const TreeNodeFrontmatterSchema = z + .object({ + children: z.array(z.string()).default([]), + routing_hints: z.string().optional(), + summary: z.string().optional(), + }) + .strict(); + +export type TreeNodeFrontmatter = z.infer; + +/** + * A single tree node on disk. The id is the relative path from + * `memory/v3/tree/` minus `.md`, using forward slashes — so `people` and + * `people/colleagues` are both valid ids. The id is the stable identity used + * in `children` references (`node:`) and is the portable node handle a + * future data-migration authors by hand. + */ +export type TreeNode = { + id: string; + frontmatter: TreeNodeFrontmatter; + body: string; +};