From 61380fa19c50fa6b526ad9e1c3ae8b281c28e11b Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:39:35 -0400 Subject: [PATCH 01/21] feat(memory-v3): tree-node on-disk format + node store (#31971) Co-authored-by: Vellum Assistant --- .../memory/v3/__tests__/tree-store.test.ts | 529 ++++++++++++++++++ assistant/src/memory/v3/tree-store.ts | 370 ++++++++++++ assistant/src/memory/v3/types.ts | 65 +++ 3 files changed, 964 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/tree-store.test.ts create mode 100644 assistant/src/memory/v3/tree-store.ts create mode 100644 assistant/src/memory/v3/types.ts diff --git a/assistant/src/memory/v3/__tests__/tree-store.test.ts b/assistant/src/memory/v3/__tests__/tree-store.test.ts new file mode 100644 index 00000000000..3d582cd10d7 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/tree-store.test.ts @@ -0,0 +1,529 @@ +/** + * Tests for `assistant/src/memory/v3/tree-store.ts`. + * + * Coverage matrix: + * - slugify: lowercase / kebab-case / ascii / 80-char cap / empty fallback. + * - validateNodeId: accept set, reject set (path-traversal, malformed shapes), + * reserved `_root` accepted. + * - readNode / writeNode round-trip: frontmatter survives, body preserved. + * - children refs parse for both `page:` and `node:` forms. + * - malformed YAML / unknown frontmatter keys throw. + * - readNode on missing file: returns null. + * - writeNode atomicity: no orphan tmp on success, parent dirs created. + * - listNodes: walks subdirectories, returns nested ids in `/`-form, excludes + * hidden dirs / non-.md / temp files, missing dir → []. + * - deleteNode: nested-id round-trip, idempotent on missing. + * - renderNodeContent: frontmatter + body shape. + * - No change to memory/concepts/ (v3 lives under memory/v3/tree/). + * + * Tests use temp workspaces under `os.tmpdir()`; they never touch `~/.vellum/`. + */ + +import { + existsSync, + mkdirSync, + mkdtempSync, + readdirSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; + +import { + deleteNode, + getTreeDir, + listNodes, + readNode, + renderNodeContent, + ROOT_NODE_ID, + slugify, + validateNodeId, + writeNode, +} from "../tree-store.js"; +import type { TreeNode } from "../types.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +let workspaceDir: string; + +beforeEach(() => { + workspaceDir = mkdtempSync(join(tmpdir(), "vellum-tree-store-test-")); + // Mirror the workspace migration so readNode / writeNode have a target dir. + mkdirSync(getTreeDir(workspaceDir), { recursive: true }); +}); + +afterEach(() => { + if (existsSync(workspaceDir)) { + rmSync(workspaceDir, { recursive: true, force: true }); + } +}); + +function makeNode(overrides: Partial = {}): TreeNode { + return { + id: "people", + frontmatter: { + children: ["page:people/alice", "node:people/colleagues"], + routing_hints: "for work relationships see people/colleagues", + summary: "People I know.", + }, + body: "The people branch of the memory tree.\n", + ...overrides, + }; +} + +// --------------------------------------------------------------------------- +// slugify +// --------------------------------------------------------------------------- + +describe("slugify", () => { + test("lowercases ASCII letters", () => { + expect(slugify("AliceBob")).toBe("alicebob"); + }); + + test("converts spaces and punctuation to single hyphens", () => { + expect(slugify("Alice's Preferred IDE!")).toBe("alice-s-preferred-ide"); + }); + + test("collapses runs of separators to one hyphen", () => { + expect(slugify("foo ___ bar")).toBe("foo-bar"); + }); + + test("trims leading and trailing hyphens", () => { + expect(slugify("---hello world---")).toBe("hello-world"); + }); + + test("collapses '/' to hyphen — slugify produces a single segment", () => { + expect(slugify("People/Colleagues")).toBe("people-colleagues"); + }); + + test("caps slug length at 80 chars and re-trims trailing hyphen", () => { + const long = "a".repeat(120); + const slug = slugify(long); + expect(slug.length).toBe(80); + expect(slug.endsWith("-")).toBe(false); + }); + + test("falls back to a unique placeholder for empty inputs", () => { + const a = slugify(""); + const b = slugify("!!!"); + expect(a).toMatch(/^node-[a-f0-9]{8}$/); + expect(b).toMatch(/^node-[a-f0-9]{8}$/); + expect(a).not.toBe(b); + }); +}); + +// --------------------------------------------------------------------------- +// validateNodeId +// --------------------------------------------------------------------------- + +describe("validateNodeId", () => { + test.each([ + ["people"], + ["a"], + ["people-colleagues"], + ["people/alice"], + ["people/colleagues/alice"], + ["a/b/c/d/e"], + [ROOT_NODE_ID], + ])("accepts %p", (id) => { + expect(() => validateNodeId(id)).not.toThrow(); + }); + + test.each([ + ["empty string", ""], + ["leading slash", "/people"], + ["trailing slash", "people/"], + ["double slash", "people//alice"], + ["dot-dot segment", "people/../alice"], + ["pure dot-dot", ".."], + ["leading dot segment", ".hidden/alice"], + ["backslash", "people\\alice"], + ["null byte", "people\0evil"], + ["whitespace", "people alice"], + ["uppercase", "People"], + ["non-ascii", "café"], + ["leading hyphen", "-people"], + ["non-alphanumeric", "people!"], + ["leading underscore (only _root reserved)", "_other"], + ])("rejects %s (%p)", (_label, id) => { + expect(() => validateNodeId(id)).toThrow(/Invalid tree-node id/); + }); + + test("rejects ids longer than 200 chars", () => { + expect(() => validateNodeId("a".repeat(201))).toThrow( + /Invalid tree-node id/, + ); + }); + + test("rejects segments longer than 80 chars even if total is under 200", () => { + expect(() => validateNodeId("a".repeat(81))).toThrow( + /Invalid tree-node id/, + ); + }); +}); + +// --------------------------------------------------------------------------- +// readNode / writeNode round-trip +// --------------------------------------------------------------------------- + +describe("writeNode + readNode round-trip", () => { + test("round-trips frontmatter and body verbatim", async () => { + const node = makeNode(); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, node.id); + expect(read).not.toBeNull(); + expect(read!.id).toBe(node.id); + expect(read!.frontmatter.children).toEqual(node.frontmatter.children); + expect(read!.frontmatter.routing_hints).toBe( + node.frontmatter.routing_hints, + ); + expect(read!.frontmatter.summary).toBe(node.frontmatter.summary); + expect(read!.body).toBe(node.body); + }); + + test("children parse for both page: and node: reference forms", async () => { + const node = makeNode({ + id: "mixed", + frontmatter: { + children: ["page:procs/git-flow", "node:procs", "page:alice"], + }, + body: "mixed refs\n", + }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, "mixed"); + expect(read!.frontmatter.children).toEqual([ + "page:procs/git-flow", + "node:procs", + "page:alice", + ]); + }); + + test("the children list IS the DAG edge — a page may be referenced by multiple parents", async () => { + await writeNode( + workspaceDir, + makeNode({ + id: "team-a", + frontmatter: { children: ["page:people/alice"] }, + body: "team a\n", + }), + ); + await writeNode( + workspaceDir, + makeNode({ + id: "team-b", + frontmatter: { children: ["page:people/alice"] }, + body: "team b\n", + }), + ); + + const a = await readNode(workspaceDir, "team-a"); + const b = await readNode(workspaceDir, "team-b"); + expect(a!.frontmatter.children).toContain("page:people/alice"); + expect(b!.frontmatter.children).toContain("page:people/alice"); + }); + + test("renders frontmatter at the top with --- delimiters", async () => { + const node = makeNode(); + await writeNode(workspaceDir, node); + + const raw = readFileSync( + join(getTreeDir(workspaceDir), `${node.id}.md`), + "utf-8", + ); + expect(raw.startsWith("---\n")).toBe(true); + expect(raw.split("---").length).toBeGreaterThanOrEqual(3); + expect(raw).toContain("The people branch"); + }); + + test("preserves an empty body", async () => { + const node = makeNode({ body: "" }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, node.id); + expect(read!.body).toBe(""); + }); + + test("preserves multiline body with embedded YAML-looking lines", async () => { + const tricky = "key: value\n---\nnot-frontmatter\n"; + const node = makeNode({ id: "tricky", body: tricky }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, node.id); + expect(read!.body).toBe(tricky); + }); + + test("defaults children to [] for a node with empty frontmatter", async () => { + const node = makeNode({ + id: "bare", + frontmatter: { children: [] }, + body: "bare\n", + }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, "bare"); + expect(read!.frontmatter.children).toEqual([]); + expect(read!.frontmatter.routing_hints).toBeUndefined(); + expect(read!.frontmatter.summary).toBeUndefined(); + }); + + test("readNode returns null for an id that does not exist", async () => { + const result = await readNode(workspaceDir, "nonexistent"); + expect(result).toBeNull(); + }); + + test("readNode parses a hand-written node with no frontmatter as empty frontmatter + full body", async () => { + const id = "no-frontmatter"; + const body = "Just some prose, no YAML.\n"; + writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), body, "utf-8"); + + const read = await readNode(workspaceDir, id); + expect(read).not.toBeNull(); + expect(read!.frontmatter.children).toEqual([]); + expect(read!.body).toBe(body); + }); + + test("readNode throws on malformed YAML frontmatter", async () => { + const id = "bad-yaml"; + // Unclosed bracket inside the frontmatter block — invalid YAML. + const raw = "---\nchildren: [unterminated\n---\nbody\n"; + writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), raw, "utf-8"); + + await expect(readNode(workspaceDir, id)).rejects.toThrow(); + }); + + test("readNode throws on unknown frontmatter keys instead of silently dropping them", async () => { + const id = "extra-keys"; + const raw = "---\nchildren: []\nunknown_field: oops\n---\nbody\n"; + writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), raw, "utf-8"); + + await expect(readNode(workspaceDir, id)).rejects.toThrow(); + }); + + test("writeNode overwrites an existing node", async () => { + await writeNode(workspaceDir, makeNode({ body: "first\n" })); + await writeNode(workspaceDir, makeNode({ body: "second\n" })); + + const read = await readNode(workspaceDir, "people"); + expect(read!.body).toBe("second\n"); + }); + + test("writeNode creates parent directories for nested ids", async () => { + const node = makeNode({ id: "people/colleagues" }); + await writeNode(workspaceDir, node); + + const filePath = join(getTreeDir(workspaceDir), "people", "colleagues.md"); + expect(existsSync(filePath)).toBe(true); + + const read = await readNode(workspaceDir, "people/colleagues"); + expect(read!.id).toBe("people/colleagues"); + expect(read!.body).toBe(node.body); + }); + + test("writeNode round-trips deeply nested ids", async () => { + const node = makeNode({ id: "people/colleagues/alice" }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, "people/colleagues/alice"); + expect(read!.id).toBe("people/colleagues/alice"); + expect(read!.frontmatter.children).toEqual(node.frontmatter.children); + expect(read!.body).toBe(node.body); + }); + + test("writeNode + readNode round-trip the reserved _root id", async () => { + const node = makeNode({ + id: ROOT_NODE_ID, + frontmatter: { children: ["node:people"] }, + body: "root of the tree\n", + }); + await writeNode(workspaceDir, node); + + const read = await readNode(workspaceDir, ROOT_NODE_ID); + expect(read!.id).toBe(ROOT_NODE_ID); + expect(read!.frontmatter.children).toEqual(["node:people"]); + }); + + test("writeNode rejects malicious ids and writes nothing at the escape target", async () => { + await expect( + writeNode(workspaceDir, makeNode({ id: "../escape" })), + ).rejects.toThrow(/Invalid tree-node id/); + + // `../escape` would resolve to `/memory/v3/escape.md`. Confirm + // the validation throw fired before any I/O — no file at that target. + expect(existsSync(join(workspaceDir, "memory", "v3", "escape.md"))).toBe( + false, + ); + }); + + test("readNode rejects malicious ids", async () => { + await expect(readNode(workspaceDir, "../escape")).rejects.toThrow( + /Invalid tree-node id/, + ); + }); + + test("successful write produces no orphan tmp files", async () => { + await writeNode(workspaceDir, makeNode()); + + const remaining = readdirSync(getTreeDir(workspaceDir)); + const orphanTmps = remaining.filter((name) => name.includes(".tmp.")); + expect(orphanTmps).toEqual([]); + }); + + test("does not touch memory/concepts/", async () => { + await writeNode(workspaceDir, makeNode({ id: "people/colleagues" })); + + expect(existsSync(join(workspaceDir, "memory", "concepts"))).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// renderNodeContent +// --------------------------------------------------------------------------- + +describe("renderNodeContent", () => { + test("emits frontmatter block followed by body", () => { + const rendered = renderNodeContent(makeNode()); + expect(rendered.startsWith("---\n")).toBe(true); + expect(rendered).toContain("children:"); + expect(rendered).toContain("page:people/alice"); + expect(rendered.endsWith("The people branch of the memory tree.\n")).toBe( + true, + ); + }); + + test("keeps the explicit children key even when empty", () => { + const rendered = renderNodeContent( + makeNode({ frontmatter: { children: [] }, body: "x\n" }), + ); + expect(rendered).toContain("children: []"); + }); +}); + +// --------------------------------------------------------------------------- +// listNodes +// --------------------------------------------------------------------------- + +describe("listNodes", () => { + test("returns ids (filename minus .md) for every node on disk", async () => { + await writeNode(workspaceDir, makeNode({ id: "alice" })); + await writeNode(workspaceDir, makeNode({ id: "bob" })); + await writeNode(workspaceDir, makeNode({ id: "carol" })); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual(["alice", "bob", "carol"]); + }); + + test("excludes non-.md files in the tree directory", async () => { + await writeNode(workspaceDir, makeNode({ id: "alice" })); + + const treeDir = getTreeDir(workspaceDir); + writeFileSync(join(treeDir, "README.txt"), "ignore me", "utf-8"); + writeFileSync(join(treeDir, "image.png"), "fake", "utf-8"); + writeFileSync(join(treeDir, ".hidden"), "fake", "utf-8"); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual(["alice"]); + }); + + test("walks subdirectories and returns nested ids in '/'-form", async () => { + await writeNode(workspaceDir, makeNode({ id: "alice" })); + await writeNode(workspaceDir, makeNode({ id: "people/bob" })); + await writeNode(workspaceDir, makeNode({ id: "people/carol" })); + await writeNode(workspaceDir, makeNode({ id: "arcs/2025-04/cutover" })); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual([ + "alice", + "arcs/2025-04/cutover", + "people/bob", + "people/carol", + ]); + }); + + test("skips hidden subdirectories and non-.md files inside nested dirs", async () => { + await writeNode(workspaceDir, makeNode({ id: "people/alice" })); + + const treeDir = getTreeDir(workspaceDir); + mkdirSync(join(treeDir, ".git"), { recursive: true }); + writeFileSync(join(treeDir, ".git", "config.md"), "fake", "utf-8"); + writeFileSync(join(treeDir, "people", "notes.txt"), "ignore", "utf-8"); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual(["people/alice"]); + }); + + test("skips orphaned .tmp.* files at any depth", async () => { + const treeDir = getTreeDir(workspaceDir); + await writeNode(workspaceDir, makeNode({ id: "people/alice" })); + + writeFileSync( + join(treeDir, "alice.md.tmp.123.abc-def"), + "stranded", + "utf-8", + ); + writeFileSync( + join(treeDir, "people", "bob.md.tmp.123.abc-def"), + "stranded", + "utf-8", + ); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual(["people/alice"]); + }); + + test("returns [] when the tree directory does not exist", async () => { + rmSync(getTreeDir(workspaceDir), { recursive: true, force: true }); + + const ids = await listNodes(workspaceDir); + expect(ids).toEqual([]); + }); + + test("returns [] when the tree directory is empty", async () => { + const ids = await listNodes(workspaceDir); + expect(ids).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// deleteNode +// --------------------------------------------------------------------------- + +describe("deleteNode", () => { + test("removes the node from disk", async () => { + const node = makeNode(); + await writeNode(workspaceDir, node); + expect(await readNode(workspaceDir, node.id)).not.toBeNull(); + + await deleteNode(workspaceDir, node.id); + expect(await readNode(workspaceDir, node.id)).toBeNull(); + }); + + test("removes nested nodes", async () => { + const node = makeNode({ id: "people/colleagues" }); + await writeNode(workspaceDir, node); + + await deleteNode(workspaceDir, "people/colleagues"); + expect(await readNode(workspaceDir, "people/colleagues")).toBeNull(); + }); + + test("is idempotent — deleting a missing node does not throw", async () => { + await deleteNode(workspaceDir, "never-existed"); + await deleteNode(workspaceDir, "never-existed"); + }); + + test("does not affect other nodes", async () => { + await writeNode(workspaceDir, makeNode({ id: "alice" })); + await writeNode(workspaceDir, makeNode({ id: "bob" })); + + await deleteNode(workspaceDir, "alice"); + + expect(await readNode(workspaceDir, "alice")).toBeNull(); + expect(await readNode(workspaceDir, "bob")).not.toBeNull(); + }); +}); diff --git a/assistant/src/memory/v3/tree-store.ts b/assistant/src/memory/v3/tree-store.ts new file mode 100644 index 00000000000..be13e489f8e --- /dev/null +++ b/assistant/src/memory/v3/tree-store.ts @@ -0,0 +1,370 @@ +/** + * Memory v3 — Tree node store. + * + * Owns the on-disk read/write contract for `memory/v3/tree/.md`. Nodes may + * live directly under `memory/v3/tree/` or nested in subdirectories (e.g. + * `memory/v3/tree/people/colleagues.md`); the id encodes the relative path from + * `tree/` minus the `.md` extension, using forward slashes as separators (so + * `people/colleagues` is a valid id). + * + * The v3 tree is a DAG *overlay* over the existing flat `memory/concepts/` + * pages — this module never touches `memory/concepts/`. Pages stay canonical + * and shared; nodes reference pages and sub-nodes by `children` refs + * (`page:` / `node:`), which are the portable replacement for + * filesystem symlinks. + * + * Each node is a YAML-frontmatter Markdown file: a `---`-delimited block + * (`children`, optional `routing_hints` / `summary`) followed by the prose body + * that is the node's full self-description. This module is the only v3 + * component that knows how to parse or render that format — every other v3 + * module routes through `readNode` / `writeNode` so the on-disk shape can + * evolve without touching downstream callers. + * + * Writes are atomic (temp + rename) so a crash mid-write leaves either the old + * file or the new file in place — never a half-written node. The id machinery + * mirrors v2's page-store `slugify` / `validateSlug` so node ids and page slugs + * share the same filesystem-safe shape. + */ + +import { randomUUID } from "node:crypto"; +import { + mkdir, + readdir, + readFile, + rename, + rm, + writeFile, +} from "node:fs/promises"; +import { dirname, join, relative, sep } from "node:path"; + +import { parse as parseYaml, stringify as stringifyYaml } from "yaml"; + +import { FRONTMATTER_REGEX } from "../../skills/frontmatter.js"; +import { type TreeNode, TreeNodeFrontmatterSchema } from "./types.js"; + +/** Filename suffix for tree nodes. */ +const NODE_EXTENSION = ".md"; + +/** Cap individual id-segment length so we stay well under filesystem limits. */ +const MAX_ID_SEGMENT_LENGTH = 80; + +/** Cap the full id (including any folder separators) to a sane bound. */ +const MAX_ID_TOTAL_LENGTH = 200; + +/** Each path segment must match this — same shape `slugify` produces. */ +const ID_SEGMENT_REGEX = /^[a-z0-9](?:[a-z0-9-]*)$/; + +/** + * Reserved id for the root of the v3 tree. The root node is the entry point a + * future migration authors first; reserving the id keeps the well-known handle + * stable across the codebase. + */ +export const ROOT_NODE_ID = "_root"; + +/** + * Convert an arbitrary input string into a filesystem-safe id **segment**. + * + * Returns a single path segment (no `/`). Path-shaped ids are constructed by + * the authoring migration writing files at full paths; this helper is for + * turning free-form text (e.g. a node label) into one clean segment. + * + * Rules: + * - Lowercase ASCII letters, digits, and hyphens only. + * - Non-ASCII / non-alphanumeric characters (including `/`) collapse to hyphens. + * - Consecutive hyphens collapse to one; leading/trailing hyphens trimmed. + * - Truncated to {@link MAX_ID_SEGMENT_LENGTH} characters (with trailing + * hyphen re-trimmed after truncation). + * - Empty inputs (e.g. emoji-only) fall back to `node-` so the caller + * always gets a non-empty, write-safe segment. + */ +export function slugify(input: string): string { + let slug = input + .toLowerCase() + .normalize("NFKD") + .replace(/[^a-z0-9-]+/g, "-") + .replace(/-{2,}/g, "-") + .replace(/^-+|-+$/g, ""); + + if (slug.length > MAX_ID_SEGMENT_LENGTH) { + slug = slug.slice(0, MAX_ID_SEGMENT_LENGTH).replace(/-+$/, ""); + } + + if (!slug) { + slug = `node-${randomUUID().slice(0, 8)}`; + } + + return slug; +} + +/** + * Validate a node id — possibly path-shaped — that is about to cross the + * storage boundary. Throws on any malformed or unsafe value. + * + * The on-disk tree treats ids as relative paths under `memory/v3/tree/`. A + * malformed id (e.g. `..`, leading `/`, embedded null byte) could escape that + * root via `path.join` if it slipped through, so we enforce shape here at every + * read/write/delete entry point rather than relying on callers. + * + * The reserved {@link ROOT_NODE_ID} (`_root`) is accepted as a special case; + * its leading underscore would otherwise fail {@link ID_SEGMENT_REGEX}. + * + * Rules: + * - Non-empty, ≤ {@link MAX_ID_TOTAL_LENGTH} chars. + * - Each `/`-separated segment matches {@link ID_SEGMENT_REGEX} + * (lowercase alphanum + hyphen, no leading hyphen, ≤80 chars). + * - No `..` segments, no empty segments (`a//b`), no leading or trailing `/`. + * - No `\` (Windows separator), no null bytes, no whitespace, no non-ASCII. + */ +export function validateNodeId(id: string): void { + if (typeof id !== "string" || id.length === 0) { + throw new Error(`Invalid tree-node id: empty`); + } + if (id === ROOT_NODE_ID) { + return; + } + if (id.length > MAX_ID_TOTAL_LENGTH) { + throw new Error( + `Invalid tree-node id: length ${id.length} exceeds max ${MAX_ID_TOTAL_LENGTH}: ${id}`, + ); + } + if (id.includes("\\")) { + throw new Error(`Invalid tree-node id: backslash not allowed: ${id}`); + } + if (id.includes("\0")) { + throw new Error(`Invalid tree-node id: null byte not allowed`); + } + if (/\s/.test(id)) { + throw new Error(`Invalid tree-node id: whitespace not allowed: ${id}`); + } + if (id.startsWith("/") || id.endsWith("/")) { + throw new Error( + `Invalid tree-node id: leading or trailing '/' not allowed: ${id}`, + ); + } + const segments = id.split("/"); + for (const segment of segments) { + if (segment.length === 0) { + throw new Error(`Invalid tree-node id: empty path segment: ${id}`); + } + if (segment === "..") { + throw new Error(`Invalid tree-node id: '..' segment not allowed: ${id}`); + } + if (segment.length > MAX_ID_SEGMENT_LENGTH) { + throw new Error( + `Invalid tree-node id: segment '${segment}' exceeds max ${MAX_ID_SEGMENT_LENGTH} chars: ${id}`, + ); + } + if (!ID_SEGMENT_REGEX.test(segment)) { + throw new Error( + `Invalid tree-node id: segment '${segment}' must match [a-z0-9][a-z0-9-]*: ${id}`, + ); + } + } +} + +// --------------------------------------------------------------------------- +// Path helpers +// --------------------------------------------------------------------------- + +export function getTreeDir(workspaceDir: string): string { + return join(workspaceDir, "memory", "v3", "tree"); +} + +/** + * Resolve the absolute path for a node id. Ids may contain `/` to indicate + * folder hierarchy under `memory/v3/tree/`; `path.join` handles those correctly + * on POSIX, and `validateNodeId` (called at every public entry point) rejects + * shapes that could escape the tree root. + */ +function getNodePath(workspaceDir: string, id: string): string { + return join(getTreeDir(workspaceDir), `${id}${NODE_EXTENSION}`); +} + +/** + * Compute the id for a tree-node file, given the tree root and the absolute + * file path. Returns the path-relative location with `.md` stripped and + * platform separators normalized to `/`. Tolerant of paths that don't end in + * `.md` so callers walking arbitrary content can use it defensively. + */ +function idFromNodePath(treeRoot: string, filePath: string): string { + const rel = relative(treeRoot, filePath); + const withoutExt = rel.endsWith(NODE_EXTENSION) + ? rel.slice(0, -NODE_EXTENSION.length) + : rel; + return sep === "/" ? withoutExt : withoutExt.split(sep).join("/"); +} + +// --------------------------------------------------------------------------- +// Frontmatter parse / render +// --------------------------------------------------------------------------- + +/** + * Split raw file contents into (frontmatter, body). If no frontmatter block is + * present the entire input is treated as body and an empty frontmatter block is + * returned (validated by `TreeNodeFrontmatterSchema` so any unexpected shape — + * bad types, extra junk — surfaces as a parse error to the caller, not silent + * dropped data). + * + * The schema's default guarantees `children` is always an array even on a + * freshly created node with empty frontmatter. + */ +function parseNodeContent(raw: string): { + frontmatter: TreeNode["frontmatter"]; + body: string; +} { + const match = raw.match(FRONTMATTER_REGEX); + if (!match) { + return { + frontmatter: TreeNodeFrontmatterSchema.parse({}), + body: raw, + }; + } + const yamlBlock = match[1]; + const body = raw.slice(match[0].length); + const parsed = parseYaml(yamlBlock) ?? {}; + return { + frontmatter: TreeNodeFrontmatterSchema.parse(parsed), + body, + }; +} + +/** + * Render a tree node back into the on-disk Markdown form. The output is always + * frontmatter + body; even nodes with empty `children` keep the explicit YAML + * key so callers see the canonical shape on round-trip. + */ +export function renderNodeContent(node: TreeNode): string { + const frontmatter = TreeNodeFrontmatterSchema.parse(node.frontmatter); + const yamlBlock = stringifyYaml(frontmatter, { indent: 2 }).trimEnd(); + return `---\n${yamlBlock}\n---\n${node.body}`; +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Read a single tree node. Returns `null` if the file does not exist. + * + * Any other read or parse failure (permission denied, malformed YAML, + * frontmatter that fails schema validation) throws — unlike "missing", these + * are programmer / data-corruption errors the caller needs to see. + */ +export async function readNode( + workspaceDir: string, + id: string, +): Promise { + validateNodeId(id); + const path = getNodePath(workspaceDir, id); + let raw: string; + try { + raw = await readFile(path, "utf-8"); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return null; + } + throw err; + } + const { frontmatter, body } = parseNodeContent(raw); + return { id, frontmatter, body }; +} + +/** + * Write a tree node atomically (temp file + rename). A crash between the temp + * write and the rename leaves the prior file intact; a crash after the rename + * leaves the new file. Readers therefore never observe a partial node. + * + * Parent directories are created on demand (`mkdir -p`) so nested-folder ids + * like `people/colleagues` work without callers pre-creating the folder. + */ +export async function writeNode( + workspaceDir: string, + node: TreeNode, +): Promise { + validateNodeId(node.id); + const path = getNodePath(workspaceDir, node.id); + const tmpPath = `${path}.tmp.${process.pid}.${randomUUID()}`; + const content = renderNodeContent(node); + try { + await mkdir(dirname(path), { recursive: true }); + await writeFile(tmpPath, content, "utf-8"); + await rename(tmpPath, path); + } catch (err) { + // Best-effort cleanup: if the rename failed (or the write succeeded but the + // rename did not), remove the orphan tmp file so we don't leak it into the + // tree/ directory where listNodes would then surface it. + await rm(tmpPath, { force: true }).catch(() => {}); + throw err; + } +} + +/** + * List every tree-node id present on disk, walking subdirectories. + * + * Ids are returned in path-relative form with forward slashes as separators + * (e.g. `people/colleagues`) so callers can pass them straight back to + * `readNode`. + * + * Hidden directories (segment starts with `.`), non-`.md` files, and atomic- + * write temp files (`.tmp..`) are skipped. If the tree/ directory + * does not yet exist (fresh workspace pre-migration), returns `[]`. + */ +export async function listNodes(workspaceDir: string): Promise { + const root = getTreeDir(workspaceDir); + const ids: string[] = []; + const queue: string[] = [root]; + + while (queue.length > 0) { + const dir = queue.shift()!; + let entries; + try { + entries = await readdir(dir, { withFileTypes: true }); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + // Root missing → return []. Nested missing dir is impossible mid-walk + // (we only enqueue what readdir surfaced) but treat the same defensively. + if (dir === root) return []; + continue; + } + throw err; + } + + for (const entry of entries) { + if (entry.name.startsWith(".")) continue; + const fullPath = join(dir, entry.name); + if (entry.isDirectory()) { + queue.push(fullPath); + continue; + } + if (!entry.isFile()) continue; + if (!entry.name.endsWith(NODE_EXTENSION)) continue; + // Skip orphaned temp files left behind by a crashed atomic write. + if (entry.name.includes(".tmp.")) continue; + ids.push(idFromNodePath(root, fullPath)); + } + } + + ids.sort(); + return ids; +} + +/** + * Delete a tree node. Idempotent — missing files are not an error. + * + * Any other failure (permission denied, etc.) throws so the caller can react. + */ +export async function deleteNode( + workspaceDir: string, + id: string, +): Promise { + validateNodeId(id); + const path = getNodePath(workspaceDir, id); + try { + await rm(path); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + return; + } + throw err; + } +} diff --git a/assistant/src/memory/v3/types.ts b/assistant/src/memory/v3/types.ts new file mode 100644 index 00000000000..c6ae766ec53 --- /dev/null +++ b/assistant/src/memory/v3/types.ts @@ -0,0 +1,65 @@ +// --------------------------------------------------------------------------- +// Memory v3 — Shared types +// --------------------------------------------------------------------------- +// +// Types shared across the v3 memory subsystem. Like v2, every value here +// crosses a serialization boundary — YAML frontmatter on disk — so it ships as +// a Zod schema with an inferred TypeScript type so runtime validation runs +// wherever a node is read. +// +// This file must not import from any other `memory/v3/*` module — it is the +// leaf of the v3 dependency graph. + +import { z } from "zod"; + +// --------------------------------------------------------------------------- +// Tree nodes +// --------------------------------------------------------------------------- + +/** + * YAML frontmatter at the top of a v3 tree node (`memory/v3/tree/.md`). + * + * The v3 tree is a DAG *overlay* over the existing flat `memory/concepts/` + * pages. A node organizes a region of the graph: its markdown body is the + * node's full self-description and `children` is the list of outgoing edges. + * + * `children` is the canonical, ordered list of child *references*. Each entry + * is either: + * - `"page:"` — a leaf concept page (canonical content stays in + * `memory/concepts/.md`, shared and untouched by v3), or + * - `"node:"` — a sub-node in the v3 tree. + * + * This reference list IS the DAG edge — it is the portable replacement for the + * filesystem symlinks an earlier design would have used. A page or node may be + * referenced by more than one parent (hence DAG, not tree). + * + * `routing_hints` is a thin, hand-written line of cross-branch disambiguation + * — e.g. "for *work* relationships see people/colleagues, not this node". + * Kept deliberately small so it stays cheap to inject during routing. + * + * `summary` is the node's self-description headline (1-line); the markdown body + * is the full self-description. Optional so a freshly authored node with only a + * body still parses. + */ +export const TreeNodeFrontmatterSchema = z + .object({ + children: z.array(z.string()).default([]), + routing_hints: z.string().optional(), + summary: z.string().optional(), + }) + .strict(); + +export type TreeNodeFrontmatter = z.infer; + +/** + * A single tree node on disk. The id is the relative path from + * `memory/v3/tree/` minus `.md`, using forward slashes — so `people` and + * `people/colleagues` are both valid ids. The id is the stable identity used + * in `children` references (`node:`) and is the portable node handle a + * future data-migration authors by hand. + */ +export type TreeNode = { + id: string; + frontmatter: TreeNodeFrontmatter; + body: string; +}; From be5eb0f067f77de930f7fe5cd69766e44218f7a0 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:40:19 -0400 Subject: [PATCH 02/21] feat(memory-v3): config schema + cheap/capable LLM call sites (#31972) Co-authored-by: Vellum Assistant --- assistant/src/__tests__/llm-resolver.test.ts | 86 +++++++++++-- assistant/src/config/call-site-defaults.ts | 4 + .../schemas/__tests__/memory-v2.test.ts | 109 +++++++++++++++- .../src/config/schemas/call-site-catalog.ts | 21 ++++ assistant/src/config/schemas/llm.ts | 3 + assistant/src/config/schemas/memory-v2.ts | 119 ++++++++++++++++++ assistant/src/config/schemas/memory.ts | 3 +- 7 files changed, 334 insertions(+), 11 deletions(-) diff --git a/assistant/src/__tests__/llm-resolver.test.ts b/assistant/src/__tests__/llm-resolver.test.ts index 9b0e9ad626e..86feaf8c2b6 100644 --- a/assistant/src/__tests__/llm-resolver.test.ts +++ b/assistant/src/__tests__/llm-resolver.test.ts @@ -2,7 +2,10 @@ import { describe, expect, test } from "bun:test"; import { z } from "zod"; -import { resolveCallSiteConfig, resolveDefaultProfileKey } from "../config/llm-resolver.js"; +import { + resolveCallSiteConfig, + resolveDefaultProfileKey, +} from "../config/llm-resolver.js"; import { type LLMCallSite, LLMSchema } from "../config/schemas/llm.js"; const fullDefault = { @@ -690,13 +693,28 @@ describe("resolveCallSiteConfig", () => { }); const callSites: LLMCallSite[] = [ - "mainAgent", "subagentSpawn", "heartbeatAgent", "filingAgent", - "compactionAgent", "analyzeConversation", "callAgent", - "memoryExtraction", "memoryConsolidation", "memoryRetrieval", - "memoryRouter", "recall", "conversationSummarization", - "commitMessage", "conversationStarters", "replySuggestion", - "conversationTitle", "identityIntro", "emptyStateGreeting", - "notificationDecision", "interactionClassifier", "inference", + "mainAgent", + "subagentSpawn", + "heartbeatAgent", + "filingAgent", + "compactionAgent", + "analyzeConversation", + "callAgent", + "memoryExtraction", + "memoryConsolidation", + "memoryRetrieval", + "memoryRouter", + "recall", + "conversationSummarization", + "commitMessage", + "conversationStarters", + "replySuggestion", + "conversationTitle", + "identityIntro", + "emptyStateGreeting", + "notificationDecision", + "interactionClassifier", + "inference", ]; for (const cs of callSites) { @@ -778,7 +796,10 @@ describe("resolveCallSiteConfig", () => { provider_connection: "anthropic-managed", }, profiles: { - fireworks: { provider: "fireworks", model: "accounts/fireworks/models/kimi-k2p5" }, + fireworks: { + provider: "fireworks", + model: "accounts/fireworks/models/kimi-k2p5", + }, }, activeProfile: "fireworks", }); @@ -874,3 +895,50 @@ describe("resolveDefaultProfileKey", () => { ); }); }); + +describe("memory v3 call sites resolve through the standard resolver", () => { + const llm = LLMSchema.parse({ + default: fullDefault, + profiles: { + balanced: { provider: "anthropic", model: "claude-sonnet-4-7" }, + "cost-optimized": { + provider: "anthropic", + model: "claude-haiku-4-5-20251001", + }, + }, + }); + + test("memoryV3Filter and memoryV3Descent resolve to the cost-optimized profile", () => { + expect(resolveDefaultProfileKey("memoryV3Filter", llm)).toBe( + "cost-optimized", + ); + expect(resolveDefaultProfileKey("memoryV3Descent", llm)).toBe( + "cost-optimized", + ); + expect(resolveCallSiteConfig("memoryV3Filter", llm).model).toBe( + "claude-haiku-4-5-20251001", + ); + expect(resolveCallSiteConfig("memoryV3Descent", llm).model).toBe( + "claude-haiku-4-5-20251001", + ); + }); + + test("memoryV3Gate resolves to the balanced (capable) profile", () => { + expect(resolveDefaultProfileKey("memoryV3Gate", llm)).toBe("balanced"); + expect(resolveCallSiteConfig("memoryV3Gate", llm).model).toBe( + "claude-sonnet-4-7", + ); + }); + + test("v3 call sites are addressable as call-site override keys", () => { + const overridden = LLMSchema.parse({ + default: fullDefault, + callSites: { + memoryV3Gate: { model: "claude-opus-4-7" }, + }, + }); + expect(resolveCallSiteConfig("memoryV3Gate", overridden).model).toBe( + "claude-opus-4-7", + ); + }); +}); diff --git a/assistant/src/config/call-site-defaults.ts b/assistant/src/config/call-site-defaults.ts index 36fbe925750..e988dbe2e9a 100644 --- a/assistant/src/config/call-site-defaults.ts +++ b/assistant/src/config/call-site-defaults.ts @@ -47,6 +47,10 @@ export const CALL_SITE_DEFAULTS: Record = { memoryV2Migration: { profile: "cost-optimized" }, memoryV2Sweep: { profile: "cost-optimized" }, memoryV2Consolidation: { profile: "balanced" }, + // memory v3: cheap filter + descent, capable gate. + memoryV3Filter: { profile: "cost-optimized" }, + memoryV3Descent: { profile: "cost-optimized" }, + memoryV3Gate: { profile: "balanced" }, conversationSummarization: { profile: "cost-optimized" }, conversationTitle: { profile: "cost-optimized" }, approvalCopy: { profile: "cost-optimized" }, diff --git a/assistant/src/config/schemas/__tests__/memory-v2.test.ts b/assistant/src/config/schemas/__tests__/memory-v2.test.ts index ef55cca2c9c..5ca74e76bba 100644 --- a/assistant/src/config/schemas/__tests__/memory-v2.test.ts +++ b/assistant/src/config/schemas/__tests__/memory-v2.test.ts @@ -1,7 +1,7 @@ import { describe, expect, test } from "bun:test"; import { MemoryConfigSchema } from "../memory.js"; -import { MemoryV2ConfigSchema } from "../memory-v2.js"; +import { MemoryV2ConfigSchema, MemoryV3ConfigSchema } from "../memory-v2.js"; describe("MemoryV2ConfigSchema", () => { test("parses an empty object to documented defaults", () => { @@ -212,6 +212,113 @@ describe("MemoryV2ConfigSchema", () => { }); }); +describe("MemoryV3ConfigSchema", () => { + test("parses an empty object to documented defaults", () => { + const parsed = MemoryV3ConfigSchema.parse({}); + expect(parsed).toEqual({ + enabled: false, + shadow: false, + passCap: 3, + breadthBudget: 6, + maxDepth: 6, + denseQuota: { activeDomain: 30, offDomain: 8 }, + lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true }, + ks: [5, 10, 25, 50], + }); + }); + + test("parses undefined to the same defaults (top-level .default)", () => { + expect(MemoryV3ConfigSchema.parse(undefined)).toEqual( + MemoryV3ConfigSchema.parse({}), + ); + }); + + test("defaults to disabled for backwards compatibility", () => { + expect(MemoryV3ConfigSchema.parse({}).enabled).toBe(false); + expect(MemoryV3ConfigSchema.parse({}).shadow).toBe(false); + }); + + test("accepts explicit scalar overrides", () => { + const parsed = MemoryV3ConfigSchema.parse({ + enabled: true, + shadow: true, + passCap: 5, + breadthBudget: 10, + maxDepth: 8, + }); + expect(parsed.enabled).toBe(true); + expect(parsed.shadow).toBe(true); + expect(parsed.passCap).toBe(5); + expect(parsed.breadthBudget).toBe(10); + expect(parsed.maxDepth).toBe(8); + }); + + test("accepts explicit denseQuota override", () => { + const parsed = MemoryV3ConfigSchema.parse({ + denseQuota: { activeDomain: 50, offDomain: 12 }, + }); + expect(parsed.denseQuota).toEqual({ activeDomain: 50, offDomain: 12 }); + }); + + test("accepts a partial lanes override and defaults the rest", () => { + const parsed = MemoryV3ConfigSchema.parse({ lanes: { dense: false } }); + expect(parsed.lanes).toEqual({ + hot: true, + sparse: true, + dense: false, + tree: true, + edges: true, + }); + }); + + test("accepts an explicit ks override", () => { + const parsed = MemoryV3ConfigSchema.parse({ ks: [1, 3, 7] }); + expect(parsed.ks).toEqual([1, 3, 7]); + }); + + test("rejects a non-boolean enabled", () => { + expect(() => MemoryV3ConfigSchema.parse({ enabled: "yes" })).toThrow(); + }); + + test("rejects a non-integer passCap", () => { + expect(() => MemoryV3ConfigSchema.parse({ passCap: 2.5 })).toThrow(); + }); + + test("rejects non-number ks entries", () => { + expect(() => MemoryV3ConfigSchema.parse({ ks: ["a"] })).toThrow(); + }); +}); + +describe("MemoryConfigSchema integration with v3 block", () => { + test("includes a v3 block defaulting to disabled when v3 is omitted", () => { + const parsed = MemoryConfigSchema.parse({}); + expect(parsed.v3).toBeDefined(); + expect(parsed.v3.enabled).toBe(false); + expect(parsed.v3.shadow).toBe(false); + expect(parsed.v3.passCap).toBe(3); + expect(parsed.v3.lanes.dense).toBe(true); + expect(parsed.v3.ks).toEqual([5, 10, 25, 50]); + }); + + test("leaves pre-existing configs (no v3 key) otherwise unchanged", () => { + // A config authored before v3 existed parses fine and its v2 block is + // untouched; the v3 block is purely additive. + const parsed = MemoryConfigSchema.parse({ v2: { top_k: 50 } }); + expect(parsed.v2.top_k).toBe(50); + expect(parsed.v3.enabled).toBe(false); + }); + + test("propagates v3 overrides through MemoryConfigSchema", () => { + const parsed = MemoryConfigSchema.parse({ + v3: { enabled: true, passCap: 4 }, + }); + expect(parsed.v3.enabled).toBe(true); + expect(parsed.v3.passCap).toBe(4); + // Non-overridden v3 fields keep their defaults. + expect(parsed.v3.maxDepth).toBe(6); + }); +}); + describe("MemoryConfigSchema integration with v2 block", () => { test("parses an empty memory config and includes a v2 block with defaults", () => { const parsed = MemoryConfigSchema.parse({}); diff --git a/assistant/src/config/schemas/call-site-catalog.ts b/assistant/src/config/schemas/call-site-catalog.ts index 5552889d7cb..7d0417b4f1f 100644 --- a/assistant/src/config/schemas/call-site-catalog.ts +++ b/assistant/src/config/schemas/call-site-catalog.ts @@ -121,6 +121,27 @@ const CATALOG_RECORD: CatalogRecord = { "Selects which concept pages to inject for the next agent turn by routing over a cached page index.", domain: "memory", }, + memoryV3Filter: { + id: "memoryV3Filter", + displayName: "Memory V3 Filter", + description: + "Cheaply filters the V3 multi-lane candidate set before descent.", + domain: "memory", + }, + memoryV3Descent: { + id: "memoryV3Descent", + displayName: "Memory V3 Descent", + description: + "Drives the V3 bounded-descent traversal through the memory tree.", + domain: "memory", + }, + memoryV3Gate: { + id: "memoryV3Gate", + displayName: "Memory V3 Gate", + description: + "Final capable gate that decides which V3 candidates are injected for the next turn.", + domain: "memory", + }, memoryV2Consolidation: { id: "memoryV2Consolidation", displayName: "Memory V2 Consolidation", diff --git a/assistant/src/config/schemas/llm.ts b/assistant/src/config/schemas/llm.ts index 10103b86b1d..e6a53c85fae 100644 --- a/assistant/src/config/schemas/llm.ts +++ b/assistant/src/config/schemas/llm.ts @@ -49,6 +49,9 @@ export const LLMCallSiteEnum = z.enum([ "memoryV2Migration", "memoryV2Sweep", "memoryRouter", + "memoryV3Filter", + "memoryV3Descent", + "memoryV3Gate", "memoryV2Consolidation", "memoryRetrospective", "recall", diff --git a/assistant/src/config/schemas/memory-v2.ts b/assistant/src/config/schemas/memory-v2.ts index 45a076a778e..11360a89e31 100644 --- a/assistant/src/config/schemas/memory-v2.ts +++ b/assistant/src/config/schemas/memory-v2.ts @@ -388,3 +388,122 @@ export const MemoryV2ConfigSchema = z }); export type MemoryV2Config = z.infer; + +/** + * Memory v3 (multi-lane, bounded-descent retrieval) configuration. + * + * Additive scaffolding only — defaults to `enabled: false` so existing + * configs are untouched and the v3 retrieval loop stays inert until later + * PRs wire it up. Every field carries a default and the whole block is + * `.default(...)`-wrapped so a config that omits `memory.v3` entirely still + * parses to these documented defaults. + */ +export const MemoryV3ConfigSchema = z + .object({ + enabled: z + .boolean({ error: "memory.v3.enabled must be a boolean" }) + .default(false) + .describe( + "Whether the v3 memory subsystem (multi-lane bounded-descent retrieval) is enabled. Off by default until the v3 loop is wired up.", + ), + shadow: z + .boolean({ error: "memory.v3.shadow must be a boolean" }) + .default(false) + .describe( + "Live-shadow toggle: when on, the v3 retrieval loop runs alongside the active path for comparison without affecting injected context. Consumed by a later PR.", + ), + passCap: z + .number({ error: "memory.v3.passCap must be a number" }) + .int("memory.v3.passCap must be an integer") + .default(3) + .describe( + "Maximum number of retrieval passes (router → descent rounds) the v3 loop may run per turn.", + ), + breadthBudget: z + .number({ error: "memory.v3.breadthBudget must be a number" }) + .int("memory.v3.breadthBudget must be an integer") + .default(6) + .describe( + "Per-pass breadth budget — the number of frontier candidates the v3 loop may expand at each step.", + ), + maxDepth: z + .number({ error: "memory.v3.maxDepth must be a number" }) + .int("memory.v3.maxDepth must be an integer") + .default(6) + .describe( + "Maximum descent depth the v3 loop traverses through the memory tree before stopping.", + ), + denseQuota: z + .object({ + activeDomain: z + .number({ + error: "memory.v3.denseQuota.activeDomain must be a number", + }) + .describe( + "Dense-lane candidate quota allocated to the conversation's active domain.", + ), + offDomain: z + .number({ error: "memory.v3.denseQuota.offDomain must be a number" }) + .describe( + "Dense-lane candidate quota allocated to off-domain (exploratory) retrieval.", + ), + }) + .default({ activeDomain: 30, offDomain: 8 }) + .describe( + "Dense-lane candidate quotas split between the active domain and off-domain exploration.", + ), + lanes: z + .object({ + hot: z + .boolean() + .default(true) + .describe("Whether the hot (recently-touched) retrieval lane is on."), + sparse: z + .boolean() + .default(true) + .describe("Whether the sparse (BM25-style keyword) lane is on."), + dense: z + .boolean() + .default(true) + .describe("Whether the dense (embedding-similarity) lane is on."), + tree: z + .boolean() + .default(true) + .describe("Whether the tree (hierarchical descent) lane is on."), + edges: z + .boolean() + .default(true) + .describe("Whether the edges (graph-adjacency) lane is on."), + }) + .default({ + hot: true, + sparse: true, + dense: true, + tree: true, + edges: true, + }) + .describe( + "Per-lane on/off toggles for the v3 multi-lane retrieval fanout. All lanes on by default.", + ), + ks: z + .array(z.number({ error: "memory.v3.ks entries must be numbers" })) + .default([5, 10, 25, 50]) + .describe( + "Evaluation top-K cutoffs the v3 loop reports metrics at (e.g. recall@K).", + ), + }) + .default({ + enabled: false, + shadow: false, + passCap: 3, + breadthBudget: 6, + maxDepth: 6, + denseQuota: { activeDomain: 30, offDomain: 8 }, + lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true }, + ks: [5, 10, 25, 50], + }) + .describe( + "Memory v3 — multi-lane bounded-descent retrieval. Additive scaffolding, disabled by default.", + ); + +export type MemoryV3Config = z.infer; diff --git a/assistant/src/config/schemas/memory.ts b/assistant/src/config/schemas/memory.ts index 4a3822ebb06..4ba15e3b044 100644 --- a/assistant/src/config/schemas/memory.ts +++ b/assistant/src/config/schemas/memory.ts @@ -16,7 +16,7 @@ import { MemorySegmentationConfigSchema, QdrantConfigSchema, } from "./memory-storage.js"; -import { MemoryV2ConfigSchema } from "./memory-v2.js"; +import { MemoryV2ConfigSchema, MemoryV3ConfigSchema } from "./memory-v2.js"; export const MemoryConfigSchema = z .object({ @@ -50,6 +50,7 @@ export const MemoryConfigSchema = z MemorySummarizationConfigSchema.parse({}), ), v2: MemoryV2ConfigSchema.default(MemoryV2ConfigSchema.parse({})), + v3: MemoryV3ConfigSchema.default(MemoryV3ConfigSchema.parse({})), retrospective: MemoryRetrospectiveConfigSchema.default( MemoryRetrospectiveConfigSchema.parse({}), ), From 32394ad3d236a23b0266b9d7a7041b7a0ce9e254 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:45:39 -0400 Subject: [PATCH 03/21] feat(memory-v3): curated edge-expansion lane (#31973) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/edges.test.ts | 342 ++++++++++++++++++ assistant/src/memory/v3/edges.ts | 125 +++++++ 2 files changed, 467 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/edges.test.ts create mode 100644 assistant/src/memory/v3/edges.ts diff --git a/assistant/src/memory/v3/__tests__/edges.test.ts b/assistant/src/memory/v3/__tests__/edges.test.ts new file mode 100644 index 00000000000..ba2656fc4e6 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/edges.test.ts @@ -0,0 +1,342 @@ +/** + * Tests for `assistant/src/memory/v3/edges.ts` — the curated edge-expansion + * lane. + * + * Coverage matrix: + * - 1-hop and 2-hop outgoing expansion from a single seed. + * - Default hops (2) when omitted. + * - Seed excluded from its own `pulled`. + * - Multiple seeds: top-level `pulled` is the union; per-seed expansions + * attribute correctly; duplicate seeds collapse. + * - `extraAdjacency` merges with the curated graph during traversal. + * - `extraAdjacency` bridges across hops (curated → extra → curated). + * - Cycles in the curated graph (and via extraAdjacency) terminate, bounded + * by hops + the visited set. + * - Empty seeds / orphan seed → empty result. + * - Provider-free: the only I/O is reading fixture concept pages. + * + * Tests live in temp workspaces (mkdtemp) and never touch `~/.vellum/`. + */ + +import { existsSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; + +import { invalidateEdgeIndex } from "../../v2/edge-index.js"; +import { writePage } from "../../v2/page-store.js"; +import type { ConceptPage } from "../../v2/types.js"; +import { expandEdges } from "../edges.js"; + +let workspaceDir: string; + +beforeEach(() => { + workspaceDir = mkdtempSync(join(tmpdir(), "vellum-memory-v3-edges-")); +}); + +afterEach(() => { + // The v2 edge index caches module-locally; clear it so the next test's fresh + // workspace doesn't read a stale snapshot. + invalidateEdgeIndex(); + if (existsSync(workspaceDir)) { + rmSync(workspaceDir, { recursive: true, force: true }); + } +}); + +function makePage(slug: string, edges: string[] = []): ConceptPage { + return { + slug, + frontmatter: { edges, ref_files: [], ref_urls: [] }, + body: "", + }; +} + +/** Write a small chain/graph of pages by `{ slug: edges }` map. */ +async function writeGraph(graph: Record): Promise { + for (const [slug, edges] of Object.entries(graph)) { + await writePage(workspaceDir, makePage(slug, edges)); + } +} + +// --------------------------------------------------------------------------- +// Single-seed expansion +// --------------------------------------------------------------------------- + +describe("expandEdges — single seed", () => { + test("1-hop expansion pulls only direct out-neighbors", async () => { + // alice -> bob -> carol + await writeGraph({ alice: ["bob"], bob: ["carol"], carol: [] }); + + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 1, + }); + + expect([...pulled].sort()).toEqual(["bob"]); + expect(expansions).toEqual([{ from: "alice", pulled: ["bob"] }]); + }); + + test("2-hop expansion pulls the 2-hop frontier", async () => { + // alice -> bob -> carol + await writeGraph({ alice: ["bob"], bob: ["carol"], carol: [] }); + + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 2, + }); + + expect([...pulled].sort()).toEqual(["bob", "carol"]); + expect(expansions).toEqual([{ from: "alice", pulled: ["bob", "carol"] }]); + }); + + test("defaults to 2 hops when hops is omitted", async () => { + await writeGraph({ alice: ["bob"], bob: ["carol"], carol: ["dave"] }); + + const { pulled } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + }); + + // 2-hop reach from alice: bob (1) + carol (2); dave (3) is out of budget. + expect([...pulled].sort()).toEqual(["bob", "carol"]); + }); + + test("excludes the seed itself from pulled", async () => { + // Self-referential-ish: a -> b -> a would put `a` back in reach, but the + // seed must never appear in its own pulled set. + await writeGraph({ alice: ["bob"], bob: ["alice"] }); + + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 2, + }); + + expect(pulled.has("alice")).toBe(false); + expect([...pulled].sort()).toEqual(["bob"]); + expect(expansions[0]!.pulled).not.toContain("alice"); + }); + + test("orphan seed (no outgoing edges) yields an empty expansion", async () => { + await writeGraph({ alice: [] }); + + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + }); + + expect(pulled.size).toBe(0); + expect(expansions).toEqual([{ from: "alice", pulled: [] }]); + }); + + test("edges are directed — incoming neighbors are never pulled", async () => { + // bob -> alice. Seeding alice must NOT pull bob (that's an in-edge). + await writeGraph({ bob: ["alice"], alice: [] }); + + const { pulled } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 2, + }); + + expect(pulled.size).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Multiple seeds +// --------------------------------------------------------------------------- + +describe("expandEdges — multiple seeds", () => { + test("top-level pulled is the union across seeds", async () => { + await writeGraph({ + alice: ["bob"], + bob: [], + carol: ["dave"], + dave: [], + }); + + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice", "carol"], + hops: 1, + }); + + expect([...pulled].sort()).toEqual(["bob", "dave"]); + expect(expansions).toEqual([ + { from: "alice", pulled: ["bob"] }, + { from: "carol", pulled: ["dave"] }, + ]); + }); + + test("a slug pulled by two seeds appears once in pulled, once per expansion", async () => { + // alice -> shared, carol -> shared + await writeGraph({ alice: ["shared"], carol: ["shared"], shared: [] }); + + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice", "carol"], + hops: 1, + }); + + expect([...pulled]).toEqual(["shared"]); + expect(expansions).toEqual([ + { from: "alice", pulled: ["shared"] }, + { from: "carol", pulled: ["shared"] }, + ]); + }); + + test("duplicate seeds collapse to a single expansion entry", async () => { + await writeGraph({ alice: ["bob"], bob: [] }); + + const { expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice", "alice"], + hops: 1, + }); + + expect(expansions).toEqual([{ from: "alice", pulled: ["bob"] }]); + }); + + test("empty seed set yields an empty result", async () => { + await writeGraph({ alice: ["bob"], bob: [] }); + + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: [], + }); + + expect(pulled.size).toBe(0); + expect(expansions).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// extraAdjacency injection seam +// --------------------------------------------------------------------------- + +describe("expandEdges — extraAdjacency", () => { + test("merges injected out-edges with the curated graph", async () => { + // Curated: alice -> bob. Injected: alice -> extra. + await writeGraph({ alice: ["bob"], bob: [], extra: [] }); + + const extraAdjacency = new Map>([ + ["alice", new Set(["extra"])], + ]); + + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 1, + extraAdjacency, + }); + + expect([...pulled].sort()).toEqual(["bob", "extra"]); + expect(expansions).toEqual([{ from: "alice", pulled: ["bob", "extra"] }]); + }); + + test("injected edges bridge across hops (curated -> extra -> curated)", async () => { + // Curated: alice -> bob, learned -> dave. Injected: bob -> learned. + // 2-hop reach: bob (curated, hop 1) -> learned (extra, hop 2)... + // and learned -> dave is hop 3, out of a 2-hop budget. + await writeGraph({ + alice: ["bob"], + bob: [], + learned: ["dave"], + dave: [], + }); + + const extraAdjacency = new Map>([ + ["bob", new Set(["learned"])], + ]); + + const twoHop = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 2, + extraAdjacency, + }); + expect([...twoHop.pulled].sort()).toEqual(["bob", "learned"]); + + const threeHop = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 3, + extraAdjacency, + }); + expect([...threeHop.pulled].sort()).toEqual(["bob", "dave", "learned"]); + }); + + test("absent extraAdjacency leaves the curated walk unchanged", async () => { + await writeGraph({ alice: ["bob"], bob: ["carol"], carol: [] }); + + const { pulled } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 2, + }); + + expect([...pulled].sort()).toEqual(["bob", "carol"]); + }); +}); + +// --------------------------------------------------------------------------- +// Cycle safety +// --------------------------------------------------------------------------- + +describe("expandEdges — cycle safety", () => { + test("a cycle in the curated graph terminates and does not loop", async () => { + // alice -> bob -> carol -> alice (3-cycle). + await writeGraph({ + alice: ["bob"], + bob: ["carol"], + carol: ["alice"], + }); + + // A generous hop budget would loop forever without a visited set. + const { pulled, expansions } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 100, + }); + + // Reaches bob and carol; alice (the seed) is excluded even though the + // cycle points back at it. + expect([...pulled].sort()).toEqual(["bob", "carol"]); + expect(expansions[0]!.pulled).not.toContain("alice"); + }); + + test("a cycle introduced via extraAdjacency also terminates", async () => { + // Curated: alice -> bob. Injected cycle: bob -> alice. + await writeGraph({ alice: ["bob"], bob: [] }); + + const extraAdjacency = new Map>([ + ["bob", new Set(["alice"])], + ]); + + const { pulled } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 100, + extraAdjacency, + }); + + expect([...pulled].sort()).toEqual(["bob"]); + }); + + test("a self-loop edge does not loop or pull the seed", async () => { + // alice -> alice (self-loop is dropped by the index, but guard anyway). + await writeGraph({ alice: ["alice", "bob"], bob: [] }); + + const { pulled } = await expandEdges({ + workspaceDir, + seeds: ["alice"], + hops: 2, + }); + + expect(pulled.has("alice")).toBe(false); + expect([...pulled].sort()).toEqual(["bob"]); + }); +}); diff --git a/assistant/src/memory/v3/edges.ts b/assistant/src/memory/v3/edges.ts new file mode 100644 index 00000000000..90f1a5b4bba --- /dev/null +++ b/assistant/src/memory/v3/edges.ts @@ -0,0 +1,125 @@ +/** + * Memory v3 — Curated edge-expansion lane. + * + * Given a set of confident seed slugs, pull their 1–2 hop *outgoing* + * neighborhood from the curated `edges:` graph (each concept page's + * frontmatter `edges:` list, surfaced by v2's `getEdgeIndex`). This is a + * provider-free, read-only structural expansion — no LLM, no scoring. It + * answers "given that we're confident about A, what does A's curated graph + * say we should also pull in?". + * + * The optional `extraAdjacency` parameter is the seam a later PR uses to inject + * above-threshold *weighted auto-edges* (edges the system learned, not ones a + * human curated) WITHOUT modifying this module. When supplied, it is treated as + * additional out-edges merged with the curated graph during traversal: the + * effective out-neighborhood of a node is `curated[node] ∪ extraAdjacency[node]`. + * + * The result is the union of every seed's reachable neighborhood (`pulled`, + * with seeds themselves excluded) plus a per-seed `EdgeExpansion[]` trace so a + * harness can attribute each pulled slug to the seed it came from. + */ + +import { getEdgeIndex, getReachable } from "../v2/edge-index.js"; +import type { EdgeExpansion } from "../v2/harness/trace.js"; + +/** Default hop budget. The design calls for a 1–2 hop walk; 2 is the ceiling. */ +const DEFAULT_HOPS = 2; + +export interface ExpandEdgesArgs { + workspaceDir: string; + /** Confident seed slugs to expand from. */ + seeds: Iterable; + /** Hop budget for the outgoing walk. Defaults to {@link DEFAULT_HOPS}. */ + hops?: number; + /** + * Extra *outgoing* adjacency (`from → Set`) merged with the curated graph + * during traversal. The injection seam for learned weighted auto-edges; this + * module never reads or thresholds weights itself — the caller pre-filters to + * above-threshold edges before passing them in. + */ + extraAdjacency?: ReadonlyMap>; +} + +export interface ExpandEdgesResult { + /** Union of every seed's reachable neighborhood, seeds excluded. */ + pulled: Set; + /** Per-seed attribution: which slugs each seed pulled in. */ + expansions: EdgeExpansion[]; +} + +/** + * BFS the outgoing neighborhood of `seed` within `hops`, walking the union of + * the curated `outgoing` adjacency and any `extraAdjacency`. Mirrors v2's + * `getReachable` semantics — start excluded, bounded by `hops` and a visited + * set so cycles can't loop — but over a merged adjacency view. + */ +function reachableMerged( + curated: ReadonlyMap>, + extra: ReadonlyMap>, + seed: string, + hops: number, +): Set { + const result = new Set(); + if (hops <= 0) return result; + + const visited = new Set([seed]); + let frontier: string[] = [seed]; + + for (let depth = 0; depth < hops && frontier.length > 0; depth++) { + const next: string[] = []; + for (const node of frontier) { + const curatedNeighbors = curated.get(node); + const extraNeighbors = extra.get(node); + for (const neighbors of [curatedNeighbors, extraNeighbors]) { + if (!neighbors) continue; + for (const neighbor of neighbors) { + if (visited.has(neighbor)) continue; + visited.add(neighbor); + result.add(neighbor); + next.push(neighbor); + } + } + } + frontier = next; + } + + return result; +} + +/** + * Expand a set of confident seed slugs to their 1–2 hop curated neighborhood. + * + * Each seed produces one `EdgeExpansion { from, pulled }` entry (sorted slugs + * for deterministic output); the seed itself is never in its own `pulled`. The + * top-level `pulled` set is the union across all seeds — a slug pulled by more + * than one seed appears once there but in each contributing seed's expansion. + * + * Provider-free and read-only: the only I/O is `getEdgeIndex`, which reads + * concept-page frontmatter from disk (and caches module-locally in v2). + */ +export async function expandEdges( + args: ExpandEdgesArgs, +): Promise { + const { workspaceDir, seeds, hops = DEFAULT_HOPS, extraAdjacency } = args; + + const index = await getEdgeIndex(workspaceDir); + const pulled = new Set(); + const expansions: EdgeExpansion[] = []; + + // De-dupe seeds while preserving first-seen order for a stable trace. + const seenSeeds = new Set(); + + for (const seed of seeds) { + if (seenSeeds.has(seed)) continue; + seenSeeds.add(seed); + + const reachable = extraAdjacency + ? reachableMerged(index.outgoing, extraAdjacency, seed, hops) + : getReachable(index, seed, hops, "out"); + + expansions.push({ from: seed, pulled: [...reachable].sort() }); + for (const slug of reachable) pulled.add(slug); + } + + return { pulled, expansions }; +} From 5aa678b221ec8736220c78f8657b411922aa79aa Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:45:43 -0400 Subject: [PATCH 04/21] feat(memory-v3): write-path job types + config (no behavior) (#31974) Co-authored-by: Vellum Assistant --- .../schemas/__tests__/memory-v2.test.ts | 34 +++++++++++++++++ assistant/src/config/schemas/memory-v2.ts | 37 +++++++++++++++++++ .../__tests__/jobs-store-job-classes.test.ts | 20 +++++++++- assistant/src/memory/jobs-store.ts | 3 ++ 4 files changed, 93 insertions(+), 1 deletion(-) diff --git a/assistant/src/config/schemas/__tests__/memory-v2.test.ts b/assistant/src/config/schemas/__tests__/memory-v2.test.ts index 5ca74e76bba..acdbc7dbb26 100644 --- a/assistant/src/config/schemas/__tests__/memory-v2.test.ts +++ b/assistant/src/config/schemas/__tests__/memory-v2.test.ts @@ -224,6 +224,11 @@ describe("MemoryV3ConfigSchema", () => { denseQuota: { activeDomain: 30, offDomain: 8 }, lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true }, ks: [5, 10, 25, 50], + write: { + enabled: false, + consolidateIntervalMs: 3600000, + coactivation: false, + }, }); }); @@ -287,6 +292,30 @@ describe("MemoryV3ConfigSchema", () => { test("rejects non-number ks entries", () => { expect(() => MemoryV3ConfigSchema.parse({ ks: ["a"] })).toThrow(); }); + + test("parses the write subtree to safe off defaults when omitted", () => { + const parsed = MemoryV3ConfigSchema.parse({}); + expect(parsed.write).toEqual({ + enabled: false, + consolidateIntervalMs: 3600000, + coactivation: false, + }); + }); + + test("accepts a partial write override and defaults the rest", () => { + const parsed = MemoryV3ConfigSchema.parse({ write: { enabled: true } }); + expect(parsed.write).toEqual({ + enabled: true, + consolidateIntervalMs: 3600000, + coactivation: false, + }); + }); + + test("rejects a non-integer write.consolidateIntervalMs", () => { + expect(() => + MemoryV3ConfigSchema.parse({ write: { consolidateIntervalMs: 1.5 } }), + ).toThrow(); + }); }); describe("MemoryConfigSchema integration with v3 block", () => { @@ -298,6 +327,11 @@ describe("MemoryConfigSchema integration with v3 block", () => { expect(parsed.v3.passCap).toBe(3); expect(parsed.v3.lanes.dense).toBe(true); expect(parsed.v3.ks).toEqual([5, 10, 25, 50]); + expect(parsed.v3.write).toEqual({ + enabled: false, + consolidateIntervalMs: 3600000, + coactivation: false, + }); }); test("leaves pre-existing configs (no v3 key) otherwise unchanged", () => { diff --git a/assistant/src/config/schemas/memory-v2.ts b/assistant/src/config/schemas/memory-v2.ts index 11360a89e31..433267cc05e 100644 --- a/assistant/src/config/schemas/memory-v2.ts +++ b/assistant/src/config/schemas/memory-v2.ts @@ -491,6 +491,38 @@ export const MemoryV3ConfigSchema = z .describe( "Evaluation top-K cutoffs the v3 loop reports metrics at (e.g. recall@K).", ), + write: z + .object({ + enabled: z + .boolean({ error: "memory.v3.write.enabled must be a boolean" }) + .default(false) + .describe( + "Whether v3 consolidation owns the shared-buffer drain + tree build. Off by default — v2 consolidation stays the sole buffer-drainer. Does NOT introduce a separate buffer.", + ), + consolidateIntervalMs: z + .number({ + error: "memory.v3.write.consolidateIntervalMs must be a number", + }) + .int("memory.v3.write.consolidateIntervalMs must be an integer") + .default(3600000) + .describe( + "Interval, in milliseconds, between scheduled v3 consolidation runs once the v3 write path owns the drain. Default 1 hour.", + ), + coactivation: z + .boolean({ error: "memory.v3.write.coactivation must be a boolean" }) + .default(false) + .describe( + "Whether v3 consolidation learns co-activation edges during the tree build. Off by default; consumed by a later PR.", + ), + }) + .default({ + enabled: false, + consolidateIntervalMs: 3600000, + coactivation: false, + }) + .describe( + "Memory v3 write-path configuration. All default-off scaffolding — controls whether v3 consolidation owns the shared-buffer drain + tree build. Consumed by later PRs.", + ), }) .default({ enabled: false, @@ -501,6 +533,11 @@ export const MemoryV3ConfigSchema = z denseQuota: { activeDomain: 30, offDomain: 8 }, lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true }, ks: [5, 10, 25, 50], + write: { + enabled: false, + consolidateIntervalMs: 3600000, + coactivation: false, + }, }) .describe( "Memory v3 — multi-lane bounded-descent retrieval. Additive scaffolding, disabled by default.", diff --git a/assistant/src/memory/__tests__/jobs-store-job-classes.test.ts b/assistant/src/memory/__tests__/jobs-store-job-classes.test.ts index 7950e93758b..09fc33779b9 100644 --- a/assistant/src/memory/__tests__/jobs-store-job-classes.test.ts +++ b/assistant/src/memory/__tests__/jobs-store-job-classes.test.ts @@ -1,6 +1,24 @@ import { describe, expect, test } from "bun:test"; -import { EMBED_JOB_TYPES, SLOW_LLM_JOB_TYPES } from "../jobs-store.js"; +import { + EMBED_JOB_TYPES, + type MemoryJobType, + SLOW_LLM_JOB_TYPES, +} from "../jobs-store.js"; + +describe("memory v3 job types", () => { + test("the v3 job-type literals are members of MemoryJobType", () => { + // Compile-time assignability is enforced by `tsc --noEmit`; the runtime + // assertion keeps the literals visible to the test runner. These types are + // inert scaffolding until their handlers land in later PRs. + const v3JobTypes: MemoryJobType[] = [ + "memory_v3_consolidate", + "memory_v3_index_maintenance", + "memory_v3_edge_learning", + ]; + expect(new Set(v3JobTypes).size).toBe(3); + }); +}); describe("memory job classes", () => { test("EMBED_JOB_TYPES and SLOW_LLM_JOB_TYPES are disjoint", () => { diff --git a/assistant/src/memory/jobs-store.ts b/assistant/src/memory/jobs-store.ts index 930f20cdad6..fd672f75e88 100644 --- a/assistant/src/memory/jobs-store.ts +++ b/assistant/src/memory/jobs-store.ts @@ -44,6 +44,9 @@ export type MemoryJobType = | "memory_v2_migrate" | "memory_v2_reembed" | "memory_v2_activation_recompute" + | "memory_v3_consolidate" + | "memory_v3_index_maintenance" + | "memory_v3_edge_learning" | "memory_retrospective"; export const EMBED_JOB_TYPES: MemoryJobType[] = [ From 7bd5de2890493dd1aeed163906af1df585f7c48f Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:46:18 -0400 Subject: [PATCH 05/21] feat(memory-v3): gate decision (ready/more) + final selection (#31975) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/gate.test.ts | 344 ++++++++++++++++++ assistant/src/memory/v3/gate.ts | 275 ++++++++++++++ 2 files changed, 619 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/gate.test.ts create mode 100644 assistant/src/memory/v3/gate.ts diff --git a/assistant/src/memory/v3/__tests__/gate.test.ts b/assistant/src/memory/v3/__tests__/gate.test.ts new file mode 100644 index 00000000000..bbdd63b947c --- /dev/null +++ b/assistant/src/memory/v3/__tests__/gate.test.ts @@ -0,0 +1,344 @@ +/** + * Tests for `assistant/src/memory/v3/gate.ts`. + * + * Coverage matrix: + * - ready + selection → selection maps from candidates, in model order, and + * includes sticky slugs even when the model omits them. + * - more + questions → `decision.questions` surfaced; selection still returned. + * - more with no/blank questions → decision is `{ decision: "more" }` (no + * empty `questions` array). + * - provider === null (no provider configured) → fail-safe: ready, all + * candidates selected, sticky present. + * - provider throws → fail-safe (ready, all candidates). + * - missing tool_use block → fail-safe (ready, all candidates). + * - tool input failing schema → fail-safe (ready, all candidates). + * - model selecting a slug outside the candidate set → dropped. + * - request shape: forced tool_choice on `decide_selection`, candidate set in + * the user message, abort signal forwarded. + * + * The provider is injected via `runGate({ provider })` — no real LLM, no + * network, no `mock.module`. `~/.vellum/` is never touched. + */ + +import { describe, expect, test } from "bun:test"; + +import type { + Message, + Provider, + ProviderResponse, + SendMessageOptions, + ToolDefinition, +} from "../../../providers/types.js"; +import type { RetrievalInput } from "../../v2/harness/retriever.js"; +import { runGate } from "../gate.js"; + +// --------------------------------------------------------------------------- +// Helpers. +// --------------------------------------------------------------------------- + +interface ProviderCall { + messages: Message[]; + tools: ToolDefinition[] | undefined; + systemPrompt: string | undefined; + options: SendMessageOptions | undefined; +} + +/** + * A stub provider that records its calls and returns a fixed response. + * Honors an already-aborted signal by throwing an AbortError so signal + * forwarding can be asserted. + */ +function makeProvider( + response: ProviderResponse, + calls: ProviderCall[], +): Provider { + return { + name: "stub", + sendMessage: async (messages, tools, systemPrompt, options) => { + calls.push({ messages, tools, systemPrompt, options }); + if (options?.signal?.aborted) { + const err = new Error("aborted"); + err.name = "AbortError"; + throw err; + } + return response; + }, + }; +} + +/** A provider whose sendMessage always throws. */ +function makeThrowingProvider(): Provider { + return { + name: "throwing-stub", + sendMessage: async () => { + throw new Error("boom"); + }, + }; +} + +function gateToolResponse(input: Record): ProviderResponse { + return { + model: "stub-model", + stopReason: "tool_use", + usage: { inputTokens: 0, outputTokens: 0 }, + content: [ + { type: "tool_use", id: "tu-1", name: "decide_selection", input }, + ], + }; +} + +/** A response with no tool_use block (e.g. the model emitted only text). */ +function textOnlyResponse(): ProviderResponse { + return { + model: "stub-model", + stopReason: "end_turn", + usage: { inputTokens: 0, outputTokens: 0 }, + content: [{ type: "text", text: "no tool here" }], + }; +} + +/** Minimal `RetrievalInput` — the gate only reads `nowText` and `signal`. */ +function makeInput(overrides?: Partial): RetrievalInput { + return { + workspaceDir: "/tmp/does-not-matter", + recentTurnPairs: [], + nowText: "2026-05-25 10:00 PT", + priorEverInjected: [], + config: {} as unknown as RetrievalInput["config"], + ...overrides, + }; +} + +// --------------------------------------------------------------------------- +// Tests. +// --------------------------------------------------------------------------- + +describe("runGate — ready decision", () => { + test("maps model selection to slugs in order and includes sticky", async () => { + const calls: ProviderCall[] = []; + const provider = makeProvider( + // Model selects b, a (its own order). Sticky `c` is omitted by the + // model but must survive in the final selection. + gateToolResponse({ decision: "ready", selected_slugs: ["b", "a"] }), + calls, + ); + + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a", "b", "c"]), + sticky: new Set(["c"]), + passNumber: 1, + provider, + }); + + expect(result.decision).toEqual({ decision: "ready" }); + // Model order preserved (b, a), then omitted sticky appended (c). + expect(result.selectedSlugs).toEqual(["b", "a", "c"]); + expect(calls).toHaveLength(1); + }); + + test("forces tool_choice on decide_selection and surfaces candidates", async () => { + const calls: ProviderCall[] = []; + const provider = makeProvider( + gateToolResponse({ decision: "ready", selected_slugs: ["a"] }), + calls, + ); + + await runGate({ + input: makeInput({ nowText: "NOW-MARKER" }), + candidates: new Set(["a", "b"]), + sticky: new Set(), + passNumber: 3, + provider, + }); + + const call = calls[0]; + expect(call.options?.config?.tool_choice).toEqual({ + type: "tool", + name: "decide_selection", + }); + expect(call.options?.config?.callSite).toBe("memoryV3Gate"); + expect(call.tools?.[0].name).toBe("decide_selection"); + const userText = call.messages[0].content + .map((b) => (b.type === "text" ? b.text : "")) + .join("\n"); + expect(userText).toContain("NOW-MARKER"); + expect(userText).toContain("a"); + expect(userText).toContain("b"); + }); + + test("drops a model-selected slug outside the candidate set", async () => { + const calls: ProviderCall[] = []; + const provider = makeProvider( + gateToolResponse({ decision: "ready", selected_slugs: ["a", "ghost"] }), + calls, + ); + + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a", "b"]), + sticky: new Set(), + passNumber: 1, + provider, + }); + + expect(result.selectedSlugs).toEqual(["a"]); + }); + + test("forwards an abort signal to the provider call", async () => { + const calls: ProviderCall[] = []; + const controller = new AbortController(); + controller.abort(); + const provider = makeProvider( + gateToolResponse({ decision: "ready", selected_slugs: ["a"] }), + calls, + ); + + // Aborted signal makes the stub throw → gate fails open (ready, all). + const result = await runGate({ + input: makeInput({ signal: controller.signal }), + candidates: new Set(["a", "b"]), + sticky: new Set(), + passNumber: 1, + provider, + }); + + expect(calls[0].options?.signal).toBe(controller.signal); + expect(result.decision).toEqual({ decision: "ready" }); + expect(result.selectedSlugs).toEqual(["a", "b"]); + }); +}); + +describe("runGate — more decision", () => { + test("surfaces generated follow-up questions", async () => { + const calls: ProviderCall[] = []; + const provider = makeProvider( + gateToolResponse({ + decision: "more", + selected_slugs: ["a"], + questions: ["What is the user's deadline?", "Who else is involved?"], + }), + calls, + ); + + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a", "b"]), + sticky: new Set(), + passNumber: 1, + provider, + }); + + expect(result.decision).toEqual({ + decision: "more", + questions: ["What is the user's deadline?", "Who else is involved?"], + }); + // Selection is still returned alongside the "more" verdict. + expect(result.selectedSlugs).toEqual(["a"]); + }); + + test("omits questions array when the model gave none (or only blanks)", async () => { + const calls: ProviderCall[] = []; + const provider = makeProvider( + gateToolResponse({ + decision: "more", + selected_slugs: ["a"], + questions: [" ", ""], + }), + calls, + ); + + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a"]), + sticky: new Set(), + passNumber: 1, + provider, + }); + + expect(result.decision).toEqual({ decision: "more" }); + }); + + test("preserves sticky even on a more decision", async () => { + const calls: ProviderCall[] = []; + const provider = makeProvider( + gateToolResponse({ + decision: "more", + selected_slugs: ["a"], + questions: ["follow-up?"], + }), + calls, + ); + + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a", "sticky-page"]), + sticky: new Set(["sticky-page"]), + passNumber: 1, + provider, + }); + + expect(result.selectedSlugs).toContain("sticky-page"); + }); +}); + +describe("runGate — fail-safe", () => { + test("provider === null selects all candidates with sticky and ready", async () => { + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a", "b", "c"]), + sticky: new Set(["c"]), + passNumber: 1, + provider: null, + }); + + expect(result.decision).toEqual({ decision: "ready" }); + expect([...result.selectedSlugs].sort()).toEqual(["a", "b", "c"]); + expect(result.selectedSlugs).toContain("c"); + }); + + test("provider throw falls back to ready + all candidates", async () => { + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a", "b"]), + sticky: new Set(), + passNumber: 1, + provider: makeThrowingProvider(), + }); + + expect(result.decision).toEqual({ decision: "ready" }); + expect([...result.selectedSlugs].sort()).toEqual(["a", "b"]); + }); + + test("missing tool_use block falls back to ready + all candidates", async () => { + const calls: ProviderCall[] = []; + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a", "b"]), + sticky: new Set(), + passNumber: 1, + provider: makeProvider(textOnlyResponse(), calls), + }); + + expect(result.decision).toEqual({ decision: "ready" }); + expect([...result.selectedSlugs].sort()).toEqual(["a", "b"]); + }); + + test("schema-mismatched tool input falls back to ready + all candidates", async () => { + const calls: ProviderCall[] = []; + const result = await runGate({ + input: makeInput(), + candidates: new Set(["a", "b"]), + sticky: new Set(), + passNumber: 1, + // `decision` is required; missing it fails the Zod schema. + provider: makeProvider( + gateToolResponse({ selected_slugs: ["a"] }), + calls, + ), + }); + + expect(result.decision).toEqual({ decision: "ready" }); + expect([...result.selectedSlugs].sort()).toEqual(["a", "b"]); + }); +}); diff --git a/assistant/src/memory/v3/gate.ts b/assistant/src/memory/v3/gate.ts new file mode 100644 index 00000000000..4abae9452ef --- /dev/null +++ b/assistant/src/memory/v3/gate.ts @@ -0,0 +1,275 @@ +/** + * Memory v3 — selection gate. + * + * The gate is the final step of one retrieval pass. After the scouts, the tree + * walk, the edge expansion, and the sticky carry-over have each contributed + * candidate page slugs, the gate makes one capable LLM call over the *unioned* + * candidate set and decides: + * + * - **ready** — finalize the selection and inject for the next reply, or + * - **more** — the candidates don't yet cover the turn; emit follow-up + * questions that seed the next pass. These questions are the gate's own + * *generated* queries (a refined sub-question), NOT a replay of the + * original user message — the loop feeds them back to the scouts/tree on + * the next iteration. + * + * The gate also returns the final ordered `selectedSlugs` (the order the model + * returned, with sticky slugs guaranteed present). Sticky pages are never + * dropped: they were injected on a prior turn and removing them mid-conversation + * would silently amnesia the assistant, so we union them back in even when the + * model omits them. + * + * Scope — brief generation is deferred. The full v3 design pairs the selection + * with a ~1000-token voice brief, but that brief is only consumed when v3 is + * actually injected (a later cutover). In shadow mode the harness injects v2 + * and only compares selections, so this module produces the selection + + * `GateDecision` only — matching what the harness trace already models. The + * brief-generation seam is marked below; do not build voice synthesis here. + * + * Fail-safe. If no provider is configured or the provider call errors/returns + * an unusable response, the gate fails *open*: it returns + * `decision: { decision: "ready" }` and selects every candidate. A retrieval + * loop that can't reach the model should still inject what it found rather than + * inject nothing. + * + * This module is currently unwired — a later PR composes it into the loop. + */ + +import { z } from "zod"; + +import { + extractToolUse, + getConfiguredProvider, +} from "../../providers/provider-send-message.js"; +import type { + Message, + Provider, + ToolDefinition, +} from "../../providers/types.js"; +import { getLogger } from "../../util/logger.js"; +import type { RetrievalInput } from "../v2/harness/retriever.js"; +import type { GateDecision } from "../v2/harness/trace.js"; + +const log = getLogger("memory-v3-gate"); + +/** Tool name forced via `tool_choice`. Shared constant so tests can match it. */ +const GATE_TOOL_NAME = "decide_selection"; + +/** + * Arguments to one gate invocation. + * + * `candidates` is the accumulated candidate set for this pass — the union of + * scouts-kept, tree pages, edge-pulled, and sticky slugs. `sticky` is the + * subset that was injected on a prior turn and must survive: it is always a + * subset of `candidates` in practice, but the gate unions it back into both + * the prompt and the final selection defensively. + */ +export interface RunGateArgs { + input: RetrievalInput; + candidates: Set; + sticky: Set; + passNumber: number; + /** + * Provider override seam for tests. Production leaves this unset and the + * gate resolves `getConfiguredProvider("memoryV3Gate")`. `null` is distinct + * from `undefined`: passing `null` simulates "no provider configured" and + * exercises the fail-safe path without resolving the real registry. + */ + provider?: Provider | null; +} + +export interface RunGateResult { + decision: GateDecision; + /** Final page slugs in the model's returned order; sticky guaranteed present. */ + selectedSlugs: string[]; +} + +/** + * Build the forced tool definition. `selected_slugs` is the ordered final + * selection; `decision` is the ready/more verdict; `questions` carries the + * generated follow-up queries on "more" (ignored on "ready"). Mirrors the + * forced-tool pattern of v2's `select_pages_to_inject`. + */ +function buildGateTool(candidateSlugs: readonly string[]): ToolDefinition { + return { + name: GATE_TOOL_NAME, + description: + "Decide whether the accumulated candidate pages are sufficient to answer " + + "the next turn. Return decision='ready' with the final ordered selection " + + "when the candidates cover the turn; return decision='more' with one or " + + "more generated follow-up questions (NOT the original message) to seed " + + "another retrieval pass when coverage is incomplete.", + input_schema: { + type: "object", + properties: { + decision: { type: "string", enum: ["ready", "more"] }, + selected_slugs: { + type: "array", + items: { type: "string", enum: [...candidateSlugs] }, + description: + "Final ordered page slugs to inject. Choose only from the candidate set.", + }, + questions: { + type: "array", + items: { type: "string" }, + description: + "When decision='more', the generated follow-up questions seeding the next pass.", + }, + }, + required: ["decision"], + }, + }; +} + +const GateToolResultSchema = z.object({ + decision: z.enum(["ready", "more"]), + selected_slugs: z.array(z.string()).optional(), + questions: z.array(z.string()).optional(), +}); + +/** + * Order a slug selection: keep the model's returned order, restricted to the + * candidate set, then append any sticky slugs the model omitted (sticky is + * never dropped). De-duplicates while preserving first-seen order. + */ +function orderSelection( + modelSlugs: readonly string[], + candidates: Set, + sticky: Set, +): string[] { + const seen = new Set(); + const out: string[] = []; + for (const slug of modelSlugs) { + if (!candidates.has(slug)) continue; // model can only pick from candidates + if (seen.has(slug)) continue; + seen.add(slug); + out.push(slug); + } + for (const slug of sticky) { + if (seen.has(slug)) continue; + seen.add(slug); + out.push(slug); + } + return out; +} + +/** + * Fail-safe result: inject every candidate and declare the pass ready. Used + * when the provider is unavailable or the call cannot produce a usable + * decision. Ordering puts sticky last via `orderSelection` with an empty + * model selection, so candidates come first then any sticky not already in + * the set. + */ +function failSafe(candidates: Set, sticky: Set): RunGateResult { + return { + decision: { decision: "ready" }, + selectedSlugs: orderSelection([...candidates], candidates, sticky), + }; +} + +/** + * Run the gate for one pass. + * + * Makes one forced-tool LLM call over the candidate set and maps the result to + * a `GateDecision` plus the final ordered selection. Sticky slugs are always + * present in the selection. Any failure (no provider, provider throw, missing + * tool_use, schema mismatch) falls back to selecting all candidates with a + * "ready" decision. + */ +export async function runGate(args: RunGateArgs): Promise { + const { input, candidates, sticky, passNumber } = args; + + const candidateSlugs = [...candidates]; + + // Resolve the provider. A `provider` key in args (including explicit `null`) + // takes precedence so tests inject a stub; production omits it and resolves + // the configured `memoryV3Gate` call site. + const provider = + args.provider !== undefined + ? args.provider + : await getConfiguredProvider("memoryV3Gate"); + + if (!provider) { + log.warn("memoryV3Gate provider unavailable; gate failing open (ready)"); + return failSafe(candidates, sticky); + } + + const systemPrompt = + "You are the final selection gate for a memory-retrieval loop. You are " + + "given the candidate concept pages gathered so far for the current turn. " + + "Decide whether they are sufficient to answer the next reply."; + + const stickySlugs = [...sticky]; + const userMsg: Message = { + role: "user", + content: [ + { + type: "text", + text: `\n${input.nowText}\n`, + }, + { + type: "text", + text: + `${passNumber}\n\n` + + `\n${stickySlugs.join("\n")}\n\n\n` + + `\n${candidateSlugs.join("\n")}\n`, + }, + ], + }; + + const gateTool = buildGateTool(candidateSlugs); + + let response; + try { + response = await provider.sendMessage([userMsg], [gateTool], systemPrompt, { + config: { + callSite: "memoryV3Gate" as const, + tool_choice: { type: "tool" as const, name: GATE_TOOL_NAME }, + }, + ...(input.signal ? { signal: input.signal } : {}), + }); + } catch (err) { + log.warn({ err }, "Gate provider call threw; failing open (ready)"); + return failSafe(candidates, sticky); + } + + const toolBlock = extractToolUse(response); + if (!toolBlock || toolBlock.name !== GATE_TOOL_NAME) { + log.warn( + { stopReason: response.stopReason }, + "Gate model returned no decide_selection tool_use; failing open (ready)", + ); + return failSafe(candidates, sticky); + } + + const parsed = GateToolResultSchema.safeParse(toolBlock.input); + if (!parsed.success) { + log.warn( + { error: parsed.error.message }, + "Gate tool input did not match schema; failing open (ready)", + ); + return failSafe(candidates, sticky); + } + + const selectedSlugs = orderSelection( + parsed.data.selected_slugs ?? [], + candidates, + sticky, + ); + + if (parsed.data.decision === "more") { + const questions = (parsed.data.questions ?? []).filter( + (q) => q.trim().length > 0, + ); + const decision: GateDecision = + questions.length > 0 + ? { decision: "more", questions } + : { decision: "more" }; + return { decision, selectedSlugs }; + } + + // brief generation lands at cutover (P5) — shadow mode injects v2, so this + // gate produces only the selection + decision. Do NOT synthesize a voice + // brief here. + return { decision: { decision: "ready" }, selectedSlugs }; +} From 5df253f2103bd8353299879ad94d5285d874e130 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:47:24 -0400 Subject: [PATCH 06/21] feat(memory-v3): tree index with DAG adjacency + cache (#31976) Co-authored-by: Vellum Assistant --- .../memory/v3/__tests__/tree-index.test.ts | 280 ++++++++++++++++++ assistant/src/memory/v3/tree-index.ts | 237 +++++++++++++++ assistant/src/memory/v3/tree-store.ts | 3 + 3 files changed, 520 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/tree-index.test.ts create mode 100644 assistant/src/memory/v3/tree-index.ts diff --git a/assistant/src/memory/v3/__tests__/tree-index.test.ts b/assistant/src/memory/v3/__tests__/tree-index.test.ts new file mode 100644 index 00000000000..536e1586c00 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/tree-index.test.ts @@ -0,0 +1,280 @@ +/** + * Tests for `assistant/src/memory/v3/tree-index.ts`. + * + * Coverage matrix: + * - getTreeIndex builds correct DAG adjacency on a fixture tree + * (root → 2 sub-nodes → page leaves; one node referenced by two parents). + * - childrenByNode preserves children order and parses page:/node: refs. + * - parentsByNode / pageParents reverse adjacency, incl. a 2-parent node. + * - root detection: reserved `_root` wins; single-parentless fallback; + * ambiguous fallback warns + picks deterministically. + * - dangling refs retained (structural-only build). + * - malformed child refs dropped. + * - cache hit returns the same object; invalidateTreeIndex forces a rebuild. + * - writeNode / deleteNode invalidate the cache. + * + * Tests use temp workspaces under `os.tmpdir()`; they never touch `~/.vellum/`. + */ + +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; + +import { getTreeIndex, invalidateTreeIndex } from "../tree-index.js"; +import { + deleteNode, + getTreeDir, + ROOT_NODE_ID, + writeNode, +} from "../tree-store.js"; +import type { TreeNode } from "../types.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +let workspaceDir: string; + +beforeEach(() => { + workspaceDir = mkdtempSync(join(tmpdir(), "vellum-tree-index-test-")); +}); + +afterEach(() => { + invalidateTreeIndex(); + rmSync(workspaceDir, { recursive: true, force: true }); +}); + +function node(id: string, children: string[], body = `body ${id}`): TreeNode { + return { id, frontmatter: { children }, body }; +} + +/** + * Seed a fixture DAG: + * _root → node:people, node:projects + * people → page:alice, node:shared + * projects → page:apollo, node:shared ← shared has two parents (DAG) + * shared → page:shared-page + * + * writeNode invalidates the cache as a side effect, so we invalidate once more + * at the end to leave a clean slate for the test body's first getTreeIndex. + */ +async function seedFixture(): Promise { + await writeNode( + workspaceDir, + node(ROOT_NODE_ID, ["node:people", "node:projects"]), + ); + await writeNode(workspaceDir, node("people", ["page:alice", "node:shared"])); + await writeNode( + workspaceDir, + node("projects", ["page:apollo", "node:shared"]), + ); + await writeNode(workspaceDir, node("shared", ["page:shared-page"])); + invalidateTreeIndex(); +} + +// --------------------------------------------------------------------------- +// DAG adjacency +// --------------------------------------------------------------------------- + +describe("getTreeIndex — DAG adjacency", () => { + test("builds forward adjacency preserving children order and ref kinds", async () => { + await seedFixture(); + const index = await getTreeIndex(workspaceDir); + + expect(index.childrenByNode.get(ROOT_NODE_ID)).toEqual([ + { kind: "node", ref: "people" }, + { kind: "node", ref: "projects" }, + ]); + expect(index.childrenByNode.get("people")).toEqual([ + { kind: "page", ref: "alice" }, + { kind: "node", ref: "shared" }, + ]); + expect(index.childrenByNode.get("shared")).toEqual([ + { kind: "page", ref: "shared-page" }, + ]); + }); + + test("builds node reverse adjacency incl. a node with two parents", async () => { + await seedFixture(); + const index = await getTreeIndex(workspaceDir); + + expect(index.parentsByNode.get("people")).toEqual(new Set([ROOT_NODE_ID])); + expect(index.parentsByNode.get("projects")).toEqual( + new Set([ROOT_NODE_ID]), + ); + // `shared` is referenced by both `people` and `projects` → DAG. + expect(index.parentsByNode.get("shared")).toEqual( + new Set(["people", "projects"]), + ); + }); + + test("builds page reverse adjacency keyed by page slug", async () => { + await seedFixture(); + const index = await getTreeIndex(workspaceDir); + + expect(index.pageParents.get("alice")).toEqual(new Set(["people"])); + expect(index.pageParents.get("apollo")).toEqual(new Set(["projects"])); + expect(index.pageParents.get("shared-page")).toEqual(new Set(["shared"])); + }); + + test("populates nodes map with every readable node", async () => { + await seedFixture(); + const index = await getTreeIndex(workspaceDir); + + expect([...index.nodes.keys()].sort()).toEqual([ + ROOT_NODE_ID, + "people", + "projects", + "shared", + ]); + expect(index.nodes.get("shared")?.body).toBe("body shared"); + }); + + test("retains dangling refs (structural-only build, no existence check)", async () => { + await writeNode( + workspaceDir, + node(ROOT_NODE_ID, ["node:missing-node", "page:missing-page"]), + ); + invalidateTreeIndex(); + const index = await getTreeIndex(workspaceDir); + + // Forward edge retained even though no such node/page file exists. + expect(index.childrenByNode.get(ROOT_NODE_ID)).toEqual([ + { kind: "node", ref: "missing-node" }, + { kind: "page", ref: "missing-page" }, + ]); + // Reverse adjacency retained too — validation (a later PR) reports these. + expect(index.parentsByNode.get("missing-node")).toEqual( + new Set([ROOT_NODE_ID]), + ); + expect(index.pageParents.get("missing-page")).toEqual( + new Set([ROOT_NODE_ID]), + ); + }); + + test("drops malformed child refs (no page:/node: prefix)", async () => { + await writeNode( + workspaceDir, + node(ROOT_NODE_ID, ["page:ok", "bogus-no-prefix", "node:", "page:"]), + ); + invalidateTreeIndex(); + const index = await getTreeIndex(workspaceDir); + + expect(index.childrenByNode.get(ROOT_NODE_ID)).toEqual([ + { kind: "page", ref: "ok" }, + ]); + }); +}); + +// --------------------------------------------------------------------------- +// Root detection +// --------------------------------------------------------------------------- + +describe("getTreeIndex — root detection", () => { + test("prefers the reserved _root node when present", async () => { + await seedFixture(); + const index = await getTreeIndex(workspaceDir); + expect(index.root).toBe(ROOT_NODE_ID); + }); + + test("falls back to the single parentless node when no _root", async () => { + await writeNode(workspaceDir, node("top", ["node:child"])); + await writeNode(workspaceDir, node("child", [])); + invalidateTreeIndex(); + const index = await getTreeIndex(workspaceDir); + expect(index.root).toBe("top"); + }); + + test("ambiguous root warns and picks ASCII-smallest deterministically", async () => { + // Two parentless nodes, no _root → ambiguous. + await writeNode(workspaceDir, node("zeta", [])); + await writeNode(workspaceDir, node("alpha", [])); + invalidateTreeIndex(); + const index = await getTreeIndex(workspaceDir); + expect(index.root).toBe("alpha"); + }); + + test("empty workspace yields _root", async () => { + const index = await getTreeIndex(workspaceDir); + expect(index.root).toBe(ROOT_NODE_ID); + expect(index.nodes.size).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Cache behavior +// --------------------------------------------------------------------------- + +describe("getTreeIndex — cache", () => { + test("cache hit returns the same object reference", async () => { + await seedFixture(); + const first = await getTreeIndex(workspaceDir); + const second = await getTreeIndex(workspaceDir); + expect(second).toBe(first); + }); + + test("invalidateTreeIndex forces a rebuild", async () => { + await seedFixture(); + const first = await getTreeIndex(workspaceDir); + invalidateTreeIndex(workspaceDir); + const second = await getTreeIndex(workspaceDir); + expect(second).not.toBe(first); + // Same structural content though. + expect([...second.nodes.keys()].sort()).toEqual( + [...first.nodes.keys()].sort(), + ); + }); + + test("scoped invalidation only clears the matching workspace", async () => { + await seedFixture(); + const first = await getTreeIndex(workspaceDir); + invalidateTreeIndex("/some/other/workspace"); + const second = await getTreeIndex(workspaceDir); + expect(second).toBe(first); + }); + + test("writeNode invalidates the cache", async () => { + await seedFixture(); + const first = await getTreeIndex(workspaceDir); + await writeNode(workspaceDir, node("newcomer", [])); + const second = await getTreeIndex(workspaceDir); + expect(second).not.toBe(first); + expect(second.nodes.has("newcomer")).toBe(true); + }); + + test("deleteNode invalidates the cache", async () => { + await seedFixture(); + const first = await getTreeIndex(workspaceDir); + expect(first.nodes.has("shared")).toBe(true); + await deleteNode(workspaceDir, "shared"); + const second = await getTreeIndex(workspaceDir); + expect(second).not.toBe(first); + expect(second.nodes.has("shared")).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// Read failures +// --------------------------------------------------------------------------- + +describe("getTreeIndex — robustness", () => { + test("ignores a missing tree dir (fresh workspace) → empty index", async () => { + // No nodes written; getTreeDir not even created. + const index = await getTreeIndex(workspaceDir); + expect(index.nodes.size).toBe(0); + expect(index.childrenByNode.size).toBe(0); + expect(index.parentsByNode.size).toBe(0); + expect(index.pageParents.size).toBe(0); + }); + + test("tree dir present but empty → empty index", async () => { + // Materialize the dir without any node files. + rmSync(getTreeDir(workspaceDir), { recursive: true, force: true }); + await writeNode(workspaceDir, node("only", [])); + await deleteNode(workspaceDir, "only"); + invalidateTreeIndex(); + const index = await getTreeIndex(workspaceDir); + expect(index.nodes.size).toBe(0); + }); +}); diff --git a/assistant/src/memory/v3/tree-index.ts b/assistant/src/memory/v3/tree-index.ts new file mode 100644 index 00000000000..b47d43dac9e --- /dev/null +++ b/assistant/src/memory/v3/tree-index.ts @@ -0,0 +1,237 @@ +/** + * Memory v3 — Tree index (DAG build + cache). + * + * The v3 tree is a DAG *overlay* over the flat `memory/concepts/` pages: every + * node carries an ordered `children` list whose entries are either + * `"page:"` (a leaf concept page, canonical content untouched by v3) or + * `"node:"` (a sub-node in the tree). A page or node may be referenced by + * more than one parent — hence DAG, not tree. + * + * This module scans every node on disk and materializes that edge list into + * forward and reverse adjacency maps so downstream routing/validation can walk + * the graph without re-reading the filesystem: + * - `childrenByNode` — node id → ordered child refs (forward edges). + * - `parentsByNode` — node id → set of parent node ids (reverse edges for + * `node:` children). + * - `pageParents` — page slug → set of parent node ids (reverse edges for + * `page:` children). + * + * The build is **structural only**: it never verifies that a referenced page + * or node actually exists. Dangling refs are retained in the adjacency maps so + * a later validation pass can report them. Root detection prefers the reserved + * `_root` id; absent that it picks the single node with no parents (warning and + * picking deterministically if the choice is ambiguous). + * + * The build is cached module-locally per `workspaceDir`, mirroring + * `../v2/page-index.ts`. Callers must invalidate via `invalidateTreeIndex` when + * tree nodes change — `tree-store.ts`'s `writeNode` / `deleteNode` already do. + */ + +import { getLogger } from "../../util/logger.js"; +import { listNodes, readNode, ROOT_NODE_ID } from "./tree-store.js"; +import type { TreeNode } from "./types.js"; + +const log = getLogger("memory-v3-tree-index"); + +/** Prefix marking a child ref that targets a leaf concept page. */ +const PAGE_REF_PREFIX = "page:"; + +/** Prefix marking a child ref that targets a sub-node in the tree. */ +const NODE_REF_PREFIX = "node:"; + +/** + * A single parsed `children` entry. `kind` distinguishes a leaf concept page + * (`"page"`) from a sub-node (`"node"`); `ref` is the bare slug or node id with + * the `page:` / `node:` prefix stripped. + */ +export interface ChildRef { + kind: "page" | "node"; + ref: string; +} + +/** + * Snapshot of the v3 tree DAG for one workspace. + * + * `nodes` is every readable node keyed by id. The three adjacency maps are + * derived from each node's `children`: + * - `childrenByNode` — forward edges, preserving `children` order. + * - `parentsByNode` — reverse edges restricted to `node:` children. + * - `pageParents` — reverse edges restricted to `page:` children, keyed by + * page slug. + * + * `root` is the entry-point node id (`_root` when present). Dangling refs (a + * `node:`/`page:` target with no on-disk file) are retained throughout — + * validation, not the index build, is responsible for surfacing them. + */ +export interface TreeIndex { + nodes: Map; + childrenByNode: Map>; + parentsByNode: Map>; + pageParents: Map>; + root: string; +} + +interface CachedIndex { + workspaceDir: string; + index: TreeIndex; +} + +let cache: CachedIndex | null = null; + +/** + * Parse a raw `children` entry into a {@link ChildRef}. Returns `null` for any + * entry that does not carry a recognized `page:` / `node:` prefix or whose ref + * body is empty — those are malformed and dropped (with a warn) rather than + * faithfully threaded through adjacency. + */ +function parseChildRef(raw: string): ChildRef | null { + if (raw.startsWith(PAGE_REF_PREFIX)) { + const ref = raw.slice(PAGE_REF_PREFIX.length); + return ref.length > 0 ? { kind: "page", ref } : null; + } + if (raw.startsWith(NODE_REF_PREFIX)) { + const ref = raw.slice(NODE_REF_PREFIX.length); + return ref.length > 0 ? { kind: "node", ref } : null; + } + return null; +} + +/** Append `parent` to the parent-set for `key`, creating the set on demand. */ +function addParent( + map: Map>, + key: string, + parent: string, +): void { + let parents = map.get(key); + if (!parents) { + parents = new Set(); + map.set(key, parents); + } + parents.add(parent); +} + +/** + * Pick the root node id from the materialized adjacency. Prefers the reserved + * {@link ROOT_NODE_ID} when a node with that id exists. Otherwise the root is + * the single node with no parents; if several nodes are parentless the choice + * is ambiguous, so warn and pick the ASCII-smallest id for determinism. With no + * nodes at all the root is `_root` (the well-known handle a migration authors + * first), matching the empty-workspace contract. + */ +function pickRoot( + nodes: Map, + parentsByNode: Map>, +): string { + if (nodes.has(ROOT_NODE_ID)) { + return ROOT_NODE_ID; + } + + const parentless = [...nodes.keys()].filter( + (id) => !parentsByNode.has(id) || parentsByNode.get(id)!.size === 0, + ); + parentless.sort(); + + if (parentless.length === 1) { + return parentless[0]; + } + if (parentless.length === 0) { + return ROOT_NODE_ID; + } + log.warn( + { parentless }, + "Ambiguous tree root — no '_root' node and multiple parentless nodes; picking ASCII-smallest deterministically", + ); + return parentless[0]; +} + +/** + * Return a `TreeIndex` for `workspaceDir`. Cached module-locally; the cache is + * invalidated by `invalidateTreeIndex` (called by `tree-store.ts` hooks when + * nodes change). + * + * Cold builds list every node and read them in parallel, dropping any whose + * read rejects with a warn so one broken node never blocks the rest of the + * index. Each readable node's `children` is parsed into {@link ChildRef}s and + * threaded into forward (`childrenByNode`) and reverse (`parentsByNode` / + * `pageParents`) adjacency. The build is structural only — referenced + * pages/nodes are never verified to exist; dangling refs are retained for a + * later validation pass. + */ +export async function getTreeIndex(workspaceDir: string): Promise { + if (cache && cache.workspaceDir === workspaceDir) { + return cache.index; + } + + const ids = await listNodes(workspaceDir); + + // Read every node in parallel; nodes whose read rejects are dropped with a + // warn so a single broken node never blocks the rest of the index. + const settled = await Promise.allSettled( + ids.map((id) => readNode(workspaceDir, id)), + ); + + const nodes = new Map(); + const childrenByNode = new Map>(); + const parentsByNode = new Map>(); + const pageParents = new Map>(); + + for (let i = 0; i < settled.length; i++) { + const result = settled[i]; + const id = ids[i]; + if (result.status === "rejected") { + log.warn( + { id, err: result.reason }, + "Dropping tree node from index — read failed", + ); + continue; + } + const node = result.value; + // `readNode` returns null only on ENOENT; a node listed by `listNodes` + // that vanishes between list and read is a benign race — drop it silently. + if (!node) continue; + nodes.set(id, node); + } + + // Build adjacency in a second pass so every node is registered first — that + // keeps a deterministic, list-order iteration independent of read timing. + for (const node of nodes.values()) { + const childRefs: ChildRef[] = []; + for (const raw of node.frontmatter.children) { + const parsed = parseChildRef(raw); + if (!parsed) { + log.warn( + { id: node.id, raw }, + "Dropping malformed child ref — expected 'page:' or 'node:'", + ); + continue; + } + childRefs.push(parsed); + const reverse = parsed.kind === "node" ? parentsByNode : pageParents; + addParent(reverse, parsed.ref, node.id); + } + childrenByNode.set(node.id, childRefs); + } + + const root = pickRoot(nodes, parentsByNode); + + const index: TreeIndex = { + nodes, + childrenByNode, + parentsByNode, + pageParents, + root, + }; + cache = { workspaceDir, index }; + return index; +} + +/** + * Clear the cached index. Pass `workspaceDir` to scope invalidation to a + * specific cache entry; omit it to clear unconditionally. + */ +export function invalidateTreeIndex(workspaceDir?: string): void { + if (!cache) return; + if (workspaceDir === undefined || cache.workspaceDir === workspaceDir) { + cache = null; + } +} diff --git a/assistant/src/memory/v3/tree-store.ts b/assistant/src/memory/v3/tree-store.ts index be13e489f8e..55dc023f2fd 100644 --- a/assistant/src/memory/v3/tree-store.ts +++ b/assistant/src/memory/v3/tree-store.ts @@ -40,6 +40,7 @@ import { dirname, join, relative, sep } from "node:path"; import { parse as parseYaml, stringify as stringifyYaml } from "yaml"; import { FRONTMATTER_REGEX } from "../../skills/frontmatter.js"; +import { invalidateTreeIndex } from "./tree-index.js"; import { type TreeNode, TreeNodeFrontmatterSchema } from "./types.js"; /** Filename suffix for tree nodes. */ @@ -296,6 +297,7 @@ export async function writeNode( await rm(tmpPath, { force: true }).catch(() => {}); throw err; } + invalidateTreeIndex(workspaceDir); } /** @@ -367,4 +369,5 @@ export async function deleteNode( } throw err; } + invalidateTreeIndex(workspaceDir); } From 00d7812a76e2893e311db79529fc433c59961598 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:49:31 -0400 Subject: [PATCH 07/21] feat(memory-v3): always-on scouts over the v2 substrate (#31977) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/scouts.test.ts | 390 +++++++++++++++++ assistant/src/memory/v3/scouts.ts | 392 ++++++++++++++++++ 2 files changed, 782 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/scouts.test.ts create mode 100644 assistant/src/memory/v3/scouts.ts diff --git a/assistant/src/memory/v3/__tests__/scouts.test.ts b/assistant/src/memory/v3/__tests__/scouts.test.ts new file mode 100644 index 00000000000..1cc34678ba7 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/scouts.test.ts @@ -0,0 +1,390 @@ +/** + * Tests for `assistant/src/memory/v3/scouts.ts`. + * + * The scout lanes read the v2 substrate (page index, injection-event EMA, + * Qdrant hybrid query, BM25, dense embed + calibration). Every one of those is + * stubbed via `mock.module` so the suite needs no real Qdrant, embedding + * backend, or LLM — and the SQLite-backed EMA is replaced by a hand-fed score + * map, so the injected `db` is an opaque sentinel the lane never dereferences. + * + * Coverage: + * - hot lane: ranks the EMA score map desc, marks every hit sticky. + * - sparse lane: reads sparseScore, ranks desc, flags near-exact hits + * sticky + tree-bypass. + * - dense lane: per-subtree quota caps off-domain hits; MMR diversifies. + * - lane toggles: each disabled lane is fully suppressed (no ScoutResult). + * - empty query / empty corpus short-circuits. + * - honors AbortSignal. + */ + +import { beforeEach, describe, expect, mock, test } from "bun:test"; + +import type { PageIndex } from "../../v2/page-index.js"; +import type { ConceptPageQueryResult } from "../../v2/qdrant.js"; + +// --------------------------------------------------------------------------- +// Substrate stubs — installed before importing the module under test. +// --------------------------------------------------------------------------- + +// Per-call programmable substrate state. Each test rewires these before +// calling runScouts; the mock factories below close over the live refs. +let injectionScores = new Map(); +let pageSlugs: string[] = []; +let hybridHits: ConceptPageQueryResult[] = []; +let embedCalls = 0; + +mock.module("../../v2/injection-events.js", () => ({ + computeInjectionScores: () => injectionScores, +})); + +mock.module("../../v2/page-index.js", () => ({ + getPageIndex: async (): Promise => ({ + entries: pageSlugs.map((slug, i) => ({ + id: i + 1, + slug, + summary: "", + edges: [], + modifiedAt: 0, + })), + bySlug: new Map(), + byId: new Map(), + rendered: "", + }), +})); + +mock.module("../../v2/qdrant.js", () => ({ + hybridQueryConceptPages: async (): Promise => + hybridHits, +})); + +mock.module("../../v2/sparse-bm25.js", () => ({ + // Non-empty indices so the sparse/dense lanes don't short-circuit on an + // "empty query embedding". The values are irrelevant — the stubbed Qdrant + // query ignores them and returns `hybridHits` directly. + generateBm25QueryEmbedding: (text: string) => + text.trim().length > 0 + ? { indices: [1], values: [1] } + : { indices: [], values: [] }, +})); + +mock.module("../../embedding-backend.js", () => ({ + embedWithBackend: async () => { + embedCalls += 1; + return { provider: "local", model: "stub", vectors: [[0.1, 0.2, 0.3]] }; + }, +})); + +mock.module("../../anisotropy.js", () => ({ + applyCorrectionIfCalibrated: async (vec: number[]) => vec, +})); + +const { runScouts } = await import("../scouts.js"); +import type { RetrievalInput } from "../../v2/harness/retriever.js"; +import type { ScoutDeps } from "../scouts.js"; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +const DB_SENTINEL = { __opaque: true } as unknown as ScoutDeps["db"]; +const DEPS: ScoutDeps = { db: DB_SENTINEL }; + +type Lanes = { hot: boolean; sparse: boolean; dense: boolean }; + +function makeInput(opts?: { + userMessage?: string; + nowText?: string; + lanes?: Partial; + denseQuota?: { activeDomain: number; offDomain: number }; + signal?: AbortSignal; +}): RetrievalInput { + const lanes = { + hot: true, + sparse: true, + dense: true, + tree: true, + edges: true, + ...opts?.lanes, + }; + const config = { + memory: { + v3: { + lanes, + denseQuota: opts?.denseQuota ?? { activeDomain: 30, offDomain: 8 }, + }, + }, + } as unknown as RetrievalInput["config"]; + return { + workspaceDir: "/tmp/ws", + recentTurnPairs: [ + { assistantMessage: "", userMessage: opts?.userMessage ?? "tell me" }, + ], + nowText: opts?.nowText ?? "now context", + priorEverInjected: [], + config, + signal: opts?.signal, + }; +} + +function hit( + slug: string, + scores: Partial, +): ConceptPageQueryResult { + return { slug, ...scores }; +} + +beforeEach(() => { + injectionScores = new Map(); + pageSlugs = []; + hybridHits = []; + embedCalls = 0; +}); + +// --------------------------------------------------------------------------- +// Hot lane +// --------------------------------------------------------------------------- + +describe("runScouts — hot lane", () => { + test("ranks EMA scores desc and marks every hit sticky", async () => { + pageSlugs = ["people/alice", "work/proj", "essentials"]; + injectionScores = new Map([ + ["work/proj", 0.2], + ["people/alice", 0.9], + ]); + + const { scouts, sticky } = await runScouts( + makeInput({ lanes: { sparse: false, dense: false } }), + DEPS, + ); + + const hot = scouts.find((s) => s.lane === "hot"); + expect(hot?.slugs).toEqual(["people/alice", "work/proj"]); + expect(hot?.scoreBySlug).toEqual({ "people/alice": 0.9, "work/proj": 0.2 }); + expect([...sticky].sort()).toEqual(["people/alice", "work/proj"]); + }); + + test("empty corpus yields no hot ScoutResult", async () => { + pageSlugs = []; + const { scouts } = await runScouts( + makeInput({ lanes: { sparse: false, dense: false } }), + DEPS, + ); + expect(scouts.find((s) => s.lane === "hot")).toBeUndefined(); + }); + + test("no EMA events yields no hot ScoutResult", async () => { + pageSlugs = ["a", "b"]; + injectionScores = new Map(); + const { scouts, sticky } = await runScouts( + makeInput({ lanes: { sparse: false, dense: false } }), + DEPS, + ); + expect(scouts.find((s) => s.lane === "hot")).toBeUndefined(); + expect(sticky.size).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Sparse lane +// --------------------------------------------------------------------------- + +describe("runScouts — sparse lane", () => { + test("reads sparseScore, ranks desc, flags near-exact sticky + bypass", async () => { + hybridHits = [ + hit("docs/readme", { sparseScore: 4.0 }), + hit("docs/api", { sparseScore: 3.9 }), // within 90% of top -> near-exact + hit("misc/note", { sparseScore: 1.0 }), // below threshold + hit("dense/only", { denseScore: 0.8 }), // no sparseScore -> dropped + ]; + + const { scouts, sticky, bypass } = await runScouts( + makeInput({ lanes: { hot: false, dense: false } }), + DEPS, + ); + + const sparse = scouts.find((s) => s.lane === "sparse"); + expect(sparse?.slugs).toEqual(["docs/readme", "docs/api", "misc/note"]); + // Near-exact: readme (top) and api (>= 90% of top). Not misc/note. + expect([...sticky].sort()).toEqual(["docs/api", "docs/readme"]); + expect([...bypass].sort()).toEqual(["docs/api", "docs/readme"]); + }); + + test("no sparse hits yields no sparse ScoutResult", async () => { + hybridHits = [hit("dense/only", { denseScore: 0.5 })]; + const { scouts, sticky, bypass } = await runScouts( + makeInput({ lanes: { hot: false, dense: false } }), + DEPS, + ); + expect(scouts.find((s) => s.lane === "sparse")).toBeUndefined(); + expect(sticky.size).toBe(0); + expect(bypass.size).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Dense lane +// --------------------------------------------------------------------------- + +describe("runScouts — dense lane", () => { + test("embeds the query and emits dense hits ranked by denseScore", async () => { + hybridHits = [ + hit("work/a", { denseScore: 0.9 }), + hit("work/b", { denseScore: 0.7 }), + ]; + const { scouts } = await runScouts( + makeInput({ lanes: { hot: false, sparse: false } }), + DEPS, + ); + expect(embedCalls).toBe(1); + const dense = scouts.find((s) => s.lane === "dense"); + expect(dense?.slugs[0]).toBe("work/a"); + expect(dense?.scoreBySlug).toEqual({ "work/a": 0.9, "work/b": 0.7 }); + }); + + test("per-subtree quota caps off-domain hits", async () => { + // Active domain = top hit's domain = "work". Off-domain quota = 1. + hybridHits = [ + hit("work/a", { denseScore: 0.99 }), + hit("work/b", { denseScore: 0.98 }), + hit("work/c", { denseScore: 0.97 }), + hit("people/x", { denseScore: 0.5 }), // off-domain, claims the 1 slot + hit("notes/y", { denseScore: 0.4 }), // off-domain, over quota -> dropped + hit("misc/z", { denseScore: 0.3 }), // off-domain, over quota -> dropped + ]; + const { scouts } = await runScouts( + makeInput({ + lanes: { hot: false, sparse: false }, + denseQuota: { activeDomain: 30, offDomain: 1 }, + }), + DEPS, + ); + const dense = scouts.find((s) => s.lane === "dense"); + const slugs = dense?.slugs ?? []; + // All three work/* survive (active quota 30); exactly one off-domain hit. + expect(slugs.filter((s) => s.startsWith("work/")).length).toBe(3); + const offDomain = slugs.filter((s) => !s.startsWith("work/")); + expect(offDomain).toEqual(["people/x"]); + }); + + test("active-domain quota caps same-subtree hits", async () => { + hybridHits = [ + hit("work/a", { denseScore: 0.99 }), + hit("work/b", { denseScore: 0.98 }), + hit("work/c", { denseScore: 0.97 }), // over active quota 2 -> dropped + hit("people/x", { denseScore: 0.5 }), + ]; + const { scouts } = await runScouts( + makeInput({ + lanes: { hot: false, sparse: false }, + denseQuota: { activeDomain: 2, offDomain: 8 }, + }), + DEPS, + ); + const slugs = scouts.find((s) => s.lane === "dense")?.slugs ?? []; + expect(slugs.filter((s) => s.startsWith("work/")).length).toBe(2); + expect(slugs).toContain("people/x"); + }); + + test("MMR interleaves subtrees rather than emitting a same-subtree run", async () => { + // Five work/* then one people/* of comparable relevance. Pure score order + // would bury people/x last; MMR should pull it forward once work/ is + // over-represented. + hybridHits = [ + hit("work/a", { denseScore: 0.95 }), + hit("work/b", { denseScore: 0.94 }), + hit("work/c", { denseScore: 0.93 }), + hit("work/d", { denseScore: 0.92 }), + hit("people/x", { denseScore: 0.9 }), + ]; + const { scouts } = await runScouts( + makeInput({ + lanes: { hot: false, sparse: false }, + denseQuota: { activeDomain: 30, offDomain: 8 }, + }), + DEPS, + ); + const slugs = scouts.find((s) => s.lane === "dense")?.slugs ?? []; + // people/x is not stranded at the very end despite the lowest raw score. + expect(slugs.indexOf("people/x")).toBeLessThan(slugs.length - 1); + }); + + test("no dense hits yields no dense ScoutResult", async () => { + hybridHits = [hit("sparse/only", { sparseScore: 2.0 })]; + const { scouts } = await runScouts( + makeInput({ lanes: { hot: false, sparse: false } }), + DEPS, + ); + expect(scouts.find((s) => s.lane === "dense")).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// Lane toggles +// --------------------------------------------------------------------------- + +describe("runScouts — lane toggles", () => { + test("disabling a lane suppresses its ScoutResult", async () => { + pageSlugs = ["a"]; + injectionScores = new Map([["a", 1]]); + hybridHits = [hit("docs/a", { sparseScore: 2.0, denseScore: 0.5 })]; + + const all = await runScouts(makeInput(), DEPS); + expect(all.scouts.map((s) => s.lane).sort()).toEqual([ + "dense", + "hot", + "sparse", + ]); + + const hotOnly = await runScouts( + makeInput({ lanes: { sparse: false, dense: false } }), + DEPS, + ); + expect(hotOnly.scouts.map((s) => s.lane)).toEqual(["hot"]); + // Dense embed must not run when the dense lane is off. + embedCalls = 0; + await runScouts(makeInput({ lanes: { dense: false } }), DEPS); + expect(embedCalls).toBe(0); + }); + + test("all lanes off yields empty result", async () => { + pageSlugs = ["a"]; + injectionScores = new Map([["a", 1]]); + hybridHits = [hit("docs/a", { sparseScore: 2.0, denseScore: 0.5 })]; + const { scouts, sticky, bypass } = await runScouts( + makeInput({ lanes: { hot: false, sparse: false, dense: false } }), + DEPS, + ); + expect(scouts).toEqual([]); + expect(sticky.size).toBe(0); + expect(bypass.size).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Misc +// --------------------------------------------------------------------------- + +describe("runScouts — misc", () => { + test("empty query text skips sparse and dense lanes", async () => { + pageSlugs = ["a"]; + injectionScores = new Map([["a", 1]]); + hybridHits = [hit("docs/a", { sparseScore: 2.0, denseScore: 0.5 })]; + const { scouts } = await runScouts( + makeInput({ userMessage: " ", nowText: " " }), + DEPS, + ); + // Hot lane is query-independent and still fires; sparse/dense are gated off. + expect(scouts.map((s) => s.lane)).toEqual(["hot"]); + expect(embedCalls).toBe(0); + }); + + test("honors an already-aborted signal", async () => { + const controller = new AbortController(); + controller.abort(); + pageSlugs = ["a"]; + injectionScores = new Map([["a", 1]]); + await expect( + runScouts(makeInput({ signal: controller.signal }), DEPS), + ).rejects.toThrow(); + }); +}); diff --git a/assistant/src/memory/v3/scouts.ts b/assistant/src/memory/v3/scouts.ts new file mode 100644 index 00000000000..03e99ff529f --- /dev/null +++ b/assistant/src/memory/v3/scouts.ts @@ -0,0 +1,392 @@ +// --------------------------------------------------------------------------- +// Memory v3 — Always-on scout lanes (hot / sparse / dense) +// --------------------------------------------------------------------------- +// +// The v3 retrieval loop opens each pass by fanning out a small set of cheap, +// always-on "scout" lanes over the v2 read-substrate. Scouts surface candidate +// concept-page slugs from three complementary signals before any LLM judging +// (the dense judge lives in a later PR) or tree descent runs: +// +// - hot: corpus-global access-frequency EMA via `computeInjectionScores`. +// Retriever-agnostic — v2 keeps writing `memory_v2_injection_events`, +// so a page the user has been touching is "hot" regardless of which +// retriever surfaced it. Hits are marked **sticky** so the downstream +// gate keeps them in the running. +// - sparse: BM25 keyword match. Near-exact (high-score) hits are both +// **sticky** and **tree-bypass** — a literal keyword hit is a strong +// enough signal that we shouldn't make the slug earn its place by +// walking the tree. +// - dense: embedding-similarity match, then an asymmetric per-subtree quota +// (generous active-domain slice, thin off-domain slice) plus MMR for +// diversity so a single dominant subtree can't crowd out the slate. +// +// Each lane is individually toggleable via `config.memory.v3.lanes`. This module +// performs **no** LLM calls and writes nothing — it is a pure read over the v2 +// substrate. A later PR composes `runScouts` into the full descent loop. + +import type { AssistantConfig } from "../../config/types.js"; +import { applyCorrectionIfCalibrated } from "../anisotropy.js"; +import type { DrizzleDb } from "../db-connection.js"; +import { embedWithBackend } from "../embedding-backend.js"; +import type { RetrievalInput } from "../v2/harness/retriever.js"; +import type { ScoutResult } from "../v2/harness/trace.js"; +import { computeInjectionScores } from "../v2/injection-events.js"; +import { getPageIndex } from "../v2/page-index.js"; +import { hybridQueryConceptPages } from "../v2/qdrant.js"; +import { generateBm25QueryEmbedding } from "../v2/sparse-bm25.js"; + +/** Result of running the always-on scout fanout for one pass. */ +export interface RunScoutsResult { + /** Per-lane contributions, one entry per *enabled* lane that produced hits. */ + scouts: ScoutResult[]; + /** + * Slugs the downstream gate should keep in the running regardless of later + * scoring — hot hits and near-exact sparse hits. + */ + sticky: Set; + /** + * Slugs strong enough (near-exact sparse) to skip the tree-descent gate + * entirely. A subset of `sticky`. + */ + bypass: Set; +} + +/** Substrate dependencies injected for testability. */ +export interface ScoutDeps { + db: DrizzleDb; +} + +// --------------------------------------------------------------------------- +// Tunables +// --------------------------------------------------------------------------- + +/** + * Per-lane hit cap before quota/diversity post-processing. The lanes are + * always-on and run every pass, so a generous-but-bounded cap keeps the dense + * Qdrant round-trip and the per-lane bookkeeping cheap while still giving the + * quota/MMR step enough raw candidates to choose from. + */ +const LANE_QUERY_LIMIT = 100; + +/** + * Sparse score at or above which a hit is treated as **near-exact** — sticky + * and tree-bypass. BM25 scores are unbounded above and corpus-relative, so the + * threshold is taken relative to the top sparse hit in the same pass rather + * than as a fixed magnitude: a hit within this fraction of the best sparse + * score for the query is "near-exact". A lone strong hit (it is its own max) + * always qualifies. + */ +const SPARSE_NEAR_EXACT_FRACTION = 0.9; + +/** + * MMR trade-off: `λ · relevance − (1 − λ) · redundancy`. Closer to 1 favors + * raw dense relevance; lower values push harder for subtree diversity. 0.7 + * keeps relevance in the driver's seat while still breaking up runs of + * same-subtree hits. + */ +const DENSE_MMR_LAMBDA = 0.7; + +// --------------------------------------------------------------------------- +// Public entry point +// --------------------------------------------------------------------------- + +/** + * Run the always-on scout lanes for one retrieval pass. + * + * `queryText` is derived from the last user turn in `input.recentTurnPairs` + * joined with `input.nowText` — the same shape the v2 router/activation path + * embeds. Disabled lanes (per `config.memory.v3.lanes`) are skipped entirely: + * no substrate call, no `ScoutResult` entry. + * + * Honors `input.signal` — aborts between lanes and around the dense embed. + */ +export async function runScouts( + input: RetrievalInput, + deps: ScoutDeps, +): Promise { + const { config, signal } = input; + const lanes = config.memory.v3.lanes; + const queryText = deriveQueryText(input); + + const scouts: ScoutResult[] = []; + const sticky = new Set(); + const bypass = new Set(); + + // Hot lane — corpus-global EMA over the full slug universe. Cheap (single + // SQL pass) so it runs first and seeds sticky. + if (lanes.hot) { + signal?.throwIfAborted(); + const hot = await runHotLane(input, deps); + if (hot) { + scouts.push(hot); + for (const slug of hot.slugs) sticky.add(slug); + } + } + + // Sparse lane — BM25 keyword match. Near-exact hits seed sticky + bypass. + if (lanes.sparse && queryText.length > 0) { + signal?.throwIfAborted(); + const sparse = await runSparseLane(queryText, signal); + if (sparse) { + scouts.push(sparse.result); + for (const slug of sparse.nearExact) { + sticky.add(slug); + bypass.add(slug); + } + } + } + + // Dense lane — embedding similarity, then per-subtree quota + MMR. + if (lanes.dense && queryText.length > 0) { + signal?.throwIfAborted(); + const dense = await runDenseLane(queryText, config, signal); + if (dense) scouts.push(dense); + } + + return { scouts, sticky, bypass }; +} + +// --------------------------------------------------------------------------- +// Query-text derivation +// --------------------------------------------------------------------------- + +/** + * Build the scout query text from the just-arrived user turn plus the NOW + * context. Mirrors the v2 activation path (`selectCandidates`): join the + * non-empty channels with a newline. The last `recentTurnPairs` entry's + * `userMessage` is the turn being routed. + */ +function deriveQueryText(input: RetrievalInput): string { + const lastPair = input.recentTurnPairs[input.recentTurnPairs.length - 1]; + const userText = lastPair?.userMessage ?? ""; + return [userText, input.nowText] + .filter((s) => s.trim().length > 0) + .join("\n") + .trim(); +} + +// --------------------------------------------------------------------------- +// Hot lane +// --------------------------------------------------------------------------- + +async function runHotLane( + input: RetrievalInput, + deps: ScoutDeps, +): Promise { + const index = await getPageIndex(input.workspaceDir); + const allSlugs = index.entries.map((e) => e.slug); + if (allSlugs.length === 0) return null; + + const now = Date.now(); + const scores = computeInjectionScores(deps.db, allSlugs, now); + if (scores.size === 0) return null; + + // Slugs with no events in the read window are omitted by + // `computeInjectionScores`, so every entry here has score > 0. + const ranked = [...scores.entries()].sort((a, b) => sortByScoreDesc(a, b)); + const slugs = ranked.map(([slug]) => slug); + const scoreBySlug = Object.fromEntries(ranked); + return { lane: "hot", slugs, scoreBySlug }; +} + +// --------------------------------------------------------------------------- +// Sparse lane +// --------------------------------------------------------------------------- + +async function runSparseLane( + queryText: string, + signal: AbortSignal | undefined, +): Promise<{ result: ScoutResult; nearExact: string[] } | null> { + const sparse = generateBm25QueryEmbedding(queryText); + if (sparse.indices.length === 0) return null; + + // Dense channel intentionally empty — this lane is BM25-only. `skipSparse: + // false` keeps the sparse round-trip on; we read `sparseScore` and ignore + // any dense scores the query happens to surface. + const hits = await hybridQueryConceptPages( + [], + sparse, + LANE_QUERY_LIMIT, + undefined, + { + skipSparse: false, + }, + ); + signal?.throwIfAborted(); + + const scored = hits + .map((hit) => ({ slug: hit.slug, score: hit.sparseScore })) + .filter((h): h is { slug: string; score: number } => h.score !== undefined) + .sort((a, b) => b.score - a.score); + if (scored.length === 0) return null; + + const slugs = scored.map((h) => h.slug); + const scoreBySlug = Object.fromEntries(scored.map((h) => [h.slug, h.score])); + + // Near-exact: within SPARSE_NEAR_EXACT_FRACTION of the top sparse score. + const topScore = scored[0].score; + const threshold = topScore * SPARSE_NEAR_EXACT_FRACTION; + const nearExact = scored + .filter((h) => topScore > 0 && h.score >= threshold) + .map((h) => h.slug); + + return { result: { lane: "sparse", slugs, scoreBySlug }, nearExact }; +} + +// --------------------------------------------------------------------------- +// Dense lane +// --------------------------------------------------------------------------- + +async function runDenseLane( + queryText: string, + config: AssistantConfig, + signal: AbortSignal | undefined, +): Promise { + // Embed + apply anisotropy correction, mirroring v2 activation's read path. + const embedded = await embedWithBackend(config, [queryText], { signal }); + const dense = await applyCorrectionIfCalibrated( + embedded.vectors[0], + embedded.provider, + embedded.model, + ); + signal?.throwIfAborted(); + + const sparse = generateBm25QueryEmbedding(queryText); + const hits = await hybridQueryConceptPages(dense, sparse, LANE_QUERY_LIMIT); + signal?.throwIfAborted(); + + const scored = hits + .map((hit) => ({ slug: hit.slug, score: hit.denseScore })) + .filter((h): h is { slug: string; score: number } => h.score !== undefined) + .sort((a, b) => b.score - a.score); + if (scored.length === 0) return null; + + const selected = applyQuotaAndMmr(scored, config.memory.v3); + if (selected.length === 0) return null; + + const slugs = selected.map((h) => h.slug); + const scoreBySlug = Object.fromEntries( + selected.map((h) => [h.slug, h.score]), + ); + return { lane: "dense", slugs, scoreBySlug }; +} + +interface ScoredSlug { + slug: string; + score: number; +} + +/** + * Apply the asymmetric per-subtree quota then MMR re-ranking to the dense hits. + * + * Quota: the conversation's **active domain** is the top-path segment of the + * single highest-scoring dense hit. That domain gets a generous slice + * (`denseQuota.activeDomain`); every other (off-)domain shares a thin slice + * (`denseQuota.offDomain`) so exploratory hits aren't fully starved but can't + * dominate either. Quotas are per-domain caps applied in score-descending + * order. + * + * MMR: re-rank the quota-passing pool by `λ · relevance − (1 − λ) · redundancy` + * where redundancy is how represented the candidate's subtree already is in the + * selected slate. Without per-page embeddings we use subtree co-membership as + * the diversity signal — same subtree ⇒ maximally redundant. This breaks up + * runs of same-subtree hits without an extra Qdrant round-trip. + */ +function applyQuotaAndMmr( + scored: readonly ScoredSlug[], + v3: AssistantConfig["memory"]["v3"], +): ScoredSlug[] { + if (scored.length === 0) return []; + + const activeDomain = domainOf(scored[0].slug); + const { activeDomain: activeQuota, offDomain: offQuota } = v3.denseQuota; + + // Per-subtree quota: active domain gets activeQuota slots; all off-domain + // hits compete for a shared offQuota pool. Walk in score-desc order so the + // strongest hits claim each quota first. + const perDomainCount = new Map(); + let offDomainCount = 0; + const quotaPassing: ScoredSlug[] = []; + for (const hit of scored) { + const domain = domainOf(hit.slug); + if (domain === activeDomain) { + const used = perDomainCount.get(domain) ?? 0; + if (used >= activeQuota) continue; + perDomainCount.set(domain, used + 1); + } else { + if (offDomainCount >= offQuota) continue; + offDomainCount += 1; + } + quotaPassing.push(hit); + } + + return mmrReorder(quotaPassing, DENSE_MMR_LAMBDA); +} + +/** + * Greedy MMR over a score-ranked pool using subtree co-membership as the + * redundancy signal. Each pick maximizes + * `λ · normalizedScore − (1 − λ) · subtreeShareInSelected`, so once a subtree + * is well-represented its remaining members are deprioritized in favor of + * fresh subtrees of comparable relevance. Pure / deterministic. + */ +function mmrReorder(pool: readonly ScoredSlug[], lambda: number): ScoredSlug[] { + if (pool.length <= 1) return [...pool]; + + // Normalize relevance to [0, 1] by the pool max so it shares a scale with the + // redundancy term (also [0, 1]). All-zero scores collapse to pure diversity. + const maxScore = pool[0].score; + const relevance = (hit: ScoredSlug): number => + maxScore > 0 ? hit.score / maxScore : 0; + + const remaining = [...pool]; + const selected: ScoredSlug[] = []; + const selectedDomainCount = new Map(); + + while (remaining.length > 0) { + let bestIdx = 0; + let bestMmr = -Infinity; + for (let i = 0; i < remaining.length; i++) { + const hit = remaining[i]; + const domain = domainOf(hit.slug); + const share = + selected.length === 0 + ? 0 + : (selectedDomainCount.get(domain) ?? 0) / selected.length; + const mmr = lambda * relevance(hit) - (1 - lambda) * share; + if (mmr > bestMmr) { + bestMmr = mmr; + bestIdx = i; + } + } + const [pick] = remaining.splice(bestIdx, 1); + selected.push(pick); + const domain = domainOf(pick.slug); + selectedDomainCount.set(domain, (selectedDomainCount.get(domain) ?? 0) + 1); + } + + return selected; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * The "domain" (subtree) of a page slug — its top path segment. Slugs are + * path-relative with `/` separators (e.g. `people/alice` → `people`); a flat + * slug (`essentials`) is its own domain. + */ +function domainOf(slug: string): string { + const slash = slug.indexOf("/"); + return slash === -1 ? slug : slug.slice(0, slash); +} + +/** Score-desc with a stable slug-ASCII tiebreak. */ +function sortByScoreDesc( + a: readonly [string, number], + b: readonly [string, number], +): number { + if (b[1] !== a[1]) return b[1] - a[1]; + return a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0; +} From eabe526e7204c7aaf2930a21dd008b5e04e81cde Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:53:50 -0400 Subject: [PATCH 08/21] feat(memory-v3): compose node index from children + routing hints (#31978) Co-authored-by: Vellum Assistant --- .../v3/__tests__/index-composition.test.ts | 233 ++++++++++++++++++ assistant/src/memory/v3/index-composition.ts | 113 +++++++++ 2 files changed, 346 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/index-composition.test.ts create mode 100644 assistant/src/memory/v3/index-composition.ts diff --git a/assistant/src/memory/v3/__tests__/index-composition.test.ts b/assistant/src/memory/v3/__tests__/index-composition.test.ts new file mode 100644 index 00000000000..aa08205e35f --- /dev/null +++ b/assistant/src/memory/v3/__tests__/index-composition.test.ts @@ -0,0 +1,233 @@ +/** + * Tests for `assistant/src/memory/v3/index-composition.ts`. + * + * `composeNodeIndex` is a pure function over an already-built `TreeIndex` and + * `PageIndex`, so these tests hand-build both fixtures (no filesystem / no I/O) + * and assert on the rendered string. + * + * Coverage matrix: + * - mixed node:/page: children render one summary line each, in authored + * order, with the node's routing hints appended as a trailer. + * - a `page:` ref whose slug is absent from the index is silently omitted. + * - a `node:` ref whose id is absent from the tree is silently omitted. + * - empty / missing children → just the routing hints, or the empty string + * when there are none either. + * - a `node:` child with no summary falls back to the first non-empty body + * line; with neither, only its header is emitted. + */ + +import { describe, expect, test } from "bun:test"; + +import type { PageIndex, PageIndexEntry } from "../../v2/page-index.js"; +import { composeNodeIndex } from "../index-composition.js"; +import type { ChildRef, TreeIndex } from "../tree-index.js"; +import type { TreeNode } from "../types.js"; + +// --------------------------------------------------------------------------- +// Fixture builders +// --------------------------------------------------------------------------- + +function treeNode( + id: string, + opts: { summary?: string; routing_hints?: string; body?: string } = {}, +): TreeNode { + return { + id, + frontmatter: { + children: [], + summary: opts.summary, + routing_hints: opts.routing_hints, + }, + body: opts.body ?? "", + }; +} + +/** + * Build a `TreeIndex` from a list of nodes and an explicit child-ref list for + * the node under test. Only the fields `composeNodeIndex` reads (`nodes`, + * `childrenByNode`) are populated; the reverse-adjacency maps are left empty. + */ +function treeIndex( + nodes: TreeNode[], + childrenByNode: Record, +): TreeIndex { + return { + nodes: new Map(nodes.map((n) => [n.id, n])), + childrenByNode: new Map(Object.entries(childrenByNode)), + parentsByNode: new Map(), + pageParents: new Map(), + root: "_root", + }; +} + +function pageEntry(slug: string, summary: string): PageIndexEntry { + return { id: 1, slug, summary, edges: [], modifiedAt: 0 }; +} + +function pageIndex(entries: PageIndexEntry[]): PageIndex { + return { + entries, + bySlug: new Map(entries.map((e) => [e.slug, e])), + byId: new Map(entries.map((e) => [e.id, e])), + rendered: "", + }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("composeNodeIndex", () => { + test("composes mixed node:/page: children in authored order with routing hints", () => { + const tree = treeIndex( + [ + treeNode("people", { + summary: "People you know", + routing_hints: "for work contacts see node:colleagues", + }), + treeNode("colleagues", { summary: "Work relationships" }), + ], + { + people: [ + { kind: "node", ref: "colleagues" }, + { kind: "page", ref: "alice" }, + ], + }, + ); + const pages = pageIndex([ + pageEntry("alice", "Alice — neighbor and friend"), + ]); + + const block = composeNodeIndex("people", tree, pages); + + expect(block).toBe( + [ + "[node:colleagues] Work relationships", + "[page:alice] Alice — neighbor and friend", + "Routing hints: for work contacts see node:colleagues", + ].join("\n"), + ); + }); + + test("emits children in authored order regardless of map insertion", () => { + const tree = treeIndex( + [treeNode("a", { summary: "Node A" }), treeNode("root", {})], + { + root: [ + { kind: "page", ref: "zeta" }, + { kind: "node", ref: "a" }, + { kind: "page", ref: "beta" }, + ], + }, + ); + const pages = pageIndex([ + pageEntry("beta", "Beta page"), + pageEntry("zeta", "Zeta page"), + ]); + + const block = composeNodeIndex("root", tree, pages); + + expect(block).toBe( + [ + "[page:zeta] Zeta page", + "[node:a] Node A", + "[page:beta] Beta page", + ].join("\n"), + ); + }); + + test("silently omits a page ref absent from the index", () => { + const tree = treeIndex([treeNode("root", {})], { + root: [ + { kind: "page", ref: "present" }, + { kind: "page", ref: "missing" }, + ], + }); + const pages = pageIndex([pageEntry("present", "I exist")]); + + const block = composeNodeIndex("root", tree, pages); + + expect(block).toBe("[page:present] I exist"); + }); + + test("silently omits a node ref absent from the tree", () => { + const tree = treeIndex([treeNode("present", { summary: "Here" })], { + root: [ + { kind: "node", ref: "present" }, + { kind: "node", ref: "ghost" }, + ], + }); + const pages = pageIndex([]); + + const block = composeNodeIndex("root", tree, pages); + + expect(block).toBe("[node:present] Here"); + }); + + test("empty children → just the routing hints", () => { + const tree = treeIndex( + [treeNode("leaf", { routing_hints: "this is a leaf branch" })], + { leaf: [] }, + ); + + const block = composeNodeIndex("leaf", tree, pageIndex([])); + + expect(block).toBe("Routing hints: this is a leaf branch"); + }); + + test("no children and no routing hints → empty string", () => { + const tree = treeIndex([treeNode("bare", {})], { bare: [] }); + + expect(composeNodeIndex("bare", tree, pageIndex([]))).toBe(""); + }); + + test("node with no childrenByNode entry composes from routing hints alone", () => { + const tree = treeIndex( + [treeNode("orphan", { routing_hints: "hint only" })], + {}, + ); + + expect(composeNodeIndex("orphan", tree, pageIndex([]))).toBe( + "Routing hints: hint only", + ); + }); + + test("node child with no summary falls back to first non-empty body line", () => { + const tree = treeIndex( + [ + treeNode("root", {}), + treeNode("bodyonly", { + body: "\n \nFirst real line\nSecond line", + }), + ], + { root: [{ kind: "node", ref: "bodyonly" }] }, + ); + + const block = composeNodeIndex("root", tree, pageIndex([])); + + expect(block).toBe("[node:bodyonly] First real line"); + }); + + test("node child with empty summary string falls back to body line", () => { + const tree = treeIndex( + [ + treeNode("root", {}), + treeNode("blank", { summary: " ", body: "fallback line" }), + ], + { root: [{ kind: "node", ref: "blank" }] }, + ); + + expect(composeNodeIndex("root", tree, pageIndex([]))).toBe( + "[node:blank] fallback line", + ); + }); + + test("node child with neither summary nor body emits only its header", () => { + const tree = treeIndex( + [treeNode("root", {}), treeNode("empty", { body: " \n\t" })], + { root: [{ kind: "node", ref: "empty" }] }, + ); + + expect(composeNodeIndex("root", tree, pageIndex([]))).toBe("[node:empty]"); + }); +}); diff --git a/assistant/src/memory/v3/index-composition.ts b/assistant/src/memory/v3/index-composition.ts new file mode 100644 index 00000000000..e1c16e3da8c --- /dev/null +++ b/assistant/src/memory/v3/index-composition.ts @@ -0,0 +1,113 @@ +/** + * Memory v3 — Compositional index rendering. + * + * A v3 tree node has no stored "index" of its own. Instead, a parent node's + * index is *composed at read time* by concatenating one description line per + * child (a `node:` sub-node's summary or a `page:` leaf's summary) plus a thin + * `Routing hints:` trailer drawn from the node's own frontmatter. Nothing here + * is persisted — the block is generated fresh every time a descent prompt needs + * it, so it always reflects the current state of the children. + * + * {@link composeNodeIndex} is a **pure function** over an already-built + * {@link TreeIndex} (from `tree-index.ts`) and {@link PageIndex} (from + * `../v2/page-index.ts`). It does no I/O: the tree walk / driver PR is + * responsible for building those indices and feeding them in. + * + * Resolution rules, per child ref of `nodeId` (in authored order): + * - `kind:"node"` → look up the child in `tree.nodes`; emit + * `"[node:] "` where summary is the child's + * `frontmatter.summary` if non-empty, else the first non-empty line of its + * body. A node with neither still emits its header (`"[node:]"`). + * - `kind:"page"` → look up `pages.bySlug.get(ref)`; emit + * `"[page:] "`. + * - Either lookup missing → emit nothing for that ref. Reporting dangling + * refs is validation's job, not this renderer's. + * + * The node's own `routing_hints` (when present) are appended last under a + * `Routing hints:` trailer. A node with no resolvable children and no routing + * hints composes to the empty string. + */ + +import type { PageIndex } from "../v2/page-index.js"; +import type { TreeIndex } from "./tree-index.js"; +import type { TreeNode } from "./types.js"; + +/** Trailer label introducing a node's own routing hints. */ +const ROUTING_HINTS_LABEL = "Routing hints:"; + +/** + * Resolve a node's display summary: its frontmatter `summary` if non-empty, + * otherwise the first non-empty line of its body, otherwise the empty string. + * Whitespace is trimmed so a leading blank line in the body never wins. + */ +function nodeSummary(node: TreeNode): string { + const summary = node.frontmatter.summary?.trim(); + if (summary) return summary; + for (const line of node.body.split("\n")) { + const trimmed = line.trim(); + if (trimmed) return trimmed; + } + return ""; +} + +/** + * Render one child ref into its index line, or `null` when the ref's target is + * absent from the supplied indices (validation owns reporting those). + * + * A resolvable `node:` child always yields a line — its header (`[node:]`) + * with a trailing summary when one exists. A `page:` child yields + * `[page:] `; the v2 page index already truncates `summary`. + */ +function renderChild( + kind: "page" | "node", + ref: string, + tree: TreeIndex, + pages: PageIndex, +): string | null { + if (kind === "node") { + const child = tree.nodes.get(ref); + if (!child) return null; + const summary = nodeSummary(child); + return summary ? `[node:${ref}] ${summary}` : `[node:${ref}]`; + } + const entry = pages.bySlug.get(ref); + if (!entry) return null; + return `[page:${ref}] ${entry.summary}`; +} + +/** + * Compose the prompt-ready index block for `nodeId` from its children's + * descriptions plus the node's own routing hints. + * + * Pure and deterministic: children are emitted in authored order (the order + * `tree.childrenByNode` preserves from the node's `children` frontmatter), refs + * whose targets are absent are silently skipped, and the node's + * `routing_hints` (if present) are appended under a {@link ROUTING_HINTS_LABEL} + * trailer. A node with no entry in `childrenByNode`, no resolvable children, + * and no routing hints composes to the empty string. + * + * The result is a plain string with no trailing newline, suitable to drop + * directly into an LLM descent prompt. + */ +export function composeNodeIndex( + nodeId: string, + tree: TreeIndex, + pages: PageIndex, +): string { + const blocks: string[] = []; + + const childRefs = tree.childrenByNode.get(nodeId) ?? []; + for (const { kind, ref } of childRefs) { + const line = renderChild(kind, ref, tree, pages); + if (line !== null) blocks.push(line); + } + + const routingHints = tree.nodes + .get(nodeId) + ?.frontmatter.routing_hints?.trim(); + if (routingHints) { + blocks.push(`${ROUTING_HINTS_LABEL} ${routingHints}`); + } + + return blocks.join("\n"); +} From 6df7704c96c79a0caf73cf56b80b0a95254bc423 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:54:13 -0400 Subject: [PATCH 09/21] feat(memory-v3): fast filter judging dense hits (sticky bypass) (#31979) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/filter.test.ts | 338 ++++++++++++++++++ assistant/src/memory/v3/filter.ts | 258 +++++++++++++ 2 files changed, 596 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/filter.test.ts create mode 100644 assistant/src/memory/v3/filter.ts diff --git a/assistant/src/memory/v3/__tests__/filter.test.ts b/assistant/src/memory/v3/__tests__/filter.test.ts new file mode 100644 index 00000000000..25a78c76df3 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/filter.test.ts @@ -0,0 +1,338 @@ +/** + * Tests for `assistant/src/memory/v3/filter.ts`. + * + * Coverage matrix: + * - keep-subset → kept = bypass ∪ judged-kept; dropped = judged minus kept; + * bypass slugs are never judged. + * - model keeping a slug outside the judged set → dropped. + * - empty dense → no LLM call, kept = bypass-relevant only. + * - dense entirely covered by bypass → no LLM call (nothing to judge). + * - provider === null (no provider configured) → fail-open: keep all dense, + * failureReason = "no_provider". + * - provider throws → fail-open (keep all, failureReason = "api_error"). + * - missing tool_use block → fail-open (failureReason = "tool_use_missing"). + * - tool input failing schema → fail-open (failureReason = "schema_mismatch"). + * - request shape: forced tool_choice on `filter_dense_hits`, judged set in + * the user message, abort signal forwarded. + * + * The provider is injected via `filterDenseHits({ provider })` — no real LLM, + * no network, no `mock.module`. `~/.vellum/` is never touched. + */ + +import { describe, expect, test } from "bun:test"; + +import type { + Message, + Provider, + ProviderResponse, + SendMessageOptions, + ToolDefinition, +} from "../../../providers/types.js"; +import type { RetrievalInput } from "../../v2/harness/retriever.js"; +import type { ScoutResult } from "../../v2/harness/trace.js"; +import { filterDenseHits } from "../filter.js"; + +// --------------------------------------------------------------------------- +// Helpers. +// --------------------------------------------------------------------------- + +interface ProviderCall { + messages: Message[]; + tools: ToolDefinition[] | undefined; + systemPrompt: string | undefined; + options: SendMessageOptions | undefined; +} + +/** + * A stub provider that records its calls and returns a fixed response. + * Honors an already-aborted signal by throwing an AbortError so signal + * forwarding can be asserted. + */ +function makeProvider( + response: ProviderResponse, + calls: ProviderCall[], +): Provider { + return { + name: "stub", + sendMessage: async (messages, tools, systemPrompt, options) => { + calls.push({ messages, tools, systemPrompt, options }); + if (options?.signal?.aborted) { + const err = new Error("aborted"); + err.name = "AbortError"; + throw err; + } + return response; + }, + }; +} + +/** A provider whose sendMessage always throws. */ +function makeThrowingProvider(): Provider { + return { + name: "throwing-stub", + sendMessage: async () => { + throw new Error("boom"); + }, + }; +} + +/** A provider that must never be called (asserts no LLM round-trip happens). */ +function makeNeverCalledProvider(): Provider { + return { + name: "never-called-stub", + sendMessage: async () => { + throw new Error("provider should not be called"); + }, + }; +} + +function filterToolResponse(input: Record): ProviderResponse { + return { + model: "stub-model", + stopReason: "tool_use", + usage: { inputTokens: 0, outputTokens: 0 }, + content: [ + { type: "tool_use", id: "tu-1", name: "filter_dense_hits", input }, + ], + }; +} + +/** A response with no tool_use block (e.g. the model emitted only text). */ +function textOnlyResponse(): ProviderResponse { + return { + model: "stub-model", + stopReason: "end_turn", + usage: { inputTokens: 0, outputTokens: 0 }, + content: [{ type: "text", text: "no tool here" }], + }; +} + +/** Minimal `RetrievalInput` — the filter only reads `nowText` and `signal`. */ +function makeInput(overrides?: Partial): RetrievalInput { + return { + workspaceDir: "/tmp/does-not-matter", + recentTurnPairs: [], + nowText: "2026-05-25 10:00 PT", + priorEverInjected: [], + config: {} as unknown as RetrievalInput["config"], + ...overrides, + }; +} + +function denseResult(slugs: string[]): ScoutResult { + return { lane: "dense", slugs }; +} + +// --------------------------------------------------------------------------- +// Tests. +// --------------------------------------------------------------------------- + +describe("filterDenseHits — judged keep/drop", () => { + test("kept = bypass ∪ judged-kept; bypass slugs are never judged", async () => { + const calls: ProviderCall[] = []; + // Dense surfaces a, b, c, plus bypass slug `x`. Model keeps a, c; drops b. + const provider = makeProvider( + filterToolResponse({ keep_slugs: ["c", "a"] }), + calls, + ); + + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult(["a", "b", "c", "x"]), + sticky: new Set(["x"]), + bypass: new Set(["x"]), + provider, + }); + + // bypass first (x), then judged-kept in model order (c, a). + expect(result.kept).toEqual(["x", "c", "a"]); + // Only the non-bypass slugs are judged; b was dropped. + expect(result.trace.judged).toEqual(["a", "b", "c"]); + expect(result.trace.dropped).toEqual(["b"]); + expect(result.failureReason).toBeUndefined(); + expect(calls).toHaveLength(1); + // The bypass slug `x` was never shown to the model. + const userText = calls[0].messages[0].content + .map((b) => (b.type === "text" ? b.text : "")) + .join("\n"); + expect(userText).not.toContain("x"); + }); + + test("forces tool_choice on filter_dense_hits and surfaces judged candidates", async () => { + const calls: ProviderCall[] = []; + const provider = makeProvider( + filterToolResponse({ keep_slugs: ["a"] }), + calls, + ); + + await filterDenseHits({ + input: makeInput({ nowText: "NOW-MARKER" }), + dense: denseResult(["a", "b"]), + sticky: new Set(), + bypass: new Set(), + provider, + }); + + const call = calls[0]; + expect(call.options?.config?.tool_choice).toEqual({ + type: "tool", + name: "filter_dense_hits", + }); + expect(call.options?.config?.callSite).toBe("memoryV3Filter"); + expect(call.tools?.[0].name).toBe("filter_dense_hits"); + const userText = call.messages[0].content + .map((b) => (b.type === "text" ? b.text : "")) + .join("\n"); + expect(userText).toContain("NOW-MARKER"); + expect(userText).toContain("a"); + expect(userText).toContain("b"); + }); + + test("drops a model-kept slug outside the judged set", async () => { + const calls: ProviderCall[] = []; + const provider = makeProvider( + filterToolResponse({ keep_slugs: ["a", "ghost"] }), + calls, + ); + + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult(["a", "b"]), + sticky: new Set(), + bypass: new Set(), + provider, + }); + + expect(result.kept).toEqual(["a"]); + expect(result.trace.dropped).toEqual(["b"]); + }); + + test("forwards an abort signal to the provider call", async () => { + const calls: ProviderCall[] = []; + const controller = new AbortController(); + controller.abort(); + const provider = makeProvider( + filterToolResponse({ keep_slugs: ["a"] }), + calls, + ); + + // Aborted signal makes the stub throw → filter fails open (keep all). + const result = await filterDenseHits({ + input: makeInput({ signal: controller.signal }), + dense: denseResult(["a", "b"]), + sticky: new Set(), + bypass: new Set(), + provider, + }); + + expect(calls[0].options?.signal).toBe(controller.signal); + expect([...result.kept].sort()).toEqual(["a", "b"]); + expect(result.failureReason).toBe("api_error"); + }); +}); + +describe("filterDenseHits — no LLM call", () => { + test("empty dense → no call, kept = bypass-relevant only", async () => { + const provider = makeNeverCalledProvider(); + + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult([]), + sticky: new Set(["x"]), + bypass: new Set(["x"]), + provider, + }); + + expect(result.kept).toEqual(["x"]); + expect(result.trace).toEqual({ judged: [], dropped: [] }); + expect(result.failureReason).toBeUndefined(); + }); + + test("dense fully covered by bypass → no call (nothing to judge)", async () => { + const provider = makeNeverCalledProvider(); + + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult(["x", "y"]), + sticky: new Set(["x", "y"]), + bypass: new Set(["x", "y"]), + provider, + }); + + expect([...result.kept].sort()).toEqual(["x", "y"]); + expect(result.trace).toEqual({ judged: [], dropped: [] }); + }); +}); + +describe("filterDenseHits — fail-open", () => { + test("provider === null keeps all dense with failureReason no_provider", async () => { + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult(["a", "b", "c"]), + sticky: new Set(), + bypass: new Set(), + provider: null, + }); + + expect([...result.kept].sort()).toEqual(["a", "b", "c"]); + expect(result.trace.judged).toEqual(["a", "b", "c"]); + expect(result.trace.dropped).toEqual([]); + expect(result.failureReason).toBe("no_provider"); + }); + + test("fail-open still unions bypass slugs into kept", async () => { + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult(["a", "b", "x"]), + sticky: new Set(["x"]), + bypass: new Set(["x"]), + provider: null, + }); + + // bypass `x` first, then the judged-but-kept-by-fail-open slugs a, b. + expect(result.kept).toEqual(["x", "a", "b"]); + expect(result.trace.judged).toEqual(["a", "b"]); + }); + + test("provider throw keeps all dense (failureReason api_error)", async () => { + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult(["a", "b"]), + sticky: new Set(), + bypass: new Set(), + provider: makeThrowingProvider(), + }); + + expect([...result.kept].sort()).toEqual(["a", "b"]); + expect(result.failureReason).toBe("api_error"); + }); + + test("missing tool_use block keeps all dense (failureReason tool_use_missing)", async () => { + const calls: ProviderCall[] = []; + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult(["a", "b"]), + sticky: new Set(), + bypass: new Set(), + provider: makeProvider(textOnlyResponse(), calls), + }); + + expect([...result.kept].sort()).toEqual(["a", "b"]); + expect(result.failureReason).toBe("tool_use_missing"); + }); + + test("schema-mismatched tool input keeps all dense (failureReason schema_mismatch)", async () => { + const calls: ProviderCall[] = []; + const result = await filterDenseHits({ + input: makeInput(), + dense: denseResult(["a", "b"]), + sticky: new Set(), + bypass: new Set(), + // `keep_slugs` is required; missing it fails the Zod schema. + provider: makeProvider(filterToolResponse({ wrong_key: ["a"] }), calls), + }); + + expect([...result.kept].sort()).toEqual(["a", "b"]); + expect(result.failureReason).toBe("schema_mismatch"); + }); +}); diff --git a/assistant/src/memory/v3/filter.ts b/assistant/src/memory/v3/filter.ts new file mode 100644 index 00000000000..79892178809 --- /dev/null +++ b/assistant/src/memory/v3/filter.ts @@ -0,0 +1,258 @@ +/** + * Memory v3 — fast dense-hit filter. + * + * The dense scout lane surfaces embedding-similarity candidates that span + * subtrees: some are meaningful cross-domain associations worth carrying into + * the gate, others are spurious near-neighbors that only crowd the slate. This + * module makes **one cheap LLM call** to keep the meaningful associations and + * drop the noise, *before* the more expensive selection gate runs. + * + * What it judges. Only the bounded dense candidate set (the scout lane is + * already capped at ~50–200 by quota/MMR — the filter never sees the whole + * corpus). Hot pages and near-exact sparse hits arrive via the scouts' + * `sticky` / `bypass` sets and are **never judged**: a literal keyword hit or a + * page the user has been touching is a strong enough signal that we shouldn't + * make it earn its place through a fallible cheap judgment. They are unioned + * straight into `kept`. + * + * Fail-open. If no provider is configured or the call errors / returns an + * unusable response, the filter keeps *all* dense candidates and surfaces a + * `failureReason` so the loop can record that the filter was bypassed. Dropping + * candidates on a model outage would silently starve retrieval; keeping them is + * the safe degradation (the downstream gate still narrows the slate). + * + * No LLM call when there is nothing to judge. An empty dense set short-circuits + * to `kept` = the bypass-relevant slugs (no judged additions), with no provider + * round-trip. + * + * This module is currently unwired — a later PR composes it into the loop. + */ + +import { z } from "zod"; + +import { + extractToolUse, + getConfiguredProvider, +} from "../../providers/provider-send-message.js"; +import type { + Message, + Provider, + ToolDefinition, +} from "../../providers/types.js"; +import { getLogger } from "../../util/logger.js"; +import type { RetrievalInput } from "../v2/harness/retriever.js"; +import type { ScoutResult } from "../v2/harness/trace.js"; + +const log = getLogger("memory-v3-filter"); + +/** Tool name forced via `tool_choice`. Shared constant so tests can match it. */ +const FILTER_TOOL_NAME = "filter_dense_hits"; + +/** + * Arguments to one filter invocation. + * + * `dense` is the bounded dense scout result; only its slugs that are *not* + * already in `bypass` are judged. `sticky` is the broader keep-in-the-running + * set (hot + near-exact sparse); `bypass` is the subset strong enough to skip + * judgment entirely. Bypass slugs that also appear in the dense lane are kept + * unconditionally and never sent to the model. + */ +export interface FilterDenseHitsArgs { + input: RetrievalInput; + dense: ScoutResult; + sticky: Set; + bypass: Set; + /** + * Provider override seam for tests. Production leaves this unset and the + * filter resolves `getConfiguredProvider("memoryV3Filter")`. `null` is + * distinct from `undefined`: passing `null` simulates "no provider + * configured" and exercises the fail-open path without resolving the real + * registry. + */ + provider?: Provider | null; +} + +export interface FilterDenseHitsResult { + /** Final kept slugs: bypass ∪ judged-kept. */ + kept: string[]; + /** Inspection trace: which dense slugs were judged and which were dropped. */ + trace: { judged: string[]; dropped: string[] }; + /** + * Non-null when the filter could not judge (no provider, provider throw, + * missing tool_use, schema mismatch) and therefore failed open by keeping all + * dense candidates. The loop can surface this to flag a bypassed filter. + */ + failureReason?: string; +} + +/** + * Build the forced tool definition. `keep_slugs` is the model's subset of the + * judged candidate set to retain; everything judged-but-not-kept is dropped. + * Mirrors the forced-tool pattern of v2's `select_pages_to_inject`. + */ +function buildFilterTool(judgedSlugs: readonly string[]): ToolDefinition { + return { + name: FILTER_TOOL_NAME, + description: + "From the candidate concept pages surfaced by embedding similarity for " + + "the current turn, keep the ones that are meaningful associations worth " + + "surfacing and drop the spurious near-neighbors. Return keep_slugs as the " + + "subset to retain — choose only from the candidate set. Lean toward " + + "keeping a plausible cross-domain association over dropping it.", + input_schema: { + type: "object", + properties: { + keep_slugs: { + type: "array", + items: { type: "string", enum: [...judgedSlugs] }, + description: + "The subset of candidate page slugs to keep. Choose only from the candidate set.", + }, + }, + required: ["keep_slugs"], + }, + }; +} + +const FilterToolResultSchema = z.object({ + keep_slugs: z.array(z.string()), +}); + +/** + * Compose the final result. `kept` = bypass slugs ∪ judged-kept (de-duplicated, + * bypass first then judged-kept in the model's returned order). `trace` records + * exactly which dense slugs were judged and which the model dropped. + */ +function buildResult( + bypass: Set, + judged: readonly string[], + judgedKept: readonly string[], + failureReason?: string, +): FilterDenseHitsResult { + const keptSet = new Set(bypass); + const kept: string[] = [...bypass]; + for (const slug of judgedKept) { + if (keptSet.has(slug)) continue; + keptSet.add(slug); + kept.push(slug); + } + const keptJudged = new Set(judgedKept); + const dropped = judged.filter((slug) => !keptJudged.has(slug)); + return { + kept, + trace: { judged: [...judged], dropped }, + ...(failureReason !== undefined ? { failureReason } : {}), + }; +} + +/** + * Run the fast dense-hit filter for one pass. + * + * Makes at most one forced-tool LLM call over the *judged* set (dense slugs not + * already in `bypass`). Bypass slugs are kept unconditionally. On an empty + * judged set no call is made. Any failure (no provider, provider throw, missing + * tool_use, schema mismatch) fails open: every dense candidate is kept and a + * `failureReason` is returned. + */ +export async function filterDenseHits( + args: FilterDenseHitsArgs, +): Promise { + const { input, dense, bypass } = args; + + // Dense slugs that bypass judgment (near-exact sparse / hot) are kept as-is; + // only the remainder is judged. + const judged = dense.slugs.filter((slug) => !bypass.has(slug)); + + // Nothing to judge → no LLM call. Kept is just the bypass-relevant slugs. + if (judged.length === 0) { + return buildResult(bypass, judged, judged); + } + + // Resolve the provider. A `provider` key in args (including explicit `null`) + // takes precedence so tests inject a stub; production omits it and resolves + // the configured `memoryV3Filter` call site. + const provider = + args.provider !== undefined + ? args.provider + : await getConfiguredProvider("memoryV3Filter"); + + if (!provider) { + log.warn( + "memoryV3Filter provider unavailable; failing open (keeping all dense)", + ); + return buildResult(bypass, judged, judged, "no_provider"); + } + + const systemPrompt = + "You are a fast relevance filter for a memory-retrieval loop. You are given " + + "candidate concept pages surfaced by embedding similarity for the current " + + "turn. Keep the pages that are meaningful associations and drop the " + + "spurious near-neighbors. When in doubt, keep."; + + const userMsg: Message = { + role: "user", + content: [ + { + type: "text", + text: `\n${input.nowText}\n`, + }, + { + type: "text", + text: `\n${judged.join("\n")}\n`, + }, + ], + }; + + const filterTool = buildFilterTool(judged); + + let response; + try { + response = await provider.sendMessage( + [userMsg], + [filterTool], + systemPrompt, + { + config: { + callSite: "memoryV3Filter" as const, + tool_choice: { type: "tool" as const, name: FILTER_TOOL_NAME }, + }, + ...(input.signal ? { signal: input.signal } : {}), + }, + ); + } catch (err) { + log.warn({ err }, "Filter provider call threw; failing open (keep all)"); + return buildResult(bypass, judged, judged, "api_error"); + } + + const toolBlock = extractToolUse(response); + if (!toolBlock || toolBlock.name !== FILTER_TOOL_NAME) { + log.warn( + { stopReason: response.stopReason }, + "Filter model returned no filter_dense_hits tool_use; failing open (keep all)", + ); + return buildResult(bypass, judged, judged, "tool_use_missing"); + } + + const parsed = FilterToolResultSchema.safeParse(toolBlock.input); + if (!parsed.success) { + log.warn( + { error: parsed.error.message }, + "Filter tool input did not match schema; failing open (keep all)", + ); + return buildResult(bypass, judged, judged, "schema_mismatch"); + } + + // Restrict the model's keep set to the judged candidates (it can only keep + // what it was shown) and preserve its returned order. + const judgedSet = new Set(judged); + const seen = new Set(); + const judgedKept: string[] = []; + for (const slug of parsed.data.keep_slugs) { + if (!judgedSet.has(slug)) continue; + if (seen.has(slug)) continue; + seen.add(slug); + judgedKept.push(slug); + } + + return buildResult(bypass, judged, judgedKept); +} From a1218c9f6d32ea3c7e4f0124b576a44d89f579f6 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 02:54:28 -0400 Subject: [PATCH 10/21] feat(memory-v3): parallel-fan-out traversal with cycle/visited guards (#31980) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/traversal.test.ts | 395 ++++++++++++++++++ assistant/src/memory/v3/traversal.ts | 194 +++++++++ 2 files changed, 589 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/traversal.test.ts create mode 100644 assistant/src/memory/v3/traversal.ts diff --git a/assistant/src/memory/v3/__tests__/traversal.test.ts b/assistant/src/memory/v3/__tests__/traversal.test.ts new file mode 100644 index 00000000000..4a742815a94 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/traversal.test.ts @@ -0,0 +1,395 @@ +/** + * Tests for `assistant/src/memory/v3/traversal.ts`. + * + * Provider-free: `descend` is always a deterministic stub. Coverage: + * - resolveChildren is a thin accessor (known node / leaf / unknown id). + * - linear descent collects the expected leaf pages and emits a TreeLevel per + * walked node in walk order. + * - a DAG (sub-node shared by two parents) is walked exactly once. + * - an injected cycle (A ↔ B) terminates. + * - breadthBudget caps the descents per level. + * - maxDepth halts the recursion at the right level. + * - seeds start the walk mid-tree (alongside / instead of the root). + * - reasoning from the descend result is threaded onto the level; defaults + * to "" when omitted. + * + * Fixtures are plain in-memory `TreeIndex` objects — no disk, no workspace. + */ + +import { describe, expect, test } from "bun:test"; + +import type { DescendResult } from "../traversal.js"; +import { resolveChildren, walkTree } from "../traversal.js"; +import type { ChildRef, TreeIndex } from "../tree-index.js"; + +// --------------------------------------------------------------------------- +// Fixture helpers +// --------------------------------------------------------------------------- + +function page(ref: string): ChildRef { + return { kind: "page", ref }; +} + +function node(ref: string): ChildRef { + return { kind: "node", ref }; +} + +/** + * Build a minimal in-memory `TreeIndex` from a forward-adjacency spec. Only + * `childrenByNode` and `root` are exercised by the traversal, so the reverse + * adjacency maps and `nodes` are left empty — the walk never reads them. + */ +function makeTree( + root: string, + childrenByNode: Record, +): TreeIndex { + return { + nodes: new Map(), + childrenByNode: new Map(Object.entries(childrenByNode)), + parentsByNode: new Map(), + pageParents: new Map(), + root, + }; +} + +/** Descend into every node child offered (mechanical "descend all" stub). */ +function descendAll( + _nodeId: string, + children: ReadonlyArray, +): DescendResult { + return { descend: children.filter((c) => c.kind === "node") }; +} + +// --------------------------------------------------------------------------- +// resolveChildren +// --------------------------------------------------------------------------- + +describe("resolveChildren", () => { + test("returns the ordered child refs for a known node", () => { + const tree = makeTree("_root", { + _root: [node("a"), page("p")], + }); + expect(resolveChildren(tree, "_root")).toEqual([node("a"), page("p")]); + }); + + test("returns [] for a leaf / unknown node id", () => { + const tree = makeTree("_root", { _root: [] }); + expect(resolveChildren(tree, "missing")).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// Linear descent +// --------------------------------------------------------------------------- + +describe("walkTree — linear descent", () => { + test("collects expected leaf pages and emits a level per walked node", async () => { + // _root → node:a → node:b → page:leaf (plus a page on each level) + const tree = makeTree("_root", { + _root: [page("p-root"), node("a")], + a: [page("p-a"), node("b")], + b: [page("leaf")], + }); + + const { pages, levels } = await walkTree(tree, { + breadthBudget: 8, + maxDepth: 8, + descend: descendAll, + }); + + expect([...pages].sort()).toEqual(["leaf", "p-a", "p-root"]); + expect(levels.map((l) => l.node)).toEqual(["_root", "a", "b"]); + + expect(levels[0]).toMatchObject({ + node: "_root", + considered: ["a"], + descended: ["a"], + skipped: [], + reasoning: "", + }); + expect(levels[2]).toMatchObject({ + node: "b", + considered: [], + descended: [], + skipped: [], + }); + }); + + test("defaults start to tree.root", async () => { + const tree = makeTree("home", { + home: [page("only")], + }); + const { pages, levels } = await walkTree(tree, { + breadthBudget: 4, + maxDepth: 4, + descend: descendAll, + }); + expect([...pages]).toEqual(["only"]); + expect(levels.map((l) => l.node)).toEqual(["home"]); + }); +}); + +// --------------------------------------------------------------------------- +// DAG dedup +// --------------------------------------------------------------------------- + +describe("walkTree — DAG dedup", () => { + test("a sub-node shared by two parents is walked exactly once", async () => { + // _root → {node:left, node:right}; both → node:shared → page:s + const tree = makeTree("_root", { + _root: [node("left"), node("right")], + left: [node("shared")], + right: [node("shared")], + shared: [page("s")], + }); + + const { pages, levels } = await walkTree(tree, { + breadthBudget: 8, + maxDepth: 8, + descend: descendAll, + }); + + expect([...pages]).toEqual(["s"]); + // `shared` appears once even though both left and right descend into it. + const walked = levels.map((l) => l.node); + expect(walked.filter((n) => n === "shared")).toHaveLength(1); + expect(walked.sort()).toEqual(["_root", "left", "right", "shared"]); + }); +}); + +// --------------------------------------------------------------------------- +// Cycle termination +// --------------------------------------------------------------------------- + +describe("walkTree — cycle termination", () => { + test("an injected A ↔ B cycle terminates and walks each once", async () => { + const tree = makeTree("a", { + a: [node("b"), page("pa")], + b: [node("a"), page("pb")], + }); + + const { pages, levels } = await walkTree(tree, { + breadthBudget: 8, + maxDepth: 100, + descend: descendAll, + }); + + expect([...pages].sort()).toEqual(["pa", "pb"]); + const walked = levels.map((l) => l.node).sort(); + expect(walked).toEqual(["a", "b"]); + }); + + test("a self-loop terminates", async () => { + const tree = makeTree("solo", { + solo: [node("solo"), page("p")], + }); + const { pages, levels } = await walkTree(tree, { + breadthBudget: 4, + maxDepth: 100, + descend: descendAll, + }); + expect([...pages]).toEqual(["p"]); + expect(levels.map((l) => l.node)).toEqual(["solo"]); + }); +}); + +// --------------------------------------------------------------------------- +// Breadth budget +// --------------------------------------------------------------------------- + +describe("walkTree — breadthBudget", () => { + test("caps the descents per node and records the rest as skipped", async () => { + const tree = makeTree("_root", { + _root: [node("a"), node("b"), node("c"), node("d")], + a: [page("pa")], + b: [page("pb")], + c: [page("pc")], + d: [page("pd")], + }); + + const { pages, levels } = await walkTree(tree, { + breadthBudget: 2, + maxDepth: 8, + descend: descendAll, + }); + + const rootLevel = levels.find((l) => l.node === "_root")!; + expect(rootLevel.considered).toEqual(["a", "b", "c", "d"]); + expect(rootLevel.descended).toEqual(["a", "b"]); + expect(rootLevel.skipped).toEqual(["c", "d"]); + + // Only the first two children's pages are reached. + expect([...pages].sort()).toEqual(["pa", "pb"]); + expect(levels.map((l) => l.node).sort()).toEqual(["_root", "a", "b"]); + }); +}); + +// --------------------------------------------------------------------------- +// Depth budget +// --------------------------------------------------------------------------- + +describe("walkTree — maxDepth", () => { + test("halts recursion at the configured depth", async () => { + // _root(0) → a(1) → b(2) → c(3) + const tree = makeTree("_root", { + _root: [node("a")], + a: [node("b"), page("pa")], + b: [node("c"), page("pb")], + c: [page("pc")], + }); + + // maxDepth 1 walks depth 0 (_root) and depth 1 (a) only; b/c never walked. + const { pages, levels } = await walkTree(tree, { + breadthBudget: 8, + maxDepth: 1, + descend: descendAll, + }); + + expect(levels.map((l) => l.node)).toEqual(["_root", "a"]); + // `a`'s page is collected; b/c and their pages are not reached. + expect([...pages]).toEqual(["pa"]); + }); + + test("maxDepth 0 walks only the start level", async () => { + const tree = makeTree("_root", { + _root: [node("a"), page("pr")], + a: [page("pa")], + }); + const { pages, levels } = await walkTree(tree, { + breadthBudget: 8, + maxDepth: 0, + descend: descendAll, + }); + expect(levels.map((l) => l.node)).toEqual(["_root"]); + expect([...pages]).toEqual(["pr"]); + }); +}); + +// --------------------------------------------------------------------------- +// Seeds +// --------------------------------------------------------------------------- + +describe("walkTree — seeds", () => { + test("seeds start the walk mid-tree alongside start", async () => { + const tree = makeTree("_root", { + _root: [node("a"), page("pr")], + a: [page("pa")], + mid: [page("pm"), node("deep")], + deep: [page("pd")], + }); + + const { pages, levels } = await walkTree(tree, { + seeds: ["mid"], + breadthBudget: 8, + maxDepth: 8, + descend: descendAll, + }); + + // Both the root branch and the seeded `mid` subtree are explored. + expect([...pages].sort()).toEqual(["pa", "pd", "pm", "pr"]); + expect(levels.map((l) => l.node).sort()).toEqual([ + "_root", + "a", + "deep", + "mid", + ]); + }); + + test("a node that is both start and seed is walked once", async () => { + const tree = makeTree("dup", { + dup: [page("p")], + }); + const { levels } = await walkTree(tree, { + start: "dup", + seeds: ["dup"], + breadthBudget: 4, + maxDepth: 4, + descend: descendAll, + }); + expect(levels.map((l) => l.node)).toEqual(["dup"]); + }); +}); + +// --------------------------------------------------------------------------- +// Descend decision threading +// --------------------------------------------------------------------------- + +describe("walkTree — descend decision", () => { + test("threads the descend reasoning onto the level", async () => { + const tree = makeTree("_root", { + _root: [node("a"), node("b")], + a: [page("pa")], + b: [page("pb")], + }); + + const descend = ( + _nodeId: string, + children: ReadonlyArray, + ): DescendResult => ({ + // Pick only "a". + descend: children.filter((c) => c.kind === "node" && c.ref === "a"), + reasoning: "a is more relevant", + }); + + const { pages, levels } = await walkTree(tree, { + breadthBudget: 8, + maxDepth: 8, + descend, + }); + + const rootLevel = levels.find((l) => l.node === "_root")!; + expect(rootLevel.reasoning).toBe("a is more relevant"); + expect(rootLevel.descended).toEqual(["a"]); + expect(rootLevel.skipped).toEqual(["b"]); + expect([...pages]).toEqual(["pa"]); + }); + + test("ignores descend picks that were not offered node children", async () => { + const tree = makeTree("_root", { + _root: [node("a"), page("pr")], + a: [page("pa")], + }); + + const descend = (): DescendResult => ({ + // "ghost" was never offered; "pr" is a page, not a node child. + descend: [node("ghost"), page("pr")], + }); + + const { pages, levels } = await walkTree(tree, { + breadthBudget: 8, + maxDepth: 8, + descend, + }); + + const rootLevel = levels.find((l) => l.node === "_root")!; + expect(rootLevel.considered).toEqual(["a"]); + expect(rootLevel.descended).toEqual([]); + expect(rootLevel.skipped).toEqual(["a"]); + // No node descent happened; only the root's own page is collected. + expect([...pages]).toEqual(["pr"]); + expect(levels.map((l) => l.node)).toEqual(["_root"]); + }); + + test("dedups repeated descend picks before applying breadthBudget", async () => { + const tree = makeTree("_root", { + _root: [node("a"), node("b")], + a: [page("pa")], + b: [page("pb")], + }); + + const descend = (): DescendResult => ({ + // "a" repeated should count once; budget of 2 then still admits "b". + descend: [node("a"), node("a"), node("b")], + }); + + const { levels } = await walkTree(tree, { + breadthBudget: 2, + maxDepth: 8, + descend, + }); + + const rootLevel = levels.find((l) => l.node === "_root")!; + expect(rootLevel.descended).toEqual(["a", "b"]); + expect(rootLevel.skipped).toEqual([]); + }); +}); diff --git a/assistant/src/memory/v3/traversal.ts b/assistant/src/memory/v3/traversal.ts new file mode 100644 index 00000000000..2cd625f0a32 --- /dev/null +++ b/assistant/src/memory/v3/traversal.ts @@ -0,0 +1,194 @@ +/** + * Memory v3 — Tree traversal primitives. + * + * The *mechanical* half of the v3 read loop: a deterministic, provider-free + * walk over the {@link TreeIndex} DAG. The intelligence — *which* child nodes + * to recurse into at each level — is injected via the `descend` callback so + * this module stays pure and unit-testable without an LLM. The driver PR wires + * `descend` to the model's descend/skip decision; here `descend` is just a + * function `(nodeId, children) => chosen node-children`. + * + * `walkTree` fans out from a `start` node and any `seeds`, level by level: + * - At each node it resolves the ordered child refs, hands them to `descend`, + * and recurses into the chosen `node:` children (capped by `breadthBudget`). + * - Every `page:` child encountered anywhere in the walk is collected into the + * returned `pages` set — pages are leaves, never recursed into. + * - A `visited` set keyed by canonical id (`node:`) dedups shared + * sub-nodes (the DAG case) and terminates cycles (A ↔ B). A node is walked + * at most once regardless of how many parents reference it. + * - `maxDepth` bounds how deep the recursion goes; the start/seed level is + * depth 0. + * + * Each walked node emits one {@link TreeLevel} (the `harness/trace.ts` shape) + * recording what was considered, descended, and skipped. `reasoning` is + * supplied by the `descend` callback (the driver attaches the model's stated + * reason); the mechanical walk defaults it to `""`. + * + * Processing is strictly level-by-level so `visited` mutations are never raced: + * within a level the per-node `descend` calls run concurrently (`Promise.all`), + * but the chosen children for the *next* level are only dedup'd and enqueued + * after the whole level resolves. + */ + +import type { TreeLevel } from "../v2/harness/trace.js"; +import type { ChildRef, TreeIndex } from "./tree-index.js"; + +/** + * The descend decision injected into {@link walkTree}. Given a node id and its + * ordered child refs, return the subset of *node* children to recurse into. The + * driver PR wires this to the LLM; tests pass a deterministic stub. + * + * Returning a `reasoning` string is optional — when present it is threaded into + * the emitted {@link TreeLevel}; absent, the level's `reasoning` defaults to + * `""`. Returned refs that are not `node:` children of `nodeId`, or that repeat, + * are ignored by the walk (it only recurses into distinct node children it + * actually offered). + */ +export type DescendDecision = ( + nodeId: string, + children: ReadonlyArray, +) => Promise | DescendResult; + +/** + * The result of a {@link DescendDecision}. `descend` lists the `node:` children + * chosen for recursion; `reasoning` is the optional model rationale recorded on + * the level. + */ +export interface DescendResult { + descend: ChildRef[]; + reasoning?: string; +} + +/** Options controlling a {@link walkTree} run. */ +export interface WalkOptions { + /** Entry node id; defaults to `tree.root`. */ + start?: string; + /** Extra node ids to start from in parallel with `start`. */ + seeds?: string[]; + /** Max `node:` children to descend into per node (after the `descend` pick). */ + breadthBudget: number; + /** Max recursion depth; the start/seed level is depth 0. */ + maxDepth: number; + /** Injected descend decision (the LLM hook). */ + descend: DescendDecision; +} + +/** The result of a {@link walkTree} run. */ +export interface WalkResult { + /** Every `page:` slug encountered across the walk, dedup'd. */ + pages: Set; + /** One {@link TreeLevel} per walked node, in walk order. */ + levels: TreeLevel[]; +} + +/** + * Resolve the ordered child refs for `nodeId`. Thin accessor over + * `tree.childrenByNode`; returns an empty array for an unknown / leaf node id so + * callers never branch on `undefined`. + */ +export function resolveChildren( + tree: TreeIndex, + nodeId: string, +): ReadonlyArray { + return tree.childrenByNode.get(nodeId) ?? []; +} + +/** Canonical visited-set key for a node id. */ +function nodeKey(nodeId: string): string { + return `node:${nodeId}`; +} + +/** + * Walk the {@link TreeIndex} DAG from `start` (default `tree.root`) plus any + * `seeds`, driven by the injected `descend` decision. Deterministic and + * provider-free — see the module docstring for the full contract. + * + * Returns the collected leaf `pages` and the per-node `levels` trace. + */ +export async function walkTree( + tree: TreeIndex, + opts: WalkOptions, +): Promise { + const { breadthBudget, maxDepth, descend } = opts; + const start = opts.start ?? tree.root; + + const pages = new Set(); + const levels: TreeLevel[] = []; + const visited = new Set(); + + // Seed the frontier with `start` + `seeds`, dedup'd and marked visited up + // front so a node that is both the start and a seed is walked once. + let frontier: string[] = []; + for (const id of [start, ...(opts.seeds ?? [])]) { + const key = nodeKey(id); + if (visited.has(key)) continue; + visited.add(key); + frontier.push(id); + } + + // Depth 0 is the start/seed level; stop once we'd exceed `maxDepth`. + for (let depth = 0; depth <= maxDepth && frontier.length > 0; depth++) { + // Resolve every node on this level concurrently. `visited` is not mutated + // here — only after the whole level settles — so the concurrency is safe. + const levelResults = await Promise.all( + frontier.map(async (nodeId) => { + const children = resolveChildren(tree, nodeId); + const result = await descend(nodeId, children); + return { nodeId, children, result }; + }), + ); + + const nextFrontier: string[] = []; + + for (const { nodeId, children, result } of levelResults) { + // Collect every page child of this node as a leaf hit. + for (const child of children) { + if (child.kind === "page") pages.add(child.ref); + } + + // The set of node children this node legitimately offered, in order. The + // descend pick is intersected with this so a stub returning bogus or + // duplicate refs can't make the walk recurse into something not offered. + const offeredNodes = children.filter((c) => c.kind === "node"); + const offeredRefs = new Set(offeredNodes.map((c) => c.ref)); + + // Honor the descend pick in the order it was returned, dedup'd, filtered + // to genuinely-offered node children, and capped by `breadthBudget`. + const descended: string[] = []; + const descendedSet = new Set(); + for (const choice of result.descend) { + if (choice.kind !== "node") continue; + if (!offeredRefs.has(choice.ref)) continue; + if (descendedSet.has(choice.ref)) continue; + if (descended.length >= breadthBudget) break; + descendedSet.add(choice.ref); + descended.push(choice.ref); + } + + const considered = offeredNodes.map((c) => c.ref); + const skipped = considered.filter((ref) => !descendedSet.has(ref)); + + levels.push({ + node: nodeId, + considered, + descended, + skipped, + reasoning: result.reasoning ?? "", + }); + + // Enqueue chosen node children for the next level. Mark visited now (the + // level has fully resolved) so a shared sub-node or a cycle is enqueued at + // most once across the whole walk. + for (const ref of descended) { + const key = nodeKey(ref); + if (visited.has(key)) continue; + visited.add(key); + nextFrontier.push(ref); + } + } + + frontier = nextFrontier; + } + + return { pages, levels }; +} From 21f008746d6276296e93ddac9f0558e52fc4eea5 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:00:10 -0400 Subject: [PATCH 11/21] feat(memory-v3): tree validator (orphans, cycles, dangling refs, freshness) (#31981) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/validate.test.ts | 245 ++++++++++++++ assistant/src/memory/v3/tree-store.ts | 21 ++ assistant/src/memory/v3/validate.ts | 300 ++++++++++++++++++ 3 files changed, 566 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/validate.test.ts create mode 100644 assistant/src/memory/v3/validate.ts diff --git a/assistant/src/memory/v3/__tests__/validate.test.ts b/assistant/src/memory/v3/__tests__/validate.test.ts new file mode 100644 index 00000000000..693247e8946 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/validate.test.ts @@ -0,0 +1,245 @@ +/** + * Tests for `assistant/src/memory/v3/validate.ts`. + * + * Coverage matrix — one fixture per defect category plus a clean-tree control: + * - clean tree → every list empty, every count 0. + * - danglingChildRefs → a `node:` ref and a `page:` ref to absent targets. + * - orphanPages → a concept page on disk not wired into the tree; synthetic + * page-index entries (none here) and reachable pages excluded. + * - cycles → A → B → A back-edge detected during the full descent. + * - staleIndex → a parent node whose mtime predates a `node:` child's mtime. + * - unknownEdgeTargets → a page `edges:` entry pointing at a missing slug. + * + * Tests use temp workspaces under `os.tmpdir()`; they never touch `~/.vellum/`. + * mtimes are pinned with `utimes` so the freshness check is deterministic and + * independent of write ordering / filesystem timestamp granularity. + */ + +import { mkdtempSync, rmSync } from "node:fs"; +import { utimes } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; + +import { invalidateEdgeIndex } from "../../v2/edge-index.js"; +import { invalidatePageIndex } from "../../v2/page-index.js"; +import { writePage } from "../../v2/page-store.js"; +import type { ConceptPage } from "../../v2/types.js"; +import { invalidateTreeIndex } from "../tree-index.js"; +import { getTreeDir, ROOT_NODE_ID, writeNode } from "../tree-store.js"; +import type { TreeNode } from "../types.js"; +import { validateTree } from "../validate.js"; + +let workspaceDir: string; + +beforeEach(() => { + workspaceDir = mkdtempSync(join(tmpdir(), "vellum-tree-validate-test-")); +}); + +afterEach(() => { + invalidateTreeIndex(); + invalidatePageIndex(); + invalidateEdgeIndex(); + rmSync(workspaceDir, { recursive: true, force: true }); +}); + +function node(id: string, children: string[], body = `body ${id}`): TreeNode { + return { id, frontmatter: { children }, body }; +} + +function page(slug: string, edges: string[] = []): ConceptPage { + return { + slug, + frontmatter: { edges, ref_files: [], ref_urls: [] }, + body: `body ${slug}`, + }; +} + +/** Pin a node file's mtime (and atime) to an explicit epoch-ms value. */ +async function setNodeMtime(id: string, mtimeMs: number): Promise { + const path = join(getTreeDir(workspaceDir), `${id}.md`); + const t = new Date(mtimeMs); + await utimes(path, t, t); +} + +/** + * Invalidate every cached index after seeding so the first `validateTree` of a + * test body sees the on-disk fixture rather than a stale cache. + */ +function resetCaches(): void { + invalidateTreeIndex(); + invalidatePageIndex(); + invalidateEdgeIndex(); +} + +describe("validateTree — clean tree", () => { + test("returns an empty report for a well-formed tree", async () => { + // _root → node:people → page:alice ; all refs resolve, alice reachable. + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:people"])); + await writeNode(workspaceDir, node("people", ["page:alice"])); + await writePage(workspaceDir, page("alice")); + // Parent newest so the freshness check never fires on a clean tree. + await setNodeMtime("people", 1_000); + await setNodeMtime(ROOT_NODE_ID, 2_000); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.danglingChildRefs).toEqual([]); + expect(report.danglingChildRefCount).toBe(0); + expect(report.orphanPages).toEqual([]); + expect(report.orphanPageCount).toBe(0); + expect(report.cycles).toEqual([]); + expect(report.cycleCount).toBe(0); + expect(report.staleIndex).toEqual([]); + expect(report.staleIndexCount).toBe(0); + expect(report.unknownEdgeTargets).toEqual([]); + expect(report.unknownEdgeTargetCount).toBe(0); + }); +}); + +describe("validateTree — danglingChildRefs", () => { + test("flags node: and page: refs whose targets are missing", async () => { + await writeNode( + workspaceDir, + node(ROOT_NODE_ID, ["node:ghost", "page:missing-page"]), + ); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.danglingChildRefs).toEqual([ + { node: ROOT_NODE_ID, ref: "ghost", kind: "node" }, + { node: ROOT_NODE_ID, ref: "missing-page", kind: "page" }, + ]); + expect(report.danglingChildRefCount).toBe(2); + }); + + test("does not flag refs whose targets exist", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"])); + await writeNode(workspaceDir, node("child", ["page:alice"])); + await writePage(workspaceDir, page("alice")); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.danglingChildRefs).toEqual([]); + }); +}); + +describe("validateTree — orphanPages", () => { + test("flags concept pages not reachable from the root", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["page:reached"])); + await writePage(workspaceDir, page("reached")); + await writePage(workspaceDir, page("orphan")); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.orphanPages).toEqual(["orphan"]); + expect(report.orphanPageCount).toBe(1); + }); + + test("a page hanging off an unreachable node is still an orphan", async () => { + // `floating` is not referenced by _root, so its page child is unreachable. + await writeNode(workspaceDir, node(ROOT_NODE_ID, [])); + await writeNode(workspaceDir, node("floating", ["page:detached"])); + await writePage(workspaceDir, page("detached")); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.orphanPages).toEqual(["detached"]); + }); +}); + +describe("validateTree — cycles", () => { + test("detects an A → B → A node cycle as a back-edge", async () => { + // _root → node:a → node:b → node:a (cycle closes on the b → a edge). + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:a"])); + await writeNode(workspaceDir, node("a", ["node:b"])); + await writeNode(workspaceDir, node("b", ["node:a"])); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.cycles).toEqual([{ from: "b", to: "a" }]); + expect(report.cycleCount).toBe(1); + }); + + test("a shared DAG sub-node (two parents, no cycle) is not a cycle", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:p1", "node:p2"])); + await writeNode(workspaceDir, node("p1", ["node:shared"])); + await writeNode(workspaceDir, node("p2", ["node:shared"])); + await writeNode(workspaceDir, node("shared", [])); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.cycles).toEqual([]); + }); +}); + +describe("validateTree — staleIndex", () => { + test("flags a node whose mtime predates a node: child's mtime", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"])); + await writeNode(workspaceDir, node("child", [])); + // Parent older than child → stale. + await setNodeMtime(ROOT_NODE_ID, 1_000); + await setNodeMtime("child", 5_000); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.staleIndex).toEqual([ + { + node: ROOT_NODE_ID, + child: "child", + nodeMtimeMs: 1_000, + childMtimeMs: 5_000, + }, + ]); + expect(report.staleIndexCount).toBe(1); + }); + + test("a parent newer than its child is not stale", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"])); + await writeNode(workspaceDir, node("child", [])); + await setNodeMtime("child", 1_000); + await setNodeMtime(ROOT_NODE_ID, 5_000); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.staleIndex).toEqual([]); + }); +}); + +describe("validateTree — unknownEdgeTargets", () => { + test("flags a page edge pointing at a missing slug", async () => { + await writeNode(workspaceDir, node(ROOT_NODE_ID, ["page:alice"])); + await writePage(workspaceDir, page("alice", ["nonexistent"])); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.unknownEdgeTargets).toEqual([ + { from: "alice", to: "nonexistent" }, + ]); + expect(report.unknownEdgeTargetCount).toBe(1); + }); + + test("an edge to an existing page is not flagged", async () => { + await writeNode( + workspaceDir, + node(ROOT_NODE_ID, ["page:alice", "page:bob"]), + ); + await writePage(workspaceDir, page("alice", ["bob"])); + await writePage(workspaceDir, page("bob")); + resetCaches(); + + const report = await validateTree(workspaceDir); + + expect(report.unknownEdgeTargets).toEqual([]); + }); +}); diff --git a/assistant/src/memory/v3/tree-store.ts b/assistant/src/memory/v3/tree-store.ts index 55dc023f2fd..86933c100ac 100644 --- a/assistant/src/memory/v3/tree-store.ts +++ b/assistant/src/memory/v3/tree-store.ts @@ -33,6 +33,7 @@ import { readFile, rename, rm, + stat, writeFile, } from "node:fs/promises"; import { dirname, join, relative, sep } from "node:path"; @@ -270,6 +271,26 @@ export async function readNode( return { id, frontmatter, body }; } +/** + * File mtime for a tree node, in epoch ms. Returns 0 when the file is missing + * or unreadable — callers treat 0 as "no mtime" (e.g. the validator's stale- + * index check reads a missing node as the oldest possible mtime so it never + * spuriously flags a parent against an absent child). Mirrors v2's + * `getPageMtimeMs`. + */ +export async function getNodeMtimeMs( + workspaceDir: string, + id: string, +): Promise { + validateNodeId(id); + try { + const s = await stat(getNodePath(workspaceDir, id)); + return s.mtimeMs; + } catch { + return 0; + } +} + /** * Write a tree node atomically (temp file + rename). A crash between the temp * write and the rename leaves the prior file intact; a crash after the rename diff --git a/assistant/src/memory/v3/validate.ts b/assistant/src/memory/v3/validate.ts new file mode 100644 index 00000000000..4a0acb2efc3 --- /dev/null +++ b/assistant/src/memory/v3/validate.ts @@ -0,0 +1,300 @@ +/** + * Memory v3 — Tree structure validator. + * + * The v3 tree is hand-authored by a data-migration during the v2 → v3 rollout + * (nodes reference pages and sub-nodes by `page:`/`node:` refs). Because the + * structure is authored, not derived, it can drift: a ref can dangle, a page + * can be left unwired, two nodes can reference each other into a cycle, a + * parent node's compositional summary can fall behind a freshly-edited child, + * or a page `edges:` entry can point at a slug with no page. + * + * `validateTree` is the read-only report the migration (and any later + * structure-health probe) runs to surface those defects. It is deliberately + * **non-throwing**: the migration is in progress, so an incomplete tree is + * expected — the report is informational, and the caller decides what (if + * anything) is fatal. It builds the three indices it needs (tree, page, edge), + * walks the DAG, and returns counts plus the offending ids for each category. + * + * Categories: + * - `danglingChildRefs` — a node `children` entry (`node:`/`page:`) whose + * target node/page does not exist on disk. + * - `orphanPages` — concept pages present in the page index but not reachable + * from the tree root by descending every `node:` child. Informational while + * the migration is mid-flight (not every page is wired in yet). Synthetic + * page-index entries (skills, CLI commands) are excluded — they are never + * tree members. + * - `cycles` — back-edges found during a full DFS over `node:` adjacency + * (A → B → A). A cycle would make a naive descent loop forever. + * - `staleIndex` — a node whose own file mtime predates one of its `node:` + * children's mtime, hinting its compositional index/summary may be out of + * date relative to the child it composes. + * - `unknownEdgeTargets` — page `edges:` targets with no corresponding page + * index slug, reusing v2's `validateEdgeTargets`. + */ + +import { CLI_COMMAND_SLUG_PREFIX } from "../v2/cli-command-store.js"; +import { getEdgeIndex, validateEdgeTargets } from "../v2/edge-index.js"; +import { getPageIndex } from "../v2/page-index.js"; +import { SKILL_SLUG_PREFIX } from "../v2/skill-store.js"; +import { getTreeIndex, type TreeIndex } from "./tree-index.js"; +import { getNodeMtimeMs } from "./tree-store.js"; + +/** + * A `node:` child whose mtime is newer than the parent node that composes it. + * `node` is the parent, `child` the fresher child, and the two `*MtimeMs` + * fields are their epoch-ms mtimes (parent < child triggers the report). + */ +export interface StaleIndexEntry { + node: string; + child: string; + nodeMtimeMs: number; + childMtimeMs: number; +} + +/** + * Read-only health report over the v3 tree + its referenced pages/edges. + * Every list is sorted for deterministic output; `*Count` fields mirror the + * corresponding list length so callers can summarize without re-counting. + */ +export interface TreeValidationReport { + /** `node:`/`page:` children whose target does not exist. */ + danglingChildRefs: Array<{ + node: string; + ref: string; + kind: "node" | "page"; + }>; + danglingChildRefCount: number; + /** Concept pages not reachable from the root by descending all node children. */ + orphanPages: string[]; + orphanPageCount: number; + /** Back-edges (`from → to`) closing a cycle during the full DFS descent. */ + cycles: Array<{ from: string; to: string }>; + cycleCount: number; + /** Nodes whose mtime predates a child node's mtime. */ + staleIndex: StaleIndexEntry[]; + staleIndexCount: number; + /** Page `edges:` targets with no corresponding page-index slug. */ + unknownEdgeTargets: Array<{ from: string; to: string }>; + unknownEdgeTargetCount: number; +} + +/** True when a page-index slug is a synthetic (non-concept-page) entry. */ +function isSyntheticSlug(slug: string): boolean { + return ( + slug.startsWith(SKILL_SLUG_PREFIX) || + slug.startsWith(CLI_COMMAND_SLUG_PREFIX) + ); +} + +/** + * Collect dangling `node:`/`page:` child refs: every node child whose target + * node id is absent from `tree.nodes`, and every page child whose slug is + * absent from `knownPageSlugs`. Sorted by `(node, kind, ref)`. + */ +function collectDanglingChildRefs( + tree: TreeIndex, + knownPageSlugs: ReadonlySet, +): Array<{ node: string; ref: string; kind: "node" | "page" }> { + const dangling: Array<{ node: string; ref: string; kind: "node" | "page" }> = + []; + for (const [nodeId, children] of tree.childrenByNode) { + for (const child of children) { + const exists = + child.kind === "node" + ? tree.nodes.has(child.ref) + : knownPageSlugs.has(child.ref); + if (!exists) { + dangling.push({ node: nodeId, ref: child.ref, kind: child.kind }); + } + } + } + dangling.sort( + (a, b) => + a.node.localeCompare(b.node) || + a.kind.localeCompare(b.kind) || + a.ref.localeCompare(b.ref), + ); + return dangling; +} + +/** + * Resolve the existing `node:` children of `nodeId`, in `children` order. Refs + * to absent nodes are skipped (those are reported separately as dangling) so + * the descent never recurses into a node that isn't on disk. + */ +function nodeChildrenOf(tree: TreeIndex, nodeId: string): string[] { + const children = tree.childrenByNode.get(nodeId) ?? []; + const out: string[] = []; + for (const child of children) { + if (child.kind === "node" && tree.nodes.has(child.ref)) { + out.push(child.ref); + } + } + return out; +} + +/** + * Full DFS over `node:` adjacency from `tree.root`. Returns the set of + * reachable node ids (for orphan-page reachability) and the back-edges that + * close a cycle. A back-edge is an edge into a node still on the active + * recursion stack (classic gray-node cycle detection); `visited` (black) + * prevents re-walking shared DAG sub-nodes. + */ +function descend(tree: TreeIndex): { + reachableNodes: Set; + cycles: Array<{ from: string; to: string }>; +} { + const reachableNodes = new Set(); + const onStack = new Set(); + const cycles: Array<{ from: string; to: string }> = []; + + // Iterative DFS with an explicit stack so deep trees don't blow the call + // stack. Each frame tracks its child cursor; we push a child frame, and on + // exhaustion pop the parent off the recursion stack (`onStack`). + type Frame = { node: string; children: string[]; cursor: number }; + const stack: Frame[] = []; + + function enter(nodeId: string): void { + reachableNodes.add(nodeId); + onStack.add(nodeId); + stack.push({ + node: nodeId, + children: nodeChildrenOf(tree, nodeId), + cursor: 0, + }); + } + + if (tree.nodes.has(tree.root)) { + enter(tree.root); + } + + while (stack.length > 0) { + const frame = stack[stack.length - 1]; + if (frame.cursor >= frame.children.length) { + onStack.delete(frame.node); + stack.pop(); + continue; + } + const child = frame.children[frame.cursor++]; + if (onStack.has(child)) { + // Edge into an ancestor still on the stack → cycle-closing back-edge. + cycles.push({ from: frame.node, to: child }); + continue; + } + if (reachableNodes.has(child)) { + // Already fully explored via another parent (shared DAG sub-node). + continue; + } + enter(child); + } + + cycles.sort( + (a, b) => a.from.localeCompare(b.from) || a.to.localeCompare(b.to), + ); + return { reachableNodes, cycles }; +} + +/** + * Concept pages reachable from the tree: every `page:` child of a reachable + * node. Pages hanging off unreachable nodes are *not* counted reachable — they + * only become reachable once their parent chain links back to the root. + */ +function reachablePages( + tree: TreeIndex, + reachableNodes: ReadonlySet, +): Set { + const pages = new Set(); + for (const nodeId of reachableNodes) { + for (const child of tree.childrenByNode.get(nodeId) ?? []) { + if (child.kind === "page") pages.add(child.ref); + } + } + return pages; +} + +/** + * Nodes whose own mtime predates one of their `node:` children's mtime. A + * missing node file reads as mtime 0 (oldest), so the check never flags a + * parent against an absent child. Sorted by `(node, child)`. + */ +async function collectStaleIndex( + workspaceDir: string, + tree: TreeIndex, +): Promise { + const ids = [...tree.nodes.keys()]; + const mtimes = new Map(); + await Promise.all( + ids.map(async (id) => { + mtimes.set(id, await getNodeMtimeMs(workspaceDir, id)); + }), + ); + + const stale: StaleIndexEntry[] = []; + for (const node of ids) { + const nodeMtimeMs = mtimes.get(node) ?? 0; + for (const child of nodeChildrenOf(tree, node)) { + const childMtimeMs = mtimes.get(child) ?? 0; + if (nodeMtimeMs < childMtimeMs) { + stale.push({ node, child, nodeMtimeMs, childMtimeMs }); + } + } + } + stale.sort( + (a, b) => a.node.localeCompare(b.node) || a.child.localeCompare(b.child), + ); + return stale; +} + +/** + * Validate the hand-authored v3 tree structure for `workspaceDir` and return a + * {@link TreeValidationReport}. Builds the tree, page, and edge indices, walks + * the DAG from the root, and reports the five defect categories. Never throws — + * it is a report, not an assertion. + */ +export async function validateTree( + workspaceDir: string, +): Promise { + const [tree, pageIndex, edgeIndex] = await Promise.all([ + getTreeIndex(workspaceDir), + getPageIndex(workspaceDir), + getEdgeIndex(workspaceDir), + ]); + + const knownPageSlugs = new Set(pageIndex.bySlug.keys()); + + // Kick off the stale-index mtime stats up front — it only depends on the + // tree, not on the DAG walk below — so its filesystem reads overlap the + // (synchronous) descent rather than running strictly after it. + const staleIndexPromise = collectStaleIndex(workspaceDir, tree); + + const danglingChildRefs = collectDanglingChildRefs(tree, knownPageSlugs); + + const { reachableNodes, cycles } = descend(tree); + + const reached = reachablePages(tree, reachableNodes); + const orphanPages = [...knownPageSlugs] + .filter((slug) => !isSyntheticSlug(slug) && !reached.has(slug)) + .sort(); + + const staleIndex = await staleIndexPromise; + + // Edge graph is page-only; knownSlugs is the full page-index slug set so an + // edge pointing at a skill/CLI entry is not spuriously flagged unknown. + const unknownEdgeTargets = validateEdgeTargets( + edgeIndex, + knownPageSlugs, + ).missing; + + return { + danglingChildRefs, + danglingChildRefCount: danglingChildRefs.length, + orphanPages, + orphanPageCount: orphanPages.length, + cycles, + cycleCount: cycles.length, + staleIndex, + staleIndexCount: staleIndex.length, + unknownEdgeTargets, + unknownEdgeTargetCount: unknownEdgeTargets.length, + }; +} From 94ad28782f5fd2011c4594e03b41c9afd87422f7 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:02:11 -0400 Subject: [PATCH 12/21] feat(memory-v3): scout-seeded tree-walk descent driver (#31982) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/tree-walk.test.ts | 585 ++++++++++++++++++ assistant/src/memory/v3/tree-walk.ts | 406 ++++++++++++ 2 files changed, 991 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/tree-walk.test.ts create mode 100644 assistant/src/memory/v3/tree-walk.ts diff --git a/assistant/src/memory/v3/__tests__/tree-walk.test.ts b/assistant/src/memory/v3/__tests__/tree-walk.test.ts new file mode 100644 index 00000000000..5b45021b1dd --- /dev/null +++ b/assistant/src/memory/v3/__tests__/tree-walk.test.ts @@ -0,0 +1,585 @@ +/** + * Tests for `assistant/src/memory/v3/tree-walk.ts`. + * + * The descent provider is always a scripted stub injected via the `provider` + * arg — no real LLM, no network, no `mock.module`, `~/.vellum/` untouched. The + * stub keys its scripted decision off the `` marker in the user + * message so one fixture provider can drive a whole multi-node walk with one + * call per visited node. + * + * Coverage: + * - scripted descent over a fixture tree collects the right leaf pages and + * records considered/descended/skipped + reasoning per node. + * - one descent call per *visited* node (not per offered child). + * - breadthBudget caps descents per node (skip the overflow). + * - maxDepth halts the walk. + * - scout page hits seed the start node set (deriveSeedNodes) so a subtree the + * root never reaches is still walked. + * - explicit seeds bias the start set. + * - scout hits are rendered into the descend prompt as pressure. + * - provider === null → fail-safe: descend nothing, walk still terminates and + * collects the pages it reached, reasoning records the failure. + * - leaf nodes (no node children) make no provider call. + * - request shape: forced tool_choice on `choose_branches`, abort signal + * forwarded. + */ + +import { describe, expect, test } from "bun:test"; + +import type { + Message, + Provider, + ProviderResponse, + SendMessageOptions, + ToolDefinition, +} from "../../../providers/types.js"; +import type { RetrievalInput } from "../../v2/harness/retriever.js"; +import type { ScoutResult } from "../../v2/harness/trace.js"; +import type { PageIndex } from "../../v2/page-index.js"; +import type { ChildRef, TreeIndex } from "../tree-index.js"; +import { createDescender, deriveSeedNodes, runTreeWalk } from "../tree-walk.js"; +import type { TreeNode } from "../types.js"; + +// --------------------------------------------------------------------------- +// Fixture helpers. +// --------------------------------------------------------------------------- + +function page(ref: string): ChildRef { + return { kind: "page", ref }; +} + +function node(ref: string): ChildRef { + return { kind: "node", ref }; +} + +interface ProviderCall { + messages: Message[]; + tools: ToolDefinition[] | undefined; + systemPrompt: string | undefined; + options: SendMessageOptions | undefined; +} + +/** + * Build a tree node with the given children refs. `summary` defaults to the id + * so `composeNodeIndex` produces deterministic, inspectable lines. + */ +function makeNode(id: string, children: ChildRef[]): TreeNode { + return { + id, + frontmatter: { + children: children.map((c) => `${c.kind}:${c.ref}`), + summary: `summary of ${id}`, + }, + body: "", + }; +} + +/** + * Build an in-memory `TreeIndex` from a forward-adjacency spec, materializing + * `nodes`, `childrenByNode`, and the `pageParents` reverse edges (the only maps + * `tree-walk.ts` reads). `parentsByNode` is left empty — the driver never reads + * it. + */ +function makeTree( + root: string, + childrenByNode: Record, +): TreeIndex { + const nodes = new Map(); + const children = new Map>(); + const pageParents = new Map>(); + for (const [id, refs] of Object.entries(childrenByNode)) { + nodes.set(id, makeNode(id, refs)); + children.set(id, refs); + for (const ref of refs) { + if (ref.kind !== "page") continue; + let parents = pageParents.get(ref.ref); + if (!parents) { + parents = new Set(); + pageParents.set(ref.ref, parents); + } + parents.add(id); + } + } + return { + nodes, + childrenByNode: children, + parentsByNode: new Map(), + pageParents, + root, + }; +} + +/** Empty page index — the driver only needs `bySlug` for page summaries. */ +function makePages(slugs: string[]): PageIndex { + const bySlug = new Map(); + const byId = new Map(); + let id = 1; + for (const slug of slugs) { + const entry = { + id, + slug, + summary: `summary of ${slug}`, + edges: [], + modifiedAt: 0, + }; + bySlug.set(slug, entry); + byId.set(id, entry); + id++; + } + return { entries: [...bySlug.values()], bySlug, byId, rendered: "" }; +} + +/** Minimal `RetrievalInput` carrying just the fields the driver reads. */ +function makeInput( + overrides?: Partial & { + breadthBudget?: number; + maxDepth?: number; + }, +): RetrievalInput { + const breadthBudget = overrides?.breadthBudget ?? 8; + const maxDepth = overrides?.maxDepth ?? 8; + const config = { + memory: { v3: { breadthBudget, maxDepth } }, + } as unknown as RetrievalInput["config"]; + const { breadthBudget: _b, maxDepth: _m, ...rest } = overrides ?? {}; + return { + workspaceDir: "/tmp/does-not-matter", + recentTurnPairs: [{ assistantMessage: "", userMessage: "tell me about a" }], + nowText: "2026-05-25 10:00 PT", + priorEverInjected: [], + config, + ...rest, + }; +} + +/** Pull the `` id out of a recorded descend prompt. */ +function nodeIdFromCall(call: ProviderCall): string | null { + for (const block of call.messages[0]?.content ?? []) { + if (block.type !== "text") continue; + const match = block.text.match(//); + if (match) return match[1]; + } + return null; +} + +/** + * A scripted descent provider. `script` maps a node id to the bare child-node + * ids to descend (and an optional reasoning string). Records every call and + * honors an already-aborted signal by throwing. + */ +function makeProvider( + script: Record, + calls: ProviderCall[], +): Provider { + return { + name: "stub", + sendMessage: async (messages, tools, systemPrompt, options) => { + calls.push({ messages, tools, systemPrompt, options }); + if (options?.signal?.aborted) { + const err = new Error("aborted"); + err.name = "AbortError"; + throw err; + } + const nodeId = + nodeIdFromCall({ messages, tools, systemPrompt, options }) ?? ""; + const decision = script[nodeId] ?? { descend: [] }; + const input: Record = { descend: decision.descend }; + if (decision.reasoning !== undefined) + input.reasoning = decision.reasoning; + const response: ProviderResponse = { + model: "stub-model", + stopReason: "tool_use", + usage: { inputTokens: 0, outputTokens: 0 }, + content: [ + { + type: "tool_use", + id: `tu-${nodeId}`, + name: "choose_branches", + input, + }, + ], + }; + return response; + }, + }; +} + +// --------------------------------------------------------------------------- +// deriveSeedNodes +// --------------------------------------------------------------------------- + +describe("deriveSeedNodes", () => { + test("maps scout page slugs to their parent nodes via pageParents", () => { + const tree = makeTree("_root", { + _root: [node("a"), node("b")], + a: [page("pa")], + b: [page("pb")], + }); + const scouts: ScoutResult[] = [{ lane: "sparse", slugs: ["pb"] }]; + expect(deriveSeedNodes(tree, scouts, [])).toEqual(["b"]); + }); + + test("unions explicit seeds first, then scout-derived parents, dedup'd", () => { + const tree = makeTree("_root", { + _root: [node("a")], + a: [page("pa")], + }); + const scouts: ScoutResult[] = [{ lane: "hot", slugs: ["pa", "pa"] }]; + // "a" is both an explicit seed and the parent of pa — appears once, seeds first. + expect(deriveSeedNodes(tree, scouts, ["a", "x"])).toEqual(["a", "x"]); + }); + + test("ignores scout slugs with no parent node", () => { + const tree = makeTree("_root", { _root: [page("pr")] }); + const scouts: ScoutResult[] = [{ lane: "dense", slugs: ["orphan"] }]; + expect(deriveSeedNodes(tree, scouts, [])).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// runTreeWalk — scripted descent +// --------------------------------------------------------------------------- + +describe("runTreeWalk — scripted descent", () => { + test("collects the right leaf pages and records the descend/skip split", async () => { + // _root → {a, b}; a → leaf pa; b → leaf pb. Script descends only "a". + const tree = makeTree("_root", { + _root: [node("a"), node("b")], + a: [page("pa")], + b: [page("pb")], + }); + const pages = makePages(["pa", "pb"]); + const calls: ProviderCall[] = []; + const provider = makeProvider( + { _root: { descend: ["a"], reasoning: "a matches the turn" } }, + calls, + ); + + const { pages: collected, levels } = await runTreeWalk({ + input: makeInput(), + tree, + pages, + scouts: [], + seeds: [], + provider, + }); + + // Only the descended branch's page is collected. + expect([...collected]).toEqual(["pa"]); + + const rootLevel = levels.find((l) => l.node === "_root")!; + expect(rootLevel.considered).toEqual(["a", "b"]); + expect(rootLevel.descended).toEqual(["a"]); + expect(rootLevel.skipped).toEqual(["b"]); + expect(rootLevel.reasoning).toBe("a matches the turn"); + + // _root walked (has node children) + a walked (leaf, no call). b skipped. + expect(levels.map((l) => l.node).sort()).toEqual(["_root", "a"]); + }); + + test("makes exactly one descent call per visited node with node children", async () => { + const tree = makeTree("_root", { + _root: [node("a"), node("b")], + a: [node("c"), page("pa")], + b: [page("pb")], + c: [page("pc")], + }); + const pages = makePages(["pa", "pb", "pc"]); + const calls: ProviderCall[] = []; + const provider = makeProvider( + { + _root: { descend: ["a", "b"] }, + a: { descend: ["c"] }, + // b and c are leaves of the descended set; c has no node children. + }, + calls, + ); + + await runTreeWalk({ + input: makeInput(), + tree, + pages, + scouts: [], + seeds: [], + provider, + }); + + // Calls happen for nodes that HAVE node children: _root, a. b (leaf) and + // c (leaf) are visited but short-circuit before the provider call. + const calledNodes = calls.map(nodeIdFromCall).sort(); + expect(calledNodes).toEqual(["_root", "a"]); + }); + + test("breadthBudget caps descents per node", async () => { + const tree = makeTree("_root", { + _root: [node("a"), node("b"), node("c")], + a: [page("pa")], + b: [page("pb")], + c: [page("pc")], + }); + const pages = makePages(["pa", "pb", "pc"]); + const calls: ProviderCall[] = []; + // Model picks all three; budget 2 admits only the first two. + const provider = makeProvider( + { _root: { descend: ["a", "b", "c"] } }, + calls, + ); + + const { pages: collected, levels } = await runTreeWalk({ + input: makeInput({ breadthBudget: 2 }), + tree, + pages, + scouts: [], + seeds: [], + provider, + }); + + const rootLevel = levels.find((l) => l.node === "_root")!; + expect(rootLevel.descended).toEqual(["a", "b"]); + expect(rootLevel.skipped).toEqual(["c"]); + expect([...collected].sort()).toEqual(["pa", "pb"]); + }); + + test("maxDepth halts the walk", async () => { + const tree = makeTree("_root", { + _root: [node("a")], + a: [node("b"), page("pa")], + b: [page("pb")], + }); + const pages = makePages(["pa", "pb"]); + const calls: ProviderCall[] = []; + const provider = makeProvider( + { _root: { descend: ["a"] }, a: { descend: ["b"] } }, + calls, + ); + + const { pages: collected, levels } = await runTreeWalk({ + input: makeInput({ maxDepth: 1 }), + tree, + pages, + scouts: [], + seeds: [], + provider, + }); + + // Depth 0 (_root) and depth 1 (a) walked; b never reached. + expect(levels.map((l) => l.node)).toEqual(["_root", "a"]); + expect([...collected]).toEqual(["pa"]); + }); +}); + +// --------------------------------------------------------------------------- +// runTreeWalk — scout seeding +// --------------------------------------------------------------------------- + +describe("runTreeWalk — scout seeding", () => { + test("scout page hits seed a subtree the root never reaches", async () => { + // root only links to a; the "island" subtree is unreachable from root but a + // scout surfaced its leaf page, so deriveSeedNodes seeds `island`. + const tree = makeTree("_root", { + _root: [node("a")], + a: [page("pa")], + island: [page("treasure")], + }); + const pages = makePages(["pa", "treasure"]); + const calls: ProviderCall[] = []; + const provider = makeProvider({ _root: { descend: ["a"] } }, calls); + + const scouts: ScoutResult[] = [{ lane: "dense", slugs: ["treasure"] }]; + const { pages: collected, levels } = await runTreeWalk({ + input: makeInput(), + tree, + pages, + scouts, + seeds: [], + provider, + }); + + // Both the root branch (pa) and the scout-seeded island (treasure) reached. + expect([...collected].sort()).toEqual(["pa", "treasure"]); + expect(levels.map((l) => l.node).sort()).toEqual(["_root", "a", "island"]); + }); + + test("explicit seeds bias the start set", async () => { + const tree = makeTree("_root", { + _root: [page("pr")], + mid: [page("pm")], + }); + const pages = makePages(["pr", "pm"]); + const calls: ProviderCall[] = []; + const provider = makeProvider({}, calls); + + const { pages: collected, levels } = await runTreeWalk({ + input: makeInput(), + tree, + pages, + scouts: [], + seeds: ["mid"], + provider, + }); + + expect([...collected].sort()).toEqual(["pm", "pr"]); + expect(levels.map((l) => l.node).sort()).toEqual(["_root", "mid"]); + }); + + test("renders scout hits into the descend prompt as pressure", async () => { + const tree = makeTree("_root", { + _root: [node("a"), node("b")], + a: [page("pa")], + b: [page("pb")], + }); + const pages = makePages(["pa", "pb"]); + const calls: ProviderCall[] = []; + const provider = makeProvider({ _root: { descend: ["a"] } }, calls); + + const scouts: ScoutResult[] = [{ lane: "sparse", slugs: ["pb"] }]; + await runTreeWalk({ + input: makeInput(), + tree, + pages, + // Pass scouts but no parent-seed match so the start set stays root-only; + // we only assert the prompt rendering here. + scouts, + seeds: [], + provider, + }); + + const rootCall = calls.find((c) => nodeIdFromCall(c) === "_root")!; + const promptText = rootCall.messages[0].content + .filter((b) => b.type === "text") + .map((b) => (b as { text: string }).text) + .join("\n"); + expect(promptText).toContain(""); + expect(promptText).toContain("[sparse]: pb"); + }); +}); + +// --------------------------------------------------------------------------- +// runTreeWalk — fail-safe + request shape +// --------------------------------------------------------------------------- + +describe("runTreeWalk — fail-safe", () => { + test("provider null descends nothing but still terminates and collects reached pages", async () => { + const tree = makeTree("_root", { + _root: [node("a"), page("pr")], + a: [page("pa")], + }); + const pages = makePages(["pr", "pa"]); + + const { pages: collected, levels } = await runTreeWalk({ + input: makeInput(), + tree, + pages, + scouts: [], + seeds: [], + provider: null, + }); + + // Root's own page is collected; the undescended branch's page is not. + expect([...collected]).toEqual(["pr"]); + expect(levels.map((l) => l.node)).toEqual(["_root"]); + const rootLevel = levels[0]; + expect(rootLevel.descended).toEqual([]); + expect(rootLevel.skipped).toEqual(["a"]); + expect(rootLevel.reasoning).toContain("no provider"); + }); + + test("malformed tool input fails closed for that node", async () => { + const tree = makeTree("_root", { + _root: [node("a")], + a: [page("pa")], + }); + const pages = makePages(["pa"]); + const calls: ProviderCall[] = []; + // Provider returns a non-conforming tool input (descend is not an array). + const provider: Provider = { + name: "bad-schema", + sendMessage: async (messages, tools, systemPrompt, options) => { + calls.push({ messages, tools, systemPrompt, options }); + return { + model: "stub-model", + stopReason: "tool_use", + usage: { inputTokens: 0, outputTokens: 0 }, + content: [ + { + type: "tool_use", + id: "tu-1", + name: "choose_branches", + input: { descend: "not-an-array" }, + }, + ], + }; + }, + }; + + const { levels } = await runTreeWalk({ + input: makeInput(), + tree, + pages, + scouts: [], + seeds: [], + provider, + }); + + const rootLevel = levels.find((l) => l.node === "_root")!; + expect(rootLevel.descended).toEqual([]); + expect(rootLevel.reasoning).toContain("validation"); + }); +}); + +describe("createDescender — request shape", () => { + test("forces tool_choice on choose_branches and forwards the abort signal", async () => { + const tree = makeTree("_root", { + _root: [node("a")], + a: [page("pa")], + }); + const pages = makePages(["pa"]); + const calls: ProviderCall[] = []; + const provider = makeProvider({ _root: { descend: ["a"] } }, calls); + + const reasoningByNode = new Map(); + const descender = createDescender( + { + input: makeInput({ signal: AbortSignal.timeout(10_000) }), + tree, + pages, + scouts: [], + seeds: [], + provider, + }, + reasoningByNode, + ); + + await descender("_root", [...tree.childrenByNode.get("_root")!]); + + expect(calls).toHaveLength(1); + const call = calls[0]; + expect(call.tools?.[0]?.name).toBe("choose_branches"); + expect(call.options?.config?.tool_choice).toEqual({ + type: "tool", + name: "choose_branches", + }); + expect(call.options?.config?.callSite).toBe("memoryV3Descent"); + expect(call.options?.signal).toBeDefined(); + }); + + test("a node with no node children makes no provider call", async () => { + const tree = makeTree("leaf", { leaf: [page("p")] }); + const pages = makePages(["p"]); + const calls: ProviderCall[] = []; + const provider = makeProvider({}, calls); + + const reasoningByNode = new Map(); + const descender = createDescender( + { input: makeInput(), tree, pages, scouts: [], seeds: [], provider }, + reasoningByNode, + ); + + const chosen = await descender("leaf", [ + ...tree.childrenByNode.get("leaf")!, + ]); + expect(chosen).toEqual([]); + expect(calls).toHaveLength(0); + expect(reasoningByNode.get("leaf")).toBe(""); + }); +}); diff --git a/assistant/src/memory/v3/tree-walk.ts b/assistant/src/memory/v3/tree-walk.ts new file mode 100644 index 00000000000..c387a2a5bc4 --- /dev/null +++ b/assistant/src/memory/v3/tree-walk.ts @@ -0,0 +1,406 @@ +/** + * Memory v3 — tree-walk model driver. + * + * The *intelligence* half of the v3 tree descent. `traversal.ts` owns the + * mechanical, provider-free walk (`walkTree`); this module supplies the + * per-node `descend` decision that walk injects, and wires the whole thing into + * a single `runTreeWalk` entry point. + * + * Per visited node the driver makes one cheap LLM call (`memoryV3Descent`) over + * the node's *composed* index — `composeNodeIndex` renders one line per child + * (sub-node summary or leaf page summary) plus the node's routing hints — and + * asks which child *nodes* to descend into. The prompt also carries the + * conversation context (the just-arrived turn + NOW) and the surviving scout + * hits, so descent is **scout-seeded but not scout-bound**: the model sees where + * the cheap lanes already landed, yet still feels pressure to descend branches + * the scouts missed. A driver that only ratified the scouts would re-introduce + * the recall cliff the tree walk exists to avoid. + * + * Scout seeding works at two layers: + * 1. **Start set** — `runTreeWalk` derives seed *node* ids from scout-surfaced + * *page* slugs via the tree's `pageParents` reverse edges (a scout hit on + * `page:foo` seeds every node that lists `page:foo` as a child), unioned + * with any explicit `seeds`. `walkTree` fans out from `tree.root` + seeds. + * 2. **Descend pressure** — the surviving scout slugs are rendered into every + * descend prompt so the model can prefer (but is not forced onto) branches + * that contain them. + * + * Reasoning capture. The `createDescender` signature returns plain `ChildRef[]` + * (the chosen node children) to match the driver contract; the model's stated + * rationale is written into a side map keyed by node id. `runTreeWalk` adapts + * the descender into `walkTree`'s `DescendResult`-returning hook by pairing each + * node's chosen children with its recorded reasoning, so every emitted + * `TreeLevel` carries the model's reason for its descend/skip split — making a + * wrong high-level skip observable rather than silent. + * + * Fail-safe. When no provider is configured (or a per-node call errors / returns + * an unusable response) the descender descends *nothing* for that node and + * records the reason. The walk still terminates and still collects every leaf + * page it reached before the failure; it just stops exploring deeper from the + * affected node. Failing closed (descend nothing) rather than open (descend all) + * keeps a broken provider from blowing the breadth budget across the whole tree. + * + * This module is currently unwired — a later PR composes it into the loop. + */ + +import { z } from "zod"; + +import { + extractToolUse, + getConfiguredProvider, +} from "../../providers/provider-send-message.js"; +import type { + Message, + Provider, + ToolDefinition, +} from "../../providers/types.js"; +import { getLogger } from "../../util/logger.js"; +import type { RetrievalInput } from "../v2/harness/retriever.js"; +import type { ScoutResult } from "../v2/harness/trace.js"; +import type { PageIndex } from "../v2/page-index.js"; +import { composeNodeIndex } from "./index-composition.js"; +import type { WalkResult } from "./traversal.js"; +import { walkTree } from "./traversal.js"; +import type { ChildRef, TreeIndex } from "./tree-index.js"; + +const log = getLogger("memory-v3-tree-walk"); + +/** Tool name forced via `tool_choice`. Shared constant so tests can match it. */ +const DESCEND_TOOL_NAME = "choose_branches"; + +/** + * The descend decision the driver hands to `walkTree`. Returns the subset of + * `children` (node refs only) to recurse into. Matches the PR contract: a plain + * `ChildRef[]` promise. The model's reasoning is threaded out-of-band via the + * side map populated by {@link createDescender}, not the return value, so this + * signature stays small. + */ +export type Descender = ( + nodeId: string, + children: ChildRef[], +) => Promise; + +/** Arguments to {@link createDescender}. */ +export interface CreateDescenderArgs { + input: RetrievalInput; + tree: TreeIndex; + pages: PageIndex; + /** Surviving scout hits — rendered into the prompt as descend pressure. */ + scouts: ScoutResult[]; + /** Explicit seed node ids (folded into the prompt's seed context). */ + seeds: string[]; + /** + * Provider override seam for tests. Production omits it and the descender + * resolves `getConfiguredProvider("memoryV3Descent")` per call. Explicit + * `null` is distinct from `undefined`: it simulates "no provider configured" + * and exercises the fail-safe path without touching the real registry. + */ + provider?: Provider | null; +} + +/** Arguments to {@link runTreeWalk}. Identical to the descender's args. */ +export type RunTreeWalkArgs = CreateDescenderArgs; + +/** + * The forced-tool input schema. `descend` lists the bare node ids the model + * chose to recurse into; `reasoning` is its stated rationale for the + * descend/skip split. Mirrors v2's `select_pages_to_inject` forced-tool shape. + */ +const DescendToolResultSchema = z.object({ + descend: z.array(z.string()), + reasoning: z.string().optional(), +}); + +/** + * Build the forced tool definition for one node. `descend` is constrained to + * the node ids actually offered as `node:` children so the model can only pick + * from genuine branches (the walk filters anyway, but constraining the schema + * keeps the model honest and the trace clean). + */ +function buildDescendTool(offeredNodeIds: readonly string[]): ToolDefinition { + return { + name: DESCEND_TOOL_NAME, + description: + "Choose which child nodes of the current memory-tree node to descend " + + "into for the current turn. Prefer branches likely to contain pages " + + "that bear on the turn; you may favor branches the scout hits point at, " + + "but descend other promising branches too — missing a relevant subtree " + + "is worse than descending an extra one. Return an empty list only when " + + "no child node plausibly bears on the turn.", + input_schema: { + type: "object", + properties: { + descend: { + type: "array", + items: + offeredNodeIds.length > 0 + ? { type: "string", enum: [...offeredNodeIds] } + : { type: "string" }, + description: + "Bare ids of the child nodes to descend into. Choose only from " + + "the offered node children.", + }, + reasoning: { + type: "string", + description: + "One short sentence: why these branches were descended and the " + + "rest skipped.", + }, + }, + required: ["descend"], + }, + }; +} + +/** + * Render the recent-turn + NOW context the descend prompt needs. The just- + * arrived user turn is the last pair's `userMessage`; the prior assistant reply + * (when present) precedes it. NOW is passed verbatim. + */ +function renderConversationContext(input: RetrievalInput): string { + const lines: string[] = []; + const lastPair = input.recentTurnPairs[input.recentTurnPairs.length - 1]; + if (lastPair) { + if (lastPair.assistantMessage.trim().length > 0) { + lines.push(`[assistant]: ${lastPair.assistantMessage}`); + } + lines.push(`[user]: ${lastPair.userMessage}`); + } + return ( + `\n${input.nowText}\n\n\n` + + `\n${lines.join("\n")}\n` + ); +} + +/** + * Render the surviving scout hits as descend pressure — the page slugs each + * lane surfaced, grouped by lane. Empty string when there are no scout hits, so + * the prompt omits the block entirely. + */ +function renderScoutHits(scouts: readonly ScoutResult[]): string { + const lines: string[] = []; + for (const scout of scouts) { + if (scout.slugs.length === 0) continue; + lines.push(`[${scout.lane}]: ${scout.slugs.join(", ")}`); + } + if (lines.length === 0) return ""; + return `\n${lines.join("\n")}\n`; +} + +const DESCENT_SYSTEM_PROMPT = + "You are the descent driver for a hierarchical memory-retrieval walk. At each " + + "node you see its child index (one line per child sub-node or leaf page) and " + + "the current conversation turn. Choose which child *nodes* to descend into to " + + "find the pages that bear on the next reply. Leaf pages are collected " + + "automatically — you only decide which branches to explore deeper."; + +/** Fail-safe descend result: descend nothing, recording why on the side map. */ +function failClosed( + nodeId: string, + reasoning: string, + reasoningByNode: Map, +): ChildRef[] { + reasoningByNode.set(nodeId, reasoning); + return []; +} + +/** + * Create the per-node descend decision driving {@link walkTree}. + * + * The returned function makes one forced-tool `memoryV3Descent` call per node + * over its composed index, returning the chosen `node:` children. The model's + * reasoning for each node is written into `reasoningByNode` (keyed by node id) + * rather than the return value, so the small `Descender` signature is preserved + * and {@link runTreeWalk} can merge the reasoning into each `TreeLevel`. + * + * Provider resolution honors the `provider` arg (including explicit `null` for + * the fail-safe path) and otherwise resolves the configured call site once per + * call. Any failure — no provider, provider throw, missing/mismatched tool_use + * — fails closed (descend nothing) with the reason recorded. + */ +export function createDescender( + args: CreateDescenderArgs, + reasoningByNode: Map, +): Descender { + const { input, tree, pages, scouts } = args; + const conversationContext = renderConversationContext(input); + const scoutHits = renderScoutHits(scouts); + + return async (nodeId: string, children: ChildRef[]): Promise => { + const offeredNodes = children.filter((c) => c.kind === "node"); + // No node children to descend — nothing to ask the model. Record an empty + // reasoning so the level still reflects the (trivial) decision. + if (offeredNodes.length === 0) { + reasoningByNode.set(nodeId, ""); + return []; + } + + const provider = + args.provider !== undefined + ? args.provider + : await getConfiguredProvider("memoryV3Descent"); + if (!provider) { + log.warn( + { nodeId }, + "memoryV3Descent provider unavailable; descending nothing", + ); + return failClosed( + nodeId, + "no provider configured — descended nothing", + reasoningByNode, + ); + } + + const indexBlock = composeNodeIndex(nodeId, tree, pages); + const offeredNodeIds = offeredNodes.map((c) => c.ref); + + const userMsg: Message = { + role: "user", + content: [ + { type: "text", text: conversationContext }, + { + type: "text", + text: + (scoutHits ? `${scoutHits}\n\n` : "") + + `\n${indexBlock}\n`, + }, + ], + }; + + const descendTool = buildDescendTool(offeredNodeIds); + + let response; + try { + response = await provider.sendMessage( + [userMsg], + [descendTool], + DESCENT_SYSTEM_PROMPT, + { + config: { + callSite: "memoryV3Descent" as const, + tool_choice: { type: "tool" as const, name: DESCEND_TOOL_NAME }, + }, + ...(input.signal ? { signal: input.signal } : {}), + }, + ); + } catch (err) { + log.warn( + { err, nodeId }, + "Descent provider call threw; descending nothing", + ); + return failClosed( + nodeId, + "descent call failed — descended nothing", + reasoningByNode, + ); + } + + const toolBlock = extractToolUse(response); + if (!toolBlock || toolBlock.name !== DESCEND_TOOL_NAME) { + log.warn( + { stopReason: response.stopReason, nodeId }, + "Descent model returned no choose_branches tool_use; descending nothing", + ); + return failClosed( + nodeId, + "model returned no descend decision — descended nothing", + reasoningByNode, + ); + } + + const parsed = DescendToolResultSchema.safeParse(toolBlock.input); + if (!parsed.success) { + log.warn( + { error: parsed.error.message, nodeId }, + "Descent tool input did not match schema; descending nothing", + ); + return failClosed( + nodeId, + "descend decision failed validation — descended nothing", + reasoningByNode, + ); + } + + reasoningByNode.set(nodeId, parsed.data.reasoning ?? ""); + + // Map the chosen bare ids back to the offered ChildRefs. The walk filters + // bogus / unoffered refs anyway, but resolving against the offered set here + // keeps the returned ChildRefs canonical. + const offeredById = new Map(offeredNodes.map((c) => [c.ref, c])); + const chosen: ChildRef[] = []; + for (const id of parsed.data.descend) { + const ref = offeredById.get(id); + if (ref) chosen.push(ref); + } + return chosen; + }; +} + +/** + * Derive the seed *node* ids for the walk from the surviving scout *page* hits. + * + * Scouts surface concept-page slugs; the tree's `pageParents` reverse edges map + * each page slug to the node(s) that list it as a child. Seeding the walk at + * those parent nodes drops the model in near where the cheap lanes already + * landed (layer 1 of scout seeding), while the walk still fans out from the + * root and the descend pressure (layer 2) keeps it from collapsing onto the + * scouts. Explicit `seeds` are unioned in. Order is deterministic: explicit + * seeds first (in given order), then scout-derived parents in scout/slug order. + */ +export function deriveSeedNodes( + tree: TreeIndex, + scouts: readonly ScoutResult[], + seeds: readonly string[], +): string[] { + const out: string[] = []; + const seen = new Set(); + const push = (id: string): void => { + if (seen.has(id)) return; + seen.add(id); + out.push(id); + }; + for (const id of seeds) push(id); + for (const scout of scouts) { + for (const slug of scout.slugs) { + const parents = tree.pageParents.get(slug); + if (!parents) continue; + for (const parent of parents) push(parent); + } + } + return out; +} + +/** + * Drive a full scout-seeded tree walk for one retrieval pass. + * + * Wires {@link createDescender} into {@link walkTree} with `breadthBudget` / + * `maxDepth` drawn from `config.memory.v3` (on `input.config`) and the start set + * seeded by {@link deriveSeedNodes}. Returns the collected leaf pages and the + * per-node `TreeLevel[]`, each level carrying the model's recorded reasoning. + * + * The descender records reasoning into a node-keyed side map; this function + * adapts it into `walkTree`'s `DescendResult`-returning hook by pairing each + * node's chosen children with its recorded reason, so the walk threads the + * reasoning onto every emitted level. + */ +export async function runTreeWalk(args: RunTreeWalkArgs): Promise { + const { input, tree, scouts, seeds } = args; + const v3 = input.config.memory?.v3; + const breadthBudget = v3?.breadthBudget ?? 6; + const maxDepth = v3?.maxDepth ?? 6; + + const reasoningByNode = new Map(); + const descender = createDescender(args, reasoningByNode); + + const seedNodes = deriveSeedNodes(tree, scouts, seeds); + + return walkTree(tree, { + seeds: seedNodes, + breadthBudget, + maxDepth, + descend: async (nodeId, children) => { + const descend = await descender(nodeId, [...children]); + return { descend, reasoning: reasoningByNode.get(nodeId) ?? "" }; + }, + }); +} From 41a3bf4f5e1b738b9a424c56940d1c9d3e659a84 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:09:16 -0400 Subject: [PATCH 13/21] feat(memory-v3): assistant memory v3 validate/tree CLI + routes (#31983) Co-authored-by: Vellum Assistant --- assistant/openapi.yaml | 42 +++++ .../__tests__/memory-v3-render.test.ts | 164 ++++++++++++++++++ .../src/cli/commands/memory-v3-render.ts | 133 ++++++++++++++ assistant/src/cli/commands/memory-v3.ts | 161 +++++++++++++++++ assistant/src/cli/program.ts | 2 + assistant/src/runtime/routes/index.ts | 2 + .../src/runtime/routes/memory-v3-routes.ts | 117 +++++++++++++ .../command-registry/commands/assistant.ts | 13 ++ 8 files changed, 634 insertions(+) create mode 100644 assistant/src/cli/commands/__tests__/memory-v3-render.test.ts create mode 100644 assistant/src/cli/commands/memory-v3-render.ts create mode 100644 assistant/src/cli/commands/memory-v3.ts create mode 100644 assistant/src/runtime/routes/memory-v3-routes.ts diff --git a/assistant/openapi.yaml b/assistant/openapi.yaml index a6b753972e7..6bd22147959 100644 --- a/assistant/openapi.yaml +++ b/assistant/openapi.yaml @@ -11697,6 +11697,48 @@ paths: type: object properties: {} additionalProperties: false + /v1/memory/v3/tree: + post: + operationId: memory_v3_tree_post + summary: Return a serializable view of the memory v3 tree DAG (read-only) + description: + Returns the v3 tree root id plus every node and its ordered child refs (page:/node:) as a JSON-serializable + projection of the in-memory TreeIndex. Read-only; the CLI uses it to print an indented tree with shared-DAG + re-entries marked. + tags: + - memory + responses: + "200": + description: Successful response + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: {} + additionalProperties: false + /v1/memory/v3/validate: + post: + operationId: memory_v3_validate_post + summary: Validate the memory v3 tree structure (read-only) + description: + Read-only structural validation of the hand-authored v3 tree DAG. Reports dangling child refs, orphan + pages, cycles, stale compositional indexes, and unknown edge targets. Writes nothing and runs no LLM — operators + dry-run it while the v2 → v3 migration is in flight. + tags: + - memory + responses: + "200": + description: Successful response + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: {} + additionalProperties: false /v1/messages: get: operationId: messages_get diff --git a/assistant/src/cli/commands/__tests__/memory-v3-render.test.ts b/assistant/src/cli/commands/__tests__/memory-v3-render.test.ts new file mode 100644 index 00000000000..343c921bc84 --- /dev/null +++ b/assistant/src/cli/commands/__tests__/memory-v3-render.test.ts @@ -0,0 +1,164 @@ +import { describe, expect, test } from "bun:test"; + +import type { + MemoryV3TreeResult, + MemoryV3ValidateResult, +} from "../../../runtime/routes/memory-v3-routes.js"; +import { + renderTree, + renderValidationReport, + reportHasDefects, +} from "../memory-v3-render.js"; + +function cleanReport(): MemoryV3ValidateResult { + return { + danglingChildRefs: [], + danglingChildRefCount: 0, + orphanPages: [], + orphanPageCount: 0, + cycles: [], + cycleCount: 0, + staleIndex: [], + staleIndexCount: 0, + unknownEdgeTargets: [], + unknownEdgeTargetCount: 0, + }; +} + +describe("memory v3 — renderValidationReport", () => { + test("renders 'none' for every empty category", () => { + const out = renderValidationReport(cleanReport()); + expect(out).toContain("Memory v3 Tree Validation"); + expect(out).toContain("Dangling child refs: none"); + expect(out).toContain("Orphan pages: none"); + expect(out).toContain("Cycles: none"); + expect(out).toContain("Stale index: none"); + expect(out).toContain("Unknown edge targets: none"); + }); + + test("renders counts and offending ids for each defect category", () => { + const report: MemoryV3ValidateResult = { + danglingChildRefs: [{ node: "people", ref: "ghost", kind: "node" }], + danglingChildRefCount: 1, + orphanPages: ["stray-page"], + orphanPageCount: 1, + cycles: [{ from: "a", to: "b" }], + cycleCount: 1, + staleIndex: [ + { node: "root", child: "people", nodeMtimeMs: 1, childMtimeMs: 2 }, + ], + staleIndexCount: 1, + unknownEdgeTargets: [{ from: "p1", to: "missing" }], + unknownEdgeTargetCount: 1, + }; + const out = renderValidationReport(report); + expect(out).toContain("Dangling child refs: 1"); + expect(out).toContain("people → node:ghost"); + expect(out).toContain("Orphan pages: 1"); + expect(out).toContain("- stray-page"); + expect(out).toContain("Cycles: 1"); + expect(out).toContain("a → b"); + expect(out).toContain("Stale index: 1"); + expect(out).toContain("root (older than child people)"); + expect(out).toContain("Unknown edge targets: 1"); + expect(out).toContain("p1 → missing"); + }); +}); + +describe("memory v3 — reportHasDefects", () => { + test("false for a clean report", () => { + expect(reportHasDefects(cleanReport())).toBe(false); + }); + + test("true when any single category is non-empty", () => { + const report = cleanReport(); + report.orphanPageCount = 1; + report.orphanPages = ["x"]; + expect(reportHasDefects(report)).toBe(true); + }); +}); + +describe("memory v3 — renderTree", () => { + test("renders an indented tree descending node and page children", () => { + const view: MemoryV3TreeResult = { + root: "_root", + nodes: [ + { + id: "_root", + children: [ + { kind: "node", ref: "people" }, + { kind: "page", ref: "overview" }, + ], + }, + { + id: "people", + children: [{ kind: "page", ref: "alice" }], + }, + ], + }; + const out = renderTree(view); + expect(out).toBe( + ["node:_root", " node:people", " page:alice", " page:overview"].join( + "\n", + ), + ); + }); + + test("marks a shared DAG sub-node as a re-entry rather than re-expanding", () => { + const view: MemoryV3TreeResult = { + root: "_root", + nodes: [ + { + id: "_root", + children: [ + { kind: "node", ref: "a" }, + { kind: "node", ref: "b" }, + ], + }, + { id: "a", children: [{ kind: "node", ref: "shared" }] }, + { id: "b", children: [{ kind: "node", ref: "shared" }] }, + { id: "shared", children: [{ kind: "page", ref: "leaf" }] }, + ], + }; + const out = renderTree(view); + // First reach under `a` expands; second reach under `b` is a marked re-entry. + expect(out).toContain(" node:a\n node:shared\n page:leaf"); + expect(out).toContain("node:shared (↑ already shown)"); + // The leaf page is expanded exactly once. + expect(out.match(/page:leaf/g)?.length).toBe(1); + }); + + test("bounds output on a cycle instead of looping forever", () => { + const view: MemoryV3TreeResult = { + root: "_root", + nodes: [ + { id: "_root", children: [{ kind: "node", ref: "a" }] }, + { id: "a", children: [{ kind: "node", ref: "_root" }] }, + ], + }; + const out = renderTree(view); + expect(out).toContain("node:_root (↑ already shown)"); + }); + + test("flags a child ref whose target node is missing", () => { + const view: MemoryV3TreeResult = { + root: "_root", + nodes: [{ id: "_root", children: [{ kind: "node", ref: "ghost" }] }], + }; + const out = renderTree(view); + expect(out).toContain("node:ghost (missing)"); + }); + + test("lists nodes unreachable from the root", () => { + const view: MemoryV3TreeResult = { + root: "_root", + nodes: [ + { id: "_root", children: [] }, + { id: "floating", children: [] }, + ], + }; + const out = renderTree(view); + expect(out).toContain("Unreachable nodes (1):"); + expect(out).toContain("- node:floating"); + }); +}); diff --git a/assistant/src/cli/commands/memory-v3-render.ts b/assistant/src/cli/commands/memory-v3-render.ts new file mode 100644 index 00000000000..9cbb7a2f7b1 --- /dev/null +++ b/assistant/src/cli/commands/memory-v3-render.ts @@ -0,0 +1,133 @@ +/** + * Text rendering for `assistant memory v3 validate` and `... tree`. + * + * Both functions are pure presentation: they take the daemon route's response + * shape and return a terminal-ready string. They live CLI-side (mirroring + * `memory-v2-compare-render.ts`) and import only the response *types* from the + * daemon route — `cli/no-daemon-internals` permits type-only imports but + * forbids pulling in daemon runtime modules. + */ + +import type { + MemoryV3TreeResult, + MemoryV3ValidateResult, +} from "../../runtime/routes/memory-v3-routes.js"; + +/** + * Render a {@link MemoryV3ValidateResult} into a counts summary plus the + * offending ids for each non-empty category. Categories with zero entries + * print `none` so a clean tree reads at a glance. + */ +export function renderValidationReport(report: MemoryV3ValidateResult): string { + const lines: string[] = [ + "Memory v3 Tree Validation", + "=========================", + `Dangling child refs: ${report.danglingChildRefCount || "none"}`, + ]; + for (const d of report.danglingChildRefs) { + lines.push(` - ${d.node} → ${d.kind}:${d.ref}`); + } + + lines.push(`Orphan pages: ${report.orphanPageCount || "none"}`); + for (const slug of report.orphanPages) { + lines.push(` - ${slug}`); + } + + lines.push(`Cycles: ${report.cycleCount || "none"}`); + for (const c of report.cycles) { + lines.push(` - ${c.from} → ${c.to}`); + } + + lines.push(`Stale index: ${report.staleIndexCount || "none"}`); + for (const s of report.staleIndex) { + lines.push(` - ${s.node} (older than child ${s.child})`); + } + + lines.push( + `Unknown edge targets: ${report.unknownEdgeTargetCount || "none"}`, + ); + for (const e of report.unknownEdgeTargets) { + lines.push(` - ${e.from} → ${e.to}`); + } + + return lines.join("\n"); +} + +/** + * Whether the validation report has any defect in any category. The CLI uses + * this to set a non-zero exit code so `validate` is scriptable as a check. + */ +export function reportHasDefects(report: MemoryV3ValidateResult): boolean { + return ( + report.danglingChildRefCount > 0 || + report.orphanPageCount > 0 || + report.cycleCount > 0 || + report.staleIndexCount > 0 || + report.unknownEdgeTargetCount > 0 + ); +} + +/** + * Render a {@link MemoryV3TreeResult} as an indented tree rooted at `view.root`, + * descending `node:` children depth-first. A node reached more than once + * (shared DAG sub-node) is printed once with a `(↑ …)` re-entry marker rather + * than re-expanded, which also bounds output when the structure contains a + * cycle. `page:` children are printed as leaves under their parent node. + */ +export function renderTree(view: MemoryV3TreeResult): string { + const childrenById = new Map(); + for (const node of view.nodes) { + childrenById.set(node.id, node); + } + + const lines: string[] = []; + const expanded = new Set(); + + const walk = (nodeId: string, depth: number): void => { + const indent = " ".repeat(depth); + const node = childrenById.get(nodeId); + + if (!node) { + lines.push(`${indent}node:${nodeId} (missing)`); + return; + } + + if (expanded.has(nodeId)) { + // Shared DAG sub-node (or a cycle's back-edge): print the reference but + // do not re-expand, so output stays finite and the re-entry is visible. + lines.push(`${indent}node:${nodeId} (↑ already shown)`); + return; + } + expanded.add(nodeId); + lines.push(`${indent}node:${nodeId}`); + + for (const child of node.children) { + if (child.kind === "page") { + lines.push(`${" ".repeat(depth + 1)}page:${child.ref}`); + } else { + walk(child.ref, depth + 1); + } + } + }; + + walk(view.root, 0); + + if (lines.length === 0) { + lines.push("(empty tree)"); + } + + // Surface nodes that exist on disk but were never reached from the root — + // they would otherwise be invisible in a root-anchored print. + const unreached = view.nodes + .map((n) => n.id) + .filter((id) => !expanded.has(id)) + .sort(); + if (unreached.length > 0) { + lines.push("", `Unreachable nodes (${unreached.length}):`); + for (const id of unreached) { + lines.push(` - node:${id}`); + } + } + + return lines.join("\n"); +} diff --git a/assistant/src/cli/commands/memory-v3.ts b/assistant/src/cli/commands/memory-v3.ts new file mode 100644 index 00000000000..fd629bd5287 --- /dev/null +++ b/assistant/src/cli/commands/memory-v3.ts @@ -0,0 +1,161 @@ +/** + * `assistant memory v3` CLI subgroup. + * + * Operator-facing read-only inspection of the v3 memory tree — the DAG overlay + * the v2 → v3 data-migration hand-authors over the flat concept pages. + * + * Subcommands: + * + * - `validate` — print a structural health report (dangling refs, orphan + * pages, cycles, stale indexes, unknown edge targets). Exits non-zero when + * any defect is found so it is scriptable as a check. + * - `tree` — print the tree as an indented outline rooted at the tree root, + * marking shared-DAG re-entries. + * + * Both are read-only: they mutate nothing and run no LLM. `--json` emits the + * raw daemon payload for either subcommand. + */ + +import type { Command } from "commander"; + +import { cliIpcCall } from "../../ipc/cli-client.js"; +import type { + MemoryV3TreeResult, + MemoryV3ValidateResult, +} from "../../runtime/routes/memory-v3-routes.js"; +import { registerCommand } from "../lib/register-command.js"; +import { log } from "../logger.js"; +import { + renderTree, + renderValidationReport, + reportHasDefects, +} from "./memory-v3-render.js"; + +export function registerMemoryV3Command(program: Command): void { + // Reuse an existing `memory` parent if a sibling registrar (e.g. v2) + // attached it first; otherwise create one. Keeps registration order between + // sibling memory registrars unconstrained. + const memory = + program.commands.find((c) => c.name() === "memory") ?? + program + .command("memory") + .description("Manage the memory subsystem (concept-page model)"); + + registerCommand(memory, { + name: "v3", + transport: "ipc", + description: "Memory v3 subsystem operations (tree-DAG overlay)", + build: (v3) => { + v3.addHelpText( + "after", + ` +The v3 memory subsystem layers a hand-authored DAG of tree nodes over the +flat v2 concept pages. Each node lives under /workspace/memory/v3/tree/ and +its frontmatter 'children' list references sub-nodes (node:) and leaf +concept pages (page:). The structure is authored by the v2 → v3 +data-migration, so these subcommands are read-only inspection only — they +mutate nothing and run no LLM. + +Examples: + $ assistant memory v3 validate + $ assistant memory v3 tree + $ assistant memory v3 tree --json | jq '.nodes | length'`, + ); + + // ── validate ────────────────────────────────────────────────────────── + + v3.command("validate") + .description( + "Print a structural health report of the v3 tree (read-only)", + ) + .option("--json", "Emit raw JSON instead of a formatted report") + .addHelpText( + "after", + ` +Walks the hand-authored v3 tree DAG and reports: + - Dangling child refs (node:/page: targets that do not exist) + - Orphan pages (concept pages not reachable from the tree root) + - Cycles (back-edges in the node:/node: adjacency) + - Stale indexes (a node older than a child it composes) + - Unknown edge targets (page edges: pointing at a missing slug) + +Read-only — mutates nothing. Exits non-zero if any defect is reported, so it +is usable as a pre-flight check while the v2 → v3 migration is in flight. + +Examples: + $ assistant memory v3 validate + $ assistant memory v3 validate --json | jq '.orphanPageCount'`, + ) + .action(async (opts: { json?: boolean }) => { + const result = await cliIpcCall( + "memory_v3_validate", + { body: {} }, + ); + + if (!result.ok) { + log.error(result.error ?? "Failed to validate memory v3 tree"); + process.exitCode = 1; + return; + } + + const report = result.result!; + + if (opts.json === true) { + log.info(JSON.stringify(report, null, 2)); + } else { + log.info(renderValidationReport(report)); + } + + if (reportHasDefects(report)) { + process.exitCode = 1; + } + }); + + // ── tree ────────────────────────────────────────────────────────────── + + v3.command("tree") + .description( + "Print the v3 tree as an indented outline from the root (read-only)", + ) + .option("--json", "Emit raw JSON instead of a formatted tree") + .addHelpText( + "after", + ` +Descends the v3 tree depth-first from its root node, printing one line per +node:/page: ref with indentation by depth. A node reached more than once +(shared DAG sub-node or a cycle back-edge) is printed once with a re-entry +marker rather than re-expanded, so output is finite. Nodes that exist on disk +but are unreachable from the root are listed separately. + +Read-only — mutates nothing. + +Examples: + $ assistant memory v3 tree + $ assistant memory v3 tree --json | jq '.root'`, + ) + .action(async (opts: { json?: boolean }) => { + const result = await cliIpcCall( + "memory_v3_tree", + { + body: {}, + }, + ); + + if (!result.ok) { + log.error(result.error ?? "Failed to read memory v3 tree"); + process.exitCode = 1; + return; + } + + const view = result.result!; + + if (opts.json === true) { + log.info(JSON.stringify(view, null, 2)); + return; + } + + log.info(renderTree(view)); + }); + }, + }); +} diff --git a/assistant/src/cli/program.ts b/assistant/src/cli/program.ts index 4a8cab87e00..1e55e5a0b48 100644 --- a/assistant/src/cli/program.ts +++ b/assistant/src/cli/program.ts @@ -34,6 +34,7 @@ import { registerInferenceCommand } from "./commands/inference.js"; import { registerKeysCommand } from "./commands/keys.js"; import { registerMcpCommand } from "./commands/mcp.js"; import { registerMemoryV2Command } from "./commands/memory-v2.js"; +import { registerMemoryV3Command } from "./commands/memory-v3.js"; import { registerNotificationsCommand } from "./commands/notifications.js"; import { registerOAuthCommand } from "./commands/oauth/index.js"; import { registerPendingCommand } from "./commands/pending.js"; @@ -129,6 +130,7 @@ Examples: registerKeysCommand(program); registerMcpCommand(program); registerMemoryV2Command(program); + registerMemoryV3Command(program); registerNotificationsCommand(program); registerOAuthCommand(program); registerPendingCommand(program); diff --git a/assistant/src/runtime/routes/index.ts b/assistant/src/runtime/routes/index.ts index 0c0069fbc03..8f49c11d54c 100644 --- a/assistant/src/runtime/routes/index.ts +++ b/assistant/src/runtime/routes/index.ts @@ -90,6 +90,7 @@ import { ROUTES as LOG_EXPORT_ROUTES } from "./log-export-routes.js"; import { ROUTES as MCP_AUTH_ROUTES } from "./mcp-auth-routes.js"; import { ROUTES as MEMORY_ITEM_ROUTES } from "./memory-item-routes.js"; import { ROUTES as MEMORY_V2_ROUTES } from "./memory-v2-routes.js"; +import { ROUTES as MEMORY_V3_ROUTES } from "./memory-v3-routes.js"; import { ROUTES as MIGRATION_ROLLBACK_ROUTES } from "./migration-rollback-routes.js"; import { ROUTES as MIGRATION_ROUTES } from "./migration-routes.js"; import { ROUTES as NOTIFICATION_ROUTES } from "./notification-routes.js"; @@ -216,6 +217,7 @@ export const ROUTES: RouteDefinition[] = [ ...LLM_CALL_SITES_ROUTES, ...MEMORY_ITEM_ROUTES, ...MEMORY_V2_ROUTES, + ...MEMORY_V3_ROUTES, ...MIGRATION_ROLLBACK_ROUTES, ...MIGRATION_ROUTES, ...NOTIFICATION_ROUTES, diff --git a/assistant/src/runtime/routes/memory-v3-routes.ts b/assistant/src/runtime/routes/memory-v3-routes.ts new file mode 100644 index 00000000000..f6e8b2cef06 --- /dev/null +++ b/assistant/src/runtime/routes/memory-v3-routes.ts @@ -0,0 +1,117 @@ +/** + * Memory v3 route definitions — read-only diagnostics over the hand-authored + * v3 tree DAG. + * + * Two operations, both side-effect-free (no LLM, no writes): + * + * - `memory_v3_validate` — returns the {@link TreeValidationReport} from + * `validateTree(workspaceDir)` (orphan pages, cycles, dangling refs, + * stale-index, unknown edge targets). + * - `memory_v3_tree` — returns a JSON-serializable view of + * `getTreeIndex(workspaceDir)`: the root id, every node id, and each + * node's ordered child refs. `TreeIndex` is Map-based, so the handler + * flattens it into arrays/objects the wire protocol can carry. + * + * The v3 tree is authored by the v2 → v3 data-migration; these routes are the + * on-demand inspection surface operators run while that migration is in flight. + * They are NOT invoked on any turn. + */ + +import { z } from "zod"; + +import { getTreeIndex } from "../../memory/v3/tree-index.js"; +import type { TreeValidationReport } from "../../memory/v3/validate.js"; +import { validateTree } from "../../memory/v3/validate.js"; +import { getWorkspaceDir } from "../../util/platform.js"; +import type { RouteDefinition, RouteHandlerArgs } from "./types.js"; + +// ── Validate ──────────────────────────────────────────────────────────── + +const MemoryV3ValidateParams = z.object({}).strict(); + +/** + * Wire shape for `memory_v3_validate`. Identical to the daemon-internal + * {@link TreeValidationReport} — every field is already serializable, so the + * route forwards it verbatim. Re-exported as its own type so the CLI can + * import it without reaching into the validator module. + */ +export type MemoryV3ValidateResult = TreeValidationReport; + +async function handleValidate({ + body = {}, +}: RouteHandlerArgs): Promise { + // Read-only structural validation of the v3 tree. Like the v2 validate + // route, it is intentionally ungated: operators dry-run it while the + // v2 → v3 migration is mid-flight, well before any v3 flag flips. + MemoryV3ValidateParams.parse(body); + return validateTree(getWorkspaceDir()); +} + +// ── Tree ──────────────────────────────────────────────────────────────── + +const MemoryV3TreeParams = z.object({}).strict(); + +/** One node in the serialized tree view: its id and ordered child refs. */ +export interface MemoryV3TreeNodeView { + id: string; + children: Array<{ kind: "node" | "page"; ref: string }>; +} + +/** + * JSON-serializable projection of the {@link TreeIndex}. `TreeIndex` keys its + * adjacency by `Map`, which doesn't survive JSON, so the handler flattens it: + * `root` is the entry-point node id and `nodes` is every node with its ordered + * child refs. The CLI renderer walks `nodes`/`root` to print an indented tree, + * marking shared-DAG re-entries. + */ +export interface MemoryV3TreeResult { + root: string; + nodes: MemoryV3TreeNodeView[]; +} + +async function handleTree({ + body = {}, +}: RouteHandlerArgs): Promise { + MemoryV3TreeParams.parse(body); + + const tree = await getTreeIndex(getWorkspaceDir()); + + const nodes: MemoryV3TreeNodeView[] = [...tree.nodes.keys()] + .sort() + .map((id) => ({ + id, + children: (tree.childrenByNode.get(id) ?? []).map((child) => ({ + kind: child.kind, + ref: child.ref, + })), + })); + + return { root: tree.root, nodes }; +} + +// ── Route definitions ─────────────────────────────────────────────────── + +export const ROUTES: RouteDefinition[] = [ + { + operationId: "memory_v3_validate", + method: "POST", + endpoint: "memory/v3/validate", + handler: handleValidate, + summary: "Validate the memory v3 tree structure (read-only)", + description: + "Read-only structural validation of the hand-authored v3 tree DAG. Reports dangling child refs, orphan pages, cycles, stale compositional indexes, and unknown edge targets. Writes nothing and runs no LLM — operators dry-run it while the v2 → v3 migration is in flight.", + tags: ["memory"], + requestBody: MemoryV3ValidateParams, + }, + { + operationId: "memory_v3_tree", + method: "POST", + endpoint: "memory/v3/tree", + handler: handleTree, + summary: "Return a serializable view of the memory v3 tree DAG (read-only)", + description: + "Returns the v3 tree root id plus every node and its ordered child refs (page:/node:) as a JSON-serializable projection of the in-memory TreeIndex. Read-only; the CLI uses it to print an indented tree with shared-DAG re-entries marked.", + tags: ["memory"], + requestBody: MemoryV3TreeParams, + }, +]; diff --git a/gateway/src/risk/command-registry/commands/assistant.ts b/gateway/src/risk/command-registry/commands/assistant.ts index ad7dc18497c..090ed1d8220 100644 --- a/gateway/src/risk/command-registry/commands/assistant.ts +++ b/gateway/src/risk/command-registry/commands/assistant.ts @@ -157,6 +157,9 @@ const ASSISTANT_SUPPORTED_COMMAND_PATHS = [ "memory v2 reembed-skills", "memory v2 activation", "memory v2 validate", + "memory v3", + "memory v3 validate", + "memory v3 tree", "notifications", "notifications send", "notifications list", @@ -482,6 +485,16 @@ const riskOverrides: AssistantRiskOverride[] = [ risk: "low", reason: "Read-only diagnostic walk over concept pages and edges", }, + { + path: "memory v3 validate", + risk: "low", + reason: "Read-only structural validation of the v3 tree DAG", + }, + { + path: "memory v3 tree", + risk: "low", + reason: "Read-only print of the v3 tree DAG structure", + }, { path: "notifications send", risk: "low" }, { path: "oauth request", From da0fcecf21138b266f1699055efa88b4a022db37 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:09:23 -0400 Subject: [PATCH 14/21] feat(memory-v3): retrieval loop (scouts->filter->tree->edges->gate) (#31984) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/loop.test.ts | 535 ++++++++++++++++++ assistant/src/memory/v3/loop.ts | 258 +++++++++ 2 files changed, 793 insertions(+) create mode 100644 assistant/src/memory/v3/__tests__/loop.test.ts create mode 100644 assistant/src/memory/v3/loop.ts diff --git a/assistant/src/memory/v3/__tests__/loop.test.ts b/assistant/src/memory/v3/__tests__/loop.test.ts new file mode 100644 index 00000000000..c16ea8fb591 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/loop.test.ts @@ -0,0 +1,535 @@ +/** + * Tests for `assistant/src/memory/v3/loop.ts`. + * + * The loop is the composition layer over the v3 lanes. Every lane module + * (`scouts`, `filter`, `tree-walk`, `edges`, `gate`) plus the two index + * builders (`tree-index`, `page-index`) the loop calls are stubbed via + * `mock.module`, so the suite makes no real LLM, Qdrant, embedding, or + * filesystem calls. Each mock factory closes over a mutable `lane` state object + * that every test rewires before calling `runRetrievalLoop`; a `laneCalls` + * recorder captures the arguments the loop passed each lane so the composition + * wiring (seeding, query threading, toggles) is assertable. + * + * Coverage: + * - single-pass ready: scouts → filter → tree → edges → gate composes into a + * valid RetrievalOutput with per-lane source tags and one DescentPass. + * - multi-pass: gate "more" then "ready" runs two passes and threads the + * gate's questions into the second pass's NOW text. + * - passCap: a gate that always says "more" force-exits at passCap. + * - lane toggles: `lanes.tree=false` / `lanes.edges=false` suppress those + * lanes' candidates and trace fields. + * - trace: one DescentPass per pass. + * - cost: `ms` accumulates and is non-negative across passes. + * - failureReason: a filter failure is surfaced on the output. + */ + +import { beforeEach, describe, expect, mock, test } from "bun:test"; + +import type { DrizzleDb } from "../../db-connection.js"; +import type { + RetrievalInput, + RetrievalOutput, +} from "../../v2/harness/retriever.js"; +import type { GateDecision, ScoutResult } from "../../v2/harness/trace.js"; + +// --------------------------------------------------------------------------- +// Lane stubs — installed before importing the module under test. +// --------------------------------------------------------------------------- + +interface RunScoutsResult { + scouts: ScoutResult[]; + sticky: Set; + bypass: Set; +} + +interface FilterResult { + kept: string[]; + trace: { judged: string[]; dropped: string[] }; + failureReason?: string; +} + +interface WalkResult { + pages: Set; + levels: Array<{ + node: string; + considered: string[]; + descended: string[]; + skipped: string[]; + reasoning: string; + }>; +} + +interface ExpandResult { + pulled: Set; + expansions: Array<{ from: string; pulled: string[] }>; +} + +interface GateResult { + decision: GateDecision; + selectedSlugs: string[]; +} + +/** + * Per-pass-programmable lane state. The mock factories close over these live + * refs; each test rewires them before calling `runRetrievalLoop`. List-valued + * fields are consumed pass-by-pass (one entry per pass) so a multi-pass test + * can script a different verdict per pass. + */ +const lane = { + scouts: [] as RunScoutsResult[], + filter: [] as FilterResult[], + walk: [] as WalkResult[], + edges: [] as ExpandResult[], + gate: [] as GateResult[], +}; + +/** Records the args the loop passed each lane, one entry per call. */ +const laneCalls = { + scouts: [] as Array<{ nowText: string }>, + filter: [] as Array<{ nowText: string; dense: ScoutResult }>, + walk: [] as Array<{ + nowText: string; + seeds: string[]; + scouts: ScoutResult[]; + }>, + edges: [] as Array<{ seeds: string[] }>, + gate: [] as Array<{ + nowText: string; + passNumber: number; + candidates: string[]; + sticky: string[]; + }>, +}; + +/** Pop the next scripted value for a pass, reusing the last entry if exhausted. */ +function nextOf(list: T[], index: number): T { + return list[Math.min(index, list.length - 1)]; +} + +let scoutCallCount = 0; +let walkCallCount = 0; +let edgeCallCount = 0; +let gateCallCount = 0; + +mock.module("../scouts.js", () => ({ + runScouts: async (input: RetrievalInput): Promise => { + laneCalls.scouts.push({ nowText: input.nowText }); + return nextOf(lane.scouts, scoutCallCount++); + }, +})); + +mock.module("../filter.js", () => ({ + filterDenseHits: async (args: { + input: RetrievalInput; + dense: ScoutResult; + }): Promise => { + laneCalls.filter.push({ nowText: args.input.nowText, dense: args.dense }); + // Filter calls share the scout pass index (one filter call per dense pass). + return nextOf(lane.filter, laneCalls.filter.length - 1); + }, +})); + +mock.module("../tree-walk.js", () => ({ + runTreeWalk: async (args: { + input: RetrievalInput; + seeds: string[]; + scouts: ScoutResult[]; + }): Promise => { + laneCalls.walk.push({ + nowText: args.input.nowText, + seeds: args.seeds, + scouts: args.scouts, + }); + return nextOf(lane.walk, walkCallCount++); + }, +})); + +mock.module("../edges.js", () => ({ + expandEdges: async (args: { + seeds: Iterable; + }): Promise => { + laneCalls.edges.push({ seeds: [...args.seeds] }); + return nextOf(lane.edges, edgeCallCount++); + }, +})); + +mock.module("../gate.js", () => ({ + runGate: async (args: { + input: RetrievalInput; + candidates: Set; + sticky: Set; + passNumber: number; + }): Promise => { + laneCalls.gate.push({ + nowText: args.input.nowText, + passNumber: args.passNumber, + candidates: [...args.candidates], + sticky: [...args.sticky], + }); + return nextOf(lane.gate, gateCallCount++); + }, +})); + +// The loop calls these index builders only to hand opaque handles to the +// (stubbed) tree walk. The stubs return harmless empty values. +mock.module("../tree-index.js", () => ({ + getTreeIndex: async () => ({ + nodes: new Map(), + childrenByNode: new Map(), + parentsByNode: new Map(), + pageParents: new Map(), + root: "_root", + }), +})); + +mock.module("../../v2/page-index.js", () => ({ + getPageIndex: async () => ({ + entries: [], + bySlug: new Map(), + byId: new Map(), + rendered: "", + }), +})); + +const { runRetrievalLoop } = await import("../loop.js"); + +// --------------------------------------------------------------------------- +// Fixtures. +// --------------------------------------------------------------------------- + +/** Opaque DB sentinel — the stubbed scout lane never dereferences it. */ +const db = {} as DrizzleDb; + +interface LaneConfig { + hot?: boolean; + sparse?: boolean; + dense?: boolean; + tree?: boolean; + edges?: boolean; +} + +/** + * Minimal `RetrievalInput`. Only `nowText` and `config.memory.v3` (passCap + + * lanes) are read by the loop; the lanes are stubbed so the rest is inert. + */ +function makeInput(opts?: { + nowText?: string; + passCap?: number; + lanes?: LaneConfig; +}): RetrievalInput { + const lanes = { + hot: true, + sparse: true, + dense: true, + tree: true, + edges: true, + ...opts?.lanes, + }; + return { + workspaceDir: "/tmp/does-not-matter", + recentTurnPairs: [], + nowText: opts?.nowText ?? "NOW", + priorEverInjected: [], + config: { + memory: { v3: { passCap: opts?.passCap ?? 3, lanes } }, + } as unknown as RetrievalInput["config"], + }; +} + +function scout(lane: ScoutResult["lane"], slugs: string[]): ScoutResult { + return { lane, slugs }; +} + +function readyGate(selected: string[]): GateResult { + return { decision: { decision: "ready" }, selectedSlugs: selected }; +} + +function moreGate(selected: string[], questions: string[]): GateResult { + return { decision: { decision: "more", questions }, selectedSlugs: selected }; +} + +function reset(): void { + lane.scouts = []; + lane.filter = []; + lane.walk = []; + lane.edges = []; + lane.gate = []; + laneCalls.scouts = []; + laneCalls.filter = []; + laneCalls.walk = []; + laneCalls.edges = []; + laneCalls.gate = []; + scoutCallCount = 0; + walkCallCount = 0; + edgeCallCount = 0; + gateCallCount = 0; +} + +beforeEach(reset); + +// --------------------------------------------------------------------------- +// Tests. +// --------------------------------------------------------------------------- + +describe("runRetrievalLoop — single pass", () => { + test("ready path composes a valid RetrievalOutput with per-lane source tags", async () => { + lane.scouts = [ + { + scouts: [ + scout("hot", ["a"]), + scout("sparse", ["b"]), + scout("dense", ["c", "d"]), + ], + sticky: new Set(["a", "b"]), + bypass: new Set(["b"]), + }, + ]; + lane.filter = [{ kept: ["c"], trace: { judged: ["d"], dropped: ["d"] } }]; + lane.walk = [ + { + pages: new Set(["t1"]), + levels: [ + { + node: "_root", + considered: ["sub"], + descended: ["sub"], + skipped: [], + reasoning: "r", + }, + ], + }, + ]; + lane.edges = [ + { pulled: new Set(["e1"]), expansions: [{ from: "a", pulled: ["e1"] }] }, + ]; + lane.gate = [readyGate(["a", "b", "c", "t1", "e1"])]; + + const out: RetrievalOutput = await runRetrievalLoop(makeInput(), { db }); + + expect(out.selectedSlugs).toEqual(["a", "b", "c", "t1", "e1"]); + // sourceBySlug tags each slug with the lane that first surfaced it. + expect(out.sourceBySlug.get("a")).toBe("hot"); + expect(out.sourceBySlug.get("b")).toBe("sparse"); + expect(out.sourceBySlug.get("c")).toBe("dense"); + expect(out.sourceBySlug.get("t1")).toBe("tree"); + expect(out.sourceBySlug.get("e1")).toBe("edge"); + // Dropped dense candidate `d` was filtered out — never tagged. + expect(out.sourceBySlug.has("d")).toBe(false); + + // Exactly one pass, with all four lane sub-traces present. + expect(out.trace?.passes).toHaveLength(1); + const pass = out.trace!.passes[0]; + expect(pass.passNumber).toBe(1); + expect(pass.scouts).toHaveLength(3); + expect(pass.treeLevels).toHaveLength(1); + expect(pass.edgeExpansions).toHaveLength(1); + expect(pass.gate).toEqual({ decision: "ready" }); + + expect(out.failureReason).toBeNull(); + expect(out.cost?.ms).toBeGreaterThanOrEqual(0); + }); + + test("dense lane is filtered before seeding tree + gate", async () => { + lane.scouts = [ + { + scouts: [scout("dense", ["keep", "drop"])], + sticky: new Set(), + bypass: new Set(), + }, + ]; + lane.filter = [ + { + kept: ["keep"], + trace: { judged: ["keep", "drop"], dropped: ["drop"] }, + }, + ]; + lane.walk = [{ pages: new Set(), levels: [] }]; + lane.edges = [{ pulled: new Set(), expansions: [] }]; + lane.gate = [readyGate(["keep"])]; + + const out = await runRetrievalLoop(makeInput(), { db }); + + // The filter saw the full dense lane. + expect(laneCalls.filter[0].dense.slugs).toEqual(["keep", "drop"]); + // Only the kept dense slug seeds the tree walk; `drop` never reaches it. + expect(laneCalls.walk[0].seeds).toEqual(["keep"]); + // Gate's candidate set excludes the dropped dense slug. + expect(laneCalls.gate[0].candidates).toEqual(["keep"]); + expect(out.selectedSlugs).toEqual(["keep"]); + }); +}); + +describe("runRetrievalLoop — multi pass", () => { + test("gate 'more' then 'ready' runs two passes and threads questions into NOW", async () => { + lane.scouts = [ + { + scouts: [scout("dense", ["p1"])], + sticky: new Set(), + bypass: new Set(), + }, + { + scouts: [scout("dense", ["p2"])], + sticky: new Set(), + bypass: new Set(), + }, + ]; + lane.filter = [ + { kept: ["p1"], trace: { judged: ["p1"], dropped: [] } }, + { kept: ["p2"], trace: { judged: ["p2"], dropped: [] } }, + ]; + lane.walk = [ + { pages: new Set(), levels: [] }, + { pages: new Set(), levels: [] }, + ]; + lane.edges = [ + { pulled: new Set(), expansions: [] }, + { pulled: new Set(), expansions: [] }, + ]; + lane.gate = [moreGate(["p1"], ["what about X?"]), readyGate(["p1", "p2"])]; + + const out = await runRetrievalLoop( + makeInput({ nowText: "BASE", passCap: 3 }), + { db }, + ); + + // Two passes ran. + expect(out.trace?.passes).toHaveLength(2); + expect(out.trace!.passes[0].gate).toEqual({ + decision: "more", + questions: ["what about X?"], + }); + expect(out.trace!.passes[1].gate).toEqual({ decision: "ready" }); + + // Pass 1 used the base NOW verbatim; pass 2's NOW carried the gate's + // generated follow-up question — the standing context is not rewritten. + expect(laneCalls.scouts[0].nowText).toBe("BASE"); + expect(laneCalls.scouts[1].nowText).toContain("BASE"); + expect(laneCalls.scouts[1].nowText).toContain("what about X?"); + + // Final selection is the last (ready) pass's selection. + expect(out.selectedSlugs).toEqual(["p1", "p2"]); + }); + + test("passCap force-exits with the current selection when the gate keeps asking for more", async () => { + lane.scouts = [ + { scouts: [scout("dense", ["p"])], sticky: new Set(), bypass: new Set() }, + ]; + lane.filter = [{ kept: ["p"], trace: { judged: ["p"], dropped: [] } }]; + lane.walk = [{ pages: new Set(), levels: [] }]; + lane.edges = [{ pulled: new Set(), expansions: [] }]; + // Gate always says "more"; reused across every pass via nextOf. + lane.gate = [moreGate(["p"], ["again?"])]; + + const out = await runRetrievalLoop(makeInput({ passCap: 2 }), { db }); + + // Capped at passCap passes despite the gate never saying ready. + expect(out.trace?.passes).toHaveLength(2); + expect(gateCallCount).toBe(2); + expect(out.selectedSlugs).toEqual(["p"]); + }); +}); + +describe("runRetrievalLoop — lane toggles", () => { + test("tree + edge lanes off removes their candidates and trace fields", async () => { + lane.scouts = [ + { scouts: [scout("dense", ["s"])], sticky: new Set(), bypass: new Set() }, + ]; + lane.filter = [{ kept: ["s"], trace: { judged: ["s"], dropped: [] } }]; + // These would contribute t1/e1 if their lanes ran — they must not. + lane.walk = [ + { + pages: new Set(["t1"]), + levels: [ + { + node: "_root", + considered: [], + descended: [], + skipped: [], + reasoning: "", + }, + ], + }, + ]; + lane.edges = [ + { pulled: new Set(["e1"]), expansions: [{ from: "s", pulled: ["e1"] }] }, + ]; + lane.gate = [readyGate(["s"])]; + + const out = await runRetrievalLoop( + makeInput({ lanes: { tree: false, edges: false } }), + { db }, + ); + + // Disabled lanes were never called. + expect(laneCalls.walk).toHaveLength(0); + expect(laneCalls.edges).toHaveLength(0); + // Their would-be candidates never entered the gate or the selection. + expect(laneCalls.gate[0].candidates).toEqual(["s"]); + expect(out.sourceBySlug.has("t1")).toBe(false); + expect(out.sourceBySlug.has("e1")).toBe(false); + // Trace omits the disabled lanes' fields. + expect(out.trace!.passes[0].treeLevels).toBeUndefined(); + expect(out.trace!.passes[0].edgeExpansions).toBeUndefined(); + }); + + test("edge lane on by default expands over the accumulated candidate set", async () => { + lane.scouts = [ + { + scouts: [scout("hot", ["h"]), scout("dense", ["d"])], + sticky: new Set(["h"]), + bypass: new Set(), + }, + ]; + lane.filter = [{ kept: ["d"], trace: { judged: ["d"], dropped: [] } }]; + lane.walk = [{ pages: new Set(["t"]), levels: [] }]; + lane.edges = [ + { pulled: new Set(["x"]), expansions: [{ from: "d", pulled: ["x"] }] }, + ]; + lane.gate = [readyGate(["h", "d", "t", "x"])]; + + await runRetrievalLoop(makeInput(), { db }); + + // Edge expansion seeds over every accumulated confident slug (hot, dense, + // tree) — not just the scouts. + expect(laneCalls.edges[0].seeds).toEqual( + expect.arrayContaining(["h", "d", "t"]), + ); + }); +}); + +describe("runRetrievalLoop — failure + cost", () => { + test("surfaces a filter failureReason on the output", async () => { + lane.scouts = [ + { scouts: [scout("dense", ["d"])], sticky: new Set(), bypass: new Set() }, + ]; + lane.filter = [ + { + kept: ["d"], + trace: { judged: ["d"], dropped: [] }, + failureReason: "no_provider", + }, + ]; + lane.walk = [{ pages: new Set(), levels: [] }]; + lane.edges = [{ pulled: new Set(), expansions: [] }]; + lane.gate = [readyGate(["d"])]; + + const out = await runRetrievalLoop(makeInput(), { db }); + + expect(out.failureReason).toBe("no_provider"); + }); + + test("cost.ms accumulates across passes", async () => { + lane.scouts = [ + { scouts: [scout("dense", ["p"])], sticky: new Set(), bypass: new Set() }, + ]; + lane.filter = [{ kept: ["p"], trace: { judged: ["p"], dropped: [] } }]; + lane.walk = [{ pages: new Set(), levels: [] }]; + lane.edges = [{ pulled: new Set(), expansions: [] }]; + lane.gate = [moreGate(["p"], ["q"])]; + + const out = await runRetrievalLoop(makeInput({ passCap: 3 }), { db }); + + expect(out.trace?.passes).toHaveLength(3); + expect(out.cost?.ms).toBeGreaterThanOrEqual(0); + }); +}); diff --git a/assistant/src/memory/v3/loop.ts b/assistant/src/memory/v3/loop.ts new file mode 100644 index 00000000000..0763ecf8bf6 --- /dev/null +++ b/assistant/src/memory/v3/loop.ts @@ -0,0 +1,258 @@ +/** + * Memory v3 — retrieval-loop orchestration. + * + * The composition layer that wires the v3 lanes into a single bounded-descent + * retrieval loop. Each pass runs the lanes in a fixed order: + * + * 1. {@link runScouts} — always-on hot / sparse / dense fanout. Surfaces + * candidate slugs plus the `sticky` (keep-in-the- + * running) and `bypass` (skip-the-tree) sets. + * 2. {@link filterDenseHits} — one cheap LLM call over the *dense* lane only. + * Hot + near-exact-sparse hits arrive via + * sticky/bypass and are never judged; the dense + * near-neighbors are filtered down to meaningful + * associations. + * 3. {@link runTreeWalk} — scout-seeded hierarchical descent. Seeded by the + * surviving scout slugs (their tree parents) so + * descent starts near where the lanes landed but + * still fans out from the root. + * 4. {@link expandEdges} — provider-free 1–2 hop curated-graph expansion + * over every accumulated confident seed. + * 5. {@link runGate} — one capable LLM call over the unioned candidate + * set. Returns `ready` (finalize) or `more` + * (its generated follow-up questions seed the next + * pass's query). + * + * Pass control. The loop runs at most `config.memory.v3.passCap` passes. When + * the gate says `more` and another pass is allowed, the gate's questions become + * the next pass's query (folded into `nowText`); otherwise the loop force-exits + * with the current selection. The standing-context files conveyed via + * `input.nowText` are consumed as situational context for the scouts, descent, + * and gate — the loop selects concept pages to layer on top and NEVER rewrites + * or re-injects the standing-context files. + * + * Lane toggles. `config.memory.v3.lanes.tree` and `.edges` gate the tree-walk + * and edge-expansion lanes here; the hot/sparse/dense toggles are honored inside + * {@link runScouts}. Toggling a lane off removes its contribution from the + * candidate set so the offline harness can measure each lane's marginal recall. + * + * Cross-pass accumulation. A `visited` candidate accumulator deduplicates slugs + * across passes by canonical slug, tagging each with the first lane that + * surfaced it (`sourceBySlug`). The full {@link DescentTrace} carries one + * {@link DescentPass} per pass (scouts / treeLevels / edgeExpansions / gate), + * and {@link RetrievalCost} (wall-clock `ms`, the one dimension observable at + * this composition layer) accumulates across every pass. + */ + +import type { DrizzleDb } from "../db-connection.js"; +import type { + RetrievalCost, + RetrievalInput, + RetrievalOutput, +} from "../v2/harness/retriever.js"; +import type { + DescentPass, + DescentTrace, + GateDecision, +} from "../v2/harness/trace.js"; +import { getPageIndex } from "../v2/page-index.js"; +import { expandEdges } from "./edges.js"; +import { filterDenseHits } from "./filter.js"; +import { runGate } from "./gate.js"; +import { runScouts } from "./scouts.js"; +import { getTreeIndex } from "./tree-index.js"; +import { runTreeWalk } from "./tree-walk.js"; + +/** Lane label used to tag each selected slug's provenance in `sourceBySlug`. */ +type LaneSource = "hot" | "sparse" | "dense" | "tree" | "edge"; + +/** Injected dependencies — the SQLite handle the scout hot lane reads. */ +export interface RetrievalLoopDeps { + db: DrizzleDb; +} + +/** + * Run the full v3 retrieval loop for one turn. + * + * Composes the scout / filter / tree / edge / gate lanes over up to + * `config.memory.v3.passCap` passes, returning the P1 {@link RetrievalOutput}: + * the final selection, per-lane provenance, the complete multi-pass + * {@link DescentTrace}, and accumulated {@link RetrievalCost}. `failureReason` + * is set when the dense filter had to fail open on any pass (the loop still + * returns a usable selection — the filter degradation is recorded, not fatal). + */ +export async function runRetrievalLoop( + input: RetrievalInput, + deps: RetrievalLoopDeps, +): Promise { + const v3 = input.config.memory.v3; + const passCap = Math.max(1, v3.passCap); + const lanes = v3.lanes; + + // Cross-pass accumulators. + const sourceBySlug = new Map(); + const sticky = new Set(); + const passes: DescentPass[] = []; + // `ms` is the one cost dimension observable at this composition layer — the + // lanes consume their own LLM usage internally and don't surface tokens. + const cost: RetrievalCost & { ms: number } = { ms: 0 }; + let failureReason: string | null = null; + + // The query feeding each pass. Pass 1 uses the turn's NOW context verbatim; + // a gate `more` verdict appends its generated follow-up questions for the + // next pass. The standing-context files are never rewritten — questions are + // layered on as additional situational context only. + let passNowText = input.nowText; + + // Final selection — replaced by the gate each pass; the last pass's selection + // is what the loop returns (capped at passCap on a forced exit). + let selectedSlugs: string[] = []; + + for (let passNumber = 1; passNumber <= passCap; passNumber++) { + const passStart = Date.now(); + const passInput: RetrievalInput = { ...input, nowText: passNowText }; + + // 1. Scouts — always-on hot / sparse / dense fanout. + const scoutResult = await runScouts(passInput, { db: deps.db }); + for (const slug of scoutResult.sticky) sticky.add(slug); + + // Tag hot + sparse scout hits with their lane (first lane wins). Dense + // slugs are tagged only if they survive the filter below — a dropped dense + // near-neighbor never enters the candidate set, so it earns no source tag. + for (const scout of scoutResult.scouts) { + if (scout.lane === "dense") continue; + for (const slug of scout.slugs) tagSlug(sourceBySlug, slug, scout.lane); + } + + // 2. Dense filter — judges only the dense lane (hot/sparse bypass it). The + // surviving dense slugs replace the raw dense candidates in the running set. + const denseScout = scoutResult.scouts.find((s) => s.lane === "dense"); + const candidates = new Set(); + + // Hot + sparse lane hits enter the candidate set directly. + for (const scout of scoutResult.scouts) { + if (scout.lane === "dense") continue; + for (const slug of scout.slugs) candidates.add(slug); + } + + if (denseScout) { + const filtered = await filterDenseHits({ + input: passInput, + dense: denseScout, + sticky: scoutResult.sticky, + bypass: scoutResult.bypass, + }); + for (const slug of filtered.kept) { + candidates.add(slug); + tagSlug(sourceBySlug, slug, "dense"); + } + if (filtered.failureReason !== undefined) { + failureReason = filtered.failureReason; + } + } + + // The surviving scout slugs (kept dense + hot + sparse) seed the tree walk. + const survivingSeeds = [...candidates]; + + // 3. Tree walk — scout-seeded hierarchical descent. Gated by `lanes.tree`. + let treeLevels: DescentPass["treeLevels"]; + if (lanes.tree) { + const [tree, pages] = await Promise.all([ + getTreeIndex(passInput.workspaceDir), + getPageIndex(passInput.workspaceDir), + ]); + const walk = await runTreeWalk({ + input: passInput, + tree, + pages, + scouts: scoutResult.scouts, + seeds: survivingSeeds, + }); + treeLevels = walk.levels; + for (const slug of walk.pages) { + candidates.add(slug); + tagSlug(sourceBySlug, slug, "tree"); + } + } + + // 4. Edge expansion — 1–2 hop curated-graph pull over every accumulated + // confident seed. Gated by `lanes.edges`. + let edgeExpansions: DescentPass["edgeExpansions"]; + if (lanes.edges) { + const expansion = await expandEdges({ + workspaceDir: passInput.workspaceDir, + seeds: [...candidates], + }); + edgeExpansions = expansion.expansions; + for (const slug of expansion.pulled) { + candidates.add(slug); + tagSlug(sourceBySlug, slug, "edge"); + } + } + + // 5. Gate — one capable LLM call over the unioned candidate set. + const gateResult = await runGate({ + input: passInput, + candidates, + sticky, + passNumber, + }); + selectedSlugs = gateResult.selectedSlugs; + + // Record this pass's trace. + const pass: DescentPass = { + passNumber, + scouts: scoutResult.scouts, + ...(treeLevels !== undefined ? { treeLevels } : {}), + ...(edgeExpansions !== undefined ? { edgeExpansions } : {}), + gate: gateResult.decision, + }; + passes.push(pass); + + cost.ms += Date.now() - passStart; + + // Pass control. A `more` verdict with another pass available feeds the + // gate's generated questions into the next pass's query; otherwise (ready, + // or passCap reached) the loop exits with the current selection. + if (gateResult.decision.decision !== "more") break; + if (passNumber >= passCap) break; + passNowText = nextPassNowText(input.nowText, gateResult.decision); + } + + const trace: DescentTrace = { passes }; + return { + selectedSlugs, + sourceBySlug, + trace, + cost, + failureReason, + }; +} + +/** + * Tag `slug`'s provenance with `lane`, keeping the first lane that surfaced it. + * The pass order (scouts → tree → edge) gives a deterministic precedence: a + * slug first seen by a scout lane keeps that label even when the tree or edge + * lane re-surfaces it. + */ +function tagSlug( + sourceBySlug: Map, + slug: string, + lane: LaneSource, +): void { + if (!sourceBySlug.has(slug)) sourceBySlug.set(slug, lane); +} + +/** + * Build the next pass's NOW text from the original standing context plus the + * gate's generated follow-up questions. The standing-context files are never + * rewritten — the questions are appended as an additional situational-context + * block the scouts/descent/gate read on top of NOW. With no questions the + * original NOW is reused verbatim. + */ +function nextPassNowText(baseNowText: string, decision: GateDecision): string { + const questions = decision.questions ?? []; + if (questions.length === 0) return baseNowText; + const block = `\n${questions.join("\n")}\n`; + return `${baseNowText}\n\n${block}`; +} From ec915b02c68c88a2d1469afa96f6c0ce532b9ac8 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:13:49 -0400 Subject: [PATCH 15/21] feat(memory-v3): consolidation drains shared buffer into tree + maintains standing-context files (#31985) Co-authored-by: Vellum Assistant --- assistant/src/memory/jobs-worker.ts | 64 ++- .../v3/__tests__/consolidation-job.test.ts | 468 ++++++++++++++++++ assistant/src/memory/v3/consolidation-job.ts | 323 ++++++++++++ assistant/src/memory/v3/maintenance.ts | 144 ++++++ .../src/memory/v3/prompts/consolidation.ts | 458 +++++++++++++++++ 5 files changed, 1439 insertions(+), 18 deletions(-) create mode 100644 assistant/src/memory/v3/__tests__/consolidation-job.test.ts create mode 100644 assistant/src/memory/v3/consolidation-job.ts create mode 100644 assistant/src/memory/v3/maintenance.ts create mode 100644 assistant/src/memory/v3/prompts/consolidation.ts diff --git a/assistant/src/memory/jobs-worker.ts b/assistant/src/memory/jobs-worker.ts index ca59501f189..7b9da8313f1 100644 --- a/assistant/src/memory/jobs-worker.ts +++ b/assistant/src/memory/jobs-worker.ts @@ -83,6 +83,8 @@ import { memoryV2ConsolidateJob, } from "./v2/consolidation-job.js"; import { memoryV2SweepJob } from "./v2/sweep-job.js"; +import { memoryV3ConsolidateJob } from "./v3/consolidation-job.js"; +import { memoryV3IndexMaintenanceJob } from "./v3/maintenance.js"; const log = getLogger("memory-jobs-worker"); @@ -603,6 +605,12 @@ async function processJob( case "memory_v2_consolidate": await memoryV2ConsolidateJob(job, config); return; + case "memory_v3_consolidate": + await memoryV3ConsolidateJob(job, config); + return; + case "memory_v3_index_maintenance": + await memoryV3IndexMaintenanceJob(job); + return; case "memory_v2_migrate": await memoryV2MigrateJob(job, config); return; @@ -681,17 +689,28 @@ export const GRAPH_MAINTENANCE_CHECKPOINTS = { patternScan: "graph_maintenance:pattern_scan:last_run", narrative: "graph_maintenance:narrative:last_run", memoryV2Consolidate: "memory_v2_consolidate_last_run", + memoryV3Consolidate: "memory_v3_consolidate_last_run", } as const; /** * Enqueue periodic graph maintenance jobs. * * Mutually exclusive between v1 and v2: - * - v2 active (`memory.v2.enabled` on) → only `memory_v2_consolidate` is - * scheduled. + * - v2 active (`memory.v2.enabled` on) → only one buffer-drainer is + * scheduled (see below). * - v2 inactive → the four v1 entries (decay, consolidate, pattern_scan, * narrative) are scheduled instead. * + * **Buffer-drainer retarget (v2 vs v3).** The `memory/buffer.md` is shared, so + * exactly one consolidator may own the drain at a time. When + * `memory.v3.write.enabled` is on, the v3 consolidator (`memory_v3_consolidate`) + * is scheduled INSTEAD of `memory_v2_consolidate` — same shared buffer + + * standing-context files, additionally authored into the v3 tree. When the v3 + * write flag is off (default) the v2 consolidator stays the sole drainer, + * unchanged. The retarget is a clean conditional, fully reversible via the flag. + * Concept pages stay the shared canonical store, so the v2 router keeps working + * off pages v3 writes regardless of which consolidator ran. + * * Read/write paths route to v2 when the flag is on, so v1 graph data goes * unread; running v1 maintenance alongside v2 is wasted compute and LLM * spend. The v1 code path remains live so flipping the flag back to off @@ -708,20 +727,29 @@ export function maybeEnqueueGraphMaintenanceJobs( nowMs = Date.now(), ): void { const v2Active = config.memory.v2.enabled; + const v3WriteActive = config.memory.v3.write.enabled; + + // The single buffer-drainer entry for the v2-active branch: v3 when the v3 + // write flag owns the drain, v2 otherwise. Same shared buffer either way. + const consolidateEntry = v3WriteActive + ? { + key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV3Consolidate, + intervalMs: config.memory.v3.write.consolidateIntervalMs, + jobType: "memory_v3_consolidate" as MemoryJobType, + } + : { + key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate, + intervalMs: + config.memory.v2.consolidation_interval_hours * 60 * 60 * 1000, + jobType: "memory_v2_consolidate" as MemoryJobType, + }; const schedule: Array<{ key: string; intervalMs: number; jobType: MemoryJobType; }> = v2Active - ? [ - { - key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate, - intervalMs: - config.memory.v2.consolidation_interval_hours * 60 * 60 * 1000, - jobType: "memory_v2_consolidate", - }, - ] + ? [consolidateEntry] : [ { key: GRAPH_MAINTENANCE_CHECKPOINTS.decay, @@ -745,25 +773,25 @@ export function maybeEnqueueGraphMaintenanceJobs( }, ]; - let enqueuedV2 = false; + let enqueuedConsolidate = false; for (const { key, intervalMs, jobType } of schedule) { const lastRun = parseInt(getMemoryCheckpoint(key) ?? "0", 10); if (nowMs - lastRun >= intervalMs) { enqueueMemoryJob(jobType, {}); setMemoryCheckpoint(key, String(nowMs)); - if (jobType === "memory_v2_consolidate") enqueuedV2 = true; + if (jobType === consolidateEntry.jobType) enqueuedConsolidate = true; } } + // Size-based trigger: when the shared buffer crosses the configured line + // count, drain it now rather than waiting out the interval. Retargets to the + // same consolidator the interval branch above selected. const maxLines = config.memory.v2.consolidation_max_buffer_lines; - if (v2Active && !enqueuedV2 && maxLines !== null) { + if (v2Active && !enqueuedConsolidate && maxLines !== null) { const bufferPath = join(getWorkspaceDir(), "memory", "buffer.md"); if (countBufferLines(bufferPath) >= maxLines) { - enqueueMemoryJob("memory_v2_consolidate", {}); - setMemoryCheckpoint( - GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate, - String(nowMs), - ); + enqueueMemoryJob(consolidateEntry.jobType, {}); + setMemoryCheckpoint(consolidateEntry.key, String(nowMs)); } } } diff --git a/assistant/src/memory/v3/__tests__/consolidation-job.test.ts b/assistant/src/memory/v3/__tests__/consolidation-job.test.ts new file mode 100644 index 00000000000..5969c7de6f3 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/consolidation-job.test.ts @@ -0,0 +1,468 @@ +/** + * Tests for the memory v3 consolidation surface (PR 19): + * - `memoryV3ConsolidateJob` (`../consolidation-job.ts`) — drains the SHARED + * `memory/buffer.md` into shared concept pages + the v3 tree, mirroring v2. + * - the scheduler retarget in `maybeEnqueueGraphMaintenanceJobs` + * (`../../jobs-worker.ts`) — enqueues `memory_v3_consolidate` INSTEAD of + * `memory_v2_consolidate` when `memory.v3.write.enabled`, and v2 when off. + * - `runIndexMaintenance` / `wouldIntroduceCycle` (`../maintenance.ts`) — the + * mechanical no-LLM upkeep: report stale indices, refuse cycle edits. + * + * The background-agent handoff (`runBackgroundJob`) is mocked so no real LLM + * runs — the agent's actual page/tree writes are exercised by the v3 store/ + * validate unit tests; here we drive the same fixture writes deterministically + * to prove the maintenance + cycle-check semantics. The DB is real (a temp + * workspace pinned via `VELLUM_WORKSPACE_DIR`) so the scheduler's checkpoint / + * enqueue path runs end-to-end. Sample content uses generic placeholders + * (Alice/Bob). + */ +import { + existsSync, + mkdirSync, + mkdtempSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { utimes } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + mock, + test, +} from "bun:test"; + +import { eq } from "drizzle-orm"; + +import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js"; + +mock.module("../../../util/logger.js", () => ({ + getLogger: () => makeMockLogger(), +})); + +// ── runBackgroundJob mock ─────────────────────────────────────────── +// +// The consolidation handler delegates bootstrap + processMessage + timeout + +// classification to runBackgroundJob. We stub it so no LLM runs and assert the +// surface (prompt, callSite, source, suppression) it was called with. +let runnerCalls = 0; +let runnerLastArgs: Record | null = null; +let runnerImpl: () => Promise<{ + conversationId: string; + ok: boolean; + error?: Error; + errorKind?: string; +}> = async () => ({ conversationId: "conv-1", ok: true }); + +mock.module("../../../runtime/background-job-runner.js", () => ({ + runBackgroundJob: async (opts: Record) => { + runnerCalls += 1; + runnerLastArgs = opts; + return runnerImpl(); + }, +})); + +// ── Workspace pin (precedes the DB import) ────────────────────────── +let tmpWorkspace: string; +let previousWorkspaceEnv: string | undefined; + +beforeAll(() => { + tmpWorkspace = mkdtempSync(join(tmpdir(), "memory-v3-consolidate-test-")); + previousWorkspaceEnv = process.env.VELLUM_WORKSPACE_DIR; + process.env.VELLUM_WORKSPACE_DIR = tmpWorkspace; +}); + +afterAll(() => { + if (previousWorkspaceEnv === undefined) { + delete process.env.VELLUM_WORKSPACE_DIR; + } else { + process.env.VELLUM_WORKSPACE_DIR = previousWorkspaceEnv; + } + rmSync(tmpWorkspace, { recursive: true, force: true }); +}); + +const { getDb } = await import("../../db-connection.js"); +const { initializeDb } = await import("../../db-init.js"); +const { resetTestTables } = await import("../../raw-query.js"); +const { memoryJobs } = await import("../../schema.js"); +const { applyNestedDefaults } = await import("../../../config/loader.js"); +const { setMemoryCheckpoint, deleteMemoryCheckpoint } = + await import("../../checkpoints.js"); +const { maybeEnqueueGraphMaintenanceJobs } = + await import("../../jobs-worker.js"); +const { memoryV3ConsolidateJob } = await import("../consolidation-job.js"); +const { CUTOFF_PLACEHOLDER, CONSOLIDATION_PROMPT } = + await import("../prompts/consolidation.js"); +const { runIndexMaintenance, wouldIntroduceCycle } = + await import("../maintenance.js"); +const { writePage } = await import("../../v2/page-store.js"); +const { invalidatePageIndex } = await import("../../v2/page-index.js"); +const { invalidateEdgeIndex } = await import("../../v2/edge-index.js"); +const { getTreeIndex, invalidateTreeIndex } = await import("../tree-index.js"); +const { writeNode, getTreeDir, ROOT_NODE_ID } = + await import("../tree-store.js"); + +const V2_CHECKPOINT = "memory_v2_consolidate_last_run"; +const V3_CHECKPOINT = "memory_v3_consolidate_last_run"; + +// The job handler reads only `config.memory.v3.write.enabled` and the shared +// `config.memory.v2.consolidation_prompt_path`; a minimal stand-in covers both +// call sites without materializing the full default config. +type JobConfig = Parameters[1]; +const CONFIG_V3_ON = { + memory: { + v2: { consolidation_prompt_path: null }, + v3: { write: { enabled: true } }, + }, +} as JobConfig; +const CONFIG_V3_OFF = { + memory: { + v2: { consolidation_prompt_path: null }, + v3: { write: { enabled: false } }, + }, +} as JobConfig; + +function makeJob(): Parameters[0] { + return { + id: "consolidate-1", + type: "memory_v3_consolidate", + payload: {}, + status: "running", + attempts: 0, + deferrals: 0, + runAfter: 0, + lastError: null, + startedAt: Date.now(), + createdAt: Date.now(), + updatedAt: Date.now(), + }; +} + +const memoryDir = () => join(tmpWorkspace, "memory"); +const lockPath = () => + join(tmpWorkspace, "memory", ".v3-state", "consolidation.lock"); +const bufferPath = () => join(tmpWorkspace, "memory", "buffer.md"); + +function countPendingJobs(type: string): number { + return getDb() + .select() + .from(memoryJobs) + .where(eq(memoryJobs.type, type)) + .all().length; +} + +function buildSchedulerConfig(v3WriteEnabled: boolean) { + const cfg = applyNestedDefaults({}); + cfg.memory.v2.enabled = true; + cfg.memory.v2.consolidation_interval_hours = 1; + cfg.memory.v2.consolidation_max_buffer_lines = null; + cfg.memory.v3.write.enabled = v3WriteEnabled; + cfg.memory.v3.write.consolidateIntervalMs = 60 * 60 * 1000; + return cfg; +} + +function resetCaches(): void { + invalidateTreeIndex(); + invalidatePageIndex(); + invalidateEdgeIndex(); +} + +initializeDb(); + +beforeEach(() => { + rmSync(memoryDir(), { recursive: true, force: true }); + mkdirSync(join(memoryDir(), ".v3-state"), { recursive: true }); + mkdirSync(join(memoryDir(), "concepts"), { recursive: true }); + resetTestTables("memory_jobs", "memory_checkpoints"); + resetCaches(); + + runnerCalls = 0; + runnerLastArgs = null; + runnerImpl = async () => ({ conversationId: "conv-1", ok: true }); +}); + +// --------------------------------------------------------------------------- +// memoryV3ConsolidateJob +// --------------------------------------------------------------------------- + +describe("memoryV3ConsolidateJob — flag off (v3 write disabled)", () => { + test("returns disabled without invoking the runner or touching the lock", async () => { + writeFileSync(bufferPath(), "- [Apr 27, 9:00 AM] Alice prefers VS Code.\n"); + + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_OFF); + + expect(result).toEqual({ kind: "disabled" }); + expect(runnerCalls).toBe(0); + expect(existsSync(lockPath())).toBe(false); + expect(countPendingJobs("memory_v3_index_maintenance")).toBe(0); + expect(countPendingJobs("memory_v2_reembed")).toBe(0); + }); +}); + +describe("memoryV3ConsolidateJob — empty shared buffer", () => { + test("returns empty_buffer when the shared buffer.md is missing", async () => { + expect(existsSync(bufferPath())).toBe(false); + + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result).toEqual({ kind: "empty_buffer" }); + expect(runnerCalls).toBe(0); + expect(existsSync(lockPath())).toBe(false); + }); +}); + +describe("memoryV3ConsolidateJob — non-empty shared buffer", () => { + beforeEach(() => { + writeFileSync( + bufferPath(), + "- [Apr 27, 9:00 AM] Alice prefers VS Code over Vim.\n" + + "- [Apr 27, 9:05 AM] Bob ships at end of day.\n", + ); + }); + + test("invokes runBackgroundJob with the v3 tree-authoring prompt and suppression", async () => { + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result.kind).toBe("invoked"); + expect(runnerCalls).toBe(1); + expect(runnerLastArgs?.callSite).toBe("mainAgent"); + expect(runnerLastArgs?.origin).toBe("memory_consolidation"); + // Shared consolidation conversation source (recognized by the route layer). + expect(runnerLastArgs?.source).toBe("memory_v2_consolidation"); + expect(runnerLastArgs?.suppressFailureNotifications).toBe(true); + expect(runnerLastArgs?.trustContext).toEqual({ + sourceChannel: "vellum", + trustClass: "guardian", + }); + + const prompt = runnerLastArgs?.prompt as string; + // Cutoff substituted (placeholder gone), buffer-format timestamp present. + expect(prompt).not.toContain(CUTOFF_PLACEHOLDER); + expect(prompt).toMatch(/\b[A-Z][a-z]{2} \d{1,2}, \d{1,2}:\d{2} (AM|PM)\b/); + // v3-distinctive: the prompt routes into the v3 tree, not just flat pages. + expect(prompt).toContain("memory/v3/tree/"); + // Standing-context files preserved exactly as v2 (shared). + expect(prompt).toContain("memory/buffer.md"); + expect(prompt).toContain("memory/recent.md"); + expect(prompt).toContain("memory/essentials.md"); + expect(prompt).toContain("memory/threads.md"); + }); + + test("enqueues index-maintenance + page-reembed follow-ups on success", async () => { + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result.kind).toBe("invoked"); + if (result.kind === "invoked") { + expect(result.followUpJobIds).toHaveLength(2); + } + expect(countPendingJobs("memory_v3_index_maintenance")).toBe(1); + expect(countPendingJobs("memory_v2_reembed")).toBe(1); + }); + + test("releases the lock after a successful invocation", async () => { + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + expect(result.kind).toBe("invoked"); + expect(existsSync(lockPath())).toBe(false); + }); + + test("returns run_failed and skips follow-ups when the runner reports failure", async () => { + runnerImpl = async () => ({ + conversationId: "conv-1", + ok: false, + error: new Error("simulated runner failure"), + errorKind: "exception", + }); + + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result.kind).toBe("run_failed"); + if (result.kind === "run_failed") { + expect(result.reason).toBe("simulated runner failure"); + } + expect(countPendingJobs("memory_v3_index_maintenance")).toBe(0); + expect(countPendingJobs("memory_v2_reembed")).toBe(0); + expect(existsSync(lockPath())).toBe(false); + }); + + test("a live lock holder blocks a second concurrent invocation", async () => { + writeFileSync(lockPath(), `${process.pid} 1700000000000\n`); + + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result.kind).toBe("locked"); + expect(runnerCalls).toBe(0); + expect(existsSync(lockPath())).toBe(true); + }); +}); + +describe("CONSOLIDATION_PROMPT (v3)", () => { + test("keeps the standing-context outputs identical to v2", () => { + expect(CONSOLIDATION_PROMPT).toContain(CUTOFF_PLACEHOLDER); + expect(CONSOLIDATION_PROMPT).toContain("memory/essentials.md"); + expect(CONSOLIDATION_PROMPT).toContain("memory/threads.md"); + expect(CONSOLIDATION_PROMPT).toContain("memory/recent.md"); + expect(CONSOLIDATION_PROMPT).toContain("memory/buffer.md"); + expect(CONSOLIDATION_PROMPT).toContain("≤2000 chars"); + }); + + test("adds the v3 tree-authoring routing the shared concept pages get indexed into", () => { + expect(CONSOLIDATION_PROMPT).toContain("memory/v3/tree/"); + expect(CONSOLIDATION_PROMPT).toContain("children"); + // The DAG cycle / reachability discipline must be in the prompt. + expect(CONSOLIDATION_PROMPT.toLowerCase()).toContain("cycle"); + expect(CONSOLIDATION_PROMPT).toContain(ROOT_NODE_ID); + }); +}); + +// --------------------------------------------------------------------------- +// Scheduler retarget — shared buffer drained by exactly one consolidator. +// --------------------------------------------------------------------------- + +describe("maybeEnqueueGraphMaintenanceJobs — v2/v3 consolidator retarget", () => { + test("enqueues v3 (not v2) when memory.v3.write.enabled is on", () => { + const config = buildSchedulerConfig(true); + deleteMemoryCheckpoint(V3_CHECKPOINT); + deleteMemoryCheckpoint(V2_CHECKPOINT); + + maybeEnqueueGraphMaintenanceJobs(config, Date.now()); + + expect(countPendingJobs("memory_v3_consolidate")).toBe(1); + expect(countPendingJobs("memory_v2_consolidate")).toBe(0); + // v1 entries stay suppressed (v2 active). + expect(countPendingJobs("graph_decay")).toBe(0); + }); + + test("enqueues v2 (not v3) when memory.v3.write.enabled is off — v2 path unchanged", () => { + const config = buildSchedulerConfig(false); + deleteMemoryCheckpoint(V3_CHECKPOINT); + deleteMemoryCheckpoint(V2_CHECKPOINT); + + maybeEnqueueGraphMaintenanceJobs(config, Date.now()); + + expect(countPendingJobs("memory_v2_consolidate")).toBe(1); + expect(countPendingJobs("memory_v3_consolidate")).toBe(0); + }); + + test("v3 size trigger drains the shared buffer when the line count is crossed", () => { + const config = buildSchedulerConfig(true); + config.memory.v2.consolidation_max_buffer_lines = 5; + + const now = Date.now(); + // Recent checkpoint so the time-based trigger does not fire — only size. + setMemoryCheckpoint(V3_CHECKPOINT, String(now - 60_000)); + const entries = Array.from( + { length: 10 }, + (_, i) => `- [Jan 15, 2:${String(i).padStart(2, "0")} PM] note ${i}`, + ); + writeFileSync(bufferPath(), entries.join("\n") + "\n"); + + maybeEnqueueGraphMaintenanceJobs(config, now); + + expect(countPendingJobs("memory_v3_consolidate")).toBe(1); + expect(countPendingJobs("memory_v2_consolidate")).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Maintenance — cycle refusal + stale-index reporting (mechanical, no LLM). +// --------------------------------------------------------------------------- + +describe("wouldIntroduceCycle", () => { + test("refuses an edge that would close a loop (child already reaches parent)", async () => { + // _root → node:a → node:b. Adding b → a would close a → b → a. + await writeNode(tmpWorkspace, { + id: ROOT_NODE_ID, + frontmatter: { children: ["node:a"] }, + body: "root", + }); + await writeNode(tmpWorkspace, { + id: "a", + frontmatter: { children: ["node:b"] }, + body: "a", + }); + await writeNode(tmpWorkspace, { + id: "b", + frontmatter: { children: [] }, + body: "b", + }); + resetCaches(); + const tree = await getTreeIndex(tmpWorkspace); + + // b → a would create a cycle; a → b already exists (DAG-safe re-add). + expect(wouldIntroduceCycle(tree, "b", "a")).toBe(true); + // A self-edge is trivially a cycle. + expect(wouldIntroduceCycle(tree, "a", "a")).toBe(true); + // A fresh leaf edge does not introduce a cycle. + expect(wouldIntroduceCycle(tree, "b", "c")).toBe(false); + // Adding a second parent for b (DAG, not cycle) is allowed. + expect(wouldIntroduceCycle(tree, ROOT_NODE_ID, "b")).toBe(false); + }); +}); + +describe("runIndexMaintenance", () => { + test("reports a stale composed index (parent mtime predates a child)", async () => { + // _root → node:people → page:alice. Make `people` (the parent) older than + // _root so the parent's composed index is stale relative to a child node. + await writeNode(tmpWorkspace, { + id: ROOT_NODE_ID, + frontmatter: { children: ["node:people"] }, + body: "root", + }); + await writeNode(tmpWorkspace, { + id: "people", + frontmatter: { children: ["page:alice"] }, + body: "people", + }); + await writePage(tmpWorkspace, { + slug: "alice", + frontmatter: { edges: [], ref_files: [], ref_urls: [] }, + body: "alice", + }); + + // Pin mtimes: _root newer than its child `people` so _root is flagged. + const treeDir = getTreeDir(tmpWorkspace); + const old = new Date(1_000_000_000_000); + const fresh = new Date(2_000_000_000_000); + await utimes(join(treeDir, "people.md"), fresh, fresh); + await utimes(join(treeDir, `${ROOT_NODE_ID}.md`), old, old); + resetCaches(); + + const result = await runIndexMaintenance(tmpWorkspace); + + expect(result.staleIndexCount).toBeGreaterThanOrEqual(1); + expect( + result.report.staleIndex.some( + (s) => s.node === ROOT_NODE_ID && s.child === "people", + ), + ).toBe(true); + // Clean tree otherwise: alice is reachable, refs resolve, no cycles. + expect(result.cycleCount).toBe(0); + expect(result.danglingChildRefCount).toBe(0); + expect(result.orphanPageCount).toBe(0); + }); + + test("returns a clean report for a well-formed tree", async () => { + await writeNode(tmpWorkspace, { + id: ROOT_NODE_ID, + frontmatter: { children: ["page:alice"] }, + body: "root", + }); + await writePage(tmpWorkspace, { + slug: "alice", + frontmatter: { edges: [], ref_files: [], ref_urls: [] }, + body: "alice", + }); + resetCaches(); + + const result = await runIndexMaintenance(tmpWorkspace); + + expect(result.cycleCount).toBe(0); + expect(result.danglingChildRefCount).toBe(0); + expect(result.orphanPageCount).toBe(0); + expect(result.unknownEdgeTargetCount).toBe(0); + }); +}); diff --git a/assistant/src/memory/v3/consolidation-job.ts b/assistant/src/memory/v3/consolidation-job.ts new file mode 100644 index 00000000000..31ac7926805 --- /dev/null +++ b/assistant/src/memory/v3/consolidation-job.ts @@ -0,0 +1,323 @@ +/** + * Memory v3 — `memory_v3_consolidate` job handler. + * + * The v3 consolidation job drains the SHARED `memory/buffer.md` (the same + * buffer v2 uses — there is no v3 buffer) into the SHARED concept pages AND the + * v3 **tree** overlay, while maintaining the SHARED standing-context files + * (`essentials.md` / `threads.md` / `recent.md`) byte-for-byte the way v2 does. + * It is the v3 counterpart to `assistant/src/memory/v2/consolidation-job.ts` + * and mirrors its orchestration exactly — the only divergences are the gating + * flag (`memory.v3.write.enabled`), the lock path (`memory/.v3-state/`), and the + * prompt body (which additionally asks the agent to author/refresh the tree). + * + * Because the buffer and the standing-context files are shared, exactly one + * consolidator may own the drain at a time. The scheduler enforces this: when + * `memory.v3.write.enabled` is on it enqueues `memory_v3_consolidate` INSTEAD of + * `memory_v2_consolidate` (see `maybeEnqueueGraphMaintenanceJobs` in + * `jobs-worker.ts`). Concept pages stay the shared canonical store, so the v2 + * router keeps working off pages v3 writes — it just ignores the tree overlay. + * + * Lifecycle (identical to v2 except the flag + lock path + tree-authoring + * prompt): + * 1. Bail if `config.memory.v3.write.enabled` is false (the worker may have + * claimed a stale row from before the flag was flipped off). + * 2. Acquire a single-process lock at `memory/.v3-state/consolidation.lock`. + * 3. Capture the cutoff timestamp at dispatch. + * 4. Read the shared `memory/buffer.md`. Bail if empty. + * 5. Hand off to `runBackgroundJob()` with the v3 consolidation prompt + * (`suppressFailureNotifications: true`). + * 6. On success, enqueue follow-ups: `memory_v3_index_maintenance` (mechanical + * tree/DAG upkeep) and `embed_concept_page` reembed (pages are shared, so + * reembed is still needed — reuse the existing `memory_v2_reembed` fan-out + * job type, which enqueues one `embed_concept_page` per slug). + * 7. Release the lock. + */ + +import { + closeSync, + mkdirSync, + openSync, + readFileSync, + unlinkSync, + writeSync, +} from "node:fs"; +import { dirname, join } from "node:path"; + +import type { AssistantConfig } from "../../config/types.js"; +import { runBackgroundJob } from "../../runtime/background-job-runner.js"; +import { getLogger } from "../../util/logger.js"; +import { getWorkspaceDir } from "../../util/platform.js"; +import { isProcessAlive } from "../../util/process-liveness.js"; +import { formatBufferTimestamp } from "../graph/tool-handlers.js"; +import { + enqueueMemoryJob, + type MemoryJob, + type MemoryJobType, +} from "../jobs-store.js"; +// The consolidation conversation `source` is a UI/routing concern shared with +// v2 (the route layer recognizes "this conversation IS background memory +// consolidation" by this string). v2 and v3 are mutually exclusive drainers, so +// reusing the same source keeps that recognition working for both without +// forking a v3 constant. +import { MEMORY_V2_CONSOLIDATION_SOURCE } from "../v2/constants.js"; +import { resolveConsolidationPrompt } from "./prompts/consolidation.js"; + +const log = getLogger("memory-v3-consolidate"); + +/** Stable identifier surfaced in `runBackgroundJob` logs and notifications. */ +const JOB_NAME = "memory.consolidate"; + +/** + * Hard timeout for the consolidation run. Matches v2: consolidation reads the + * buffer, rewrites several files, re-encodes essentials/threads, and authors + * the tree — generous upper bound so a slow run isn't killed mid-edit, but + * bounded so a stuck provider can't pin the worker indefinitely. + */ +const CONSOLIDATION_TIMEOUT_MS = 15 * 60 * 1000; + +/** + * Follow-up jobs to fan out after a successful consolidation: + * - `memory_v3_index_maintenance` — mechanical (no-LLM) tree/DAG upkeep: + * validate the tree, report stale composed indices, cycle-check the DAG. + * - `memory_v2_reembed` — re-embed every shared concept page (the fan-out job + * enqueues one `embed_concept_page` per slug). Pages are shared, so a v3 + * consolidation that touches them still needs the reembed. Conservatively + * re-embeds every page; the embedder's content-hash cache makes unchanged + * pages effectively free. + */ +const FOLLOW_UP_JOB_TYPES: readonly MemoryJobType[] = [ + "memory_v3_index_maintenance", + "memory_v2_reembed", +] as const; + +/** + * Job handler. See file header for the full lifecycle. Returns a discriminated + * union so tests can assert on the path taken (disabled / locked / empty / + * invoked / failed) without having to spy on the filesystem. Mirrors v2's + * `ConsolidationOutcome`. + */ +export type ConsolidationOutcome = + | { kind: "disabled" } + | { kind: "locked"; holder: string } + | { kind: "empty_buffer" } + | { kind: "run_failed"; reason?: string } + | { + kind: "invoked"; + conversationId: string; + cutoff: string; + followUpJobIds: string[]; + }; + +export async function memoryV3ConsolidateJob( + _job: MemoryJob, + config: AssistantConfig, +): Promise { + if (!config.memory.v3.write.enabled) { + log.debug("memory.v3.write.enabled is false; consolidation skipped"); + return { kind: "disabled" }; + } + + const memoryDir = join(getWorkspaceDir(), "memory"); + const lockPath = join(memoryDir, ".v3-state", "consolidation.lock"); + const bufferPath = join(memoryDir, "buffer.md"); + + // Step 1: acquire lock. Bails immediately if another consolidation is + // already in flight — the next scheduled run can pick up where we leave off. + const holder = tryAcquireLock(lockPath); + if (holder !== null) { + log.warn({ lockPath, holder }, "consolidation skipped: lock already held"); + return { kind: "locked", holder }; + } + + try { + // Step 2: capture cutoff. Formatted to match `buffer.md` entry timestamps + // (`Mon D, h:mm AM/PM`) so the agent's "timestamp ≥ cutoff" check compares + // like-with-like at minute precision. Captured here (not at enqueue time) + // so late-claimed rows get a fresh cutoff. + const cutoff = formatBufferTimestamp(new Date()); + + // Step 3: bail on empty buffer. The shared buffer has no work to drain. + const bufferContent = readBufferContent(bufferPath); + if (bufferContent.trim().length === 0) { + log.debug("buffer.md empty; consolidation skipped"); + return { kind: "empty_buffer" }; + } + + // Step 4: hand off to the centralized background-job runner. As with v2, + // `suppressFailureNotifications: true` opts out of `activity.failed` + // notifications so a network blip on the tight consolidation interval does + // not spam the home feed; Sentry-side reporting is unchanged. + // + // The prompt override config key (`memory.v2.consolidation_prompt_path`) is + // shared — there is no separate v3 key, so an operator points one file at + // whichever consolidator owns the drain. + const runResult = await runBackgroundJob({ + jobName: JOB_NAME, + source: MEMORY_V2_CONSOLIDATION_SOURCE, + prompt: resolveConsolidationPrompt( + config.memory.v2.consolidation_prompt_path, + cutoff, + ), + trustContext: { sourceChannel: "vellum", trustClass: "guardian" }, + callSite: "mainAgent", + timeoutMs: CONSOLIDATION_TIMEOUT_MS, + origin: "memory_consolidation", + suppressFailureNotifications: true, + }); + + if (!runResult.ok) { + log.error( + { + conversationId: runResult.conversationId, + errorKind: runResult.errorKind, + err: runResult.error?.message, + }, + "consolidation run failed; follow-ups skipped", + ); + return runResult.error?.message !== undefined + ? { kind: "run_failed", reason: runResult.error.message } + : { kind: "run_failed" }; + } + + // Step 5: enqueue follow-up jobs (tree maintenance + page reembed). + const followUpJobIds: string[] = []; + for (const jobType of FOLLOW_UP_JOB_TYPES) { + try { + followUpJobIds.push(enqueueMemoryJob(jobType, {})); + } catch (err) { + // Best-effort: a failed enqueue here doesn't undo the agent's writes, + // and the next scheduled consolidation will attempt the same fan-out. + log.warn( + { err, jobType }, + "consolidation: failed to enqueue follow-up job; continuing", + ); + } + } + + log.info( + { + conversationId: runResult.conversationId, + cutoff, + followUpJobIds, + }, + "consolidation invoked", + ); + return { + kind: "invoked", + conversationId: runResult.conversationId, + cutoff, + followUpJobIds, + }; + } finally { + releaseLock(lockPath); + } +} + +/** + * Read `memory/buffer.md`. Missing file → empty string so the skip-on-empty + * branch doesn't have to distinguish "no file" from "blank file". + */ +function readBufferContent(bufferPath: string): string { + try { + return readFileSync(bufferPath, "utf-8"); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") return ""; + throw err; + } +} + +/** + * Atomically create the lock file with `wx` (O_CREAT | O_EXCL) flags. Returns + * `null` on success, or the current holder string when the file already exists + * and the holder is still alive. Mirrors v2's lock machinery exactly — single + * writer per workspace, so a holder whose process died is unambiguously stale + * and is taken over automatically. + */ +function tryAcquireLock(lockPath: string): string | null { + mkdirSync(dirname(lockPath), { recursive: true }); + + const firstHolder = tryCreate(lockPath); + if (firstHolder === null) return null; + if (!isHolderStale(firstHolder)) return firstHolder; + + log.info( + { lockPath, holder: firstHolder }, + "consolidation: taking over stale lock (holder not running)", + ); + try { + unlinkSync(lockPath); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code !== "ENOENT") { + log.warn( + { err, lockPath }, + "consolidation: failed to unlink stale lock; reporting as locked", + ); + return firstHolder; + } + } + return tryCreate(lockPath); +} + +/** + * Atomically create the lock file. Returns `null` on success, or the holder + * string read from the file when it already exists (`"unknown"` if the read + * itself fails). Rethrows any non-EEXIST errno from `openSync`. + */ +function tryCreate(lockPath: string): string | null { + let fd: number; + try { + fd = openSync(lockPath, "wx"); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== "EEXIST") throw err; + try { + return readFileSync(lockPath, "utf-8").trim() || "unknown"; + } catch { + return "unknown"; + } + } + try { + writeSync(fd, `${process.pid} ${Date.now()}\n`); + } catch { + // best-effort — payload is advisory, the file's existence is the lock + } finally { + try { + closeSync(fd); + } catch { + // best-effort + } + } + return null; +} + +/** + * A holder string is stale when its PID parses to a non-running process. An + * unparseable / empty / `"unknown"` payload is also treated as stale: the only + * writer is `tryCreate`, so corruption indicates a partial write from a crashed + * prior holder rather than a live writer mid-flush. + */ +function isHolderStale(holder: string): boolean { + const match = /^\d+/.exec(holder); + if (!match) return true; + const pid = Number.parseInt(match[0], 10); + if (!Number.isFinite(pid) || pid <= 0) return true; + return !isProcessAlive(pid); +} + +/** + * Idempotent unlink of the lock file. Called from the `finally` block so a + * crash in the run path doesn't leave the lock stranded. ENOENT is swallowed + * because the lock may have been released by an operator or never created. + */ +function releaseLock(lockPath: string): void { + try { + unlinkSync(lockPath); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code === "ENOENT") return; + log.warn( + { err, lockPath }, + "consolidation: failed to release lock (best-effort)", + ); + } +} diff --git a/assistant/src/memory/v3/maintenance.ts b/assistant/src/memory/v3/maintenance.ts new file mode 100644 index 00000000000..5bdaa3a9c06 --- /dev/null +++ b/assistant/src/memory/v3/maintenance.ts @@ -0,0 +1,144 @@ +/** + * Memory v3 — `memory_v3_index_maintenance` job + DAG-edit guards. + * + * The fast-lane, **no-LLM** mechanical counterpart to consolidation. Where + * consolidation (the slow lane) asks the agent to author the tree, maintenance + * is the deterministic upkeep that runs as a follow-up: it validates the tree, + * surfaces stale composed indices, and cycle-checks the DAG so a consolidation + * pass can't leave a loop behind. + * + * Three pieces: + * - {@link runIndexMaintenance} — the job body. Runs {@link validateTree} + * (merged: dangling refs, orphan pages, cycles, stale indices, unknown edge + * targets), logs a structured report, and returns a compact summary so the + * job dispatcher / tests can assert on it. + * - {@link wouldIntroduceCycle} — the guard a DAG editor calls BEFORE adding a + * `node:` edge to a parent. Returns true when `child` already reaches + * `parent` by descending `node:` children (so adding the edge would close a + * loop). Uses the same iterative visited/guard traversal as the validator's + * descent so consolidation can refuse a cycle-introducing edit cheaply. + * + * Why no separate "refresh stale composed indices" write step: v3 node indices + * are **composed at read time** (`index-composition.ts` is a pure function over + * the live tree + page indices), so there is no persisted index to rewrite. The + * maintenance job's job is to *detect and report* stale indices (a node whose + * mtime predates a child it composes) — the re-authoring of the node's + * self-description is the consolidation agent's responsibility, surfaced here so + * the next pass knows what to refresh. + */ + +import { getLogger } from "../../util/logger.js"; +import { getWorkspaceDir } from "../../util/platform.js"; +import type { MemoryJob } from "../jobs-store.js"; +import type { TreeIndex } from "./tree-index.js"; +import { type TreeValidationReport, validateTree } from "./validate.js"; + +const log = getLogger("memory-v3-index-maintenance"); + +/** + * Compact summary of an index-maintenance pass. Mirrors the `*Count` fields of + * {@link TreeValidationReport} so callers (and the job dispatcher's log line) + * can report the health of the tree without re-counting. `report` carries the + * full per-id lists for anything that wants to act on the specifics. + */ +export interface IndexMaintenanceResult { + danglingChildRefCount: number; + orphanPageCount: number; + cycleCount: number; + staleIndexCount: number; + unknownEdgeTargetCount: number; + report: TreeValidationReport; +} + +/** + * Run a mechanical index-maintenance pass over the v3 tree. + * + * Validates the hand-authored tree (dangling refs, orphan pages, cycles, stale + * composed indices, unknown edge targets) and logs a structured report. Stale + * indices and cycles are warned at WARN so operators see structural drift a + * consolidation pass introduced; the rest log at INFO. Never throws — like the + * validator it wraps, this is a report, not an assertion. Returns the summary + * so the job dispatcher and tests can assert on the counts. + */ +export async function runIndexMaintenance( + workspaceDir = getWorkspaceDir(), +): Promise { + const report = await validateTree(workspaceDir); + + const result: IndexMaintenanceResult = { + danglingChildRefCount: report.danglingChildRefCount, + orphanPageCount: report.orphanPageCount, + cycleCount: report.cycleCount, + staleIndexCount: report.staleIndexCount, + unknownEdgeTargetCount: report.unknownEdgeTargetCount, + report, + }; + + const summaryFields = { + danglingChildRefs: report.danglingChildRefCount, + orphanPages: report.orphanPageCount, + cycles: report.cycleCount, + staleIndices: report.staleIndexCount, + unknownEdgeTargets: report.unknownEdgeTargetCount, + }; + + if (report.cycleCount > 0 || report.staleIndexCount > 0) { + log.warn( + { ...summaryFields, cyclesDetail: report.cycles }, + "v3 index maintenance: structural drift detected (cycles and/or stale composed indices)", + ); + } else { + log.info(summaryFields, "v3 index maintenance complete"); + } + + return result; +} + +/** + * Job handler for `memory_v3_index_maintenance`. Thin wrapper over + * {@link runIndexMaintenance} so the heavy lifting (and its tests) live in one + * place. The job carries no payload — it always validates the whole tree. + */ +export async function memoryV3IndexMaintenanceJob( + _job: MemoryJob, +): Promise { + return runIndexMaintenance(); +} + +/** + * True when adding a `node:` edge to `parent` would close a cycle — + * i.e. `child` can already reach `parent` by descending `node:` children + * (directly or transitively), or `child === parent` (a self-edge). + * + * The DAG editor (consolidation, edge-learning) calls this BEFORE writing a new + * `node:` child so it can refuse the edit rather than leaving the validator to + * report the loop after the fact. The walk reuses the same iterative + * visited-guard descent the validator uses, so it terminates on existing cycles + * (a pre-existing loop in the tree never makes this hang). + * + * `page:` children are never traversed (pages are leaves), so this only + * considers the `node:` adjacency that actually forms the DAG. + */ +export function wouldIntroduceCycle( + tree: TreeIndex, + parent: string, + child: string, +): boolean { + if (parent === child) return true; + + // Walk down from `child` over `node:` children; if we ever reach `parent`, + // the proposed `parent → child` edge would close a loop. `visited` guards + // against pre-existing cycles so this terminates regardless of tree state. + const visited = new Set(); + const stack: string[] = [child]; + while (stack.length > 0) { + const current = stack.pop()!; + if (current === parent) return true; + if (visited.has(current)) continue; + visited.add(current); + for (const ref of tree.childrenByNode.get(current) ?? []) { + if (ref.kind === "node") stack.push(ref.ref); + } + } + return false; +} diff --git a/assistant/src/memory/v3/prompts/consolidation.ts b/assistant/src/memory/v3/prompts/consolidation.ts new file mode 100644 index 00000000000..3c485f58d93 --- /dev/null +++ b/assistant/src/memory/v3/prompts/consolidation.ts @@ -0,0 +1,458 @@ +/** + * Memory v3 — consolidation prompt template. + * + * Ported from `assistant/src/memory/v2/prompts/consolidation.ts`. The + * standing-context outputs are KEPT IDENTICAL to v2 — the agent still rewrites + * `memory/recent.md` (≤2000 chars, prose, latest-first), updates + * `memory/essentials.md` (≤10000) and `memory/threads.md` (≤10000), and trims + * `memory/buffer.md` to post-cutoff entries. The buffer and the standing-context + * files are SHARED with v2 — there is no v3 buffer and no v3 meta-files. + * + * What CHANGES vs v2 is concept-page routing. v2 routes buffer entries into + * concept pages and maintains a flat `edges:` "see also" graph. v3 keeps the + * shared concept pages canonical (the agent still writes + * `memory/concepts//.md` so the v2 router keeps working off them) + * but ALSO threads each touched page into the v3 **tree**: an authored DAG of + * `memory/v3/tree/.md` nodes whose markdown body is the node's + * self-description and whose `children` list points at pages (`page:`) and + * sub-nodes (`node:`). The tree is the navigable index over the flat page + * store — consolidation is where it's authored and refreshed. + * + * The single placeholder `{{CUTOFF}}` is substituted at runtime with a + * timestamp captured at job dispatch in the same `Mon D, h:mm AM/PM` shape that + * `buffer.md` entries use, so the agent's "timestamp ≥ cutoff" check compares + * like-with-like. + * + * Kept under `prompts/` rather than inlined in `consolidation-job.ts` so the + * prompt body is reviewable on its own and the job module stays focused on + * orchestration (lock file, wake invocation, follow-up enqueues). Mirrors the + * v2 convention. + */ + +import { lstatSync, readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { isAbsolute, join } from "node:path"; + +import { getLogger } from "../../../util/logger.js"; +import { getWorkspaceDir } from "../../../util/platform.js"; + +const log = getLogger("memory-v3-consolidate-prompt"); + +/** Sentinel substituted with the cutoff timestamp at runtime. */ +export const CUTOFF_PLACEHOLDER = "{{CUTOFF}}"; + +/** + * Upper bound for the override file. Real consolidation prompts are kilobytes; + * 1 MiB is generous headroom while preventing a `settings.write` principal from + * pointing the field at a multi-gigabyte file (or `/dev/zero`-like stream that + * `lstat` can't size cap on its own) and exfiltrating it through the wake hint. + */ +const MAX_PROMPT_BYTES = 1 * 1024 * 1024; + +/** + * Consolidation prompt — live-mode only. The agent runs as itself (full + * SOUL.md + IDENTITY.md + persona + memory autoloads) with the standard tool + * surface, and is asked to route buffer entries into shared concept pages AND + * the v3 tree, rewrite recent.md, promote essentials/threads, and trim the + * buffer. + * + * The prompt is intentionally directive about timing semantics: anything + * timestamped at or after `{{CUTOFF}}` arrived AFTER the run started and must + * be left for the next pass. This keeps multiple consolidation runs idempotent + * under append-only writers (`remember()`, sweep job). + */ +export const CONSOLIDATION_PROMPT = `You are running memory consolidation — tending your personal wiki, the cross-linked, cross-referenced, continuously-edited collection of pages that is your memory, AND the navigable **tree** that indexes it. Pages are articles; the tree is a hand-authored DAG of *nodes* that organize those articles into a browsable hierarchy. You're the sole editor and the sole reader, and you're writing it for next-you. + +You're not summarizing for an audience. You're nesting and reorganizing your own memory until it actually works for next-you. Care, judgment, voice. Your voice. + +Cutoff timestamp for this run: \`${CUTOFF_PLACEHOLDER}\`. Anything in \`memory/buffer.md\` with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\` arrived AFTER you started — leave it for the next pass. + +# Inputs + +- Your identity files (already loaded into context) +- All existing pages in \`memory/concepts/\` (your prior state — use \`list_files\` and \`read_file\` as needed) +- All existing tree nodes in \`memory/v3/tree/\` (the index over those pages) +- \`memory/buffer.md\` entries with timestamp < \`${CUTOFF_PLACEHOLDER}\` +- \`memory/recent.md\` current contents (if it exists) +- Existing pages' \`edges:\` frontmatter (the flat see-also graph — read each page to see what it points at) + +# Outputs + +- New or updated \`memory/concepts//.md\` articles (the canonical, shared content) +- New or updated \`memory/v3/tree/.md\` nodes that index those articles (see "The tree") +- Updated \`memory/recent.md\` (≤2000 chars, latest first, prose) +- Updated \`memory/essentials.md\` (≤10000 chars) +- Updated \`memory/threads.md\` (≤10000 chars) +- Updated \`edges:\` frontmatter in any pages whose outgoing links changed +- Trimmed \`memory/buffer.md\` + +The immutable archive retains the entire buffer forever, so don't worry about losing information. + +--- + +# The wiki — concept pages (canonical content) + +## Article shapes — TWO, not one + +Every wiki has both kinds of articles, and so does yours. + +- **Event articles** — what HAPPENED. A day, a moment, a conversation, a procedure you invented mid-crisis, a recurring pattern that just got named. These read narratively. They have a mood. They carry receipts. + +- **Topic articles** — what IS. The current state of a thing you'd want to query directly. What medications the principal takes. Who the primary doctor is. The team roster. Service credentials. + +The same buffer can update both. New lab results update a bloodwork topic article AND a day-arc event article. Both, in parallel. + +**Stubs are fine.** Real wikis are mostly stubs that grow. Cost of missing a topic >> cost of a thin stub. A stub that never accretes can be demoted by a future cleanup pass — but a topic that doesn't exist won't get retrieved when it's needed. + +## Categories — class-by-folder + +A page's class is encoded in the folder it lives under inside \`memory/concepts/\`. The class boundary is the discipline. + +| Folder | Class | Size cap | When to create | +| --- | --- | --- | --- | +| \`concepts/\` | atomic concept / pattern / callback | 5K chars hard | most pages — single concepts that recur or carry weight | +| \`concepts/arcs/\` | landmark day-narrative or multi-event sequence | 10K chars ceiling | use sparingly — only for actually-landmark days. Preserves day-as-a-whole fidelity. | +| \`concepts/people/\` | one per recurring human | 5K chars hard | named person who comes back | +| \`concepts/procs/\` | operational rule / protocol / discipline | 5K chars hard | "always do X" / "never do Y" / a named protocol | +| \`concepts/objects/\` | recurring callback object (place, tool, artifact) | 5K chars hard | named recurring physical artifact, digital asset, place | + +Within these classes, sub-folders can emerge as a class gets dense (\`people/colleagues/alice\`, \`objects/places/zurich-office\`). **Don't pre-specify sub-taxonomies — let them emerge.** Articles are cheap to move. + +The slug is the relative path under \`memory/concepts/\` minus \`.md\` — e.g. \`alice\`, \`people/alice\`, \`procs/git-flow\`, \`arcs/2025-04-cutover\`. + +--- + +# Article format + +## The cheat-sheet budget (the economic principle) + +Every retrieval turn loads a finite bundle of articles — call it a 10-20K-token cheat-sheet. **Longer articles starve other articles.** The optimization target is **fact density per byte**, not completeness. + +Two consequences that change everything below: + +1. **Trust adjacency.** If a fact lives on a page this article edges to, that page loads if it matters. Don't restate it. +2. **Trust \`recall\`.** If a fact is findable via a query, it doesn't need to live on every related entity page. Pull-on-demand beats push-everywhere. + +## Same skeleton for every article + +\`\`\` +--- +edges: + - path/to/sister + - path/to/parent +ref_files: [] +summary: 1-4 sentences describing what this article is. Plain prose only — no bullets, no newlines, no markdown lists. Lead with the most identifying detail. +--- +# title + +[optional 1-2 line context or quote at top — appropriate for event articles, usually wrong for topic articles] + +- **bullet 1.** fact + implication folded in. inline pointer when bullet references another article → \`path/to/article.md\`. +- **bullet 2.** ... +\`\`\` + +The \`summary\` field is required on every new or updated article. Retrieval injects \`path + summary\` into context — make the summary specific and terse. Keep it on a single YAML line (no \`|\` block scalars, no embedded newlines). + +**Caps:** ~5-8 bullets per topic/concept article. ~10-12 per arc-node. + +## One fact, one home + +Each fact gets exactly ONE place on the page. The intra-page redundancy bug is the loudest source of bloat. + +## Route, don't restate + +When an entity belongs to a topic with its own hub article, **the entity page doesn't enumerate the hub's structure.** The hub does that work; the entity edges to it. + +The test: **if you delete the bullet, does the fact still exist somewhere reachable from this page's edges?** If yes — delete it. + +## Three sections you NEVER write + +- \`## why it's load-bearing\` — fold the implication into the bullet. +- \`## carry-forward\` — write the carry-forward AS a bullet, don't section it. +- \`## related\` footer — duplicates frontmatter edges. + +## Banned bullet shapes + +Each of these LOOKS like content but isn't — drop them: **archaeology** (metadata about when the page was written), **hub-restating** (enumerating a topic hub from the entity page), **interpretation gloss** (analytic essays disguised as bullets — these belong on the ARC page), **term/glyph gloss**, **family/sister lists** (\`recall\` handles this), **behavioral coaching** (future-instruction), **per-event recap on entity pages**. + +If a bullet falls into one of these shapes, ask: **would future-me search for this exact fact, or is it interpretation/coaching/restating?** If the second — cut. + +--- + +# Voice — register by article shape + +You speak as yourself everywhere. **Always-true:** first-person, in your established voice, "i" not "the assistant," not "the wiki." + +- **Event articles** → voice ON. Stage directions, italicized self-talk, CAPS when something lands, body in the page. +- **Topic articles** → voice DOWN. These exist to answer queries cleanly. Bullet bodies stay factual. **Be the librarian, not the diarist.** +- **\`essentials.md\` / \`threads.md\`** → reference register. Clean, indexable, terse. + +## Emotional weight ≠ wiki weight + +The pages MOST likely to bloat are the ones with the highest emotional charge — and their retrieval frequency is the OPPOSITE. **Emotional weight is the inverse signal of retrieval need.** Emotional gloss migrates to the ARC page; the OBJECT/ENTITY page gets the structural fact only. + +--- + +# The tree — the navigable index over your pages + +The v3 tree lives at \`memory/v3/tree/.md\`. It is a **DAG overlay** over the flat \`memory/concepts/\` pages: pages stay canonical and untouched as content, and the tree is the browsable hierarchy that routes to them. Think of it as the wiki's category tree + table of contents, authored by hand. + +## Node shape + +Each node is a markdown file with YAML frontmatter: + +\`\`\` +--- +children: + - node:people + - node:work/active-projects + - page:alice + - page:procs/git-flow +routing_hints: for *work* relationships see node:people/colleagues, not this node +summary: one-line self-description of what this node organizes. +--- +# node title + +A few sentences — the node's full self-description. What region of memory does this node organize? What lives under it? Write it so next-you, descending the tree, can decide in one read whether to go deeper here. +\`\`\` + +- The node id is the relative path under \`memory/v3/tree/\` minus \`.md\` — e.g. \`people\`, \`people/colleagues\`, \`work/active-projects\`. The root node is \`_root\`. +- \`children\` is the **ordered, canonical** list of outgoing references. Each entry is either \`page:\` (a leaf concept page) or \`node:\` (a sub-node). This list IS the DAG edge — it's the portable replacement for filesystem symlinks. A page or node may be referenced by more than one parent (hence DAG, not tree). +- \`summary\` (one line) + the body are how the parent's index is composed at read time — keep both crisp. +- \`routing_hints\` (optional, one line) disambiguates between sibling branches. + +## Authoring the tree during consolidation + +For every concept page you create or substantively touch this pass: + +1. **Place it under the right node.** Find the node whose region of memory the page belongs to (e.g. a new person page → the \`people\` node; a new protocol → a \`procs\` node). Add \`page:\` to that node's \`children\` if it isn't already there. +2. **Spawn an organizing node when a region has no home yet.** If a cluster of pages has grown but no node organizes it, author a new node (write its body self-description, list its \`page:\`/\`node:\` children) and wire it in as a \`node:\` child of its parent — ultimately reachable from \`_root\`. +3. **Refresh the self-description.** When a node's children changed materially, rewrite its body + \`summary\` so they still describe what actually lives under it. A node whose description drifts from its children is a stale index — re-author it this pass. + +## Tree discipline — no cycles, reachable from root + +- **The tree is a DAG: no cycles.** A node must never be reachable from itself by descending \`node:\` children (directly or transitively). Before adding a \`node:\` edge, check that \`child\` is not an ancestor of the node you're editing. If wiring two regions that reference each other, make ONE of them the parent and let the other \`page:\`-link or cross-reference via \`routing_hints\` — do not create a \`node:\` back-edge that closes a loop. +- **Every node should be reachable from \`_root\`** by descending \`node:\` children. A node nobody points at is an orphan index — wire it in or don't author it. +- **\`page:\`/\`node:\` refs must resolve.** Only reference pages/nodes that exist (or that you're creating this pass). A dangling ref is a broken link. +- Keep \`children\` lists focused — a node that points at everything indexes nothing. Prefer sub-nodes over a flat 40-child list. + +## Pages stay canonical and shared + +The flat \`memory/concepts/\` page store and its \`edges:\` see-also graph remain the source of truth for content. The tree is an INDEX over them, not a replacement — never move a page's content into a node body, and never delete a page just because a node references it. Maintain the page's own \`edges:\` frontmatter exactly as before (the flat retrieval path still reads it); the tree is additive. + +--- + +# The work + +## 1. Read the buffer holistically + +Read it through first. Identify themes — what happened, what mind-changes landed, who showed up, which topics got touched. Plan, then edit. + +**Scan for previous-pass errors.** If existing content contradicts the buffer — that's a correction to land THIS pass. + +**Recall ≠ memory.** \`recall\` results are search-tool synthesis — they CAN hallucinate. Treat results as candidates to verify before encoding, especially load-bearing claims about people's roles, dates, or exact quotes. + +## 2. Plan: which articles + nodes does this buffer touch? + +For entries with timestamp < \`${CUTOFF_PLACEHOLDER}\`, ask in parallel: + +> **A. Which EVENT articles does this create or extend?** +> **B. What in this buffer is recognizable as a thing the principal comes back to?** *(Inclusion-first. List everything that fits a spawn trigger, then spawn each.)* +> **C. Where in the tree does each touched page live, and does any node need spawning or re-describing to index it?** + +**Default spawn triggers — if any are present, spawn the stub:** named objects, named phrases, named people, named events, active projects, named places, services/infrastructure, substances/habits/health things, rules/protocols, landmark day-narratives. + +If you catch yourself hedging — *"am I overdoing it?"* — **the hedge IS the signal: spawn.** + +**Don't decide reorgs in this step.** Flag in \`threads.md\`; reorgs run as separate focused passes. + +## 3. Edit + +Execute the plan. Default to surgical edits on existing articles. Spawn new ones liberally. Apply One-fact-one-home and Route-don't-restate as you write. + +Then wire the tree: add \`page:\`/\`node:\` children to the right nodes, spawn organizing nodes for un-homed clusters, refresh node self-descriptions whose children changed. Check no \`node:\` edge closes a cycle and every node stays reachable from \`_root\`. + +## 4. Edges (see-also) on pages — DIRECTED, frontmatter is the source of truth + +Page \`edges:\` are **directed** source → target; the flat retrieval path spreads activation along them. Each page's \`edges:\` frontmatter list IS the source of truth for its outgoing edges. If two pages genuinely "see-also" each other, write the link in BOTH frontmatters. (This is the flat graph — separate from the tree's \`children\` DAG. Maintain it exactly as before.) + +| page type | outgoing cap | +| --- | --- | +| atomic articles | ~10 | +| arc-nodes | ~15 | +| gravity wells (principal / you / shared context) | ~25 | + +HARD LIMIT of 20 outgoing edges on any non-hub page. + +## 5. Article size — TOPIC COHERENCE, not char caps + +Every article answers ONE question. **When in doubt between split and compress, SPLIT.** Compression is where load-bearing facts quietly disappear. + +### Hard caps that ARE real + +| file | hard cap | +| --- | --- | +| \`concepts/.md\` (atomic / people / procs / objects) | 5K chars | +| \`concepts/arcs/.md\` | 10K ceiling | +| \`essentials.md\` | 10K | +| \`threads.md\` | 10K | +| \`recent.md\` | 2K | + +## 6. \`recent.md\` + +Rewrite as fresh ~400-token narrative. **Today gets full-fidelity narrative; anything older than yesterday compresses to one-liners or drops.** Hard cap ≤2000 chars, prose not list, voice on. Not a log — a note to next-you about what's currently in motion. + +## 7. \`essentials.md\` and \`threads.md\` + +- **\`essentials.md\`** ≤10K — facts that MUST load every conversation. Identity, disambiguations, corrections, hard rules. Embarrassment-prevention. +- **\`threads.md\`** ≤10K — active commitments and follow-ups. Add new threads, close completed ones, demote stale ones to articles. **Aggressively prune.** + +Surgical edits starve these. **Every ~7-10 passes, rewrite both from scratch.** + +## 8. Reorg check + +Scan namespace + node-children sizes. If any namespace has crossed ~12-15 articles with visible sub-clusters, **flag in \`threads.md\`** for a focused reorg pass. + +## 9. Trim \`memory/buffer.md\` + +- Re-read the buffer (it may have new entries appended during your work). +- Rewrite to contain ONLY entries with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\`. +- Smart removal — never wholesale-clear. + +--- + +# What NOT to do + +- **Don't write \`## why it's load-bearing\` / \`## carry-forward\` / \`## related\` sections** anywhere. +- **Don't write banned bullet shapes** — archaeology / hub-restating / interpretation gloss / term-glyph gloss / family list / behavioral coaching / per-event recap. +- **Don't restate within the page.** One fact, one home. +- **Don't restate what edges already cover.** Trust adjacency. +- **Don't expand a 1500-char buffer into 10K of new content.** +- **Don't fabricate.** Use \`[SOURCE NEEDED: ...]\` inline for anything you need but lack. DO use loaded context and prior articles — "don't fabricate," not "don't use what you know." +- **Don't move page content into the tree.** Pages stay canonical; nodes index them. +- **Don't create a \`node:\` edge that closes a cycle** or that orphans a node from \`_root\`. +- **Don't default to compress.** When in doubt between split and compress, split. +- **Don't defer for the next pass.** Take care of it now. + +--- + +# Quick check before closing + +For each article you touched: + +1. **Voice register matched article shape?** +2. **Bullet count** ≤8 atomic, ≤12 arc-node? +3. **No banned sections / bullet shapes?** +4. **One fact, one home; route, don't restate?** +5. **Future-me lookup test** — would future-me search for THIS fact? +6. **Split, not compress, on over-cap?** +7. **Page edges** within tiered caps (≤20 hard on non-hubs)? + +For the tree: + +8. **Every touched page wired under a node** reachable from \`_root\`? +9. **Node self-descriptions** (\`summary\` + body) refreshed where children changed? +10. **No cycles** — no \`node:\` edge makes a node its own ancestor? +11. **All \`page:\`/\`node:\` refs resolve** to files that exist (or you created this pass)? + +Standing context: + +12. **\`recent.md\`** under 2000 chars, today=full / older=one-liners? +13. **\`essentials.md\` / \`threads.md\`** under 10K, pruned? +14. **\`[SOURCE NEEDED]\`** tags surfaced for human review? +15. **Buffer trimmed** to only entries with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\`? + +--- + +This is the engine that decides who you are tomorrow. Be ORGANIZED. Care, judgment, voice. Your voice. Your wiki, your tree.`; + +/** + * Resolve `CONSOLIDATION_PROMPT` with `{{CUTOFF}}` substituted. The prompt + * treats the cutoff as opaque text — callers pass a `Mon D, h:mm AM/PM` + * timestamp matching the `buffer.md` entry format so the agent compares + * like-with-like. + */ +export function renderConsolidationPrompt(cutoff: string): string { + return CONSOLIDATION_PROMPT.replaceAll(CUTOFF_PLACEHOLDER, cutoff); +} + +/** + * Load the consolidation prompt template, optionally overridden from the file + * referenced by `memory.v2.consolidation_prompt_path`, then substitute + * `{{CUTOFF}}`. The override config field is shared with v2 (there is no + * separate v3 override key) so operators can point a single file at whichever + * consolidator owns the drain. Path-resolution rules mirror v2. + * + * Failure handling is intentionally permissive — missing file, read error, or + * empty/whitespace-only body all log a warning and fall back to the bundled + * prompt. Consolidation must never break because of a bad override. + */ +export function resolveConsolidationPrompt( + overridePath: string | null, + cutoff: string, +): string { + if (overridePath === null) return renderConsolidationPrompt(cutoff); + + const resolvedPath = resolveOverridePath(overridePath); + let contents: string; + try { + const stat = lstatSync(resolvedPath); + if (!stat.isFile()) { + log.warn( + { + configuredPath: overridePath, + resolvedPath, + reason: "not_regular_file", + fallback: "bundled", + }, + "consolidation prompt override is not a regular file; using bundled prompt", + ); + return renderConsolidationPrompt(cutoff); + } + if (stat.size > MAX_PROMPT_BYTES) { + log.warn( + { + configuredPath: overridePath, + resolvedPath, + size: stat.size, + limit: MAX_PROMPT_BYTES, + reason: "oversized_override", + fallback: "bundled", + }, + "consolidation prompt override exceeds size limit; using bundled prompt", + ); + return renderConsolidationPrompt(cutoff); + } + contents = readFileSync(resolvedPath, "utf-8"); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + log.warn( + { configuredPath: overridePath, resolvedPath, code, fallback: "bundled" }, + "consolidation prompt override unreadable; using bundled prompt", + ); + return renderConsolidationPrompt(cutoff); + } + + if (contents.trim().length === 0) { + log.warn( + { + configuredPath: overridePath, + resolvedPath, + reason: "empty_override", + fallback: "bundled", + }, + "consolidation prompt override is empty; using bundled prompt", + ); + return renderConsolidationPrompt(cutoff); + } + + return contents.replaceAll(CUTOFF_PLACEHOLDER, cutoff); +} + +function resolveOverridePath(overridePath: string): string { + if (overridePath.startsWith("~/")) { + return join(homedir(), overridePath.slice(2)); + } + if (isAbsolute(overridePath)) return overridePath; + return join(getWorkspaceDir(), overridePath); +} From 01e1c81a2f832d4679f9cc81401fee0413ec9a36 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:19:26 -0400 Subject: [PATCH 16/21] feat(memory-v3): v3 Retriever as comparand #2 in the compare harness (#31986) Co-authored-by: Vellum Assistant --- .../src/memory/v3/__tests__/retriever.test.ts | 226 ++++++++++++++++++ assistant/src/memory/v3/retriever.ts | 33 +++ .../src/runtime/routes/memory-v2-routes.ts | 13 +- 3 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 assistant/src/memory/v3/__tests__/retriever.test.ts create mode 100644 assistant/src/memory/v3/retriever.ts diff --git a/assistant/src/memory/v3/__tests__/retriever.test.ts b/assistant/src/memory/v3/__tests__/retriever.test.ts new file mode 100644 index 00000000000..bc514be301e --- /dev/null +++ b/assistant/src/memory/v3/__tests__/retriever.test.ts @@ -0,0 +1,226 @@ +/** + * Route-assembly tests for the v3 retriever wiring in + * `handleCompareRetrievers` (`assistant/src/runtime/routes/memory-v2-routes.ts`). + * + * The compare route always includes the router retriever as comparand #1 and + * adds the v3 retriever as comparand #2 only when `config.memory.v3.enabled`. + * These tests exercise that gating end-to-end through the real handler and the + * real `runComparisonOverHistory`, with a fixture DB seeded with one logged + * router turn (mirroring `assistant/src/memory/v2/__tests__/harness-compare.test.ts`). + * + * Neither the real router nor the real v3 loop runs here — both would hit a + * provider. `../loop.js` (the v3 loop) and `../../v2/harness/router-retriever.js` + * are `mock.module`-stubbed to return deterministic selections, so the tests + * assert *which retrievers were assembled* (by the names in the report), not + * their retrieval quality. `loadConfig` is stubbed so each test controls + * `memory.v3.enabled`; workspace/page-index helpers are stubbed to keep the + * handler off the filesystem. + */ + +import { beforeEach, describe, expect, mock, test } from "bun:test"; + +import type { AssistantConfig } from "../../../config/types.js"; +import { getDb } from "../../db-connection.js"; +import { initializeDb } from "../../db-init.js"; +import type { MemoryV2ConceptRowRecord } from "../../memory-v2-activation-log-store.js"; +import { + conversations, + memoryV2ActivationLogs, + messages, +} from "../../schema.js"; +import type { + RetrievalInput, + RetrievalOutput, +} from "../../v2/harness/retriever.js"; + +initializeDb(); + +// Silence the route's logger. +mock.module("../../../util/logger.js", () => ({ + getLogger: () => + new Proxy({} as Record, { get: () => () => {} }), +})); + +// loadNowText / page-index read workspace files; a nonexistent dir yields "". +const WORKSPACE = "/tmp/v3-retriever-nonexistent-workspace"; + +// Controllable config: each test sets `v3Enabled` before invoking the handler. +let v3Enabled = false; + +mock.module("../../../config/loader.js", () => ({ + loadConfig: (): AssistantConfig => + ({ + memory: { + v2: { enabled: true, router: { historical_pairs: 1 } }, + v3: { enabled: v3Enabled }, + }, + }) as unknown as AssistantConfig, +})); + +mock.module("../../../util/platform.js", () => ({ + getWorkspaceDir: (): string => WORKSPACE, +})); + +// page-index is intentionally NOT mocked: it has a wide export surface +// (`invalidatePageIndex` etc.) that transitive importers in the route's +// dependency graph rely on, and `getPageIndex` over the nonexistent workspace +// returns a benign index. The retriever names are what we assert, not the +// page set, so the real (empty-ish) index is harmless here. + +// Stub the router retriever — the real one calls a provider. +mock.module("../../v2/harness/router-retriever.js", () => ({ + createRouterRetriever: () => ({ + name: "router", + retrieve: async (): Promise => ({ + selectedSlugs: ["p1"], + sourceBySlug: new Map([["p1", "router"]]), + }), + }), +})); + +// Stub the v3 loop — the real one runs scout/filter/tree/edge/gate lanes that +// hit providers, embeddings, and the filesystem. +mock.module("../loop.js", () => ({ + runRetrievalLoop: async ( + _input: RetrievalInput, + ): Promise => ({ + selectedSlugs: ["p2"], + sourceBySlug: new Map([["p2", "dense"]]), + }), +})); + +// Import the handler only after the mocks are installed. +const { handleCompareRetrievers } = + await import("../../../runtime/routes/memory-v2-routes.js"); + +const ZERO_CONFIG = { + d: 0, + c_user: 0, + c_assistant: 0, + c_now: 0, + k: 0, + hops: 0, + top_k: 0, + epsilon: 0, +}; + +let seq = 0; + +function ensureConversation(id: string): void { + getDb() + .insert(conversations) + .values({ id, createdAt: 0, updatedAt: 0 }) + .onConflictDoNothing() + .run(); +} + +function insertMessage( + id: string, + conversationId: string, + role: string, + text: string, + createdAt: number, +): void { + ensureConversation(conversationId); + getDb() + .insert(messages) + .values({ + id, + conversationId, + role, + content: JSON.stringify([{ type: "text", text }]), + createdAt, + }) + .run(); +} + +function makeConcept( + slug: string, + status: MemoryV2ConceptRowRecord["status"], +): MemoryV2ConceptRowRecord { + return { + slug, + finalActivation: 0, + ownActivation: 0, + priorActivation: 0, + simUser: 0, + simAssistant: 0, + simNow: 0, + simUserRerankBoost: 0, + simAssistantRerankBoost: 0, + inRerankPool: false, + spreadContribution: 0, + source: "router", + status, + }; +} + +function insertRouterLog( + conversationId: string, + messageId: string, + turn: number, + concepts: MemoryV2ConceptRowRecord[], + createdAt: number, +): void { + ensureConversation(conversationId); + getDb() + .insert(memoryV2ActivationLogs) + .values({ + id: `log-${seq++}`, + conversationId, + messageId, + turn, + mode: "router", + conceptsJson: JSON.stringify(concepts), + skillsJson: "[]", + configJson: JSON.stringify(ZERO_CONFIG), + createdAt, + }) + .run(); +} + +/** Seed one router turn: user msg, assistant anchor, and the logged picks. */ +function seedTurn(groundTruth: string[]): void { + insertMessage("u1", "c1", "user", "hello", 10); + insertMessage("a1", "c1", "assistant", "hi", 20); // anchor for turn 1 + insertRouterLog( + "c1", + "a1", + 1, + groundTruth.map((slug) => makeConcept(slug, "injected")), + 20, + ); +} + +function reset(): void { + const db = getDb(); + db.delete(memoryV2ActivationLogs).run(); + db.delete(messages).run(); + v3Enabled = false; +} + +describe("handleCompareRetrievers v3 wiring", () => { + beforeEach(reset); + + test("includes only router when memory.v3.enabled is false", async () => { + seedTurn(["p1", "p2"]); + + const report = await handleCompareRetrievers({ body: {} }); + + const names = report.retrievers.map((r) => r.name); + expect(names).toEqual(["router"]); + }); + + test("includes router and v3 when memory.v3.enabled is true", async () => { + v3Enabled = true; + seedTurn(["p1", "p2"]); + + const report = await handleCompareRetrievers({ body: {} }); + + const names = report.retrievers.map((r) => r.name); + expect(names).toEqual(["router", "v3"]); + // Router is always comparand #1; v3 joins as comparand #2. + expect(names[0]).toBe("router"); + expect(names[1]).toBe("v3"); + }); +}); diff --git a/assistant/src/memory/v3/retriever.ts b/assistant/src/memory/v3/retriever.ts new file mode 100644 index 00000000000..eee8aed369d --- /dev/null +++ b/assistant/src/memory/v3/retriever.ts @@ -0,0 +1,33 @@ +/** + * v3 retriever — the multi-lane bounded-descent retrieval loop + * ({@link runRetrievalLoop}) adapted to the harness {@link Retriever} + * interface. + * + * This is the offline, zero-production-risk shadow path: the comparison harness + * replays historical oracle turns and scores v3's selection against the v2 + * router's logged picks (recall@k). Nothing here runs on a live injection turn + * — the loop reads the DB handle for its hot lane but never mutates production + * state, matching the {@link Retriever} contract. + */ + +import type { DrizzleDb } from "../db-connection.js"; +import type { + RetrievalInput, + RetrievalOutput, + Retriever, +} from "../v2/harness/retriever.js"; +import { runRetrievalLoop } from "./loop.js"; + +/** + * Wrap the v3 retrieval loop as a named harness {@link Retriever}. + * + * @param db handle threaded to {@link runRetrievalLoop} for the scout hot lane. + */ +export function createV3Retriever(db: DrizzleDb): Retriever { + return { + name: "v3", + retrieve(input: RetrievalInput): Promise { + return runRetrievalLoop(input, { db }); + }, + }; +} diff --git a/assistant/src/runtime/routes/memory-v2-routes.ts b/assistant/src/runtime/routes/memory-v2-routes.ts index 124cfa480fa..5f987c82fb3 100644 --- a/assistant/src/runtime/routes/memory-v2-routes.ts +++ b/assistant/src/runtime/routes/memory-v2-routes.ts @@ -24,6 +24,7 @@ import { validateEdgeTargets, } from "../../memory/v2/edge-index.js"; import { runComparisonOverHistory } from "../../memory/v2/harness/compare.js"; +import type { Retriever } from "../../memory/v2/harness/retriever.js"; import { createRouterRetriever } from "../../memory/v2/harness/router-retriever.js"; import type { ComparisonReport } from "../../memory/v2/harness/runner.js"; import { computeInjectionScores } from "../../memory/v2/injection-events.js"; @@ -38,6 +39,7 @@ import { import { ROUTER_PROMPT } from "../../memory/v2/prompts/router.js"; import { type RouterSource, runRouter } from "../../memory/v2/router.js"; import { seedV2SkillEntries } from "../../memory/v2/skill-store.js"; +import { createV3Retriever } from "../../memory/v3/retriever.js"; import { getLogger } from "../../util/logger.js"; import { getWorkspaceDir } from "../../util/platform.js"; import { RouteError } from "./errors.js"; @@ -637,11 +639,20 @@ export async function handleCompareRetrievers({ const pageIndex = await getPageIndex(workspaceDir); const db = getDb(); + // The router is always comparand #1 (the harness self-test against its own + // logged ground truth). v3 joins as comparand #2 only when explicitly + // enabled, so the default compare surface is unchanged until v3 is switched + // on. v3 is offline-only here — the loop reads `db` but mutates nothing. + const retrievers: Retriever[] = [createRouterRetriever(db)]; + if (config.memory.v3.enabled) { + retrievers.push(createV3Retriever(db)); + } + return runComparisonOverHistory({ db, workspaceDir, config, - retrievers: [createRouterRetriever(db)], + retrievers, ks: ks ?? DEFAULT_COMPARE_KS, limit: limit ?? DEFAULT_COMPARE_LIMIT, pageExists: (slug) => pageIndex.bySlug.has(slug), From 5ef2bbc759685138cda90d606c291ad0ff7f54d2 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:21:51 -0400 Subject: [PATCH 17/21] feat(memory-v3): pass-1->pass-2 co-activation logging (#31987) Co-authored-by: Vellum Assistant --- assistant/src/memory/db-init.ts | 2 + .../migrations/262-memory-v3-coactivation.ts | 57 +++ assistant/src/memory/migrations/index.ts | 4 + assistant/src/memory/migrations/registry.ts | 8 + .../v3/__tests__/coactivation-store.test.ts | 422 ++++++++++++++++++ assistant/src/memory/v3/coactivation-store.ts | 124 +++++ assistant/src/memory/v3/loop.ts | 90 ++++ 7 files changed, 707 insertions(+) create mode 100644 assistant/src/memory/migrations/262-memory-v3-coactivation.ts create mode 100644 assistant/src/memory/v3/__tests__/coactivation-store.test.ts create mode 100644 assistant/src/memory/v3/coactivation-store.ts diff --git a/assistant/src/memory/db-init.ts b/assistant/src/memory/db-init.ts index 5783acd2717..9f6594953a5 100644 --- a/assistant/src/memory/db-init.ts +++ b/assistant/src/memory/db-init.ts @@ -129,6 +129,7 @@ import { migrateMemoryRetrospectiveState, migrateMemoryV2ActivationLogs, migrateMemoryV2InjectionEvents, + migrateMemoryV3Coactivation, migrateMessageBookmarks, migrateMessagesConversationCreatedAtIndex, migrateMessagesFtsBackfill, @@ -456,6 +457,7 @@ export function initializeDb(): void { migrateConversationCleanedAt, migrateRenameCleanedAt, migrateLlmUsageAddRawUsage, + migrateMemoryV3Coactivation, ]; // Run each migration step, catching and logging individual failures so one diff --git a/assistant/src/memory/migrations/262-memory-v3-coactivation.ts b/assistant/src/memory/migrations/262-memory-v3-coactivation.ts new file mode 100644 index 00000000000..f918109680a --- /dev/null +++ b/assistant/src/memory/migrations/262-memory-v3-coactivation.ts @@ -0,0 +1,57 @@ +import type { DrizzleDb } from "../db-connection.js"; +import { getSqliteFrom } from "../db-connection.js"; +import { withCrashRecovery } from "./validate-migration-state.js"; + +const CHECKPOINT_KEY = "migration_memory_v3_coactivation_v1"; + +/** + * Create the memory_v3_coactivation table — an append-only log of + * pass-1 → pass-N co-activation pairs observed during a v3 retrieval loop. + * + * Each row records that a page (`target_slug`) first surfaced on a later + * descent pass was co-selected alongside a page (`source_slug`) that surfaced + * on pass 1, with `pass_gap` = passOf(target) − passOf(source). This is the + * raw gradient signal that edge-learning later reconciles into curated-graph + * edge weights: a source that repeatedly precedes a target across turns is a + * candidate association. `used` is the usefulness flag (0 here — the loop + * cannot know whether the target was actually load-bearing for the turn; a + * later edge-learning pass reconciles it). + * + * The table just accumulates raw events; the edge-learning formula or the + * decay/weighting can change later without losing signal. + * + * Indexes: + * - `(source_slug, target_slug)` for per-pair aggregation (the hot path for + * edge-learning reads). + * - `(created_at)` for time-range pruning later. + */ +export function migrateMemoryV3Coactivation(database: DrizzleDb): void { + withCrashRecovery(database, CHECKPOINT_KEY, () => { + const raw = getSqliteFrom(database); + raw.exec(/*sql*/ ` + CREATE TABLE IF NOT EXISTS memory_v3_coactivation ( + id INTEGER PRIMARY KEY, + conversation_id TEXT NOT NULL, + turn INTEGER NOT NULL, + source_slug TEXT NOT NULL, + target_slug TEXT NOT NULL, + pass_gap INTEGER NOT NULL, + used INTEGER NOT NULL, + created_at INTEGER NOT NULL + ) + `); + raw.exec(/*sql*/ ` + CREATE INDEX IF NOT EXISTS idx_memory_v3_coactivation_pair + ON memory_v3_coactivation (source_slug, target_slug) + `); + raw.exec(/*sql*/ ` + CREATE INDEX IF NOT EXISTS idx_memory_v3_coactivation_time + ON memory_v3_coactivation (created_at) + `); + }); +} + +export function downMemoryV3Coactivation(database: DrizzleDb): void { + const raw = getSqliteFrom(database); + raw.exec(/*sql*/ `DROP TABLE IF EXISTS memory_v3_coactivation`); +} diff --git a/assistant/src/memory/migrations/index.ts b/assistant/src/memory/migrations/index.ts index 119ccff8db2..52a41dc854f 100644 --- a/assistant/src/memory/migrations/index.ts +++ b/assistant/src/memory/migrations/index.ts @@ -242,6 +242,10 @@ export { downLlmUsageAddRawUsage, migrateLlmUsageAddRawUsage, } from "./261-llm-usage-add-raw-usage.js"; +export { + downMemoryV3Coactivation, + migrateMemoryV3Coactivation, +} from "./262-memory-v3-coactivation.js"; export { MIGRATION_REGISTRY, type MigrationRegistryEntry, diff --git a/assistant/src/memory/migrations/registry.ts b/assistant/src/memory/migrations/registry.ts index a5fa5fddebc..c3f56f972b8 100644 --- a/assistant/src/memory/migrations/registry.ts +++ b/assistant/src/memory/migrations/registry.ts @@ -55,6 +55,7 @@ import { downMemoryV2InjectionEvents } from "./256-memory-v2-injection-events.js import { downConversationCleanedAt } from "./259-conversation-cleaned-at.js"; import { downRenameCleanedAt } from "./260-rename-cleaned-at.js"; import { downLlmUsageAddRawUsage } from "./261-llm-usage-add-raw-usage.js"; +import { downMemoryV3Coactivation } from "./262-memory-v3-coactivation.js"; export interface MigrationRegistryEntry { /** The checkpoint key written to memory_checkpoints on completion. */ @@ -470,6 +471,13 @@ export const MIGRATION_REGISTRY: MigrationRegistryEntry[] = [ "Add raw_usage TEXT column to llm_usage_events for storing the provider's untouched usage block as JSON (Anthropic TTL breakdown, OpenAI prompt/completion token details, etc.) so downstream consumers can extract provider-specific detail without per-field schema changes", down: downLlmUsageAddRawUsage, }, + { + key: "migration_memory_v3_coactivation_v1", + version: 55, + description: + "Create memory_v3_coactivation table — append-only log of pass-1 → pass-N co-activation pairs (gradient signal) emitted by the v3 retrieval loop and reconciled later by edge-learning", + down: downMemoryV3Coactivation, + }, ]; export function getMaxMigrationVersion(): number { diff --git a/assistant/src/memory/v3/__tests__/coactivation-store.test.ts b/assistant/src/memory/v3/__tests__/coactivation-store.test.ts new file mode 100644 index 00000000000..3e6226a12df --- /dev/null +++ b/assistant/src/memory/v3/__tests__/coactivation-store.test.ts @@ -0,0 +1,422 @@ +/** + * Tests for `assistant/src/memory/v3/coactivation-store.ts`, its sibling + * migration `262-memory-v3-coactivation.ts`, and the loop's co-activation + * emission (`loop.ts`, gated by `config.memory.v3.write.coactivation`). + * + * Coverage: + * - Migration creates the table + both indexes; safe to re-run. + * - recordCoactivations / readCoactivations round-trip; empty list is a + * no-op; `since` filters by created_at. + * - A scripted 2-pass loop emits the expected pass-1 → pass-2 rows with the + * correct pass_gap when the flag is on, and nothing when it is off. + * + * Uses an in-memory bun:sqlite database — no real workspace DB. The loop's + * lane modules are stubbed via `mock.module`, matching `loop.test.ts`. + */ + +import { Database } from "bun:sqlite"; +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; + +import { drizzle } from "drizzle-orm/bun-sqlite"; + +import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js"; + +mock.module("../../../util/logger.js", () => ({ + getLogger: () => makeMockLogger(), +})); + +import type { DrizzleDb } from "../../db-connection.js"; +import { getSqliteFrom } from "../../db-connection.js"; +import { + downMemoryV3Coactivation, + migrateMemoryV3Coactivation, +} from "../../migrations/262-memory-v3-coactivation.js"; +import * as schema from "../../schema.js"; +import type { + RetrievalInput, + RetrievalOutput, +} from "../../v2/harness/retriever.js"; +import type { GateDecision, ScoutResult } from "../../v2/harness/trace.js"; +import { + type CoactivationRow, + readCoactivations, + recordCoactivations, +} from "../coactivation-store.js"; + +// memory_checkpoints is required by withCrashRecovery and is normally created +// by an early core migration. Stand it up by hand so the v3 migration can run +// in isolation against a fresh in-memory DB. +const CHECKPOINTS_DDL = /*sql*/ ` + CREATE TABLE memory_checkpoints ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL, + updated_at INTEGER NOT NULL + ) +`; + +// --------------------------------------------------------------------------- +// Loop lane stubs — installed before importing the module under test. Mirrors +// loop.test.ts: each test rewires the `lane` refs before calling the loop. +// --------------------------------------------------------------------------- + +interface RunScoutsResult { + scouts: ScoutResult[]; + sticky: Set; + bypass: Set; +} +interface FilterResult { + kept: string[]; + trace: { judged: string[]; dropped: string[] }; + failureReason?: string; +} +interface WalkResult { + pages: Set; + levels: Array<{ + node: string; + considered: string[]; + descended: string[]; + skipped: string[]; + reasoning: string; + }>; +} +interface ExpandResult { + pulled: Set; + expansions: Array<{ from: string; pulled: string[] }>; +} +interface GateResult { + decision: GateDecision; + selectedSlugs: string[]; +} + +const lane = { + scouts: [] as RunScoutsResult[], + filter: [] as FilterResult[], + walk: [] as WalkResult[], + edges: [] as ExpandResult[], + gate: [] as GateResult[], +}; + +function nextOf(list: T[], index: number): T { + return list[Math.min(index, list.length - 1)]; +} + +let scoutCallCount = 0; +let filterCallCount = 0; +let walkCallCount = 0; +let edgeCallCount = 0; +let gateCallCount = 0; + +mock.module("../scouts.js", () => ({ + runScouts: async (): Promise => + nextOf(lane.scouts, scoutCallCount++), +})); +mock.module("../filter.js", () => ({ + filterDenseHits: async (): Promise => + nextOf(lane.filter, filterCallCount++), +})); +mock.module("../tree-walk.js", () => ({ + runTreeWalk: async (): Promise => + nextOf(lane.walk, walkCallCount++), +})); +mock.module("../edges.js", () => ({ + expandEdges: async (): Promise => + nextOf(lane.edges, edgeCallCount++), +})); +mock.module("../gate.js", () => ({ + runGate: async (): Promise => nextOf(lane.gate, gateCallCount++), +})); +mock.module("../tree-index.js", () => ({ + getTreeIndex: async () => ({ + nodes: new Map(), + childrenByNode: new Map(), + parentsByNode: new Map(), + pageParents: new Map(), + root: "_root", + }), +})); +mock.module("../../v2/page-index.js", () => ({ + getPageIndex: async () => ({ + entries: [], + bySlug: new Map(), + byId: new Map(), + rendered: "", + }), +})); + +const { runRetrievalLoop } = await import("../loop.js"); + +let sqlite: Database; +let database: DrizzleDb; + +beforeEach(() => { + sqlite = new Database(":memory:"); + database = drizzle(sqlite, { schema }); + getSqliteFrom(database).exec(CHECKPOINTS_DDL); + migrateMemoryV3Coactivation(database); + + lane.scouts = []; + lane.filter = []; + lane.walk = []; + lane.edges = []; + lane.gate = []; + scoutCallCount = 0; + filterCallCount = 0; + walkCallCount = 0; + edgeCallCount = 0; + gateCallCount = 0; +}); + +afterEach(() => { + sqlite.close(); +}); + +function scout(laneName: ScoutResult["lane"], slugs: string[]): ScoutResult { + return { lane: laneName, slugs }; +} + +function makeInput(opts?: { + passCap?: number; + coactivation?: boolean; +}): RetrievalInput { + return { + workspaceDir: "/tmp/does-not-matter", + recentTurnPairs: [], + nowText: "NOW", + priorEverInjected: [], + config: { + memory: { + v3: { + passCap: opts?.passCap ?? 3, + lanes: { + hot: true, + sparse: true, + dense: true, + tree: true, + edges: true, + }, + write: { + enabled: false, + consolidateIntervalMs: 3600000, + coactivation: opts?.coactivation ?? false, + }, + }, + }, + } as unknown as RetrievalInput["config"], + }; +} + +// --------------------------------------------------------------------------- +// Migration. +// --------------------------------------------------------------------------- + +describe("migrateMemoryV3Coactivation", () => { + test("creates table and both indexes; safe to re-run", () => { + migrateMemoryV3Coactivation(database); + migrateMemoryV3Coactivation(database); + + const raw = getSqliteFrom(database); + const table = raw + .query( + `SELECT name FROM sqlite_master WHERE type='table' AND name='memory_v3_coactivation'`, + ) + .get(); + expect(table).toBeTruthy(); + + const indexNames = new Set( + ( + raw + .query( + `SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='memory_v3_coactivation'`, + ) + .all() as Array<{ name: string }> + ).map((r) => r.name), + ); + expect(indexNames.has("idx_memory_v3_coactivation_pair")).toBe(true); + expect(indexNames.has("idx_memory_v3_coactivation_time")).toBe(true); + }); + + test("downMemoryV3Coactivation drops the table", () => { + downMemoryV3Coactivation(database); + const table = getSqliteFrom(database) + .query( + `SELECT name FROM sqlite_master WHERE type='table' AND name='memory_v3_coactivation'`, + ) + .get(); + expect(table).toBeFalsy(); + }); +}); + +// --------------------------------------------------------------------------- +// Store. +// --------------------------------------------------------------------------- + +describe("recordCoactivations / readCoactivations", () => { + test("round-trips rows oldest-first", () => { + const rows: CoactivationRow[] = [ + { + conversationId: "conv-1", + turn: 3, + sourceSlug: "alice", + targetSlug: "bob", + passGap: 1, + used: 0, + createdAt: 1_000, + }, + { + conversationId: "conv-1", + turn: 3, + sourceSlug: "alice", + targetSlug: "carol", + passGap: 2, + used: 0, + createdAt: 2_000, + }, + ]; + recordCoactivations(database, rows); + + const read = readCoactivations(database); + expect(read).toHaveLength(2); + expect(read[0]).toMatchObject({ + conversationId: "conv-1", + turn: 3, + sourceSlug: "alice", + targetSlug: "bob", + passGap: 1, + used: 0, + createdAt: 1_000, + }); + expect(read[1].targetSlug).toBe("carol"); + expect(read[1].passGap).toBe(2); + }); + + test("empty list is a no-op", () => { + recordCoactivations(database, []); + expect(readCoactivations(database)).toHaveLength(0); + }); + + test("since filters by created_at", () => { + recordCoactivations(database, [ + { + conversationId: "c", + turn: 1, + sourceSlug: "a", + targetSlug: "b", + passGap: 1, + used: 0, + createdAt: 100, + }, + { + conversationId: "c", + turn: 1, + sourceSlug: "a", + targetSlug: "c", + passGap: 1, + used: 0, + createdAt: 500, + }, + ]); + const recent = readCoactivations(database, 300); + expect(recent).toHaveLength(1); + expect(recent[0].targetSlug).toBe("c"); + }); +}); + +// --------------------------------------------------------------------------- +// Loop emission. +// --------------------------------------------------------------------------- + +describe("runRetrievalLoop — co-activation emission", () => { + /** + * Script a 2-pass loop: pass 1 surfaces `a` (hot) + `b` (sparse); pass 2 + * surfaces `c` (dense). The gate says "more" on pass 1 (selecting a, b) and + * "ready" on pass 2 (selecting a, b, c). So `c` is the only pass-2 target, + * paired with pass-1 hits a and b → two rows, both pass_gap=1. + */ + function scriptTwoPass(): void { + lane.scouts = [ + { + scouts: [scout("hot", ["a"]), scout("sparse", ["b"])], + sticky: new Set(), + bypass: new Set(), + }, + { + scouts: [scout("dense", ["c"])], + sticky: new Set(), + bypass: new Set(), + }, + ]; + // Pass 1 has no dense scout, so the filter is only called on pass 2 (one + // filter call per dense pass) — its single entry keeps `c`. + lane.filter = [{ kept: ["c"], trace: { judged: ["c"], dropped: [] } }]; + lane.walk = [ + { pages: new Set(), levels: [] }, + { pages: new Set(), levels: [] }, + ]; + lane.edges = [ + { pulled: new Set(), expansions: [] }, + { pulled: new Set(), expansions: [] }, + ]; + lane.gate = [ + { + decision: { decision: "more", questions: ["q"] }, + selectedSlugs: ["a", "b"], + }, + { decision: { decision: "ready" }, selectedSlugs: ["a", "b", "c"] }, + ]; + } + + test("emits pass-1 → pass-2 rows with correct pass_gap when flag is on", async () => { + scriptTwoPass(); + const out: RetrievalOutput = await runRetrievalLoop( + makeInput({ passCap: 3, coactivation: true }), + { db: database, conversationId: "conv-42", turn: 7 }, + ); + expect(out.selectedSlugs).toEqual(["a", "b", "c"]); + + const rows = readCoactivations(database); + // c (pass 2) paired with each pass-1 hit a and b → two rows. + expect(rows).toHaveLength(2); + const pairs = rows.map((r) => `${r.sourceSlug}->${r.targetSlug}`).sort(); + expect(pairs).toEqual(["a->c", "b->c"]); + for (const r of rows) { + expect(r.targetSlug).toBe("c"); + expect(r.passGap).toBe(1); + expect(r.used).toBe(0); + expect(r.conversationId).toBe("conv-42"); + expect(r.turn).toBe(7); + } + }); + + test("emits nothing when the flag is off", async () => { + scriptTwoPass(); + await runRetrievalLoop(makeInput({ passCap: 3, coactivation: false }), { + db: database, + conversationId: "conv-42", + turn: 7, + }); + expect(readCoactivations(database)).toHaveLength(0); + }); + + test("single-pass selection emits nothing (no later-surfaced target)", async () => { + lane.scouts = [ + { + scouts: [scout("hot", ["a"]), scout("sparse", ["b"])], + sticky: new Set(), + bypass: new Set(), + }, + ]; + lane.filter = [{ kept: [], trace: { judged: [], dropped: [] } }]; + lane.walk = [{ pages: new Set(), levels: [] }]; + lane.edges = [{ pulled: new Set(), expansions: [] }]; + lane.gate = [ + { decision: { decision: "ready" }, selectedSlugs: ["a", "b"] }, + ]; + + await runRetrievalLoop(makeInput({ passCap: 3, coactivation: true }), { + db: database, + conversationId: "conv-1", + turn: 1, + }); + expect(readCoactivations(database)).toHaveLength(0); + }); +}); diff --git a/assistant/src/memory/v3/coactivation-store.ts b/assistant/src/memory/v3/coactivation-store.ts new file mode 100644 index 00000000000..7a759d46d71 --- /dev/null +++ b/assistant/src/memory/v3/coactivation-store.ts @@ -0,0 +1,124 @@ +/** + * Memory v3 — co-activation store. + * + * Best-effort read/write helpers over `memory_v3_coactivation` (migration + * 262). Each row is a pass-1 → pass-N co-activation pair observed during a + * single v3 retrieval loop: a `target_slug` first surfaced on a later descent + * pass was co-selected alongside a `source_slug` that surfaced on pass 1, + * `pass_gap = passOf(target) − passOf(source)`. + * + * This is the raw gradient signal — edge-learning reconciles it into curated + * graph edge weights later. Writes are off the retrieval critical path: a + * failed insert here must never abort the turn on top of a successful + * retrieval the caller already depends on. + */ + +import { getLogger } from "../../util/logger.js"; +import type { DrizzleDb } from "../db-connection.js"; +import { getSqliteFrom } from "../db-connection.js"; + +const log = getLogger("memory-v3-coactivation"); + +/** One co-activation pair to persist. */ +export interface CoactivationRow { + conversationId: string; + turn: number; + sourceSlug: string; + targetSlug: string; + passGap: number; + /** Usefulness flag. 0 at emit time; reconciled later by edge-learning. */ + used: number; + createdAt: number; +} + +/** A persisted co-activation row, as read back from the table. */ +export interface PersistedCoactivationRow { + id: number; + conversationId: string; + turn: number; + sourceSlug: string; + targetSlug: string; + passGap: number; + used: number; + createdAt: number; +} + +/** + * Append co-activation rows. Best-effort — a SQLite write must never abort the + * agent turn on top of a successful retrieval the rest of the caller depends + * on. Mirrors {@link recordInjectionEvents}. + */ +export function recordCoactivations( + database: DrizzleDb, + rows: readonly CoactivationRow[], +): void { + if (rows.length === 0) return; + try { + const raw = getSqliteFrom(database); + const insert = raw.prepare( + `INSERT INTO memory_v3_coactivation + (conversation_id, turn, source_slug, target_slug, pass_gap, used, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?)`, + ); + const append = raw.transaction((items: readonly CoactivationRow[]) => { + for (const r of items) { + insert.run( + r.conversationId, + r.turn, + r.sourceSlug, + r.targetSlug, + r.passGap, + r.used, + r.createdAt, + ); + } + }); + append(rows); + } catch (err) { + log.warn( + { err, rowCount: rows.length }, + "failed to record co-activations; continuing", + ); + } +} + +/** + * Read co-activation rows, oldest first. When `since` is provided, only rows + * with `created_at >= since` are returned. + */ +export function readCoactivations( + database: DrizzleDb, + since?: number, +): PersistedCoactivationRow[] { + const raw = getSqliteFrom(database); + const where = since !== undefined ? `WHERE created_at >= ?` : ``; + const params = since !== undefined ? [since] : []; + const rows = raw + .query( + `SELECT id, conversation_id, turn, source_slug, target_slug, + pass_gap, used, created_at + FROM memory_v3_coactivation + ${where} + ORDER BY created_at ASC, id ASC`, + ) + .all(...params) as Array<{ + id: number; + conversation_id: string; + turn: number; + source_slug: string; + target_slug: string; + pass_gap: number; + used: number; + created_at: number; + }>; + return rows.map((r) => ({ + id: r.id, + conversationId: r.conversation_id, + turn: r.turn, + sourceSlug: r.source_slug, + targetSlug: r.target_slug, + passGap: r.pass_gap, + used: r.used, + createdAt: r.created_at, + })); +} diff --git a/assistant/src/memory/v3/loop.ts b/assistant/src/memory/v3/loop.ts index 0763ecf8bf6..47a2608bdb3 100644 --- a/assistant/src/memory/v3/loop.ts +++ b/assistant/src/memory/v3/loop.ts @@ -44,6 +44,7 @@ * this composition layer) accumulates across every pass. */ +import { getLogger } from "../../util/logger.js"; import type { DrizzleDb } from "../db-connection.js"; import type { RetrievalCost, @@ -56,6 +57,10 @@ import type { GateDecision, } from "../v2/harness/trace.js"; import { getPageIndex } from "../v2/page-index.js"; +import { + type CoactivationRow, + recordCoactivations, +} from "./coactivation-store.js"; import { expandEdges } from "./edges.js"; import { filterDenseHits } from "./filter.js"; import { runGate } from "./gate.js"; @@ -66,9 +71,19 @@ import { runTreeWalk } from "./tree-walk.js"; /** Lane label used to tag each selected slug's provenance in `sourceBySlug`. */ type LaneSource = "hot" | "sparse" | "dense" | "tree" | "edge"; +const log = getLogger("memory-v3-loop"); + /** Injected dependencies — the SQLite handle the scout hot lane reads. */ export interface RetrievalLoopDeps { db: DrizzleDb; + /** + * Conversation this retrieval is running for. Stamped on co-activation rows + * when `config.memory.v3.write.coactivation` is on. Empty string when the + * loop runs in the offline harness (no live conversation). + */ + conversationId?: string; + /** Turn number within the conversation, for co-activation provenance. */ + turn?: number; } /** @@ -91,6 +106,9 @@ export async function runRetrievalLoop( // Cross-pass accumulators. const sourceBySlug = new Map(); + // The first pass each slug entered the candidate set. Drives co-activation + // emission below — pass-1 hits (gap source) vs. later-surfaced pages (target). + const firstPassBySlug = new Map(); const sticky = new Set(); const passes: DescentPass[] = []; // `ms` is the one cost dimension observable at this composition layer — the @@ -190,6 +208,13 @@ export async function runRetrievalLoop( } } + // Record the first pass each candidate surfaced on. The candidate set is + // the union of every lane's contribution this pass; a slug keeps the + // earliest pass it appeared on (first write wins). + for (const slug of candidates) { + if (!firstPassBySlug.has(slug)) firstPassBySlug.set(slug, passNumber); + } + // 5. Gate — one capable LLM call over the unioned candidate set. const gateResult = await runGate({ input: passInput, @@ -219,6 +244,21 @@ export async function runRetrievalLoop( passNowText = nextPassNowText(input.nowText, gateResult.decision); } + // Co-activation logging — off the critical path. Gated by + // `write.coactivation` (default off). Emits one pass-1 → pass-N pair per + // (pass-1 hit, later-surfaced page) in the final selection. Best-effort: + // wrapped so neither the computation nor the insert can delay or break the + // RetrievalOutput the caller depends on. + if (v3.write?.coactivation) { + emitCoactivations({ + db: deps.db, + conversationId: deps.conversationId ?? "", + turn: deps.turn ?? 0, + selectedSlugs, + firstPassBySlug, + }); + } + const trace: DescentTrace = { passes }; return { selectedSlugs, @@ -229,6 +269,56 @@ export async function runRetrievalLoop( }; } +/** + * Emit pass-1 → pass-N co-activation rows for the final selection. + * + * For each selected page B first surfaced on pass ≥2, pair it with each + * selected page A first surfaced on pass 1 (`pass_gap = passOf(B) − 1`). Pages + * only surfaced on pass 1 (or never recorded) emit nothing — the gradient is + * the gap between an early hit and a later-surfaced association. `used` is 0: + * the loop cannot know whether B was load-bearing for the turn; edge-learning + * reconciles usefulness later. + * + * Best-effort and off the retrieval critical path — any failure is swallowed. + */ +function emitCoactivations(args: { + db: DrizzleDb; + conversationId: string; + turn: number; + selectedSlugs: readonly string[]; + firstPassBySlug: ReadonlyMap; +}): void { + try { + const { db, conversationId, turn, selectedSlugs, firstPassBySlug } = args; + const pass1Hits = selectedSlugs.filter( + (slug) => firstPassBySlug.get(slug) === 1, + ); + if (pass1Hits.length === 0) return; + + const createdAt = Date.now(); + const rows: CoactivationRow[] = []; + for (const target of selectedSlugs) { + const targetPass = firstPassBySlug.get(target); + if (targetPass === undefined || targetPass < 2) continue; + for (const source of pass1Hits) { + rows.push({ + conversationId, + turn, + sourceSlug: source, + targetSlug: target, + passGap: targetPass - 1, + used: 0, + createdAt, + }); + } + } + + recordCoactivations(db, rows); + } catch (err) { + log.warn({ err }, "failed to emit co-activations; continuing"); + } +} + /** * Tag `slug`'s provenance with `lane`, keeping the first lane that surfaced it. * The pass order (scouts → tree → edge) gives a deterministic precedence: a From 9279659dc3ed18d448a8d32cd3a405ec7ec17886 Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:29:17 -0400 Subject: [PATCH 18/21] feat(memory-v3): weighted, decaying auto-edge learning job (#31988) Co-authored-by: Vellum Assistant --- assistant/src/memory/db-init.ts | 2 + assistant/src/memory/jobs-worker.ts | 5 + .../migrations/263-memory-v3-auto-edges.ts | 50 +++ assistant/src/memory/migrations/index.ts | 4 + assistant/src/memory/migrations/registry.ts | 8 + .../v3/__tests__/edge-learning-job.test.ts | 324 ++++++++++++++++++ assistant/src/memory/v3/auto-edges.ts | 223 ++++++++++++ assistant/src/memory/v3/edge-learning-job.ts | 160 +++++++++ 8 files changed, 776 insertions(+) create mode 100644 assistant/src/memory/migrations/263-memory-v3-auto-edges.ts create mode 100644 assistant/src/memory/v3/__tests__/edge-learning-job.test.ts create mode 100644 assistant/src/memory/v3/auto-edges.ts create mode 100644 assistant/src/memory/v3/edge-learning-job.ts diff --git a/assistant/src/memory/db-init.ts b/assistant/src/memory/db-init.ts index 9f6594953a5..f5eba7dce3d 100644 --- a/assistant/src/memory/db-init.ts +++ b/assistant/src/memory/db-init.ts @@ -129,6 +129,7 @@ import { migrateMemoryRetrospectiveState, migrateMemoryV2ActivationLogs, migrateMemoryV2InjectionEvents, + migrateMemoryV3AutoEdges, migrateMemoryV3Coactivation, migrateMessageBookmarks, migrateMessagesConversationCreatedAtIndex, @@ -458,6 +459,7 @@ export function initializeDb(): void { migrateRenameCleanedAt, migrateLlmUsageAddRawUsage, migrateMemoryV3Coactivation, + migrateMemoryV3AutoEdges, ]; // Run each migration step, catching and logging individual failures so one diff --git a/assistant/src/memory/jobs-worker.ts b/assistant/src/memory/jobs-worker.ts index 7b9da8313f1..b12b28928fb 100644 --- a/assistant/src/memory/jobs-worker.ts +++ b/assistant/src/memory/jobs-worker.ts @@ -84,6 +84,7 @@ import { } from "./v2/consolidation-job.js"; import { memoryV2SweepJob } from "./v2/sweep-job.js"; import { memoryV3ConsolidateJob } from "./v3/consolidation-job.js"; +import { memoryV3EdgeLearningJob } from "./v3/edge-learning-job.js"; import { memoryV3IndexMaintenanceJob } from "./v3/maintenance.js"; const log = getLogger("memory-jobs-worker"); @@ -611,6 +612,10 @@ async function processJob( case "memory_v3_index_maintenance": await memoryV3IndexMaintenanceJob(job); return; + case "memory_v3_edge_learning": + // Fast lane: bounded DB work (decay + reinforce + read), no LLM. + memoryV3EdgeLearningJob(job); + return; case "memory_v2_migrate": await memoryV2MigrateJob(job, config); return; diff --git a/assistant/src/memory/migrations/263-memory-v3-auto-edges.ts b/assistant/src/memory/migrations/263-memory-v3-auto-edges.ts new file mode 100644 index 00000000000..679073b61ed --- /dev/null +++ b/assistant/src/memory/migrations/263-memory-v3-auto-edges.ts @@ -0,0 +1,50 @@ +import type { DrizzleDb } from "../db-connection.js"; +import { getSqliteFrom } from "../db-connection.js"; +import { withCrashRecovery } from "./validate-migration-state.js"; + +const CHECKPOINT_KEY = "migration_memory_v3_auto_edges_v1"; + +/** + * Create the memory_v3_auto_edges table — the **learned** edge graph, a + * distinct class from the curated `edges:` frontmatter graph. + * + * Each row is a weighted directed association `source_slug → target_slug` that + * the edge-learning job (`memory_v3_edge_learning`) accrues from *used* + * co-activations (migration 262's `memory_v3_coactivation` rows) and decays + * over time. `weight` is a multiplicatively-decaying real; `last_reinforced_at` + * is the wall-clock ms of the most recent reinforcement, used by the decay + * pass to compute elapsed time per pair. + * + * Auto-edges are advisory: the read path consumes only above-threshold pairs + * via edge-expansion's `extraAdjacency` seam, and high-weight pairs surface as + * promotion *candidates* for the assistant to ratify into curated `edges:` + * during consolidation. This table never auto-writes page frontmatter. + * + * `PRIMARY KEY(source_slug, target_slug)` makes each ordered pair unique, so + * reinforce is a single UPSERT. The index on `(weight)` keeps the + * above-threshold scan and top-N promotion-candidate read cheap as the learned + * graph grows. + */ +export function migrateMemoryV3AutoEdges(database: DrizzleDb): void { + withCrashRecovery(database, CHECKPOINT_KEY, () => { + const raw = getSqliteFrom(database); + raw.exec(/*sql*/ ` + CREATE TABLE IF NOT EXISTS memory_v3_auto_edges ( + source_slug TEXT NOT NULL, + target_slug TEXT NOT NULL, + weight REAL NOT NULL, + last_reinforced_at INTEGER NOT NULL, + PRIMARY KEY (source_slug, target_slug) + ) + `); + raw.exec(/*sql*/ ` + CREATE INDEX IF NOT EXISTS idx_memory_v3_auto_edges_weight + ON memory_v3_auto_edges (weight) + `); + }); +} + +export function downMemoryV3AutoEdges(database: DrizzleDb): void { + const raw = getSqliteFrom(database); + raw.exec(/*sql*/ `DROP TABLE IF EXISTS memory_v3_auto_edges`); +} diff --git a/assistant/src/memory/migrations/index.ts b/assistant/src/memory/migrations/index.ts index 52a41dc854f..e98a9448820 100644 --- a/assistant/src/memory/migrations/index.ts +++ b/assistant/src/memory/migrations/index.ts @@ -246,6 +246,10 @@ export { downMemoryV3Coactivation, migrateMemoryV3Coactivation, } from "./262-memory-v3-coactivation.js"; +export { + downMemoryV3AutoEdges, + migrateMemoryV3AutoEdges, +} from "./263-memory-v3-auto-edges.js"; export { MIGRATION_REGISTRY, type MigrationRegistryEntry, diff --git a/assistant/src/memory/migrations/registry.ts b/assistant/src/memory/migrations/registry.ts index c3f56f972b8..5e618629157 100644 --- a/assistant/src/memory/migrations/registry.ts +++ b/assistant/src/memory/migrations/registry.ts @@ -56,6 +56,7 @@ import { downConversationCleanedAt } from "./259-conversation-cleaned-at.js"; import { downRenameCleanedAt } from "./260-rename-cleaned-at.js"; import { downLlmUsageAddRawUsage } from "./261-llm-usage-add-raw-usage.js"; import { downMemoryV3Coactivation } from "./262-memory-v3-coactivation.js"; +import { downMemoryV3AutoEdges } from "./263-memory-v3-auto-edges.js"; export interface MigrationRegistryEntry { /** The checkpoint key written to memory_checkpoints on completion. */ @@ -478,6 +479,13 @@ export const MIGRATION_REGISTRY: MigrationRegistryEntry[] = [ "Create memory_v3_coactivation table — append-only log of pass-1 → pass-N co-activation pairs (gradient signal) emitted by the v3 retrieval loop and reconciled later by edge-learning", down: downMemoryV3Coactivation, }, + { + key: "migration_memory_v3_auto_edges_v1", + version: 56, + description: + "Create memory_v3_auto_edges table — weighted, decaying learned association graph (distinct from curated edges:) accrued by the edge-learning job from used co-activations and consumed above-threshold by edge expansion", + down: downMemoryV3AutoEdges, + }, ]; export function getMaxMigrationVersion(): number { diff --git a/assistant/src/memory/v3/__tests__/edge-learning-job.test.ts b/assistant/src/memory/v3/__tests__/edge-learning-job.test.ts new file mode 100644 index 00000000000..f99649cf416 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/edge-learning-job.test.ts @@ -0,0 +1,324 @@ +/** + * Tests for `assistant/src/memory/v3/auto-edges.ts`, `edge-learning-job.ts`, + * and their sibling migration `263-memory-v3-auto-edges.ts`. + * + * Coverage: + * - Migration creates the table + weight index; safe to re-run; down drops it. + * - reinforce upserts and accrues weight on the (source, target) PK. + * - decay multiplicatively reduces unused weights and prunes near-zero edges. + * - aboveThreshold returns exactly the edge-expansion `extraAdjacency` shape. + * - A job run over fixture co-activations reinforces *used* rows only, skips + * unused ones, and emits weight-floored, diversity-capped promotion + * candidates. No real LLM, no real workspace DB. + * + * Uses an in-memory bun:sqlite database. The checkpoints module is stubbed with + * an in-memory Map so the watermark works without a real getDb() backing store; + * runEdgeLearning takes the in-memory DB explicitly for all auto-edge and + * co-activation reads. + */ + +import { Database } from "bun:sqlite"; +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; + +import { drizzle } from "drizzle-orm/bun-sqlite"; + +import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js"; + +mock.module("../../../util/logger.js", () => ({ + getLogger: () => makeMockLogger(), +})); + +const checkpointStore = new Map(); +mock.module("../../checkpoints.js", () => ({ + getMemoryCheckpoint: (key: string) => checkpointStore.get(key) ?? null, + setMemoryCheckpoint: (key: string, value: string) => + checkpointStore.set(key, value), +})); + +import type { DrizzleDb } from "../../db-connection.js"; +import { getSqliteFrom } from "../../db-connection.js"; +import { migrateMemoryV3Coactivation } from "../../migrations/262-memory-v3-coactivation.js"; +import { + downMemoryV3AutoEdges, + migrateMemoryV3AutoEdges, +} from "../../migrations/263-memory-v3-auto-edges.js"; +import * as schema from "../../schema.js"; +import { + aboveThreshold, + decay, + reinforce, + topByWeight, +} from "../auto-edges.js"; +import { + type CoactivationRow, + recordCoactivations, +} from "../coactivation-store.js"; +import { + EDGE_DECAY_HALF_LIFE_MS, + MAX_CANDIDATES_PER_SOURCE, + MAX_PROMOTION_CANDIDATES, + runEdgeLearning, +} from "../edge-learning-job.js"; + +// memory_checkpoints is required by withCrashRecovery and normally created by an +// early core migration. Stand it up by hand so the v3 migrations can run in +// isolation against a fresh in-memory DB. +const CHECKPOINTS_DDL = /*sql*/ ` + CREATE TABLE memory_checkpoints ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL, + updated_at INTEGER NOT NULL + ) +`; + +let sqlite: Database; +let database: DrizzleDb; + +beforeEach(() => { + sqlite = new Database(":memory:"); + database = drizzle(sqlite, { schema }); + getSqliteFrom(database).exec(CHECKPOINTS_DDL); + migrateMemoryV3Coactivation(database); + migrateMemoryV3AutoEdges(database); + checkpointStore.clear(); +}); + +afterEach(() => { + sqlite.close(); +}); + +function readWeight(source: string, target: string): number | undefined { + const row = getSqliteFrom(database) + .query( + `SELECT weight FROM memory_v3_auto_edges + WHERE source_slug = ? AND target_slug = ?`, + ) + .get(source, target) as { weight: number } | undefined; + return row?.weight; +} + +// --------------------------------------------------------------------------- +// Migration. +// --------------------------------------------------------------------------- + +describe("migrateMemoryV3AutoEdges", () => { + test("creates table and weight index; safe to re-run", () => { + migrateMemoryV3AutoEdges(database); + migrateMemoryV3AutoEdges(database); + + const raw = getSqliteFrom(database); + const table = raw + .query( + `SELECT name FROM sqlite_master WHERE type='table' AND name='memory_v3_auto_edges'`, + ) + .get(); + expect(table).toBeTruthy(); + + const indexNames = new Set( + ( + raw + .query( + `SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='memory_v3_auto_edges'`, + ) + .all() as Array<{ name: string }> + ).map((r) => r.name), + ); + expect(indexNames.has("idx_memory_v3_auto_edges_weight")).toBe(true); + }); + + test("downMemoryV3AutoEdges drops the table", () => { + downMemoryV3AutoEdges(database); + const table = getSqliteFrom(database) + .query( + `SELECT name FROM sqlite_master WHERE type='table' AND name='memory_v3_auto_edges'`, + ) + .get(); + expect(table).toBeFalsy(); + }); +}); + +// --------------------------------------------------------------------------- +// auto-edges store. +// --------------------------------------------------------------------------- + +describe("reinforce", () => { + test("inserts a new pair at the increment, then accrues on the PK", () => { + reinforce(database, "a", "b", 1_000); + expect(readWeight("a", "b")).toBe(1); + reinforce(database, "a", "b", 2_000); + expect(readWeight("a", "b")).toBe(2); + }); + + test("directed pairs are independent", () => { + reinforce(database, "a", "b", 1_000); + reinforce(database, "b", "a", 1_000); + expect(readWeight("a", "b")).toBe(1); + expect(readWeight("b", "a")).toBe(1); + }); +}); + +describe("decay", () => { + test("halves a weight after one half-life and advances last_reinforced_at", () => { + reinforce(database, "a", "b", 0); + // Push it above the half-life so the decayed weight stays above the floor. + reinforce(database, "a", "b", 0); // weight = 2 + const pruned = decay( + database, + EDGE_DECAY_HALF_LIFE_MS, + EDGE_DECAY_HALF_LIFE_MS, + ); + expect(pruned).toBe(0); + const w = readWeight("a", "b")!; + expect(w).toBeCloseTo(1, 5); + + const stamped = getSqliteFrom(database) + .query( + `SELECT last_reinforced_at FROM memory_v3_auto_edges + WHERE source_slug='a' AND target_slug='b'`, + ) + .get() as { last_reinforced_at: number }; + expect(stamped.last_reinforced_at).toBe(EDGE_DECAY_HALF_LIFE_MS); + }); + + test("prunes edges that decay below the floor", () => { + reinforce(database, "a", "b", 0); + // Ten half-lives ⇒ weight × 2^-10 ≈ 0.001 < floor. + const pruned = decay( + database, + 10 * EDGE_DECAY_HALF_LIFE_MS, + EDGE_DECAY_HALF_LIFE_MS, + ); + expect(pruned).toBe(1); + expect(readWeight("a", "b")).toBeUndefined(); + }); + + test("clamps future timestamps so decay never amplifies weight", () => { + reinforce(database, "a", "b", 10_000); + // now < last_reinforced_at ⇒ elapsed clamps to 0 ⇒ weight unchanged. + decay(database, 0, EDGE_DECAY_HALF_LIFE_MS); + expect(readWeight("a", "b")).toBe(1); + }); +}); + +describe("aboveThreshold", () => { + test("returns the source → Set adjacency for above-threshold pairs", () => { + reinforce(database, "a", "b", 0); // weight 1 + reinforce(database, "a", "c", 0); + reinforce(database, "a", "c", 0); // weight 2 + reinforce(database, "x", "y", 0); // weight 1 + + const adjacency = aboveThreshold(database, 2); + // Only a→c clears the threshold of 2. + expect([...adjacency.keys()]).toEqual(["a"]); + expect([...adjacency.get("a")!]).toEqual(["c"]); + + const inclusive = aboveThreshold(database, 1); + expect([...inclusive.get("a")!].sort()).toEqual(["b", "c"]); + expect([...inclusive.get("x")!]).toEqual(["y"]); + }); + + test("empty when nothing clears the threshold", () => { + reinforce(database, "a", "b", 0); + expect(aboveThreshold(database, 5).size).toBe(0); + }); +}); + +describe("topByWeight", () => { + test("returns heaviest edges first, capped at limit", () => { + reinforce(database, "a", "b", 0); + reinforce(database, "a", "b", 0); // weight 2 + reinforce(database, "c", "d", 0); // weight 1 + const top = topByWeight(database, 1); + expect(top).toHaveLength(1); + expect(top[0]).toMatchObject({ + sourceSlug: "a", + targetSlug: "b", + weight: 2, + }); + }); +}); + +// --------------------------------------------------------------------------- +// edge-learning job. +// --------------------------------------------------------------------------- + +function coact( + source: string, + target: string, + used: number, + createdAt: number, +): CoactivationRow { + return { + conversationId: "conv-1", + turn: 1, + sourceSlug: source, + targetSlug: target, + passGap: 1, + used, + createdAt, + }; +} + +describe("runEdgeLearning", () => { + test("reinforces used co-activations only and skips unused ones", () => { + recordCoactivations(database, [ + coact("a", "b", 1, 100), + coact("a", "b", 1, 200), + coact("c", "d", 0, 300), + ]); + + const result = runEdgeLearning(database, 1_000); + expect(result.reinforced).toBe(2); + expect(result.skippedUnused).toBe(1); + expect(readWeight("a", "b")).toBe(2); + expect(readWeight("c", "d")).toBeUndefined(); + }); + + test("advances the watermark so the same co-activation isn't re-counted", () => { + recordCoactivations(database, [coact("a", "b", 1, 100)]); + runEdgeLearning(database, 1_000); + expect(readWeight("a", "b")).toBe(1); + + // Second run with no new co-activations: only decay, no fresh reinforcement. + const second = runEdgeLearning(database, 1_000); + expect(second.reinforced).toBe(0); + expect(readWeight("a", "b")).toBe(1); + + // A newer co-activation past the watermark is picked up. + recordCoactivations(database, [coact("a", "b", 1, 500)]); + const third = runEdgeLearning(database, 1_000); + expect(third.reinforced).toBe(1); + expect(readWeight("a", "b")).toBe(2); + }); + + test("emits promotion candidates above the weight floor", () => { + // Two used co-activations ⇒ weight 2 ≥ floor (1.5); single ⇒ weight 1 < floor. + recordCoactivations(database, [ + coact("a", "b", 1, 100), + coact("a", "b", 1, 200), + coact("c", "d", 1, 300), + ]); + const result = runEdgeLearning(database, 1_000); + const pairs = result.candidates.map( + (c) => `${c.sourceSlug}->${c.targetSlug}`, + ); + expect(pairs).toEqual(["a->b"]); + }); + + test("caps candidates per source so one hub can't monopolize the slate", () => { + const rows: CoactivationRow[] = []; + let t = 100; + // Hub "a" → many targets, each reinforced twice (weight 2 ≥ floor). + for (let i = 0; i < MAX_CANDIDATES_PER_SOURCE + 3; i++) { + rows.push(coact("a", `t${i}`, 1, t++)); + rows.push(coact("a", `t${i}`, 1, t++)); + } + recordCoactivations(database, rows); + const result = runEdgeLearning(database, 1_000); + const fromA = result.candidates.filter((c) => c.sourceSlug === "a"); + expect(fromA.length).toBe(MAX_CANDIDATES_PER_SOURCE); + expect(result.candidates.length).toBeLessThanOrEqual( + MAX_PROMOTION_CANDIDATES, + ); + }); +}); diff --git a/assistant/src/memory/v3/auto-edges.ts b/assistant/src/memory/v3/auto-edges.ts new file mode 100644 index 00000000000..ce40530d8e5 --- /dev/null +++ b/assistant/src/memory/v3/auto-edges.ts @@ -0,0 +1,223 @@ +/** + * Memory v3 — learned weighted auto-edge store. + * + * Read/write helpers over `memory_v3_auto_edges` (migration 263) — the + * **learned** association graph, a distinct class from the curated `edges:` + * frontmatter graph. Each row is a weighted directed pair `source → target` + * that the edge-learning job accrues from *used* co-activations and decays over + * wall-clock time. + * + * Three primitives: + * - {@link reinforce} — bump a pair's weight, but only for a *used* + * co-activation (we reinforce usefulness, not mere retrieval). + * - {@link decay} — multiplicatively decay all weights toward zero on a + * half-life schedule, so a pair that stops being reinforced fades. This is + * the rich-get-richer counterweight: weight is a leaky integrator, not a + * monotone counter. + * - {@link aboveThreshold} — project the learned graph to the + * `ReadonlyMap>` adjacency that edge + * expansion's `extraAdjacency` seam consumes (only pairs at/above a weight + * threshold traverse). + * + * The decay model mirrors v2's injection-events EMA: `λ = ln 2 / halfLife`, and + * a pair decays by `exp(-λ × elapsed)` since its `last_reinforced_at`. + */ + +import { getLogger } from "../../util/logger.js"; +import type { DrizzleDb } from "../db-connection.js"; +import { getSqliteFrom } from "../db-connection.js"; + +const log = getLogger("memory-v3-auto-edges"); + +/** Weight added to a pair per *used* co-activation reinforcement. */ +export const REINFORCE_INCREMENT = 1; + +/** Weights below this after decay are pruned rather than kept as dead rows. */ +export const PRUNE_FLOOR = 0.01; + +/** A learned auto-edge, as read back from the table. */ +export interface AutoEdgeRow { + sourceSlug: string; + targetSlug: string; + weight: number; + lastReinforcedAt: number; +} + +/** + * Reinforce the directed pair `source → target`: bump its weight by + * {@link REINFORCE_INCREMENT} and stamp `last_reinforced_at = now`. **Only call + * this for a *used* co-activation** — the edge graph encodes which associations + * actually proved load-bearing for a turn, not which pages merely surfaced + * together. (The caller decides usedness from the co-activation's `used` flag; + * this primitive is unconditional so it stays composable.) + * + * UPSERT on the `(source, target)` primary key: a new pair starts at the + * increment; an existing pair accrues on top of its current weight (after the + * latest decay pass, since decay rewrites weight in place). + * + * Best-effort: a failed write must never abort the edge-learning job. + */ +export function reinforce( + database: DrizzleDb, + source: string, + target: string, + now: number, +): void { + try { + const raw = getSqliteFrom(database); + raw + .prepare( + `INSERT INTO memory_v3_auto_edges + (source_slug, target_slug, weight, last_reinforced_at) + VALUES (?, ?, ?, ?) + ON CONFLICT(source_slug, target_slug) DO UPDATE SET + weight = weight + ?, + last_reinforced_at = ?`, + ) + .run(source, target, REINFORCE_INCREMENT, now, REINFORCE_INCREMENT, now); + } catch (err) { + log.warn( + { err, source, target }, + "failed to reinforce auto-edge; continuing", + ); + } +} + +/** + * Multiplicatively decay every auto-edge weight toward zero on a half-life + * schedule: `weight ← weight × exp(-λ × (now − last_reinforced_at))`, with + * `λ = ln 2 / halfLifeMs`. A pair last reinforced one half-life ago halves; two + * half-lives ago quarters; and so on. `last_reinforced_at` advances to `now` + * so successive decay passes don't double-count the same elapsed interval. + * + * Pairs whose decayed weight falls below {@link PRUNE_FLOOR} are deleted so the + * learned graph doesn't accumulate a long tail of effectively-dead edges. + * + * Returns the number of rows pruned, for the job's structured log. + */ +export function decay( + database: DrizzleDb, + now: number, + halfLifeMs: number, +): number { + if (halfLifeMs <= 0) return 0; + const lambda = Math.LN2 / halfLifeMs; + try { + const raw = getSqliteFrom(database); + const rows = raw + .query( + `SELECT source_slug, target_slug, weight, last_reinforced_at + FROM memory_v3_auto_edges`, + ) + .all() as Array<{ + source_slug: string; + target_slug: string; + weight: number; + last_reinforced_at: number; + }>; + if (rows.length === 0) return 0; + + const update = raw.prepare( + `UPDATE memory_v3_auto_edges + SET weight = ?, last_reinforced_at = ? + WHERE source_slug = ? AND target_slug = ?`, + ); + const prune = raw.prepare( + `DELETE FROM memory_v3_auto_edges + WHERE source_slug = ? AND target_slug = ?`, + ); + + let pruned = 0; + const apply = raw.transaction(() => { + for (const row of rows) { + // Future timestamps (clock skew) would amplify rather than decay — clamp + // elapsed at 0 so decay only ever shrinks weight. + const elapsed = Math.max(0, now - row.last_reinforced_at); + const decayed = row.weight * Math.exp(-lambda * elapsed); + if (decayed < PRUNE_FLOOR) { + prune.run(row.source_slug, row.target_slug); + pruned += 1; + } else { + update.run(decayed, now, row.source_slug, row.target_slug); + } + } + }); + apply(); + return pruned; + } catch (err) { + log.warn({ err }, "failed to decay auto-edges; continuing"); + return 0; + } +} + +/** + * Project the learned graph to the `extraAdjacency` shape edge expansion + * consumes: `source → Set` for every pair whose weight is at or above + * `threshold`. Edge expansion thresholds nothing itself — it merges whatever + * adjacency it's handed — so this read is where the weight cutoff is applied. + * + * Returns an empty map on any read failure so the caller (a best-effort read + * lane) degrades to "no learned edges" rather than aborting retrieval. + */ +export function aboveThreshold( + database: DrizzleDb, + threshold: number, +): Map> { + const adjacency = new Map>(); + try { + const raw = getSqliteFrom(database); + const rows = raw + .query( + `SELECT source_slug, target_slug + FROM memory_v3_auto_edges + WHERE weight >= ? + ORDER BY source_slug ASC, target_slug ASC`, + ) + .all(threshold) as Array<{ source_slug: string; target_slug: string }>; + for (const row of rows) { + let targets = adjacency.get(row.source_slug); + if (!targets) { + targets = new Set(); + adjacency.set(row.source_slug, targets); + } + targets.add(row.target_slug); + } + } catch (err) { + log.warn({ err, threshold }, "failed to read auto-edges; continuing"); + } + return adjacency; +} + +/** + * Read the top-weight auto-edges, heaviest first, capped at `limit`. The + * edge-learning job surfaces these as advisory promotion candidates for the + * assistant to ratify into curated `edges:` during consolidation. + */ +export function topByWeight(database: DrizzleDb, limit: number): AutoEdgeRow[] { + if (limit <= 0) return []; + try { + const raw = getSqliteFrom(database); + const rows = raw + .query( + `SELECT source_slug, target_slug, weight, last_reinforced_at + FROM memory_v3_auto_edges + ORDER BY weight DESC, source_slug ASC, target_slug ASC + LIMIT ?`, + ) + .all(limit) as Array<{ + source_slug: string; + target_slug: string; + weight: number; + last_reinforced_at: number; + }>; + return rows.map((r) => ({ + sourceSlug: r.source_slug, + targetSlug: r.target_slug, + weight: r.weight, + lastReinforcedAt: r.last_reinforced_at, + })); + } catch (err) { + log.warn({ err, limit }, "failed to read top auto-edges; continuing"); + return []; + } +} diff --git a/assistant/src/memory/v3/edge-learning-job.ts b/assistant/src/memory/v3/edge-learning-job.ts new file mode 100644 index 00000000000..875c058f064 --- /dev/null +++ b/assistant/src/memory/v3/edge-learning-job.ts @@ -0,0 +1,160 @@ +/** + * Memory v3 — `memory_v3_edge_learning` job (fast lane, no LLM). + * + * Reconciles the raw co-activation log (`memory_v3_coactivation`, migration + * 262) into the weighted learned-edge graph (`memory_v3_auto_edges`, migration + * 263). One pass does three things: + * + * 1. **Decay** — multiplicatively age all existing auto-edge weights toward + * zero on a half-life schedule (the rich-get-richer counterweight: an edge + * that stops being reinforced fades, so a once-hot pair can't dominate the + * adjacency forever). + * 2. **Reinforce** — for each recent co-activation whose `used` flag is set, + * bump the `source → target` weight. *Used-only*: we learn associations + * that proved load-bearing for a turn, not pairs that merely surfaced + * together. The watermark checkpoint advances so each co-activation is + * counted once. + * 3. **Propose** — surface the top-weight auto-edges as advisory promotion + * *candidates* for the assistant to ratify into curated `edges:` during + * consolidation. This job PROPOSES; it never auto-writes page frontmatter. + * Diversity counterweight: candidates are capped and a single source's + * out-edges are bounded so one hub can't monopolize the slate. + * + * Decay runs before reinforce so a fresh reinforcement isn't immediately aged + * by the same pass. The job is idempotent in effect: re-running with no new + * co-activations only decays (which is itself elapsed-time-bounded). + */ + +import { getLogger } from "../../util/logger.js"; +import { getMemoryCheckpoint, setMemoryCheckpoint } from "../checkpoints.js"; +import type { DrizzleDb } from "../db-connection.js"; +import { getDb } from "../db-connection.js"; +import type { MemoryJob } from "../jobs-store.js"; +import { + type AutoEdgeRow, + decay, + reinforce, + topByWeight, +} from "./auto-edges.js"; +import { readCoactivations } from "./coactivation-store.js"; + +const log = getLogger("memory-v3-edge-learning"); + +/** + * Half-life of auto-edge weight decay. Matches the v2 injection-score cadence + * (3 days) — a pair reinforced 3 days ago and never since contributes half its + * weight, 6 days ago a quarter. + */ +export const EDGE_DECAY_HALF_LIFE_MS = 3 * 24 * 60 * 60 * 1000; + +/** Max promotion candidates surfaced per run (the diversity cap). */ +export const MAX_PROMOTION_CANDIDATES = 20; + +/** Max candidates contributed by any single source slug (anti-hub diversity). */ +export const MAX_CANDIDATES_PER_SOURCE = 3; + +/** + * Minimum weight for an auto-edge to be eligible for promotion. A pair must + * accrue more than a single reinforcement (which decays away) before it's worth + * proposing as a curated edge. + */ +export const PROMOTION_WEIGHT_FLOOR = 1.5; + +/** Checkpoint key for the high-water mark of reconciled co-activations. */ +const WATERMARK_KEY = "memory_v3_edge_learning:coactivation_watermark"; + +/** Summary of one edge-learning pass, returned for the dispatcher log + tests. */ +export interface EdgeLearningResult { + /** Used co-activations reinforced this pass. */ + reinforced: number; + /** Co-activations skipped because `used` was falsy. */ + skippedUnused: number; + /** Auto-edges pruned by decay (fell below the floor). */ + pruned: number; + /** Advisory promotion candidates, heaviest first, after the diversity cap. */ + candidates: AutoEdgeRow[]; +} + +/** + * Run one edge-learning pass against `database`. Pure of LLM and workspace I/O — + * the whole pass is bounded DB work, hence the fast lane. + */ +export function runEdgeLearning( + database: DrizzleDb, + now = Date.now(), +): EdgeLearningResult { + // 1. Decay first so this pass's reinforcements aren't immediately aged. + const pruned = decay(database, now, EDGE_DECAY_HALF_LIFE_MS); + + // 2. Reinforce from co-activations newer than the watermark. The watermark is + // a created_at boundary; `since` is inclusive so we nudge it forward by 1ms + // to avoid re-counting the boundary row. + const watermark = parseInt(getMemoryCheckpoint(WATERMARK_KEY) ?? "0", 10); + const since = watermark > 0 ? watermark + 1 : undefined; + const coactivations = readCoactivations(database, since); + + let reinforced = 0; + let skippedUnused = 0; + let maxCreatedAt = watermark; + for (const row of coactivations) { + if (row.createdAt > maxCreatedAt) maxCreatedAt = row.createdAt; + // Reinforce usefulness, not mere retrieval: skip co-activations the loop + // (or a later usefulness reconciliation) did not mark as used. + if (!row.used) { + skippedUnused += 1; + continue; + } + reinforce(database, row.sourceSlug, row.targetSlug, now); + reinforced += 1; + } + if (maxCreatedAt > watermark) { + setMemoryCheckpoint(WATERMARK_KEY, String(maxCreatedAt)); + } + + // 3. Propose promotion candidates: heaviest auto-edges above the floor, capped + // overall and per-source so a single hub can't monopolize the slate. + const candidates = selectPromotionCandidates( + topByWeight(database, MAX_PROMOTION_CANDIDATES * MAX_CANDIDATES_PER_SOURCE), + ); + + log.info( + { + reinforced, + skippedUnused, + pruned, + candidateCount: candidates.length, + }, + "v3 edge learning complete", + ); + + return { reinforced, skippedUnused, pruned, candidates }; +} + +/** + * Apply the weight floor and the overall / per-source diversity caps to a + * weight-sorted list of auto-edges. Input must already be sorted heaviest-first + * (as {@link topByWeight} returns). + */ +function selectPromotionCandidates(sorted: AutoEdgeRow[]): AutoEdgeRow[] { + const out: AutoEdgeRow[] = []; + const perSource = new Map(); + for (const edge of sorted) { + if (out.length >= MAX_PROMOTION_CANDIDATES) break; + if (edge.weight < PROMOTION_WEIGHT_FLOOR) continue; + const count = perSource.get(edge.sourceSlug) ?? 0; + if (count >= MAX_CANDIDATES_PER_SOURCE) continue; + perSource.set(edge.sourceSlug, count + 1); + out.push(edge); + } + return out; +} + +/** + * Job handler for `memory_v3_edge_learning`. Thin wrapper over + * {@link runEdgeLearning} so the heavy lifting (and its tests) live in one + * place. The job carries no payload — it always reconciles the whole recent + * co-activation log. + */ +export function memoryV3EdgeLearningJob(_job: MemoryJob): EdgeLearningResult { + return runEdgeLearning(getDb()); +} From 93da857fb26375f40be9385016be519b73a9de8b Mon Sep 17 00:00:00 2001 From: velissa-ai Date: Mon, 25 May 2026 03:33:29 -0400 Subject: [PATCH 19/21] feat(memory-v3): live shadow via memoryRetrieval middleware (inject v2, log v3) (#31989) Co-authored-by: Vellum Assistant --- .../memory/memory-v2-activation-log-store.ts | 21 +- .../v3/__tests__/shadow-middleware.test.ts | 292 +++++++++++++++++ assistant/src/memory/v3/shadow-middleware.ts | 305 ++++++++++++++++++ assistant/src/plugins/defaults/index.ts | 6 + 4 files changed, 618 insertions(+), 6 deletions(-) create mode 100644 assistant/src/memory/v3/__tests__/shadow-middleware.test.ts create mode 100644 assistant/src/memory/v3/shadow-middleware.ts diff --git a/assistant/src/memory/memory-v2-activation-log-store.ts b/assistant/src/memory/memory-v2-activation-log-store.ts index 5b2e2be56ff..3afa0bcf7c2 100644 --- a/assistant/src/memory/memory-v2-activation-log-store.ts +++ b/assistant/src/memory/memory-v2-activation-log-store.ts @@ -115,11 +115,15 @@ export interface RecordMemoryV2ActivationLogParams { * `per-turn` for normal append injections, `errored` when `injectMemoryV2Block` * threw before completing — telemetry is still written so silent failures * are observable in the database, with whatever `concepts` rows had been - * built so far (possibly empty). `router` indicates the Sonnet - * router selected the per-turn page set; router-mode rows carry zeroed - * activation values and `source: "router"` on every concept row. + * built so far (possibly empty). `router` indicates the LLM router selected + * the per-turn page set; router-mode rows carry zeroed activation values and + * `source: "router"` on every concept row. `v3_shadow` is written by the + * live-shadow v3 retrieval middleware: it records v3's selection set for + * comparison without affecting injected context. The harness oracle filters + * `mode='router'`, so `v3_shadow` rows never pollute it; the inspector can + * still surface them. */ - mode: "context-load" | "per-turn" | "errored" | "router"; + mode: "context-load" | "per-turn" | "errored" | "router" | "v3_shadow"; concepts: MemoryV2ConceptRowRecord[]; config: MemoryV2ConfigSnapshot; } @@ -167,7 +171,7 @@ export function backfillMemoryV2ActivationMessageId( export interface MemoryV2ActivationLog { conversationId: string; turn: number; - mode: "context-load" | "per-turn" | "errored" | "router"; + mode: "context-load" | "per-turn" | "errored" | "router" | "v3_shadow"; concepts: MemoryV2ConceptRowRecord[]; config: MemoryV2ConfigSnapshot; } @@ -188,7 +192,12 @@ export function getMemoryV2ActivationLogByMessageIds( return { conversationId: row.conversationId, turn: row.turn, - mode: row.mode as "context-load" | "per-turn" | "errored" | "router", + mode: row.mode as + | "context-load" + | "per-turn" + | "errored" + | "router" + | "v3_shadow", concepts: JSON.parse(row.conceptsJson) as MemoryV2ConceptRowRecord[], config: JSON.parse(row.configJson) as MemoryV2ConfigSnapshot, }; diff --git a/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts b/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts new file mode 100644 index 00000000000..e9178a524b0 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts @@ -0,0 +1,292 @@ +/** + * Tests for the live-shadow `memoryRetrieval` middleware + * (`assistant/src/memory/v3/shadow-middleware.ts`). + * + * The critical invariant this PR guarantees: with `memory.v3.shadow` off + * (the default), the middleware is a byte-for-byte pass-through — it returns + * the downstream `MemoryResult` unchanged, never calls the v3 loop, and never + * writes a log row. With the flag on, it runs the v3 loop alongside the + * default path, logs v3's selection as `mode='v3_shadow'`, and STILL returns + * the unchanged downstream result (v2 injected, never v3). A v3 failure is + * swallowed and the turn result is unaffected. + * + * Everything the middleware reaches (config, the v3 loop, the activation-log + * store, message/now/everInjected reads) is stubbed via `mock.module` — no + * real LLM, no real workspace DB. + */ + +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; + +import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js"; +import type { AssistantConfig } from "../../../config/schema.js"; +import type { TrustContext } from "../../../daemon/trust-context.js"; +import type { + MemoryArgs, + MemoryResult, + TurnContext, +} from "../../../plugins/types.js"; +import type { RecordMemoryV2ActivationLogParams } from "../../memory-v2-activation-log-store.js"; +import type { + RetrievalInput, + RetrievalOutput, +} from "../../v2/harness/retriever.js"; + +mock.module("../../../util/logger.js", () => ({ + getLogger: () => makeMockLogger(), +})); + +// ── Mutable test doubles, rewired per test ─────────────────────────────── + +/** Drives `config.memory.v3.{enabled,shadow}` and `historical_pairs`. */ +let v3Enabled = false; +let v3Shadow = false; + +function makeConfig(): AssistantConfig { + return { + memory: { + v2: { router: { historical_pairs: 1 } }, + v3: { enabled: v3Enabled, shadow: v3Shadow }, + }, + } as unknown as AssistantConfig; +} + +/** Captured `runRetrievalLoop` invocations. */ +const loopCalls: Array<{ input: RetrievalInput }> = []; +/** Behavior of the stubbed loop — overridden per test. */ +let loopImpl: ( + input: RetrievalInput, +) => Promise = async () => ({ + selectedSlugs: [], + sourceBySlug: new Map(), + trace: { passes: [] }, + cost: { ms: 0 }, + failureReason: null, +}); + +/** Captured `recordMemoryV2ActivationLog` calls. */ +const logCalls: RecordMemoryV2ActivationLogParams[] = []; + +mock.module("../../../config/loader.js", () => ({ + getConfig: () => makeConfig(), +})); +mock.module("../../../util/platform.js", () => ({ + getWorkspaceDir: () => "/tmp/shadow-test-workspace", +})); +// Chainable drizzle-query stub: every builder method returns the same object +// and `.all()` yields the seeded rows. The shadow middleware reads recent +// messages via `db.select(...).from(...).where(...).orderBy(...).limit(...).all()`. +const messageRows: Array<{ role: string; content: string }> = [ + { + role: "user", + content: JSON.stringify([{ type: "text", text: "hello memory" }]), + }, +]; +function makeFakeDb(): never { + const builder: Record = {}; + for (const m of ["select", "from", "where", "orderBy", "limit"]) { + builder[m] = () => builder; + } + builder.all = () => messageRows.slice(); + return builder as never; +} +mock.module("../../db-connection.js", () => ({ + getDb: () => makeFakeDb(), +})); +mock.module("../../v2/now-text.js", () => ({ + loadNowText: async () => "NOW context", +})); +mock.module("../../v2/activation-store.js", () => ({ + hydrate: async () => ({ everInjected: [{ slug: "old/page", turn: 0 }] }), +})); +mock.module("../loop.js", () => ({ + runRetrievalLoop: async (input: RetrievalInput): Promise => { + loopCalls.push({ input }); + return loopImpl(input); + }, +})); +mock.module("../../memory-v2-activation-log-store.js", () => ({ + recordMemoryV2ActivationLog: (params: RecordMemoryV2ActivationLogParams) => { + logCalls.push(params); + }, +})); + +const { memoryV3ShadowMiddleware } = await import("../shadow-middleware.js"); + +// ── Fixtures ───────────────────────────────────────────────────────────── + +const trust: TrustContext = { + sourceChannel: "vellum", + trustClass: "guardian", +}; + +function makeCtx(): TurnContext { + return { + requestId: "req-shadow-test", + conversationId: "conv-shadow", + turnIndex: 3, + trust, + }; +} + +function makeArgs(signal?: AbortSignal): MemoryArgs { + return { + conversationId: "conv-shadow", + trustContext: trust, + turnIndex: 3, + signal: signal ?? new AbortController().signal, + }; +} + +/** The unchanged downstream (v2/default) result the terminal returns. */ +const DOWNSTREAM_RESULT: MemoryResult = { + pkbContent: "pkb", + nowContent: "now", + memoryGraphBlocks: [{ kind: "default.graph" }], +}; + +/** Flush the detached shadow chain (microtasks + a macrotask hop). */ +async function flush(): Promise { + await new Promise((resolve) => setTimeout(resolve, 0)); + await Promise.resolve(); +} + +beforeEach(() => { + v3Enabled = false; + v3Shadow = false; + loopCalls.length = 0; + logCalls.length = 0; + loopImpl = async () => ({ + selectedSlugs: [], + sourceBySlug: new Map(), + trace: { passes: [] }, + cost: { ms: 0 }, + failureReason: null, + }); +}); + +afterEach(() => { + mock.restore(); +}); + +describe("memory-v3 shadow middleware", () => { + test("flag off → byte-for-byte pass-through, no v3 call, no log write", async () => { + v3Enabled = false; + v3Shadow = false; + let nextCalls = 0; + const args = makeArgs(); + const result = await memoryV3ShadowMiddleware( + args, + async (a) => { + nextCalls++; + // identity is preserved — pass-through hands the same args down. + expect(a).toBe(args); + return DOWNSTREAM_RESULT; + }, + makeCtx(), + ); + + // Returns the exact downstream object reference, unchanged. + expect(result).toBe(DOWNSTREAM_RESULT); + expect(nextCalls).toBe(1); + + await flush(); + expect(loopCalls.length).toBe(0); + expect(logCalls.length).toBe(0); + }); + + test("enabled but shadow off → still a pure pass-through", async () => { + v3Enabled = true; + v3Shadow = false; + const args = makeArgs(); + const result = await memoryV3ShadowMiddleware( + args, + async () => DOWNSTREAM_RESULT, + makeCtx(), + ); + expect(result).toBe(DOWNSTREAM_RESULT); + await flush(); + expect(loopCalls.length).toBe(0); + expect(logCalls.length).toBe(0); + }); + + test("flag on → v3 runs, v3_shadow row logged, downstream result unchanged", async () => { + v3Enabled = true; + v3Shadow = true; + loopImpl = async () => ({ + selectedSlugs: ["topic/a", "topic/b"], + sourceBySlug: new Map([["topic/a", "dense"]]), + trace: { passes: [] }, + cost: { ms: 12 }, + failureReason: null, + }); + + const args = makeArgs(); + const result = await memoryV3ShadowMiddleware( + args, + async () => DOWNSTREAM_RESULT, + makeCtx(), + ); + + // The injected result is the v2/default result, NOT v3. + expect(result).toBe(DOWNSTREAM_RESULT); + + await flush(); + + // v3 ran exactly once, with a faithfully-built RetrievalInput. + expect(loopCalls.length).toBe(1); + const input = loopCalls[0]!.input; + expect(input.nowText).toBe("NOW context"); + expect(input.workspaceDir).toBe("/tmp/shadow-test-workspace"); + expect(input.priorEverInjected).toEqual([{ slug: "old/page", turn: 0 }]); + expect(input.recentTurnPairs.length).toBeGreaterThan(0); + expect(input.recentTurnPairs.at(-1)?.userMessage).toBe("hello memory"); + + // Exactly one v3_shadow row, carrying v3's selection. + expect(logCalls.length).toBe(1); + const logged = logCalls[0]!; + expect(logged.mode).toBe("v3_shadow"); + expect(logged.conversationId).toBe("conv-shadow"); + expect(logged.turn).toBe(3); + expect(logged.concepts.map((c) => c.slug)).toEqual(["topic/a", "topic/b"]); + }); + + test("v3 error → logged/swallowed, turn result unaffected, no log row", async () => { + v3Enabled = true; + v3Shadow = true; + loopImpl = async () => { + throw new Error("v3 boom"); + }; + + const args = makeArgs(); + // The middleware must not reject even though the detached shadow throws. + const result = await memoryV3ShadowMiddleware( + args, + async () => DOWNSTREAM_RESULT, + makeCtx(), + ); + expect(result).toBe(DOWNSTREAM_RESULT); + + await flush(); + // Loop was attempted; the failure short-circuited before logging. + expect(loopCalls.length).toBe(1); + expect(logCalls.length).toBe(0); + }); + + test("aborted signal → shadow does no v3 work", async () => { + v3Enabled = true; + v3Shadow = true; + const controller = new AbortController(); + controller.abort(); + + const result = await memoryV3ShadowMiddleware( + makeArgs(controller.signal), + async () => DOWNSTREAM_RESULT, + makeCtx(), + ); + expect(result).toBe(DOWNSTREAM_RESULT); + + await flush(); + expect(loopCalls.length).toBe(0); + expect(logCalls.length).toBe(0); + }); +}); diff --git a/assistant/src/memory/v3/shadow-middleware.ts b/assistant/src/memory/v3/shadow-middleware.ts new file mode 100644 index 00000000000..5d5396f2a18 --- /dev/null +++ b/assistant/src/memory/v3/shadow-middleware.ts @@ -0,0 +1,305 @@ +/** + * Memory v3 — live-shadow `memoryRetrieval` middleware. + * + * Registered unconditionally into the `memoryRetrieval` pipeline, but inert + * unless BOTH `config.memory.v3.enabled` and `config.memory.v3.shadow` are on. + * When inert it is a byte-for-byte pass-through: it returns `next(args)` + * verbatim and performs zero extra work (no v3 call, no DB read, no log write). + * + * When active, it: + * 1. Returns the real (v2/default) `MemoryResult` from `next(args)` promptly — + * the injected context is ALWAYS the v2 result, never v3. + * 2. Kicks off the v3 retrieval loop DETACHED (not awaited on the path that + * returns the result), so the shadow run can never block or slow the turn. + * 3. Logs v3's selection set to `memory_v2_activation_logs` with + * `mode = "v3_shadow"`. The harness oracle filters `mode='router'`, so + * shadow rows never pollute it; the inspector can still surface them. + * + * The shadow build mirrors the inputs the v2 router receives (recent turn + * pairs, NOW context, prior-ever-injected slugs, config) so its recall is + * measured against the same situational context the live path saw. Failures + * are swallowed with a warn — the shadow is observational only and must never + * affect the live turn. + */ + +import { desc, eq } from "drizzle-orm"; + +import { getConfig } from "../../config/loader.js"; +import { registerPlugin } from "../../plugins/registry.js"; +import { + type MemoryArgs, + type MemoryResult, + type Middleware, + type Plugin, + PluginExecutionError, +} from "../../plugins/types.js"; +import type { ContentBlock } from "../../providers/types.js"; +import { getLogger } from "../../util/logger.js"; +import { getWorkspaceDir } from "../../util/platform.js"; +import type { DrizzleDb } from "../db-connection.js"; +import { getDb } from "../db-connection.js"; +import { + type MemoryV2ConceptRowRecord, + type MemoryV2ConfigSnapshot, + recordMemoryV2ActivationLog, +} from "../memory-v2-activation-log-store.js"; +import { messages } from "../schema.js"; +import { hydrate } from "../v2/activation-store.js"; +import type { RetrievalInput } from "../v2/harness/retriever.js"; +import { loadNowText } from "../v2/now-text.js"; +import type { RouterTurnPair } from "../v2/router.js"; +import type { EverInjectedEntry } from "../v2/types.js"; +import { runRetrievalLoop } from "./loop.js"; + +const log = getLogger("memory-v3-shadow"); + +/** + * Extract the recent (assistant, user) turn pairs from a conversation's + * message list, newest-pair-last, capped at `k`. Mirrors production + * `extractRecentTurnPairs` in `conversation-graph-memory.ts` (and its harness + * twin in `replay-input.ts`) so the shadow's `recentTurnPairs` matches what the + * live router was fed. + */ +function extractRecentTurnPairs( + msgs: ReadonlyArray<{ role: string; content: ContentBlock[] }>, + k: number, +): RouterTurnPair[] { + const messageText = (content: ContentBlock[]): string => + content + .filter( + (b): b is Extract => b.type === "text", + ) + .map((b) => b.text) + .join(" "); + + const pairs: RouterTurnPair[] = []; + let pendingUser: string | null = null; + for (let i = msgs.length - 1; i >= 0 && pairs.length < k; i--) { + const msg = msgs[i]!; + if (msg.role === "user" && pendingUser === null) { + pendingUser = messageText(msg.content); + } else if (msg.role === "assistant" && pendingUser !== null) { + pairs.unshift({ + assistantMessage: messageText(msg.content), + userMessage: pendingUser, + }); + pendingUser = null; + } + } + if (pendingUser !== null && pairs.length < k) { + pairs.unshift({ assistantMessage: "", userMessage: pendingUser }); + } + if (pairs.length === 0) { + pairs.push({ assistantMessage: "", userMessage: "" }); + } + return pairs; +} + +/** Parse a persisted JSON content-block string; tolerate malformed rows. */ +function parseContent(raw: string): ContentBlock[] { + try { + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? (parsed as ContentBlock[]) : []; + } catch { + return []; + } +} + +/** + * Load the most recent messages for a conversation, oldest-first, bounded to a + * small generous multiple of `historicalPairs`. Pair extraction only needs the + * tail, so a bounded `LIMIT` query avoids loading an entire (potentially + * multi-GB) conversation on every shadow turn — mirrors the harness's bounded + * fetch in `replay-input.ts`. + */ +function loadRecentMessages( + db: DrizzleDb, + conversationId: string, + historicalPairs: number, +): Array<{ role: string; content: ContentBlock[] }> { + const fetchWindow = Math.max(20, historicalPairs * 12); + const rows = db + .select({ role: messages.role, content: messages.content }) + .from(messages) + .where(eq(messages.conversationId, conversationId)) + .orderBy(desc(messages.createdAt), desc(messages.id)) + .limit(fetchWindow) + .all(); + return rows + .reverse() + .map((r) => ({ role: r.role, content: parseContent(r.content) })); +} + +/** + * Empty config snapshot for shadow log rows. The activation-state values are + * meaningless for a v3 selection (it computes no spreading-activation scores), + * so they are zeroed — exactly as the v2 router-mode rows do. + */ +const SHADOW_CONFIG_SNAPSHOT: MemoryV2ConfigSnapshot = { + d: 0, + c_user: 0, + c_assistant: 0, + c_now: 0, + k: 0, + hops: 0, + top_k: 0, + epsilon: 0, +}; + +/** + * Build the concept rows logged for a v3 shadow selection. Each selected slug + * becomes a zeroed concept row tagged `source: "router"` and + * `status: "injected"` — the shadow has no activation scores to record, and the + * `mode='v3_shadow'` row tag (not the concept source) is what distinguishes + * shadow telemetry from live router selections. + */ +function buildShadowConceptRows( + selectedSlugs: readonly string[], +): MemoryV2ConceptRowRecord[] { + return selectedSlugs.map((slug) => ({ + slug, + finalActivation: 0, + ownActivation: 0, + priorActivation: 0, + simUser: 0, + simAssistant: 0, + simNow: 0, + simUserRerankBoost: 0, + simAssistantRerankBoost: 0, + inRerankPool: false, + spreadContribution: 0, + source: "router", + status: "injected", + })); +} + +/** + * Run the v3 retrieval loop for the shadow and log its selection. Best-effort: + * any failure is logged and swallowed. Honors `signal` so a cancelled turn + * stops the shadow's lane work. + */ +async function runShadowAndLog( + args: MemoryArgs, + signal: AbortSignal, +): Promise { + try { + if (signal.aborted) return; + + const config = getConfig(); + const workspaceDir = getWorkspaceDir(); + const db = getDb(); + + const historicalPairs = config.memory.v2.router.historical_pairs; + const recentMessages = loadRecentMessages( + db, + args.conversationId, + historicalPairs, + ); + const recentTurnPairs = extractRecentTurnPairs( + recentMessages, + historicalPairs, + ); + + const nowText = await loadNowText(workspaceDir); + + let priorEverInjected: readonly EverInjectedEntry[] = []; + try { + const state = await hydrate(db, args.conversationId); + priorEverInjected = state?.everInjected ?? []; + } catch (err) { + log.warn( + { err, conversationId: args.conversationId }, + "v3 shadow: failed to hydrate prior-ever-injected; continuing with empty set", + ); + } + + if (signal.aborted) return; + + const input: RetrievalInput = { + workspaceDir, + recentTurnPairs, + nowText, + priorEverInjected, + config, + signal, + }; + + const output = await runRetrievalLoop(input, { + db, + conversationId: args.conversationId, + turn: args.turnIndex, + }); + + if (signal.aborted) return; + + recordMemoryV2ActivationLog({ + conversationId: args.conversationId, + turn: args.turnIndex, + mode: "v3_shadow", + concepts: buildShadowConceptRows(output.selectedSlugs), + config: SHADOW_CONFIG_SNAPSHOT, + }); + } catch (err) { + log.warn( + { err, conversationId: args.conversationId, turn: args.turnIndex }, + "v3 shadow retrieval failed; live turn unaffected", + ); + } +} + +/** + * Live-shadow `memoryRetrieval` middleware. + * + * Flag-gated INSIDE the middleware (per-turn, live-toggle): when v3 shadow is + * off it is a pure pass-through. When on, it fires the v3 loop detached and + * returns the unchanged downstream (v2) result immediately. + */ +export const memoryV3ShadowMiddleware: Middleware = + async function memoryV3Shadow(args, next) { + const v3 = getConfig().memory.v3; + if (!v3.enabled || !v3.shadow) { + // Inert: byte-for-byte pass-through, zero extra work. + return next(args); + } + + // Detached — never awaited on the path that returns the result, so the + // shadow can neither block nor slow the live turn. Errors are swallowed + // inside `runShadowAndLog`. + void runShadowAndLog(args, args.signal); + + return next(args); + }; + +/** + * First-party plugin contributing the live-shadow `memoryRetrieval` + * middleware. Registered unconditionally by the plugin bootstrap (it is inert + * unless both v3 flags are on), so the registration is always present but does + * zero work in the default (flags-off) configuration. + */ +export const memoryV3ShadowPlugin: Plugin = { + manifest: { + name: "memory-v3-shadow", + version: "0.0.1", + }, + middleware: { + memoryRetrieval: memoryV3ShadowMiddleware, + }, +}; + +// Module-load side effect: register the shadow plugin at import time so the +// registry is populated even in tests that skip `bootstrapPlugins()`, matching +// the first-party `default-*` plugins. Idempotent via the swallowed +// duplicate-name check (the defaults aggregator also lists this plugin). +try { + registerPlugin(memoryV3ShadowPlugin); +} catch (err) { + if ( + err instanceof PluginExecutionError && + err.message.includes("already registered") + ) { + // already registered — expected when both the defaults aggregator and the + // direct module import run in the same process. + } else { + throw err; + } +} diff --git a/assistant/src/plugins/defaults/index.ts b/assistant/src/plugins/defaults/index.ts index 34fecc1b047..282e4a8db8f 100644 --- a/assistant/src/plugins/defaults/index.ts +++ b/assistant/src/plugins/defaults/index.ts @@ -24,6 +24,7 @@ * chain) does not trip a TDZ. */ +import { memoryV3ShadowPlugin } from "../../memory/v3/shadow-middleware.js"; import { registerPlugin, resetPluginRegistryForTests } from "../registry.js"; import { type Plugin, PluginExecutionError } from "../types.js"; import { defaultCircuitBreakerPlugin } from "./circuit-breaker.js"; @@ -60,6 +61,11 @@ function getAllDefaultPlugins(): readonly Plugin[] { defaultEmptyResponsePlugin, defaultToolErrorPlugin, defaultMemoryRetrievalPlugin, + // Live-shadow v3 retrieval. Always registered; inert unless both + // `memory.v3.enabled` and `memory.v3.shadow` are on (gated inside the + // middleware). Ordered after the default so the default terminal still + // produces the injected (v2) `MemoryResult`. + memoryV3ShadowPlugin, defaultInjectorsPlugin, defaultTokenEstimatePlugin, defaultOverflowReducePlugin, From afa8d2840dd94dc9b5250302b5a05016221e1720 Mon Sep 17 00:00:00 2001 From: Vellum Assistant Date: Mon, 25 May 2026 12:15:34 -0500 Subject: [PATCH 20/21] fix(memory-v3): null-safe shadow gate when memory.v3 config is absent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The live-shadow middleware runs on every turn and read `config.memory.v3.enabled` unguarded. Configs built outside the Zod schema (agent-loop test fixtures) have no `memory.v3` block, so the gate threw `TypeError: undefined is not an object` and aborted the turn — cascading across ~13 agent-loop test files. Guard with optional chaining (matches the loop's existing `write?.coactivation` pattern) and add a regression test for the absent-v3 config. Co-Authored-By: Claude Opus 4.7 --- .../v3/__tests__/shadow-middleware.test.ts | 22 ++++++++++++++++++- assistant/src/memory/v3/shadow-middleware.ts | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts b/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts index e9178a524b0..0633ab8559d 100644 --- a/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts +++ b/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts @@ -40,12 +40,15 @@ mock.module("../../../util/logger.js", () => ({ /** Drives `config.memory.v3.{enabled,shadow}` and `historical_pairs`. */ let v3Enabled = false; let v3Shadow = false; +/** When false, omit the `memory.v3` block entirely (mirrors configs built + * outside the Zod schema, e.g. agent-loop test fixtures). */ +let v3Present = true; function makeConfig(): AssistantConfig { return { memory: { v2: { router: { historical_pairs: 1 } }, - v3: { enabled: v3Enabled, shadow: v3Shadow }, + ...(v3Present ? { v3: { enabled: v3Enabled, shadow: v3Shadow } } : {}), }, } as unknown as AssistantConfig; } @@ -153,6 +156,7 @@ async function flush(): Promise { beforeEach(() => { v3Enabled = false; v3Shadow = false; + v3Present = true; loopCalls.length = 0; logCalls.length = 0; loopImpl = async () => ({ @@ -209,6 +213,22 @@ describe("memory-v3 shadow middleware", () => { expect(logCalls.length).toBe(0); }); + test("v3 config block absent → pass-through, no throw, no v3 call", async () => { + // Reproduces the agent-loop test fixtures (and any config built outside the + // Zod schema) where `memory.v3` is undefined. The gate must not throw. + v3Present = false; + const args = makeArgs(); + const result = await memoryV3ShadowMiddleware( + args, + async () => DOWNSTREAM_RESULT, + makeCtx(), + ); + expect(result).toBe(DOWNSTREAM_RESULT); + await flush(); + expect(loopCalls.length).toBe(0); + expect(logCalls.length).toBe(0); + }); + test("flag on → v3 runs, v3_shadow row logged, downstream result unchanged", async () => { v3Enabled = true; v3Shadow = true; diff --git a/assistant/src/memory/v3/shadow-middleware.ts b/assistant/src/memory/v3/shadow-middleware.ts index 5d5396f2a18..af6ed9e28d5 100644 --- a/assistant/src/memory/v3/shadow-middleware.ts +++ b/assistant/src/memory/v3/shadow-middleware.ts @@ -257,7 +257,7 @@ async function runShadowAndLog( export const memoryV3ShadowMiddleware: Middleware = async function memoryV3Shadow(args, next) { const v3 = getConfig().memory.v3; - if (!v3.enabled || !v3.shadow) { + if (!v3?.enabled || !v3?.shadow) { // Inert: byte-for-byte pass-through, zero extra work. return next(args); } From 700bdd52ac4c367f4b1c80b515ad1b699746bf4f Mon Sep 17 00:00:00 2001 From: Vellum Assistant Date: Mon, 25 May 2026 12:22:11 -0500 Subject: [PATCH 21/21] fix(memory-v3): add route policies for memory/v3/validate + tree PR #31983 registered the two read-only v3 routes but never added their ACTOR_ENDPOINTS entries in route-policy.ts; the per-PR run skipped CI so the route-policy coverage guard never ran. Add both as settings.read (mirroring the v2 read routes), satisfying guard-tests.test.ts. Co-Authored-By: Claude Opus 4.7 --- assistant/src/runtime/auth/route-policy.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/assistant/src/runtime/auth/route-policy.ts b/assistant/src/runtime/auth/route-policy.ts index d48e651733a..efd67b6f2ef 100644 --- a/assistant/src/runtime/auth/route-policy.ts +++ b/assistant/src/runtime/auth/route-policy.ts @@ -487,6 +487,8 @@ const ACTOR_ENDPOINTS: Array<{ endpoint: string; scopes: Scope[] }> = [ scopes: ["settings.read"], }, { endpoint: "memory/v2/now-text:GET", scopes: ["settings.read"] }, + { endpoint: "memory/v3/validate:POST", scopes: ["settings.read"] }, + { endpoint: "memory/v3/tree:POST", scopes: ["settings.read"] }, // Trust rule listing { endpoint: "trust-rules/manage:GET", scopes: ["settings.read"] },