From 61380fa19c50fa6b526ad9e1c3ae8b281c28e11b Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:39:35 -0400
Subject: [PATCH 01/21] feat(memory-v3): tree-node on-disk format + node store
 (#31971)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../memory/v3/__tests__/tree-store.test.ts    | 529 ++++++++++++++++++
 assistant/src/memory/v3/tree-store.ts         | 370 ++++++++++++
 assistant/src/memory/v3/types.ts              |  65 +++
 3 files changed, 964 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/tree-store.test.ts
 create mode 100644 assistant/src/memory/v3/tree-store.ts
 create mode 100644 assistant/src/memory/v3/types.ts
diff --git a/assistant/src/memory/v3/__tests__/tree-store.test.ts b/assistant/src/memory/v3/__tests__/tree-store.test.ts
new file mode 100644
index 00000000000..3d582cd10d7
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/tree-store.test.ts
@@ -0,0 +1,529 @@
+/**
+ * Tests for `assistant/src/memory/v3/tree-store.ts`.
+ *
+ * Coverage matrix:
+ *   - slugify: lowercase / kebab-case / ascii / 80-char cap / empty fallback.
+ *   - validateNodeId: accept set, reject set (path-traversal, malformed shapes),
+ *     reserved `_root` accepted.
+ *   - readNode / writeNode round-trip: frontmatter survives, body preserved.
+ *   - children refs parse for both `page:` and `node:` forms.
+ *   - malformed YAML / unknown frontmatter keys throw.
+ *   - readNode on missing file: returns null.
+ *   - writeNode atomicity: no orphan tmp on success, parent dirs created.
+ *   - listNodes: walks subdirectories, returns nested ids in `/`-form, excludes
+ *     hidden dirs / non-.md / temp files, missing dir → [].
+ *   - deleteNode: nested-id round-trip, idempotent on missing.
+ *   - renderNodeContent: frontmatter + body shape.
+ *   - No change to memory/concepts/ (v3 lives under memory/v3/tree/).
+ *
+ * Tests use temp workspaces under `os.tmpdir()`; they never touch `~/.vellum/`.
+ */
+
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  readdirSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+
+import {
+  deleteNode,
+  getTreeDir,
+  listNodes,
+  readNode,
+  renderNodeContent,
+  ROOT_NODE_ID,
+  slugify,
+  validateNodeId,
+  writeNode,
+} from "../tree-store.js";
+import type { TreeNode } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+let workspaceDir: string;
+
+beforeEach(() => {
+  workspaceDir = mkdtempSync(join(tmpdir(), "vellum-tree-store-test-"));
+  // Mirror the workspace migration so readNode / writeNode have a target dir.
+  mkdirSync(getTreeDir(workspaceDir), { recursive: true });
+});
+
+afterEach(() => {
+  if (existsSync(workspaceDir)) {
+    rmSync(workspaceDir, { recursive: true, force: true });
+  }
+});
+
+function makeNode(overrides: Partial<TreeNode> = {}): TreeNode {
+  return {
+    id: "people",
+    frontmatter: {
+      children: ["page:people/alice", "node:people/colleagues"],
+      routing_hints: "for work relationships see people/colleagues",
+      summary: "People I know.",
+    },
+    body: "The people branch of the memory tree.\n",
+    ...overrides,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// slugify
+// ---------------------------------------------------------------------------
+
+describe("slugify", () => {
+  test("lowercases ASCII letters", () => {
+    expect(slugify("AliceBob")).toBe("alicebob");
+  });
+
+  test("converts spaces and punctuation to single hyphens", () => {
+    expect(slugify("Alice's Preferred IDE!")).toBe("alice-s-preferred-ide");
+  });
+
+  test("collapses runs of separators to one hyphen", () => {
+    expect(slugify("foo   ___ bar")).toBe("foo-bar");
+  });
+
+  test("trims leading and trailing hyphens", () => {
+    expect(slugify("---hello world---")).toBe("hello-world");
+  });
+
+  test("collapses '/' to hyphen — slugify produces a single segment", () => {
+    expect(slugify("People/Colleagues")).toBe("people-colleagues");
+  });
+
+  test("caps slug length at 80 chars and re-trims trailing hyphen", () => {
+    const long = "a".repeat(120);
+    const slug = slugify(long);
+    expect(slug.length).toBe(80);
+    expect(slug.endsWith("-")).toBe(false);
+  });
+
+  test("falls back to a unique placeholder for empty inputs", () => {
+    const a = slugify("");
+    const b = slugify("!!!");
+    expect(a).toMatch(/^node-[a-f0-9]{8}$/);
+    expect(b).toMatch(/^node-[a-f0-9]{8}$/);
+    expect(a).not.toBe(b);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// validateNodeId
+// ---------------------------------------------------------------------------
+
+describe("validateNodeId", () => {
+  test.each([
+    ["people"],
+    ["a"],
+    ["people-colleagues"],
+    ["people/alice"],
+    ["people/colleagues/alice"],
+    ["a/b/c/d/e"],
+    [ROOT_NODE_ID],
+  ])("accepts %p", (id) => {
+    expect(() => validateNodeId(id)).not.toThrow();
+  });
+
+  test.each([
+    ["empty string", ""],
+    ["leading slash", "/people"],
+    ["trailing slash", "people/"],
+    ["double slash", "people//alice"],
+    ["dot-dot segment", "people/../alice"],
+    ["pure dot-dot", ".."],
+    ["leading dot segment", ".hidden/alice"],
+    ["backslash", "people\\alice"],
+    ["null byte", "people\0evil"],
+    ["whitespace", "people alice"],
+    ["uppercase", "People"],
+    ["non-ascii", "café"],
+    ["leading hyphen", "-people"],
+    ["non-alphanumeric", "people!"],
+    ["leading underscore (only _root reserved)", "_other"],
+  ])("rejects %s (%p)", (_label, id) => {
+    expect(() => validateNodeId(id)).toThrow(/Invalid tree-node id/);
+  });
+
+  test("rejects ids longer than 200 chars", () => {
+    expect(() => validateNodeId("a".repeat(201))).toThrow(
+      /Invalid tree-node id/,
+    );
+  });
+
+  test("rejects segments longer than 80 chars even if total is under 200", () => {
+    expect(() => validateNodeId("a".repeat(81))).toThrow(
+      /Invalid tree-node id/,
+    );
+  });
+});
+
+// ---------------------------------------------------------------------------
+// readNode / writeNode round-trip
+// ---------------------------------------------------------------------------
+
+describe("writeNode + readNode round-trip", () => {
+  test("round-trips frontmatter and body verbatim", async () => {
+    const node = makeNode();
+    await writeNode(workspaceDir, node);
+
+    const read = await readNode(workspaceDir, node.id);
+    expect(read).not.toBeNull();
+    expect(read!.id).toBe(node.id);
+    expect(read!.frontmatter.children).toEqual(node.frontmatter.children);
+    expect(read!.frontmatter.routing_hints).toBe(
+      node.frontmatter.routing_hints,
+    );
+    expect(read!.frontmatter.summary).toBe(node.frontmatter.summary);
+    expect(read!.body).toBe(node.body);
+  });
+
+  test("children parse for both page: and node: reference forms", async () => {
+    const node = makeNode({
+      id: "mixed",
+      frontmatter: {
+        children: ["page:procs/git-flow", "node:procs", "page:alice"],
+      },
+      body: "mixed refs\n",
+    });
+    await writeNode(workspaceDir, node);
+
+    const read = await readNode(workspaceDir, "mixed");
+    expect(read!.frontmatter.children).toEqual([
+      "page:procs/git-flow",
+      "node:procs",
+      "page:alice",
+    ]);
+  });
+
+  test("the children list IS the DAG edge — a page may be referenced by multiple parents", async () => {
+    await writeNode(
+      workspaceDir,
+      makeNode({
+        id: "team-a",
+        frontmatter: { children: ["page:people/alice"] },
+        body: "team a\n",
+      }),
+    );
+    await writeNode(
+      workspaceDir,
+      makeNode({
+        id: "team-b",
+        frontmatter: { children: ["page:people/alice"] },
+        body: "team b\n",
+      }),
+    );
+
+    const a = await readNode(workspaceDir, "team-a");
+    const b = await readNode(workspaceDir, "team-b");
+    expect(a!.frontmatter.children).toContain("page:people/alice");
+    expect(b!.frontmatter.children).toContain("page:people/alice");
+  });
+
+  test("renders frontmatter at the top with --- delimiters", async () => {
+    const node = makeNode();
+    await writeNode(workspaceDir, node);
+
+    const raw = readFileSync(
+      join(getTreeDir(workspaceDir), `${node.id}.md`),
+      "utf-8",
+    );
+    expect(raw.startsWith("---\n")).toBe(true);
+    expect(raw.split("---").length).toBeGreaterThanOrEqual(3);
+    expect(raw).toContain("The people branch");
+  });
+
+  test("preserves an empty body", async () => {
+    const node = makeNode({ body: "" });
+    await writeNode(workspaceDir, node);
+
+    const read = await readNode(workspaceDir, node.id);
+    expect(read!.body).toBe("");
+  });
+
+  test("preserves multiline body with embedded YAML-looking lines", async () => {
+    const tricky = "key: value\n---\nnot-frontmatter\n";
+    const node = makeNode({ id: "tricky", body: tricky });
+    await writeNode(workspaceDir, node);
+
+    const read = await readNode(workspaceDir, node.id);
+    expect(read!.body).toBe(tricky);
+  });
+
+  test("defaults children to [] for a node with empty frontmatter", async () => {
+    const node = makeNode({
+      id: "bare",
+      frontmatter: { children: [] },
+      body: "bare\n",
+    });
+    await writeNode(workspaceDir, node);
+
+    const read = await readNode(workspaceDir, "bare");
+    expect(read!.frontmatter.children).toEqual([]);
+    expect(read!.frontmatter.routing_hints).toBeUndefined();
+    expect(read!.frontmatter.summary).toBeUndefined();
+  });
+
+  test("readNode returns null for an id that does not exist", async () => {
+    const result = await readNode(workspaceDir, "nonexistent");
+    expect(result).toBeNull();
+  });
+
+  test("readNode parses a hand-written node with no frontmatter as empty frontmatter + full body", async () => {
+    const id = "no-frontmatter";
+    const body = "Just some prose, no YAML.\n";
+    writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), body, "utf-8");
+
+    const read = await readNode(workspaceDir, id);
+    expect(read).not.toBeNull();
+    expect(read!.frontmatter.children).toEqual([]);
+    expect(read!.body).toBe(body);
+  });
+
+  test("readNode throws on malformed YAML frontmatter", async () => {
+    const id = "bad-yaml";
+    // Unclosed bracket inside the frontmatter block — invalid YAML.
+    const raw = "---\nchildren: [unterminated\n---\nbody\n";
+    writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), raw, "utf-8");
+
+    await expect(readNode(workspaceDir, id)).rejects.toThrow();
+  });
+
+  test("readNode throws on unknown frontmatter keys instead of silently dropping them", async () => {
+    const id = "extra-keys";
+    const raw = "---\nchildren: []\nunknown_field: oops\n---\nbody\n";
+    writeFileSync(join(getTreeDir(workspaceDir), `${id}.md`), raw, "utf-8");
+
+    await expect(readNode(workspaceDir, id)).rejects.toThrow();
+  });
+
+  test("writeNode overwrites an existing node", async () => {
+    await writeNode(workspaceDir, makeNode({ body: "first\n" }));
+    await writeNode(workspaceDir, makeNode({ body: "second\n" }));
+
+    const read = await readNode(workspaceDir, "people");
+    expect(read!.body).toBe("second\n");
+  });
+
+  test("writeNode creates parent directories for nested ids", async () => {
+    const node = makeNode({ id: "people/colleagues" });
+    await writeNode(workspaceDir, node);
+
+    const filePath = join(getTreeDir(workspaceDir), "people", "colleagues.md");
+    expect(existsSync(filePath)).toBe(true);
+
+    const read = await readNode(workspaceDir, "people/colleagues");
+    expect(read!.id).toBe("people/colleagues");
+    expect(read!.body).toBe(node.body);
+  });
+
+  test("writeNode round-trips deeply nested ids", async () => {
+    const node = makeNode({ id: "people/colleagues/alice" });
+    await writeNode(workspaceDir, node);
+
+    const read = await readNode(workspaceDir, "people/colleagues/alice");
+    expect(read!.id).toBe("people/colleagues/alice");
+    expect(read!.frontmatter.children).toEqual(node.frontmatter.children);
+    expect(read!.body).toBe(node.body);
+  });
+
+  test("writeNode + readNode round-trip the reserved _root id", async () => {
+    const node = makeNode({
+      id: ROOT_NODE_ID,
+      frontmatter: { children: ["node:people"] },
+      body: "root of the tree\n",
+    });
+    await writeNode(workspaceDir, node);
+
+    const read = await readNode(workspaceDir, ROOT_NODE_ID);
+    expect(read!.id).toBe(ROOT_NODE_ID);
+    expect(read!.frontmatter.children).toEqual(["node:people"]);
+  });
+
+  test("writeNode rejects malicious ids and writes nothing at the escape target", async () => {
+    await expect(
+      writeNode(workspaceDir, makeNode({ id: "../escape" })),
+    ).rejects.toThrow(/Invalid tree-node id/);
+
+    // `../escape` would resolve to `<workspace>/memory/v3/escape.md`. Confirm
+    // the validation throw fired before any I/O — no file at that target.
+    expect(existsSync(join(workspaceDir, "memory", "v3", "escape.md"))).toBe(
+      false,
+    );
+  });
+
+  test("readNode rejects malicious ids", async () => {
+    await expect(readNode(workspaceDir, "../escape")).rejects.toThrow(
+      /Invalid tree-node id/,
+    );
+  });
+
+  test("successful write produces no orphan tmp files", async () => {
+    await writeNode(workspaceDir, makeNode());
+
+    const remaining = readdirSync(getTreeDir(workspaceDir));
+    const orphanTmps = remaining.filter((name) => name.includes(".tmp."));
+    expect(orphanTmps).toEqual([]);
+  });
+
+  test("does not touch memory/concepts/", async () => {
+    await writeNode(workspaceDir, makeNode({ id: "people/colleagues" }));
+
+    expect(existsSync(join(workspaceDir, "memory", "concepts"))).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// renderNodeContent
+// ---------------------------------------------------------------------------
+
+describe("renderNodeContent", () => {
+  test("emits frontmatter block followed by body", () => {
+    const rendered = renderNodeContent(makeNode());
+    expect(rendered.startsWith("---\n")).toBe(true);
+    expect(rendered).toContain("children:");
+    expect(rendered).toContain("page:people/alice");
+    expect(rendered.endsWith("The people branch of the memory tree.\n")).toBe(
+      true,
+    );
+  });
+
+  test("keeps the explicit children key even when empty", () => {
+    const rendered = renderNodeContent(
+      makeNode({ frontmatter: { children: [] }, body: "x\n" }),
+    );
+    expect(rendered).toContain("children: []");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// listNodes
+// ---------------------------------------------------------------------------
+
+describe("listNodes", () => {
+  test("returns ids (filename minus .md) for every node on disk", async () => {
+    await writeNode(workspaceDir, makeNode({ id: "alice" }));
+    await writeNode(workspaceDir, makeNode({ id: "bob" }));
+    await writeNode(workspaceDir, makeNode({ id: "carol" }));
+
+    const ids = await listNodes(workspaceDir);
+    expect(ids).toEqual(["alice", "bob", "carol"]);
+  });
+
+  test("excludes non-.md files in the tree directory", async () => {
+    await writeNode(workspaceDir, makeNode({ id: "alice" }));
+
+    const treeDir = getTreeDir(workspaceDir);
+    writeFileSync(join(treeDir, "README.txt"), "ignore me", "utf-8");
+    writeFileSync(join(treeDir, "image.png"), "fake", "utf-8");
+    writeFileSync(join(treeDir, ".hidden"), "fake", "utf-8");
+
+    const ids = await listNodes(workspaceDir);
+    expect(ids).toEqual(["alice"]);
+  });
+
+  test("walks subdirectories and returns nested ids in '/'-form", async () => {
+    await writeNode(workspaceDir, makeNode({ id: "alice" }));
+    await writeNode(workspaceDir, makeNode({ id: "people/bob" }));
+    await writeNode(workspaceDir, makeNode({ id: "people/carol" }));
+    await writeNode(workspaceDir, makeNode({ id: "arcs/2025-04/cutover" }));
+
+    const ids = await listNodes(workspaceDir);
+    expect(ids).toEqual([
+      "alice",
+      "arcs/2025-04/cutover",
+      "people/bob",
+      "people/carol",
+    ]);
+  });
+
+  test("skips hidden subdirectories and non-.md files inside nested dirs", async () => {
+    await writeNode(workspaceDir, makeNode({ id: "people/alice" }));
+
+    const treeDir = getTreeDir(workspaceDir);
+    mkdirSync(join(treeDir, ".git"), { recursive: true });
+    writeFileSync(join(treeDir, ".git", "config.md"), "fake", "utf-8");
+    writeFileSync(join(treeDir, "people", "notes.txt"), "ignore", "utf-8");
+
+    const ids = await listNodes(workspaceDir);
+    expect(ids).toEqual(["people/alice"]);
+  });
+
+  test("skips orphaned .tmp.* files at any depth", async () => {
+    const treeDir = getTreeDir(workspaceDir);
+    await writeNode(workspaceDir, makeNode({ id: "people/alice" }));
+
+    writeFileSync(
+      join(treeDir, "alice.md.tmp.123.abc-def"),
+      "stranded",
+      "utf-8",
+    );
+    writeFileSync(
+      join(treeDir, "people", "bob.md.tmp.123.abc-def"),
+      "stranded",
+      "utf-8",
+    );
+
+    const ids = await listNodes(workspaceDir);
+    expect(ids).toEqual(["people/alice"]);
+  });
+
+  test("returns [] when the tree directory does not exist", async () => {
+    rmSync(getTreeDir(workspaceDir), { recursive: true, force: true });
+
+    const ids = await listNodes(workspaceDir);
+    expect(ids).toEqual([]);
+  });
+
+  test("returns [] when the tree directory is empty", async () => {
+    const ids = await listNodes(workspaceDir);
+    expect(ids).toEqual([]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// deleteNode
+// ---------------------------------------------------------------------------
+
+describe("deleteNode", () => {
+  test("removes the node from disk", async () => {
+    const node = makeNode();
+    await writeNode(workspaceDir, node);
+    expect(await readNode(workspaceDir, node.id)).not.toBeNull();
+
+    await deleteNode(workspaceDir, node.id);
+    expect(await readNode(workspaceDir, node.id)).toBeNull();
+  });
+
+  test("removes nested nodes", async () => {
+    const node = makeNode({ id: "people/colleagues" });
+    await writeNode(workspaceDir, node);
+
+    await deleteNode(workspaceDir, "people/colleagues");
+    expect(await readNode(workspaceDir, "people/colleagues")).toBeNull();
+  });
+
+  test("is idempotent — deleting a missing node does not throw", async () => {
+    await deleteNode(workspaceDir, "never-existed");
+    await deleteNode(workspaceDir, "never-existed");
+  });
+
+  test("does not affect other nodes", async () => {
+    await writeNode(workspaceDir, makeNode({ id: "alice" }));
+    await writeNode(workspaceDir, makeNode({ id: "bob" }));
+
+    await deleteNode(workspaceDir, "alice");
+
+    expect(await readNode(workspaceDir, "alice")).toBeNull();
+    expect(await readNode(workspaceDir, "bob")).not.toBeNull();
+  });
+});
diff --git a/assistant/src/memory/v3/tree-store.ts b/assistant/src/memory/v3/tree-store.ts
new file mode 100644
index 00000000000..be13e489f8e
--- /dev/null
+++ b/assistant/src/memory/v3/tree-store.ts
@@ -0,0 +1,370 @@
+/**
+ * Memory v3 — Tree node store.
+ *
+ * Owns the on-disk read/write contract for `memory/v3/tree/<id>.md`. Nodes may
+ * live directly under `memory/v3/tree/` or nested in subdirectories (e.g.
+ * `memory/v3/tree/people/colleagues.md`); the id encodes the relative path from
+ * `tree/` minus the `.md` extension, using forward slashes as separators (so
+ * `people/colleagues` is a valid id).
+ *
+ * The v3 tree is a DAG *overlay* over the existing flat `memory/concepts/`
+ * pages — this module never touches `memory/concepts/`. Pages stay canonical
+ * and shared; nodes reference pages and sub-nodes by `children` refs
+ * (`page:<slug>` / `node:<id>`), which are the portable replacement for
+ * filesystem symlinks.
+ *
+ * Each node is a YAML-frontmatter Markdown file: a `---`-delimited block
+ * (`children`, optional `routing_hints` / `summary`) followed by the prose body
+ * that is the node's full self-description. This module is the only v3
+ * component that knows how to parse or render that format — every other v3
+ * module routes through `readNode` / `writeNode` so the on-disk shape can
+ * evolve without touching downstream callers.
+ *
+ * Writes are atomic (temp + rename) so a crash mid-write leaves either the old
+ * file or the new file in place — never a half-written node. The id machinery
+ * mirrors v2's page-store `slugify` / `validateSlug` so node ids and page slugs
+ * share the same filesystem-safe shape.
+ */
+
+import { randomUUID } from "node:crypto";
+import {
+  mkdir,
+  readdir,
+  readFile,
+  rename,
+  rm,
+  writeFile,
+} from "node:fs/promises";
+import { dirname, join, relative, sep } from "node:path";
+
+import { parse as parseYaml, stringify as stringifyYaml } from "yaml";
+
+import { FRONTMATTER_REGEX } from "../../skills/frontmatter.js";
+import { type TreeNode, TreeNodeFrontmatterSchema } from "./types.js";
+
+/** Filename suffix for tree nodes. */
+const NODE_EXTENSION = ".md";
+
+/** Cap individual id-segment length so we stay well under filesystem limits. */
+const MAX_ID_SEGMENT_LENGTH = 80;
+
+/** Cap the full id (including any folder separators) to a sane bound. */
+const MAX_ID_TOTAL_LENGTH = 200;
+
+/** Each path segment must match this — same shape `slugify` produces. */
+const ID_SEGMENT_REGEX = /^[a-z0-9](?:[a-z0-9-]*)$/;
+
+/**
+ * Reserved id for the root of the v3 tree. The root node is the entry point a
+ * future migration authors first; reserving the id keeps the well-known handle
+ * stable across the codebase.
+ */
+export const ROOT_NODE_ID = "_root";
+
+/**
+ * Convert an arbitrary input string into a filesystem-safe id **segment**.
+ *
+ * Returns a single path segment (no `/`). Path-shaped ids are constructed by
+ * the authoring migration writing files at full paths; this helper is for
+ * turning free-form text (e.g. a node label) into one clean segment.
+ *
+ * Rules:
+ *   - Lowercase ASCII letters, digits, and hyphens only.
+ *   - Non-ASCII / non-alphanumeric characters (including `/`) collapse to hyphens.
+ *   - Consecutive hyphens collapse to one; leading/trailing hyphens trimmed.
+ *   - Truncated to {@link MAX_ID_SEGMENT_LENGTH} characters (with trailing
+ *     hyphen re-trimmed after truncation).
+ *   - Empty inputs (e.g. emoji-only) fall back to `node-<random>` so the caller
+ *     always gets a non-empty, write-safe segment.
+ */
+export function slugify(input: string): string {
+  let slug = input
+    .toLowerCase()
+    .normalize("NFKD")
+    .replace(/[^a-z0-9-]+/g, "-")
+    .replace(/-{2,}/g, "-")
+    .replace(/^-+|-+$/g, "");
+
+  if (slug.length > MAX_ID_SEGMENT_LENGTH) {
+    slug = slug.slice(0, MAX_ID_SEGMENT_LENGTH).replace(/-+$/, "");
+  }
+
+  if (!slug) {
+    slug = `node-${randomUUID().slice(0, 8)}`;
+  }
+
+  return slug;
+}
+
+/**
+ * Validate a node id — possibly path-shaped — that is about to cross the
+ * storage boundary. Throws on any malformed or unsafe value.
+ *
+ * The on-disk tree treats ids as relative paths under `memory/v3/tree/`. A
+ * malformed id (e.g. `..`, leading `/`, embedded null byte) could escape that
+ * root via `path.join` if it slipped through, so we enforce shape here at every
+ * read/write/delete entry point rather than relying on callers.
+ *
+ * The reserved {@link ROOT_NODE_ID} (`_root`) is accepted as a special case;
+ * its leading underscore would otherwise fail {@link ID_SEGMENT_REGEX}.
+ *
+ * Rules:
+ *   - Non-empty, ≤ {@link MAX_ID_TOTAL_LENGTH} chars.
+ *   - Each `/`-separated segment matches {@link ID_SEGMENT_REGEX}
+ *     (lowercase alphanum + hyphen, no leading hyphen, ≤80 chars).
+ *   - No `..` segments, no empty segments (`a//b`), no leading or trailing `/`.
+ *   - No `\` (Windows separator), no null bytes, no whitespace, no non-ASCII.
+ */
+export function validateNodeId(id: string): void {
+  if (typeof id !== "string" || id.length === 0) {
+    throw new Error(`Invalid tree-node id: empty`);
+  }
+  if (id === ROOT_NODE_ID) {
+    return;
+  }
+  if (id.length > MAX_ID_TOTAL_LENGTH) {
+    throw new Error(
+      `Invalid tree-node id: length ${id.length} exceeds max ${MAX_ID_TOTAL_LENGTH}: ${id}`,
+    );
+  }
+  if (id.includes("\\")) {
+    throw new Error(`Invalid tree-node id: backslash not allowed: ${id}`);
+  }
+  if (id.includes("\0")) {
+    throw new Error(`Invalid tree-node id: null byte not allowed`);
+  }
+  if (/\s/.test(id)) {
+    throw new Error(`Invalid tree-node id: whitespace not allowed: ${id}`);
+  }
+  if (id.startsWith("/") || id.endsWith("/")) {
+    throw new Error(
+      `Invalid tree-node id: leading or trailing '/' not allowed: ${id}`,
+    );
+  }
+  const segments = id.split("/");
+  for (const segment of segments) {
+    if (segment.length === 0) {
+      throw new Error(`Invalid tree-node id: empty path segment: ${id}`);
+    }
+    if (segment === "..") {
+      throw new Error(`Invalid tree-node id: '..' segment not allowed: ${id}`);
+    }
+    if (segment.length > MAX_ID_SEGMENT_LENGTH) {
+      throw new Error(
+        `Invalid tree-node id: segment '${segment}' exceeds max ${MAX_ID_SEGMENT_LENGTH} chars: ${id}`,
+      );
+    }
+    if (!ID_SEGMENT_REGEX.test(segment)) {
+      throw new Error(
+        `Invalid tree-node id: segment '${segment}' must match [a-z0-9][a-z0-9-]*: ${id}`,
+      );
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Path helpers
+// ---------------------------------------------------------------------------
+
+export function getTreeDir(workspaceDir: string): string {
+  return join(workspaceDir, "memory", "v3", "tree");
+}
+
+/**
+ * Resolve the absolute path for a node id. Ids may contain `/` to indicate
+ * folder hierarchy under `memory/v3/tree/`; `path.join` handles those correctly
+ * on POSIX, and `validateNodeId` (called at every public entry point) rejects
+ * shapes that could escape the tree root.
+ */
+function getNodePath(workspaceDir: string, id: string): string {
+  return join(getTreeDir(workspaceDir), `${id}${NODE_EXTENSION}`);
+}
+
+/**
+ * Compute the id for a tree-node file, given the tree root and the absolute
+ * file path. Returns the path-relative location with `.md` stripped and
+ * platform separators normalized to `/`. Tolerant of paths that don't end in
+ * `.md` so callers walking arbitrary content can use it defensively.
+ */
+function idFromNodePath(treeRoot: string, filePath: string): string {
+  const rel = relative(treeRoot, filePath);
+  const withoutExt = rel.endsWith(NODE_EXTENSION)
+    ? rel.slice(0, -NODE_EXTENSION.length)
+    : rel;
+  return sep === "/" ? withoutExt : withoutExt.split(sep).join("/");
+}
+
+// ---------------------------------------------------------------------------
+// Frontmatter parse / render
+// ---------------------------------------------------------------------------
+
+/**
+ * Split raw file contents into (frontmatter, body). If no frontmatter block is
+ * present the entire input is treated as body and an empty frontmatter block is
+ * returned (validated by `TreeNodeFrontmatterSchema` so any unexpected shape —
+ * bad types, extra junk — surfaces as a parse error to the caller, not silent
+ * dropped data).
+ *
+ * The schema's default guarantees `children` is always an array even on a
+ * freshly created node with empty frontmatter.
+ */
+function parseNodeContent(raw: string): {
+  frontmatter: TreeNode["frontmatter"];
+  body: string;
+} {
+  const match = raw.match(FRONTMATTER_REGEX);
+  if (!match) {
+    return {
+      frontmatter: TreeNodeFrontmatterSchema.parse({}),
+      body: raw,
+    };
+  }
+  const yamlBlock = match[1];
+  const body = raw.slice(match[0].length);
+  const parsed = parseYaml(yamlBlock) ?? {};
+  return {
+    frontmatter: TreeNodeFrontmatterSchema.parse(parsed),
+    body,
+  };
+}
+
+/**
+ * Render a tree node back into the on-disk Markdown form. The output is always
+ * frontmatter + body; even nodes with empty `children` keep the explicit YAML
+ * key so callers see the canonical shape on round-trip.
+ */
+export function renderNodeContent(node: TreeNode): string {
+  const frontmatter = TreeNodeFrontmatterSchema.parse(node.frontmatter);
+  const yamlBlock = stringifyYaml(frontmatter, { indent: 2 }).trimEnd();
+  return `---\n${yamlBlock}\n---\n${node.body}`;
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Read a single tree node. Returns `null` if the file does not exist.
+ *
+ * Any other read or parse failure (permission denied, malformed YAML,
+ * frontmatter that fails schema validation) throws — unlike "missing", these
+ * are programmer / data-corruption errors the caller needs to see.
+ */
+export async function readNode(
+  workspaceDir: string,
+  id: string,
+): Promise<TreeNode | null> {
+  validateNodeId(id);
+  const path = getNodePath(workspaceDir, id);
+  let raw: string;
+  try {
+    raw = await readFile(path, "utf-8");
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code === "ENOENT") {
+      return null;
+    }
+    throw err;
+  }
+  const { frontmatter, body } = parseNodeContent(raw);
+  return { id, frontmatter, body };
+}
+
+/**
+ * Write a tree node atomically (temp file + rename). A crash between the temp
+ * write and the rename leaves the prior file intact; a crash after the rename
+ * leaves the new file. Readers therefore never observe a partial node.
+ *
+ * Parent directories are created on demand (`mkdir -p`) so nested-folder ids
+ * like `people/colleagues` work without callers pre-creating the folder.
+ */
+export async function writeNode(
+  workspaceDir: string,
+  node: TreeNode,
+): Promise<void> {
+  validateNodeId(node.id);
+  const path = getNodePath(workspaceDir, node.id);
+  const tmpPath = `${path}.tmp.${process.pid}.${randomUUID()}`;
+  const content = renderNodeContent(node);
+  try {
+    await mkdir(dirname(path), { recursive: true });
+    await writeFile(tmpPath, content, "utf-8");
+    await rename(tmpPath, path);
+  } catch (err) {
+    // Best-effort cleanup: if the rename failed (or the write succeeded but the
+    // rename did not), remove the orphan tmp file so we don't leak it into the
+    // tree/ directory where listNodes would then surface it.
+    await rm(tmpPath, { force: true }).catch(() => {});
+    throw err;
+  }
+}
+
+/**
+ * List every tree-node id present on disk, walking subdirectories.
+ *
+ * Ids are returned in path-relative form with forward slashes as separators
+ * (e.g. `people/colleagues`) so callers can pass them straight back to
+ * `readNode`.
+ *
+ * Hidden directories (segment starts with `.`), non-`.md` files, and atomic-
+ * write temp files (`.tmp.<pid>.<uuid>`) are skipped. If the tree/ directory
+ * does not yet exist (fresh workspace pre-migration), returns `[]`.
+ */
+export async function listNodes(workspaceDir: string): Promise<string[]> {
+  const root = getTreeDir(workspaceDir);
+  const ids: string[] = [];
+  const queue: string[] = [root];
+
+  while (queue.length > 0) {
+    const dir = queue.shift()!;
+    let entries;
+    try {
+      entries = await readdir(dir, { withFileTypes: true });
+    } catch (err) {
+      if ((err as NodeJS.ErrnoException).code === "ENOENT") {
+        // Root missing → return []. Nested missing dir is impossible mid-walk
+        // (we only enqueue what readdir surfaced) but treat the same defensively.
+        if (dir === root) return [];
+        continue;
+      }
+      throw err;
+    }
+
+    for (const entry of entries) {
+      if (entry.name.startsWith(".")) continue;
+      const fullPath = join(dir, entry.name);
+      if (entry.isDirectory()) {
+        queue.push(fullPath);
+        continue;
+      }
+      if (!entry.isFile()) continue;
+      if (!entry.name.endsWith(NODE_EXTENSION)) continue;
+      // Skip orphaned temp files left behind by a crashed atomic write.
+      if (entry.name.includes(".tmp.")) continue;
+      ids.push(idFromNodePath(root, fullPath));
+    }
+  }
+
+  ids.sort();
+  return ids;
+}
+
+/**
+ * Delete a tree node. Idempotent — missing files are not an error.
+ *
+ * Any other failure (permission denied, etc.) throws so the caller can react.
+ */
+export async function deleteNode(
+  workspaceDir: string,
+  id: string,
+): Promise<void> {
+  validateNodeId(id);
+  const path = getNodePath(workspaceDir, id);
+  try {
+    await rm(path);
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code === "ENOENT") {
+      return;
+    }
+    throw err;
+  }
+}
diff --git a/assistant/src/memory/v3/types.ts b/assistant/src/memory/v3/types.ts
new file mode 100644
index 00000000000..c6ae766ec53
--- /dev/null
+++ b/assistant/src/memory/v3/types.ts
@@ -0,0 +1,65 @@
+// ---------------------------------------------------------------------------
+// Memory v3 — Shared types
+// ---------------------------------------------------------------------------
+//
+// Types shared across the v3 memory subsystem. Like v2, every value here
+// crosses a serialization boundary — YAML frontmatter on disk — so it ships as
+// a Zod schema with an inferred TypeScript type so runtime validation runs
+// wherever a node is read.
+//
+// This file must not import from any other `memory/v3/*` module — it is the
+// leaf of the v3 dependency graph.
+
+import { z } from "zod";
+
+// ---------------------------------------------------------------------------
+// Tree nodes
+// ---------------------------------------------------------------------------
+
+/**
+ * YAML frontmatter at the top of a v3 tree node (`memory/v3/tree/<id>.md`).
+ *
+ * The v3 tree is a DAG *overlay* over the existing flat `memory/concepts/`
+ * pages. A node organizes a region of the graph: its markdown body is the
+ * node's full self-description and `children` is the list of outgoing edges.
+ *
+ * `children` is the canonical, ordered list of child *references*. Each entry
+ * is either:
+ *   - `"page:<page-slug>"` — a leaf concept page (canonical content stays in
+ *     `memory/concepts/<page-slug>.md`, shared and untouched by v3), or
+ *   - `"node:<node-id>"` — a sub-node in the v3 tree.
+ *
+ * This reference list IS the DAG edge — it is the portable replacement for the
+ * filesystem symlinks an earlier design would have used. A page or node may be
+ * referenced by more than one parent (hence DAG, not tree).
+ *
+ * `routing_hints` is a thin, hand-written line of cross-branch disambiguation
+ * — e.g. "for *work* relationships see people/colleagues, not this node".
+ * Kept deliberately small so it stays cheap to inject during routing.
+ *
+ * `summary` is the node's self-description headline (1-line); the markdown body
+ * is the full self-description. Optional so a freshly authored node with only a
+ * body still parses.
+ */
+export const TreeNodeFrontmatterSchema = z
+  .object({
+    children: z.array(z.string()).default([]),
+    routing_hints: z.string().optional(),
+    summary: z.string().optional(),
+  })
+  .strict();
+
+export type TreeNodeFrontmatter = z.infer<typeof TreeNodeFrontmatterSchema>;
+
+/**
+ * A single tree node on disk. The id is the relative path from
+ * `memory/v3/tree/` minus `.md`, using forward slashes — so `people` and
+ * `people/colleagues` are both valid ids. The id is the stable identity used
+ * in `children` references (`node:<id>`) and is the portable node handle a
+ * future data-migration authors by hand.
+ */
+export type TreeNode = {
+  id: string;
+  frontmatter: TreeNodeFrontmatter;
+  body: string;
+};

From be5eb0f067f77de930f7fe5cd69766e44218f7a0 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:40:19 -0400
Subject: [PATCH 02/21] feat(memory-v3): config schema + cheap/capable LLM call
 sites (#31972)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 assistant/src/__tests__/llm-resolver.test.ts  |  86 +++++++++++--
 assistant/src/config/call-site-defaults.ts    |   4 +
 .../schemas/__tests__/memory-v2.test.ts       | 109 +++++++++++++++-
 .../src/config/schemas/call-site-catalog.ts   |  21 ++++
 assistant/src/config/schemas/llm.ts           |   3 +
 assistant/src/config/schemas/memory-v2.ts     | 119 ++++++++++++++++++
 assistant/src/config/schemas/memory.ts        |   3 +-
 7 files changed, 334 insertions(+), 11 deletions(-)

diff --git a/assistant/src/__tests__/llm-resolver.test.ts b/assistant/src/__tests__/llm-resolver.test.ts
index 9b0e9ad626e..86feaf8c2b6 100644
--- a/assistant/src/__tests__/llm-resolver.test.ts
+++ b/assistant/src/__tests__/llm-resolver.test.ts
@@ -2,7 +2,10 @@ import { describe, expect, test } from "bun:test";
 
 import { z } from "zod";
 
-import { resolveCallSiteConfig, resolveDefaultProfileKey } from "../config/llm-resolver.js";
+import {
+  resolveCallSiteConfig,
+  resolveDefaultProfileKey,
+} from "../config/llm-resolver.js";
 import { type LLMCallSite, LLMSchema } from "../config/schemas/llm.js";
 
 const fullDefault = {
@@ -690,13 +693,28 @@ describe("resolveCallSiteConfig", () => {
     });
 
     const callSites: LLMCallSite[] = [
-      "mainAgent", "subagentSpawn", "heartbeatAgent", "filingAgent",
-      "compactionAgent", "analyzeConversation", "callAgent",
-      "memoryExtraction", "memoryConsolidation", "memoryRetrieval",
-      "memoryRouter", "recall", "conversationSummarization",
-      "commitMessage", "conversationStarters", "replySuggestion",
-      "conversationTitle", "identityIntro", "emptyStateGreeting",
-      "notificationDecision", "interactionClassifier", "inference",
+      "mainAgent",
+      "subagentSpawn",
+      "heartbeatAgent",
+      "filingAgent",
+      "compactionAgent",
+      "analyzeConversation",
+      "callAgent",
+      "memoryExtraction",
+      "memoryConsolidation",
+      "memoryRetrieval",
+      "memoryRouter",
+      "recall",
+      "conversationSummarization",
+      "commitMessage",
+      "conversationStarters",
+      "replySuggestion",
+      "conversationTitle",
+      "identityIntro",
+      "emptyStateGreeting",
+      "notificationDecision",
+      "interactionClassifier",
+      "inference",
     ];
 
     for (const cs of callSites) {
@@ -778,7 +796,10 @@ describe("resolveCallSiteConfig", () => {
         provider_connection: "anthropic-managed",
       },
       profiles: {
-        fireworks: { provider: "fireworks", model: "accounts/fireworks/models/kimi-k2p5" },
+        fireworks: {
+          provider: "fireworks",
+          model: "accounts/fireworks/models/kimi-k2p5",
+        },
       },
       activeProfile: "fireworks",
     });
@@ -874,3 +895,50 @@ describe("resolveDefaultProfileKey", () => {
     );
   });
 });
+
+describe("memory v3 call sites resolve through the standard resolver", () => {
+  const llm = LLMSchema.parse({
+    default: fullDefault,
+    profiles: {
+      balanced: { provider: "anthropic", model: "claude-sonnet-4-7" },
+      "cost-optimized": {
+        provider: "anthropic",
+        model: "claude-haiku-4-5-20251001",
+      },
+    },
+  });
+
+  test("memoryV3Filter and memoryV3Descent resolve to the cost-optimized profile", () => {
+    expect(resolveDefaultProfileKey("memoryV3Filter", llm)).toBe(
+      "cost-optimized",
+    );
+    expect(resolveDefaultProfileKey("memoryV3Descent", llm)).toBe(
+      "cost-optimized",
+    );
+    expect(resolveCallSiteConfig("memoryV3Filter", llm).model).toBe(
+      "claude-haiku-4-5-20251001",
+    );
+    expect(resolveCallSiteConfig("memoryV3Descent", llm).model).toBe(
+      "claude-haiku-4-5-20251001",
+    );
+  });
+
+  test("memoryV3Gate resolves to the balanced (capable) profile", () => {
+    expect(resolveDefaultProfileKey("memoryV3Gate", llm)).toBe("balanced");
+    expect(resolveCallSiteConfig("memoryV3Gate", llm).model).toBe(
+      "claude-sonnet-4-7",
+    );
+  });
+
+  test("v3 call sites are addressable as call-site override keys", () => {
+    const overridden = LLMSchema.parse({
+      default: fullDefault,
+      callSites: {
+        memoryV3Gate: { model: "claude-opus-4-7" },
+      },
+    });
+    expect(resolveCallSiteConfig("memoryV3Gate", overridden).model).toBe(
+      "claude-opus-4-7",
+    );
+  });
+});
diff --git a/assistant/src/config/call-site-defaults.ts b/assistant/src/config/call-site-defaults.ts
index 36fbe925750..e988dbe2e9a 100644
--- a/assistant/src/config/call-site-defaults.ts
+++ b/assistant/src/config/call-site-defaults.ts
@@ -47,6 +47,10 @@ export const CALL_SITE_DEFAULTS: Record<LLMCallSite, CallSiteDefaultConfig> = {
   memoryV2Migration: { profile: "cost-optimized" },
   memoryV2Sweep: { profile: "cost-optimized" },
   memoryV2Consolidation: { profile: "balanced" },
+  // memory v3: cheap filter + descent, capable gate.
+  memoryV3Filter: { profile: "cost-optimized" },
+  memoryV3Descent: { profile: "cost-optimized" },
+  memoryV3Gate: { profile: "balanced" },
   conversationSummarization: { profile: "cost-optimized" },
   conversationTitle: { profile: "cost-optimized" },
   approvalCopy: { profile: "cost-optimized" },
diff --git a/assistant/src/config/schemas/__tests__/memory-v2.test.ts b/assistant/src/config/schemas/__tests__/memory-v2.test.ts
index ef55cca2c9c..5ca74e76bba 100644
--- a/assistant/src/config/schemas/__tests__/memory-v2.test.ts
+++ b/assistant/src/config/schemas/__tests__/memory-v2.test.ts
@@ -1,7 +1,7 @@
 import { describe, expect, test } from "bun:test";
 
 import { MemoryConfigSchema } from "../memory.js";
-import { MemoryV2ConfigSchema } from "../memory-v2.js";
+import { MemoryV2ConfigSchema, MemoryV3ConfigSchema } from "../memory-v2.js";
 
 describe("MemoryV2ConfigSchema", () => {
   test("parses an empty object to documented defaults", () => {
@@ -212,6 +212,113 @@ describe("MemoryV2ConfigSchema", () => {
   });
 });
 
+describe("MemoryV3ConfigSchema", () => {
+  test("parses an empty object to documented defaults", () => {
+    const parsed = MemoryV3ConfigSchema.parse({});
+    expect(parsed).toEqual({
+      enabled: false,
+      shadow: false,
+      passCap: 3,
+      breadthBudget: 6,
+      maxDepth: 6,
+      denseQuota: { activeDomain: 30, offDomain: 8 },
+      lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true },
+      ks: [5, 10, 25, 50],
+    });
+  });
+
+  test("parses undefined to the same defaults (top-level .default)", () => {
+    expect(MemoryV3ConfigSchema.parse(undefined)).toEqual(
+      MemoryV3ConfigSchema.parse({}),
+    );
+  });
+
+  test("defaults to disabled for backwards compatibility", () => {
+    expect(MemoryV3ConfigSchema.parse({}).enabled).toBe(false);
+    expect(MemoryV3ConfigSchema.parse({}).shadow).toBe(false);
+  });
+
+  test("accepts explicit scalar overrides", () => {
+    const parsed = MemoryV3ConfigSchema.parse({
+      enabled: true,
+      shadow: true,
+      passCap: 5,
+      breadthBudget: 10,
+      maxDepth: 8,
+    });
+    expect(parsed.enabled).toBe(true);
+    expect(parsed.shadow).toBe(true);
+    expect(parsed.passCap).toBe(5);
+    expect(parsed.breadthBudget).toBe(10);
+    expect(parsed.maxDepth).toBe(8);
+  });
+
+  test("accepts explicit denseQuota override", () => {
+    const parsed = MemoryV3ConfigSchema.parse({
+      denseQuota: { activeDomain: 50, offDomain: 12 },
+    });
+    expect(parsed.denseQuota).toEqual({ activeDomain: 50, offDomain: 12 });
+  });
+
+  test("accepts a partial lanes override and defaults the rest", () => {
+    const parsed = MemoryV3ConfigSchema.parse({ lanes: { dense: false } });
+    expect(parsed.lanes).toEqual({
+      hot: true,
+      sparse: true,
+      dense: false,
+      tree: true,
+      edges: true,
+    });
+  });
+
+  test("accepts an explicit ks override", () => {
+    const parsed = MemoryV3ConfigSchema.parse({ ks: [1, 3, 7] });
+    expect(parsed.ks).toEqual([1, 3, 7]);
+  });
+
+  test("rejects a non-boolean enabled", () => {
+    expect(() => MemoryV3ConfigSchema.parse({ enabled: "yes" })).toThrow();
+  });
+
+  test("rejects a non-integer passCap", () => {
+    expect(() => MemoryV3ConfigSchema.parse({ passCap: 2.5 })).toThrow();
+  });
+
+  test("rejects non-number ks entries", () => {
+    expect(() => MemoryV3ConfigSchema.parse({ ks: ["a"] })).toThrow();
+  });
+});
+
+describe("MemoryConfigSchema integration with v3 block", () => {
+  test("includes a v3 block defaulting to disabled when v3 is omitted", () => {
+    const parsed = MemoryConfigSchema.parse({});
+    expect(parsed.v3).toBeDefined();
+    expect(parsed.v3.enabled).toBe(false);
+    expect(parsed.v3.shadow).toBe(false);
+    expect(parsed.v3.passCap).toBe(3);
+    expect(parsed.v3.lanes.dense).toBe(true);
+    expect(parsed.v3.ks).toEqual([5, 10, 25, 50]);
+  });
+
+  test("leaves pre-existing configs (no v3 key) otherwise unchanged", () => {
+    // A config authored before v3 existed parses fine and its v2 block is
+    // untouched; the v3 block is purely additive.
+    const parsed = MemoryConfigSchema.parse({ v2: { top_k: 50 } });
+    expect(parsed.v2.top_k).toBe(50);
+    expect(parsed.v3.enabled).toBe(false);
+  });
+
+  test("propagates v3 overrides through MemoryConfigSchema", () => {
+    const parsed = MemoryConfigSchema.parse({
+      v3: { enabled: true, passCap: 4 },
+    });
+    expect(parsed.v3.enabled).toBe(true);
+    expect(parsed.v3.passCap).toBe(4);
+    // Non-overridden v3 fields keep their defaults.
+    expect(parsed.v3.maxDepth).toBe(6);
+  });
+});
+
 describe("MemoryConfigSchema integration with v2 block", () => {
   test("parses an empty memory config and includes a v2 block with defaults", () => {
     const parsed = MemoryConfigSchema.parse({});
diff --git a/assistant/src/config/schemas/call-site-catalog.ts b/assistant/src/config/schemas/call-site-catalog.ts
index 5552889d7cb..7d0417b4f1f 100644
--- a/assistant/src/config/schemas/call-site-catalog.ts
+++ b/assistant/src/config/schemas/call-site-catalog.ts
@@ -121,6 +121,27 @@ const CATALOG_RECORD: CatalogRecord = {
       "Selects which concept pages to inject for the next agent turn by routing over a cached page index.",
     domain: "memory",
   },
+  memoryV3Filter: {
+    id: "memoryV3Filter",
+    displayName: "Memory V3 Filter",
+    description:
+      "Cheaply filters the V3 multi-lane candidate set before descent.",
+    domain: "memory",
+  },
+  memoryV3Descent: {
+    id: "memoryV3Descent",
+    displayName: "Memory V3 Descent",
+    description:
+      "Drives the V3 bounded-descent traversal through the memory tree.",
+    domain: "memory",
+  },
+  memoryV3Gate: {
+    id: "memoryV3Gate",
+    displayName: "Memory V3 Gate",
+    description:
+      "Final capable gate that decides which V3 candidates are injected for the next turn.",
+    domain: "memory",
+  },
   memoryV2Consolidation: {
     id: "memoryV2Consolidation",
     displayName: "Memory V2 Consolidation",
diff --git a/assistant/src/config/schemas/llm.ts b/assistant/src/config/schemas/llm.ts
index 10103b86b1d..e6a53c85fae 100644
--- a/assistant/src/config/schemas/llm.ts
+++ b/assistant/src/config/schemas/llm.ts
@@ -49,6 +49,9 @@ export const LLMCallSiteEnum = z.enum([
   "memoryV2Migration",
   "memoryV2Sweep",
   "memoryRouter",
+  "memoryV3Filter",
+  "memoryV3Descent",
+  "memoryV3Gate",
   "memoryV2Consolidation",
   "memoryRetrospective",
   "recall",
diff --git a/assistant/src/config/schemas/memory-v2.ts b/assistant/src/config/schemas/memory-v2.ts
index 45a076a778e..11360a89e31 100644
--- a/assistant/src/config/schemas/memory-v2.ts
+++ b/assistant/src/config/schemas/memory-v2.ts
@@ -388,3 +388,122 @@ export const MemoryV2ConfigSchema = z
   });
 
 export type MemoryV2Config = z.infer<typeof MemoryV2ConfigSchema>;
+
+/**
+ * Memory v3 (multi-lane, bounded-descent retrieval) configuration.
+ *
+ * Additive scaffolding only — defaults to `enabled: false` so existing
+ * configs are untouched and the v3 retrieval loop stays inert until later
+ * PRs wire it up. Every field carries a default and the whole block is
+ * `.default(...)`-wrapped so a config that omits `memory.v3` entirely still
+ * parses to these documented defaults.
+ */
+export const MemoryV3ConfigSchema = z
+  .object({
+    enabled: z
+      .boolean({ error: "memory.v3.enabled must be a boolean" })
+      .default(false)
+      .describe(
+        "Whether the v3 memory subsystem (multi-lane bounded-descent retrieval) is enabled. Off by default until the v3 loop is wired up.",
+      ),
+    shadow: z
+      .boolean({ error: "memory.v3.shadow must be a boolean" })
+      .default(false)
+      .describe(
+        "Live-shadow toggle: when on, the v3 retrieval loop runs alongside the active path for comparison without affecting injected context. Consumed by a later PR.",
+      ),
+    passCap: z
+      .number({ error: "memory.v3.passCap must be a number" })
+      .int("memory.v3.passCap must be an integer")
+      .default(3)
+      .describe(
+        "Maximum number of retrieval passes (router → descent rounds) the v3 loop may run per turn.",
+      ),
+    breadthBudget: z
+      .number({ error: "memory.v3.breadthBudget must be a number" })
+      .int("memory.v3.breadthBudget must be an integer")
+      .default(6)
+      .describe(
+        "Per-pass breadth budget — the number of frontier candidates the v3 loop may expand at each step.",
+      ),
+    maxDepth: z
+      .number({ error: "memory.v3.maxDepth must be a number" })
+      .int("memory.v3.maxDepth must be an integer")
+      .default(6)
+      .describe(
+        "Maximum descent depth the v3 loop traverses through the memory tree before stopping.",
+      ),
+    denseQuota: z
+      .object({
+        activeDomain: z
+          .number({
+            error: "memory.v3.denseQuota.activeDomain must be a number",
+          })
+          .describe(
+            "Dense-lane candidate quota allocated to the conversation's active domain.",
+          ),
+        offDomain: z
+          .number({ error: "memory.v3.denseQuota.offDomain must be a number" })
+          .describe(
+            "Dense-lane candidate quota allocated to off-domain (exploratory) retrieval.",
+          ),
+      })
+      .default({ activeDomain: 30, offDomain: 8 })
+      .describe(
+        "Dense-lane candidate quotas split between the active domain and off-domain exploration.",
+      ),
+    lanes: z
+      .object({
+        hot: z
+          .boolean()
+          .default(true)
+          .describe("Whether the hot (recently-touched) retrieval lane is on."),
+        sparse: z
+          .boolean()
+          .default(true)
+          .describe("Whether the sparse (BM25-style keyword) lane is on."),
+        dense: z
+          .boolean()
+          .default(true)
+          .describe("Whether the dense (embedding-similarity) lane is on."),
+        tree: z
+          .boolean()
+          .default(true)
+          .describe("Whether the tree (hierarchical descent) lane is on."),
+        edges: z
+          .boolean()
+          .default(true)
+          .describe("Whether the edges (graph-adjacency) lane is on."),
+      })
+      .default({
+        hot: true,
+        sparse: true,
+        dense: true,
+        tree: true,
+        edges: true,
+      })
+      .describe(
+        "Per-lane on/off toggles for the v3 multi-lane retrieval fanout. All lanes on by default.",
+      ),
+    ks: z
+      .array(z.number({ error: "memory.v3.ks entries must be numbers" }))
+      .default([5, 10, 25, 50])
+      .describe(
+        "Evaluation top-K cutoffs the v3 loop reports metrics at (e.g. recall@K).",
+      ),
+  })
+  .default({
+    enabled: false,
+    shadow: false,
+    passCap: 3,
+    breadthBudget: 6,
+    maxDepth: 6,
+    denseQuota: { activeDomain: 30, offDomain: 8 },
+    lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true },
+    ks: [5, 10, 25, 50],
+  })
+  .describe(
+    "Memory v3 — multi-lane bounded-descent retrieval. Additive scaffolding, disabled by default.",
+  );
+
+export type MemoryV3Config = z.infer<typeof MemoryV3ConfigSchema>;
diff --git a/assistant/src/config/schemas/memory.ts b/assistant/src/config/schemas/memory.ts
index 4a3822ebb06..4ba15e3b044 100644
--- a/assistant/src/config/schemas/memory.ts
+++ b/assistant/src/config/schemas/memory.ts
@@ -16,7 +16,7 @@ import {
   MemorySegmentationConfigSchema,
   QdrantConfigSchema,
 } from "./memory-storage.js";
-import { MemoryV2ConfigSchema } from "./memory-v2.js";
+import { MemoryV2ConfigSchema, MemoryV3ConfigSchema } from "./memory-v2.js";
 
 export const MemoryConfigSchema = z
   .object({
@@ -50,6 +50,7 @@ export const MemoryConfigSchema = z
       MemorySummarizationConfigSchema.parse({}),
     ),
     v2: MemoryV2ConfigSchema.default(MemoryV2ConfigSchema.parse({})),
+    v3: MemoryV3ConfigSchema.default(MemoryV3ConfigSchema.parse({})),
     retrospective: MemoryRetrospectiveConfigSchema.default(
       MemoryRetrospectiveConfigSchema.parse({}),
     ),

From 32394ad3d236a23b0266b9d7a7041b7a0ce9e254 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:45:39 -0400
Subject: [PATCH 03/21] feat(memory-v3): curated edge-expansion lane (#31973)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/edges.test.ts     | 342 ++++++++++++++++++
 assistant/src/memory/v3/edges.ts              | 125 +++++++
 2 files changed, 467 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/edges.test.ts
 create mode 100644 assistant/src/memory/v3/edges.ts

diff --git a/assistant/src/memory/v3/__tests__/edges.test.ts b/assistant/src/memory/v3/__tests__/edges.test.ts
new file mode 100644
index 00000000000..ba2656fc4e6
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/edges.test.ts
@@ -0,0 +1,342 @@
+/**
+ * Tests for `assistant/src/memory/v3/edges.ts` — the curated edge-expansion
+ * lane.
+ *
+ * Coverage matrix:
+ *   - 1-hop and 2-hop outgoing expansion from a single seed.
+ *   - Default hops (2) when omitted.
+ *   - Seed excluded from its own `pulled`.
+ *   - Multiple seeds: top-level `pulled` is the union; per-seed expansions
+ *     attribute correctly; duplicate seeds collapse.
+ *   - `extraAdjacency` merges with the curated graph during traversal.
+ *   - `extraAdjacency` bridges across hops (curated → extra → curated).
+ *   - Cycles in the curated graph (and via extraAdjacency) terminate, bounded
+ *     by hops + the visited set.
+ *   - Empty seeds / orphan seed → empty result.
+ *   - Provider-free: the only I/O is reading fixture concept pages.
+ *
+ * Tests live in temp workspaces (mkdtemp) and never touch `~/.vellum/`.
+ */
+
+import { existsSync, mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+
+import { invalidateEdgeIndex } from "../../v2/edge-index.js";
+import { writePage } from "../../v2/page-store.js";
+import type { ConceptPage } from "../../v2/types.js";
+import { expandEdges } from "../edges.js";
+
+let workspaceDir: string;
+
+beforeEach(() => {
+  workspaceDir = mkdtempSync(join(tmpdir(), "vellum-memory-v3-edges-"));
+});
+
+afterEach(() => {
+  // The v2 edge index caches module-locally; clear it so the next test's fresh
+  // workspace doesn't read a stale snapshot.
+  invalidateEdgeIndex();
+  if (existsSync(workspaceDir)) {
+    rmSync(workspaceDir, { recursive: true, force: true });
+  }
+});
+
+function makePage(slug: string, edges: string[] = []): ConceptPage {
+  return {
+    slug,
+    frontmatter: { edges, ref_files: [], ref_urls: [] },
+    body: "",
+  };
+}
+
+/** Write a small chain/graph of pages by `{ slug: edges }` map. */
+async function writeGraph(graph: Record<string, string[]>): Promise<void> {
+  for (const [slug, edges] of Object.entries(graph)) {
+    await writePage(workspaceDir, makePage(slug, edges));
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Single-seed expansion
+// ---------------------------------------------------------------------------
+
+describe("expandEdges — single seed", () => {
+  test("1-hop expansion pulls only direct out-neighbors", async () => {
+    // alice -> bob -> carol
+    await writeGraph({ alice: ["bob"], bob: ["carol"], carol: [] });
+
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 1,
+    });
+
+    expect([...pulled].sort()).toEqual(["bob"]);
+    expect(expansions).toEqual([{ from: "alice", pulled: ["bob"] }]);
+  });
+
+  test("2-hop expansion pulls the 2-hop frontier", async () => {
+    // alice -> bob -> carol
+    await writeGraph({ alice: ["bob"], bob: ["carol"], carol: [] });
+
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 2,
+    });
+
+    expect([...pulled].sort()).toEqual(["bob", "carol"]);
+    expect(expansions).toEqual([{ from: "alice", pulled: ["bob", "carol"] }]);
+  });
+
+  test("defaults to 2 hops when hops is omitted", async () => {
+    await writeGraph({ alice: ["bob"], bob: ["carol"], carol: ["dave"] });
+
+    const { pulled } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+    });
+
+    // 2-hop reach from alice: bob (1) + carol (2); dave (3) is out of budget.
+    expect([...pulled].sort()).toEqual(["bob", "carol"]);
+  });
+
+  test("excludes the seed itself from pulled", async () => {
+    // Self-referential-ish: a -> b -> a would put `a` back in reach, but the
+    // seed must never appear in its own pulled set.
+    await writeGraph({ alice: ["bob"], bob: ["alice"] });
+
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 2,
+    });
+
+    expect(pulled.has("alice")).toBe(false);
+    expect([...pulled].sort()).toEqual(["bob"]);
+    expect(expansions[0]!.pulled).not.toContain("alice");
+  });
+
+  test("orphan seed (no outgoing edges) yields an empty expansion", async () => {
+    await writeGraph({ alice: [] });
+
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+    });
+
+    expect(pulled.size).toBe(0);
+    expect(expansions).toEqual([{ from: "alice", pulled: [] }]);
+  });
+
+  test("edges are directed — incoming neighbors are never pulled", async () => {
+    // bob -> alice. Seeding alice must NOT pull bob (that's an in-edge).
+    await writeGraph({ bob: ["alice"], alice: [] });
+
+    const { pulled } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 2,
+    });
+
+    expect(pulled.size).toBe(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Multiple seeds
+// ---------------------------------------------------------------------------
+
+describe("expandEdges — multiple seeds", () => {
+  test("top-level pulled is the union across seeds", async () => {
+    await writeGraph({
+      alice: ["bob"],
+      bob: [],
+      carol: ["dave"],
+      dave: [],
+    });
+
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice", "carol"],
+      hops: 1,
+    });
+
+    expect([...pulled].sort()).toEqual(["bob", "dave"]);
+    expect(expansions).toEqual([
+      { from: "alice", pulled: ["bob"] },
+      { from: "carol", pulled: ["dave"] },
+    ]);
+  });
+
+  test("a slug pulled by two seeds appears once in pulled, once per expansion", async () => {
+    // alice -> shared, carol -> shared
+    await writeGraph({ alice: ["shared"], carol: ["shared"], shared: [] });
+
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice", "carol"],
+      hops: 1,
+    });
+
+    expect([...pulled]).toEqual(["shared"]);
+    expect(expansions).toEqual([
+      { from: "alice", pulled: ["shared"] },
+      { from: "carol", pulled: ["shared"] },
+    ]);
+  });
+
+  test("duplicate seeds collapse to a single expansion entry", async () => {
+    await writeGraph({ alice: ["bob"], bob: [] });
+
+    const { expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice", "alice"],
+      hops: 1,
+    });
+
+    expect(expansions).toEqual([{ from: "alice", pulled: ["bob"] }]);
+  });
+
+  test("empty seed set yields an empty result", async () => {
+    await writeGraph({ alice: ["bob"], bob: [] });
+
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: [],
+    });
+
+    expect(pulled.size).toBe(0);
+    expect(expansions).toEqual([]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// extraAdjacency injection seam
+// ---------------------------------------------------------------------------
+
+describe("expandEdges — extraAdjacency", () => {
+  test("merges injected out-edges with the curated graph", async () => {
+    // Curated: alice -> bob. Injected: alice -> extra.
+    await writeGraph({ alice: ["bob"], bob: [], extra: [] });
+
+    const extraAdjacency = new Map<string, Set<string>>([
+      ["alice", new Set(["extra"])],
+    ]);
+
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 1,
+      extraAdjacency,
+    });
+
+    expect([...pulled].sort()).toEqual(["bob", "extra"]);
+    expect(expansions).toEqual([{ from: "alice", pulled: ["bob", "extra"] }]);
+  });
+
+  test("injected edges bridge across hops (curated -> extra -> curated)", async () => {
+    // Curated: alice -> bob, learned -> dave. Injected: bob -> learned.
+    // 2-hop reach: bob (curated, hop 1) -> learned (extra, hop 2)...
+    // and learned -> dave is hop 3, out of a 2-hop budget.
+    await writeGraph({
+      alice: ["bob"],
+      bob: [],
+      learned: ["dave"],
+      dave: [],
+    });
+
+    const extraAdjacency = new Map<string, Set<string>>([
+      ["bob", new Set(["learned"])],
+    ]);
+
+    const twoHop = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 2,
+      extraAdjacency,
+    });
+    expect([...twoHop.pulled].sort()).toEqual(["bob", "learned"]);
+
+    const threeHop = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 3,
+      extraAdjacency,
+    });
+    expect([...threeHop.pulled].sort()).toEqual(["bob", "dave", "learned"]);
+  });
+
+  test("absent extraAdjacency leaves the curated walk unchanged", async () => {
+    await writeGraph({ alice: ["bob"], bob: ["carol"], carol: [] });
+
+    const { pulled } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 2,
+    });
+
+    expect([...pulled].sort()).toEqual(["bob", "carol"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Cycle safety
+// ---------------------------------------------------------------------------
+
+describe("expandEdges — cycle safety", () => {
+  test("a cycle in the curated graph terminates and does not loop", async () => {
+    // alice -> bob -> carol -> alice (3-cycle).
+    await writeGraph({
+      alice: ["bob"],
+      bob: ["carol"],
+      carol: ["alice"],
+    });
+
+    // A generous hop budget would loop forever without a visited set.
+    const { pulled, expansions } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 100,
+    });
+
+    // Reaches bob and carol; alice (the seed) is excluded even though the
+    // cycle points back at it.
+    expect([...pulled].sort()).toEqual(["bob", "carol"]);
+    expect(expansions[0]!.pulled).not.toContain("alice");
+  });
+
+  test("a cycle introduced via extraAdjacency also terminates", async () => {
+    // Curated: alice -> bob. Injected cycle: bob -> alice.
+    await writeGraph({ alice: ["bob"], bob: [] });
+
+    const extraAdjacency = new Map<string, Set<string>>([
+      ["bob", new Set(["alice"])],
+    ]);
+
+    const { pulled } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 100,
+      extraAdjacency,
+    });
+
+    expect([...pulled].sort()).toEqual(["bob"]);
+  });
+
+  test("a self-loop edge does not loop or pull the seed", async () => {
+    // alice -> alice (self-loop is dropped by the index, but guard anyway).
+    await writeGraph({ alice: ["alice", "bob"], bob: [] });
+
+    const { pulled } = await expandEdges({
+      workspaceDir,
+      seeds: ["alice"],
+      hops: 2,
+    });
+
+    expect(pulled.has("alice")).toBe(false);
+    expect([...pulled].sort()).toEqual(["bob"]);
+  });
+});
diff --git a/assistant/src/memory/v3/edges.ts b/assistant/src/memory/v3/edges.ts
new file mode 100644
index 00000000000..90f1a5b4bba
--- /dev/null
+++ b/assistant/src/memory/v3/edges.ts
@@ -0,0 +1,125 @@
+/**
+ * Memory v3 — Curated edge-expansion lane.
+ *
+ * Given a set of confident seed slugs, pull their 1–2 hop *outgoing*
+ * neighborhood from the curated `edges:` graph (each concept page's
+ * frontmatter `edges:` list, surfaced by v2's `getEdgeIndex`). This is a
+ * provider-free, read-only structural expansion — no LLM, no scoring. It
+ * answers "given that we're confident about A, what does A's curated graph
+ * say we should also pull in?".
+ *
+ * The optional `extraAdjacency` parameter is the seam a later PR uses to inject
+ * above-threshold *weighted auto-edges* (edges the system learned, not ones a
+ * human curated) WITHOUT modifying this module. When supplied, it is treated as
+ * additional out-edges merged with the curated graph during traversal: the
+ * effective out-neighborhood of a node is `curated[node] ∪ extraAdjacency[node]`.
+ *
+ * The result is the union of every seed's reachable neighborhood (`pulled`,
+ * with seeds themselves excluded) plus a per-seed `EdgeExpansion[]` trace so a
+ * harness can attribute each pulled slug to the seed it came from.
+ */
+
+import { getEdgeIndex, getReachable } from "../v2/edge-index.js";
+import type { EdgeExpansion } from "../v2/harness/trace.js";
+
+/** Default hop budget. The design calls for a 1–2 hop walk; 2 is the ceiling. */
+const DEFAULT_HOPS = 2;
+
+export interface ExpandEdgesArgs {
+  workspaceDir: string;
+  /** Confident seed slugs to expand from. */
+  seeds: Iterable<string>;
+  /** Hop budget for the outgoing walk. Defaults to {@link DEFAULT_HOPS}. */
+  hops?: number;
+  /**
+   * Extra *outgoing* adjacency (`from → Set<to>`) merged with the curated graph
+   * during traversal. The injection seam for learned weighted auto-edges; this
+   * module never reads or thresholds weights itself — the caller pre-filters to
+   * above-threshold edges before passing them in.
+   */
+  extraAdjacency?: ReadonlyMap<string, ReadonlySet<string>>;
+}
+
+export interface ExpandEdgesResult {
+  /** Union of every seed's reachable neighborhood, seeds excluded. */
+  pulled: Set<string>;
+  /** Per-seed attribution: which slugs each seed pulled in. */
+  expansions: EdgeExpansion[];
+}
+
+/**
+ * BFS the outgoing neighborhood of `seed` within `hops`, walking the union of
+ * the curated `outgoing` adjacency and any `extraAdjacency`. Mirrors v2's
+ * `getReachable` semantics — start excluded, bounded by `hops` and a visited
+ * set so cycles can't loop — but over a merged adjacency view.
+ */
+function reachableMerged(
+  curated: ReadonlyMap<string, ReadonlySet<string>>,
+  extra: ReadonlyMap<string, ReadonlySet<string>>,
+  seed: string,
+  hops: number,
+): Set<string> {
+  const result = new Set<string>();
+  if (hops <= 0) return result;
+
+  const visited = new Set<string>([seed]);
+  let frontier: string[] = [seed];
+
+  for (let depth = 0; depth < hops && frontier.length > 0; depth++) {
+    const next: string[] = [];
+    for (const node of frontier) {
+      const curatedNeighbors = curated.get(node);
+      const extraNeighbors = extra.get(node);
+      for (const neighbors of [curatedNeighbors, extraNeighbors]) {
+        if (!neighbors) continue;
+        for (const neighbor of neighbors) {
+          if (visited.has(neighbor)) continue;
+          visited.add(neighbor);
+          result.add(neighbor);
+          next.push(neighbor);
+        }
+      }
+    }
+    frontier = next;
+  }
+
+  return result;
+}
+
+/**
+ * Expand a set of confident seed slugs to their 1–2 hop curated neighborhood.
+ *
+ * Each seed produces one `EdgeExpansion { from, pulled }` entry (sorted slugs
+ * for deterministic output); the seed itself is never in its own `pulled`. The
+ * top-level `pulled` set is the union across all seeds — a slug pulled by more
+ * than one seed appears once there but in each contributing seed's expansion.
+ *
+ * Provider-free and read-only: the only I/O is `getEdgeIndex`, which reads
+ * concept-page frontmatter from disk (and caches module-locally in v2).
+ */
+export async function expandEdges(
+  args: ExpandEdgesArgs,
+): Promise<ExpandEdgesResult> {
+  const { workspaceDir, seeds, hops = DEFAULT_HOPS, extraAdjacency } = args;
+
+  const index = await getEdgeIndex(workspaceDir);
+  const pulled = new Set<string>();
+  const expansions: EdgeExpansion[] = [];
+
+  // De-dupe seeds while preserving first-seen order for a stable trace.
+  const seenSeeds = new Set<string>();
+
+  for (const seed of seeds) {
+    if (seenSeeds.has(seed)) continue;
+    seenSeeds.add(seed);
+
+    const reachable = extraAdjacency
+      ? reachableMerged(index.outgoing, extraAdjacency, seed, hops)
+      : getReachable(index, seed, hops, "out");
+
+    expansions.push({ from: seed, pulled: [...reachable].sort() });
+    for (const slug of reachable) pulled.add(slug);
+  }
+
+  return { pulled, expansions };
+}

From 5aa678b221ec8736220c78f8657b411922aa79aa Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:45:43 -0400
Subject: [PATCH 04/21] feat(memory-v3): write-path job types + config (no
 behavior) (#31974)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../schemas/__tests__/memory-v2.test.ts       | 34 +++++++++++++++++
 assistant/src/config/schemas/memory-v2.ts     | 37 +++++++++++++++++++
 .../__tests__/jobs-store-job-classes.test.ts  | 20 +++++++++-
 assistant/src/memory/jobs-store.ts            |  3 ++
 4 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/assistant/src/config/schemas/__tests__/memory-v2.test.ts b/assistant/src/config/schemas/__tests__/memory-v2.test.ts
index 5ca74e76bba..acdbc7dbb26 100644
--- a/assistant/src/config/schemas/__tests__/memory-v2.test.ts
+++ b/assistant/src/config/schemas/__tests__/memory-v2.test.ts
@@ -224,6 +224,11 @@ describe("MemoryV3ConfigSchema", () => {
       denseQuota: { activeDomain: 30, offDomain: 8 },
       lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true },
       ks: [5, 10, 25, 50],
+      write: {
+        enabled: false,
+        consolidateIntervalMs: 3600000,
+        coactivation: false,
+      },
     });
   });
 
@@ -287,6 +292,30 @@ describe("MemoryV3ConfigSchema", () => {
   test("rejects non-number ks entries", () => {
     expect(() => MemoryV3ConfigSchema.parse({ ks: ["a"] })).toThrow();
   });
+
+  test("parses the write subtree to safe off defaults when omitted", () => {
+    const parsed = MemoryV3ConfigSchema.parse({});
+    expect(parsed.write).toEqual({
+      enabled: false,
+      consolidateIntervalMs: 3600000,
+      coactivation: false,
+    });
+  });
+
+  test("accepts a partial write override and defaults the rest", () => {
+    const parsed = MemoryV3ConfigSchema.parse({ write: { enabled: true } });
+    expect(parsed.write).toEqual({
+      enabled: true,
+      consolidateIntervalMs: 3600000,
+      coactivation: false,
+    });
+  });
+
+  test("rejects a non-integer write.consolidateIntervalMs", () => {
+    expect(() =>
+      MemoryV3ConfigSchema.parse({ write: { consolidateIntervalMs: 1.5 } }),
+    ).toThrow();
+  });
 });
 
 describe("MemoryConfigSchema integration with v3 block", () => {
@@ -298,6 +327,11 @@ describe("MemoryConfigSchema integration with v3 block", () => {
     expect(parsed.v3.passCap).toBe(3);
     expect(parsed.v3.lanes.dense).toBe(true);
     expect(parsed.v3.ks).toEqual([5, 10, 25, 50]);
+    expect(parsed.v3.write).toEqual({
+      enabled: false,
+      consolidateIntervalMs: 3600000,
+      coactivation: false,
+    });
   });
 
   test("leaves pre-existing configs (no v3 key) otherwise unchanged", () => {
diff --git a/assistant/src/config/schemas/memory-v2.ts b/assistant/src/config/schemas/memory-v2.ts
index 11360a89e31..433267cc05e 100644
--- a/assistant/src/config/schemas/memory-v2.ts
+++ b/assistant/src/config/schemas/memory-v2.ts
@@ -491,6 +491,38 @@ export const MemoryV3ConfigSchema = z
       .describe(
         "Evaluation top-K cutoffs the v3 loop reports metrics at (e.g. recall@K).",
       ),
+    write: z
+      .object({
+        enabled: z
+          .boolean({ error: "memory.v3.write.enabled must be a boolean" })
+          .default(false)
+          .describe(
+            "Whether v3 consolidation owns the shared-buffer drain + tree build. Off by default — v2 consolidation stays the sole buffer-drainer. Does NOT introduce a separate buffer.",
+          ),
+        consolidateIntervalMs: z
+          .number({
+            error: "memory.v3.write.consolidateIntervalMs must be a number",
+          })
+          .int("memory.v3.write.consolidateIntervalMs must be an integer")
+          .default(3600000)
+          .describe(
+            "Interval, in milliseconds, between scheduled v3 consolidation runs once the v3 write path owns the drain. Default 1 hour.",
+          ),
+        coactivation: z
+          .boolean({ error: "memory.v3.write.coactivation must be a boolean" })
+          .default(false)
+          .describe(
+            "Whether v3 consolidation learns co-activation edges during the tree build. Off by default; consumed by a later PR.",
+          ),
+      })
+      .default({
+        enabled: false,
+        consolidateIntervalMs: 3600000,
+        coactivation: false,
+      })
+      .describe(
+        "Memory v3 write-path configuration. All default-off scaffolding — controls whether v3 consolidation owns the shared-buffer drain + tree build. Consumed by later PRs.",
+      ),
   })
   .default({
     enabled: false,
@@ -501,6 +533,11 @@ export const MemoryV3ConfigSchema = z
     denseQuota: { activeDomain: 30, offDomain: 8 },
     lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true },
     ks: [5, 10, 25, 50],
+    write: {
+      enabled: false,
+      consolidateIntervalMs: 3600000,
+      coactivation: false,
+    },
   })
   .describe(
     "Memory v3 — multi-lane bounded-descent retrieval. Additive scaffolding, disabled by default.",
diff --git a/assistant/src/memory/__tests__/jobs-store-job-classes.test.ts b/assistant/src/memory/__tests__/jobs-store-job-classes.test.ts
index 7950e93758b..09fc33779b9 100644
--- a/assistant/src/memory/__tests__/jobs-store-job-classes.test.ts
+++ b/assistant/src/memory/__tests__/jobs-store-job-classes.test.ts
@@ -1,6 +1,24 @@
 import { describe, expect, test } from "bun:test";
 
-import { EMBED_JOB_TYPES, SLOW_LLM_JOB_TYPES } from "../jobs-store.js";
+import {
+  EMBED_JOB_TYPES,
+  type MemoryJobType,
+  SLOW_LLM_JOB_TYPES,
+} from "../jobs-store.js";
+
+describe("memory v3 job types", () => {
+  test("the v3 job-type literals are members of MemoryJobType", () => {
+    // Compile-time assignability is enforced by `tsc --noEmit`; the runtime
+    // assertion keeps the literals visible to the test runner. These types are
+    // inert scaffolding until their handlers land in later PRs.
+    const v3JobTypes: MemoryJobType[] = [
+      "memory_v3_consolidate",
+      "memory_v3_index_maintenance",
+      "memory_v3_edge_learning",
+    ];
+    expect(new Set(v3JobTypes).size).toBe(3);
+  });
+});
 
 describe("memory job classes", () => {
   test("EMBED_JOB_TYPES and SLOW_LLM_JOB_TYPES are disjoint", () => {
diff --git a/assistant/src/memory/jobs-store.ts b/assistant/src/memory/jobs-store.ts
index 930f20cdad6..fd672f75e88 100644
--- a/assistant/src/memory/jobs-store.ts
+++ b/assistant/src/memory/jobs-store.ts
@@ -44,6 +44,9 @@ export type MemoryJobType =
   | "memory_v2_migrate"
   | "memory_v2_reembed"
   | "memory_v2_activation_recompute"
+  | "memory_v3_consolidate"
+  | "memory_v3_index_maintenance"
+  | "memory_v3_edge_learning"
   | "memory_retrospective";
 
 export const EMBED_JOB_TYPES: MemoryJobType[] = [

From 7bd5de2890493dd1aeed163906af1df585f7c48f Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:46:18 -0400
Subject: [PATCH 05/21] feat(memory-v3): gate decision (ready/more) + final
 selection (#31975)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/gate.test.ts      | 344 ++++++++++++++++++
 assistant/src/memory/v3/gate.ts               | 275 ++++++++++++++
 2 files changed, 619 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/gate.test.ts
 create mode 100644 assistant/src/memory/v3/gate.ts

diff --git a/assistant/src/memory/v3/__tests__/gate.test.ts b/assistant/src/memory/v3/__tests__/gate.test.ts
new file mode 100644
index 00000000000..bbdd63b947c
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/gate.test.ts
@@ -0,0 +1,344 @@
+/**
+ * Tests for `assistant/src/memory/v3/gate.ts`.
+ *
+ * Coverage matrix:
+ *   - ready + selection → selection maps from candidates, in model order, and
+ *     includes sticky slugs even when the model omits them.
+ *   - more + questions → `decision.questions` surfaced; selection still returned.
+ *   - more with no/blank questions → decision is `{ decision: "more" }` (no
+ *     empty `questions` array).
+ *   - provider === null (no provider configured) → fail-safe: ready, all
+ *     candidates selected, sticky present.
+ *   - provider throws → fail-safe (ready, all candidates).
+ *   - missing tool_use block → fail-safe (ready, all candidates).
+ *   - tool input failing schema → fail-safe (ready, all candidates).
+ *   - model selecting a slug outside the candidate set → dropped.
+ *   - request shape: forced tool_choice on `decide_selection`, candidate set in
+ *     the user message, abort signal forwarded.
+ *
+ * The provider is injected via `runGate({ provider })` — no real LLM, no
+ * network, no `mock.module`. `~/.vellum/` is never touched.
+ */
+
+import { describe, expect, test } from "bun:test";
+
+import type {
+  Message,
+  Provider,
+  ProviderResponse,
+  SendMessageOptions,
+  ToolDefinition,
+} from "../../../providers/types.js";
+import type { RetrievalInput } from "../../v2/harness/retriever.js";
+import { runGate } from "../gate.js";
+
+// ---------------------------------------------------------------------------
+// Helpers.
+// ---------------------------------------------------------------------------
+
+interface ProviderCall {
+  messages: Message[];
+  tools: ToolDefinition[] | undefined;
+  systemPrompt: string | undefined;
+  options: SendMessageOptions | undefined;
+}
+
+/**
+ * A stub provider that records its calls and returns a fixed response.
+ * Honors an already-aborted signal by throwing an AbortError so signal
+ * forwarding can be asserted.
+ */
+function makeProvider(
+  response: ProviderResponse,
+  calls: ProviderCall[],
+): Provider {
+  return {
+    name: "stub",
+    sendMessage: async (messages, tools, systemPrompt, options) => {
+      calls.push({ messages, tools, systemPrompt, options });
+      if (options?.signal?.aborted) {
+        const err = new Error("aborted");
+        err.name = "AbortError";
+        throw err;
+      }
+      return response;
+    },
+  };
+}
+
+/** A provider whose sendMessage always throws. */
+function makeThrowingProvider(): Provider {
+  return {
+    name: "throwing-stub",
+    sendMessage: async () => {
+      throw new Error("boom");
+    },
+  };
+}
+
+function gateToolResponse(input: Record<string, unknown>): ProviderResponse {
+  return {
+    model: "stub-model",
+    stopReason: "tool_use",
+    usage: { inputTokens: 0, outputTokens: 0 },
+    content: [
+      { type: "tool_use", id: "tu-1", name: "decide_selection", input },
+    ],
+  };
+}
+
+/** A response with no tool_use block (e.g. the model emitted only text). */
+function textOnlyResponse(): ProviderResponse {
+  return {
+    model: "stub-model",
+    stopReason: "end_turn",
+    usage: { inputTokens: 0, outputTokens: 0 },
+    content: [{ type: "text", text: "no tool here" }],
+  };
+}
+
+/** Minimal `RetrievalInput` — the gate only reads `nowText` and `signal`. */
+function makeInput(overrides?: Partial<RetrievalInput>): RetrievalInput {
+  return {
+    workspaceDir: "/tmp/does-not-matter",
+    recentTurnPairs: [],
+    nowText: "2026-05-25 10:00 PT",
+    priorEverInjected: [],
+    config: {} as unknown as RetrievalInput["config"],
+    ...overrides,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Tests.
+// ---------------------------------------------------------------------------
+
+describe("runGate — ready decision", () => {
+  test("maps model selection to slugs in order and includes sticky", async () => {
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      // Model selects b, a (its own order). Sticky `c` is omitted by the
+      // model but must survive in the final selection.
+      gateToolResponse({ decision: "ready", selected_slugs: ["b", "a"] }),
+      calls,
+    );
+
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a", "b", "c"]),
+      sticky: new Set(["c"]),
+      passNumber: 1,
+      provider,
+    });
+
+    expect(result.decision).toEqual({ decision: "ready" });
+    // Model order preserved (b, a), then omitted sticky appended (c).
+    expect(result.selectedSlugs).toEqual(["b", "a", "c"]);
+    expect(calls).toHaveLength(1);
+  });
+
+  test("forces tool_choice on decide_selection and surfaces candidates", async () => {
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      gateToolResponse({ decision: "ready", selected_slugs: ["a"] }),
+      calls,
+    );
+
+    await runGate({
+      input: makeInput({ nowText: "NOW-MARKER" }),
+      candidates: new Set(["a", "b"]),
+      sticky: new Set(),
+      passNumber: 3,
+      provider,
+    });
+
+    const call = calls[0];
+    expect(call.options?.config?.tool_choice).toEqual({
+      type: "tool",
+      name: "decide_selection",
+    });
+    expect(call.options?.config?.callSite).toBe("memoryV3Gate");
+    expect(call.tools?.[0].name).toBe("decide_selection");
+    const userText = call.messages[0].content
+      .map((b) => (b.type === "text" ? b.text : ""))
+      .join("\n");
+    expect(userText).toContain("NOW-MARKER");
+    expect(userText).toContain("a");
+    expect(userText).toContain("b");
+  });
+
+  test("drops a model-selected slug outside the candidate set", async () => {
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      gateToolResponse({ decision: "ready", selected_slugs: ["a", "ghost"] }),
+      calls,
+    );
+
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a", "b"]),
+      sticky: new Set(),
+      passNumber: 1,
+      provider,
+    });
+
+    expect(result.selectedSlugs).toEqual(["a"]);
+  });
+
+  test("forwards an abort signal to the provider call", async () => {
+    const calls: ProviderCall[] = [];
+    const controller = new AbortController();
+    controller.abort();
+    const provider = makeProvider(
+      gateToolResponse({ decision: "ready", selected_slugs: ["a"] }),
+      calls,
+    );
+
+    // Aborted signal makes the stub throw → gate fails open (ready, all).
+    const result = await runGate({
+      input: makeInput({ signal: controller.signal }),
+      candidates: new Set(["a", "b"]),
+      sticky: new Set(),
+      passNumber: 1,
+      provider,
+    });
+
+    expect(calls[0].options?.signal).toBe(controller.signal);
+    expect(result.decision).toEqual({ decision: "ready" });
+    expect(result.selectedSlugs).toEqual(["a", "b"]);
+  });
+});
+
+describe("runGate — more decision", () => {
+  test("surfaces generated follow-up questions", async () => {
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      gateToolResponse({
+        decision: "more",
+        selected_slugs: ["a"],
+        questions: ["What is the user's deadline?", "Who else is involved?"],
+      }),
+      calls,
+    );
+
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a", "b"]),
+      sticky: new Set(),
+      passNumber: 1,
+      provider,
+    });
+
+    expect(result.decision).toEqual({
+      decision: "more",
+      questions: ["What is the user's deadline?", "Who else is involved?"],
+    });
+    // Selection is still returned alongside the "more" verdict.
+    expect(result.selectedSlugs).toEqual(["a"]);
+  });
+
+  test("omits questions array when the model gave none (or only blanks)", async () => {
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      gateToolResponse({
+        decision: "more",
+        selected_slugs: ["a"],
+        questions: ["   ", ""],
+      }),
+      calls,
+    );
+
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a"]),
+      sticky: new Set(),
+      passNumber: 1,
+      provider,
+    });
+
+    expect(result.decision).toEqual({ decision: "more" });
+  });
+
+  test("preserves sticky even on a more decision", async () => {
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      gateToolResponse({
+        decision: "more",
+        selected_slugs: ["a"],
+        questions: ["follow-up?"],
+      }),
+      calls,
+    );
+
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a", "sticky-page"]),
+      sticky: new Set(["sticky-page"]),
+      passNumber: 1,
+      provider,
+    });
+
+    expect(result.selectedSlugs).toContain("sticky-page");
+  });
+});
+
+describe("runGate — fail-safe", () => {
+  test("provider === null selects all candidates with sticky and ready", async () => {
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a", "b", "c"]),
+      sticky: new Set(["c"]),
+      passNumber: 1,
+      provider: null,
+    });
+
+    expect(result.decision).toEqual({ decision: "ready" });
+    expect([...result.selectedSlugs].sort()).toEqual(["a", "b", "c"]);
+    expect(result.selectedSlugs).toContain("c");
+  });
+
+  test("provider throw falls back to ready + all candidates", async () => {
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a", "b"]),
+      sticky: new Set(),
+      passNumber: 1,
+      provider: makeThrowingProvider(),
+    });
+
+    expect(result.decision).toEqual({ decision: "ready" });
+    expect([...result.selectedSlugs].sort()).toEqual(["a", "b"]);
+  });
+
+  test("missing tool_use block falls back to ready + all candidates", async () => {
+    const calls: ProviderCall[] = [];
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a", "b"]),
+      sticky: new Set(),
+      passNumber: 1,
+      provider: makeProvider(textOnlyResponse(), calls),
+    });
+
+    expect(result.decision).toEqual({ decision: "ready" });
+    expect([...result.selectedSlugs].sort()).toEqual(["a", "b"]);
+  });
+
+  test("schema-mismatched tool input falls back to ready + all candidates", async () => {
+    const calls: ProviderCall[] = [];
+    const result = await runGate({
+      input: makeInput(),
+      candidates: new Set(["a", "b"]),
+      sticky: new Set(),
+      passNumber: 1,
+      // `decision` is required; missing it fails the Zod schema.
+      provider: makeProvider(
+        gateToolResponse({ selected_slugs: ["a"] }),
+        calls,
+      ),
+    });
+
+    expect(result.decision).toEqual({ decision: "ready" });
+    expect([...result.selectedSlugs].sort()).toEqual(["a", "b"]);
+  });
+});
diff --git a/assistant/src/memory/v3/gate.ts b/assistant/src/memory/v3/gate.ts
new file mode 100644
index 00000000000..4abae9452ef
--- /dev/null
+++ b/assistant/src/memory/v3/gate.ts
@@ -0,0 +1,275 @@
+/**
+ * Memory v3 — selection gate.
+ *
+ * The gate is the final step of one retrieval pass. After the scouts, the tree
+ * walk, the edge expansion, and the sticky carry-over have each contributed
+ * candidate page slugs, the gate makes one capable LLM call over the *unioned*
+ * candidate set and decides:
+ *
+ *   - **ready** — finalize the selection and inject for the next reply, or
+ *   - **more**  — the candidates don't yet cover the turn; emit follow-up
+ *     questions that seed the next pass. These questions are the gate's own
+ *     *generated* queries (a refined sub-question), NOT a replay of the
+ *     original user message — the loop feeds them back to the scouts/tree on
+ *     the next iteration.
+ *
+ * The gate also returns the final ordered `selectedSlugs` (the order the model
+ * returned, with sticky slugs guaranteed present). Sticky pages are never
+ * dropped: they were injected on a prior turn and removing them mid-conversation
+ * would silently amnesia the assistant, so we union them back in even when the
+ * model omits them.
+ *
+ * Scope — brief generation is deferred. The full v3 design pairs the selection
+ * with a ~1000-token voice brief, but that brief is only consumed when v3 is
+ * actually injected (a later cutover). In shadow mode the harness injects v2
+ * and only compares selections, so this module produces the selection +
+ * `GateDecision` only — matching what the harness trace already models. The
+ * brief-generation seam is marked below; do not build voice synthesis here.
+ *
+ * Fail-safe. If no provider is configured or the provider call errors/returns
+ * an unusable response, the gate fails *open*: it returns
+ * `decision: { decision: "ready" }` and selects every candidate. A retrieval
+ * loop that can't reach the model should still inject what it found rather than
+ * inject nothing.
+ *
+ * This module is currently unwired — a later PR composes it into the loop.
+ */
+
+import { z } from "zod";
+
+import {
+  extractToolUse,
+  getConfiguredProvider,
+} from "../../providers/provider-send-message.js";
+import type {
+  Message,
+  Provider,
+  ToolDefinition,
+} from "../../providers/types.js";
+import { getLogger } from "../../util/logger.js";
+import type { RetrievalInput } from "../v2/harness/retriever.js";
+import type { GateDecision } from "../v2/harness/trace.js";
+
+const log = getLogger("memory-v3-gate");
+
+/** Tool name forced via `tool_choice`. Shared constant so tests can match it. */
+const GATE_TOOL_NAME = "decide_selection";
+
+/**
+ * Arguments to one gate invocation.
+ *
+ * `candidates` is the accumulated candidate set for this pass — the union of
+ * scouts-kept, tree pages, edge-pulled, and sticky slugs. `sticky` is the
+ * subset that was injected on a prior turn and must survive: it is always a
+ * subset of `candidates` in practice, but the gate unions it back into both
+ * the prompt and the final selection defensively.
+ */
+export interface RunGateArgs {
+  input: RetrievalInput;
+  candidates: Set<string>;
+  sticky: Set<string>;
+  passNumber: number;
+  /**
+   * Provider override seam for tests. Production leaves this unset and the
+   * gate resolves `getConfiguredProvider("memoryV3Gate")`. `null` is distinct
+   * from `undefined`: passing `null` simulates "no provider configured" and
+   * exercises the fail-safe path without resolving the real registry.
+   */
+  provider?: Provider | null;
+}
+
+export interface RunGateResult {
+  decision: GateDecision;
+  /** Final page slugs in the model's returned order; sticky guaranteed present. */
+  selectedSlugs: string[];
+}
+
+/**
+ * Build the forced tool definition. `selected_slugs` is the ordered final
+ * selection; `decision` is the ready/more verdict; `questions` carries the
+ * generated follow-up queries on "more" (ignored on "ready"). Mirrors the
+ * forced-tool pattern of v2's `select_pages_to_inject`.
+ */
+function buildGateTool(candidateSlugs: readonly string[]): ToolDefinition {
+  return {
+    name: GATE_TOOL_NAME,
+    description:
+      "Decide whether the accumulated candidate pages are sufficient to answer " +
+      "the next turn. Return decision='ready' with the final ordered selection " +
+      "when the candidates cover the turn; return decision='more' with one or " +
+      "more generated follow-up questions (NOT the original message) to seed " +
+      "another retrieval pass when coverage is incomplete.",
+    input_schema: {
+      type: "object",
+      properties: {
+        decision: { type: "string", enum: ["ready", "more"] },
+        selected_slugs: {
+          type: "array",
+          items: { type: "string", enum: [...candidateSlugs] },
+          description:
+            "Final ordered page slugs to inject. Choose only from the candidate set.",
+        },
+        questions: {
+          type: "array",
+          items: { type: "string" },
+          description:
+            "When decision='more', the generated follow-up questions seeding the next pass.",
+        },
+      },
+      required: ["decision"],
+    },
+  };
+}
+
+const GateToolResultSchema = z.object({
+  decision: z.enum(["ready", "more"]),
+  selected_slugs: z.array(z.string()).optional(),
+  questions: z.array(z.string()).optional(),
+});
+
+/**
+ * Order a slug selection: keep the model's returned order, restricted to the
+ * candidate set, then append any sticky slugs the model omitted (sticky is
+ * never dropped). De-duplicates while preserving first-seen order.
+ */
+function orderSelection(
+  modelSlugs: readonly string[],
+  candidates: Set<string>,
+  sticky: Set<string>,
+): string[] {
+  const seen = new Set<string>();
+  const out: string[] = [];
+  for (const slug of modelSlugs) {
+    if (!candidates.has(slug)) continue; // model can only pick from candidates
+    if (seen.has(slug)) continue;
+    seen.add(slug);
+    out.push(slug);
+  }
+  for (const slug of sticky) {
+    if (seen.has(slug)) continue;
+    seen.add(slug);
+    out.push(slug);
+  }
+  return out;
+}
+
+/**
+ * Fail-safe result: inject every candidate and declare the pass ready. Used
+ * when the provider is unavailable or the call cannot produce a usable
+ * decision. Ordering puts sticky last via `orderSelection` with an empty
+ * model selection, so candidates come first then any sticky not already in
+ * the set.
+ */
+function failSafe(candidates: Set<string>, sticky: Set<string>): RunGateResult {
+  return {
+    decision: { decision: "ready" },
+    selectedSlugs: orderSelection([...candidates], candidates, sticky),
+  };
+}
+
+/**
+ * Run the gate for one pass.
+ *
+ * Makes one forced-tool LLM call over the candidate set and maps the result to
+ * a `GateDecision` plus the final ordered selection. Sticky slugs are always
+ * present in the selection. Any failure (no provider, provider throw, missing
+ * tool_use, schema mismatch) falls back to selecting all candidates with a
+ * "ready" decision.
+ */
+export async function runGate(args: RunGateArgs): Promise<RunGateResult> {
+  const { input, candidates, sticky, passNumber } = args;
+
+  const candidateSlugs = [...candidates];
+
+  // Resolve the provider. A `provider` key in args (including explicit `null`)
+  // takes precedence so tests inject a stub; production omits it and resolves
+  // the configured `memoryV3Gate` call site.
+  const provider =
+    args.provider !== undefined
+      ? args.provider
+      : await getConfiguredProvider("memoryV3Gate");
+
+  if (!provider) {
+    log.warn("memoryV3Gate provider unavailable; gate failing open (ready)");
+    return failSafe(candidates, sticky);
+  }
+
+  const systemPrompt =
+    "You are the final selection gate for a memory-retrieval loop. You are " +
+    "given the candidate concept pages gathered so far for the current turn. " +
+    "Decide whether they are sufficient to answer the next reply.";
+
+  const stickySlugs = [...sticky];
+  const userMsg: Message = {
+    role: "user",
+    content: [
+      {
+        type: "text",
+        text: `<now>\n${input.nowText}\n</now>`,
+      },
+      {
+        type: "text",
+        text:
+          `<pass_number>${passNumber}</pass_number>\n\n` +
+          `<sticky_slugs>\n${stickySlugs.join("\n")}\n</sticky_slugs>\n\n` +
+          `<candidate_slugs>\n${candidateSlugs.join("\n")}\n</candidate_slugs>`,
+      },
+    ],
+  };
+
+  const gateTool = buildGateTool(candidateSlugs);
+
+  let response;
+  try {
+    response = await provider.sendMessage([userMsg], [gateTool], systemPrompt, {
+      config: {
+        callSite: "memoryV3Gate" as const,
+        tool_choice: { type: "tool" as const, name: GATE_TOOL_NAME },
+      },
+      ...(input.signal ? { signal: input.signal } : {}),
+    });
+  } catch (err) {
+    log.warn({ err }, "Gate provider call threw; failing open (ready)");
+    return failSafe(candidates, sticky);
+  }
+
+  const toolBlock = extractToolUse(response);
+  if (!toolBlock || toolBlock.name !== GATE_TOOL_NAME) {
+    log.warn(
+      { stopReason: response.stopReason },
+      "Gate model returned no decide_selection tool_use; failing open (ready)",
+    );
+    return failSafe(candidates, sticky);
+  }
+
+  const parsed = GateToolResultSchema.safeParse(toolBlock.input);
+  if (!parsed.success) {
+    log.warn(
+      { error: parsed.error.message },
+      "Gate tool input did not match schema; failing open (ready)",
+    );
+    return failSafe(candidates, sticky);
+  }
+
+  const selectedSlugs = orderSelection(
+    parsed.data.selected_slugs ?? [],
+    candidates,
+    sticky,
+  );
+
+  if (parsed.data.decision === "more") {
+    const questions = (parsed.data.questions ?? []).filter(
+      (q) => q.trim().length > 0,
+    );
+    const decision: GateDecision =
+      questions.length > 0
+        ? { decision: "more", questions }
+        : { decision: "more" };
+    return { decision, selectedSlugs };
+  }
+
+  // brief generation lands at cutover (P5) — shadow mode injects v2, so this
+  // gate produces only the selection + decision. Do NOT synthesize a voice
+  // brief here.
+  return { decision: { decision: "ready" }, selectedSlugs };
+}

From 5df253f2103bd8353299879ad94d5285d874e130 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:47:24 -0400
Subject: [PATCH 06/21] feat(memory-v3): tree index with DAG adjacency + cache
 (#31976)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../memory/v3/__tests__/tree-index.test.ts    | 280 ++++++++++++++++++
 assistant/src/memory/v3/tree-index.ts         | 237 +++++++++++++++
 assistant/src/memory/v3/tree-store.ts         |   3 +
 3 files changed, 520 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/tree-index.test.ts
 create mode 100644 assistant/src/memory/v3/tree-index.ts

diff --git a/assistant/src/memory/v3/__tests__/tree-index.test.ts b/assistant/src/memory/v3/__tests__/tree-index.test.ts
new file mode 100644
index 00000000000..536e1586c00
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/tree-index.test.ts
@@ -0,0 +1,280 @@
+/**
+ * Tests for `assistant/src/memory/v3/tree-index.ts`.
+ *
+ * Coverage matrix:
+ *   - getTreeIndex builds correct DAG adjacency on a fixture tree
+ *     (root → 2 sub-nodes → page leaves; one node referenced by two parents).
+ *   - childrenByNode preserves children order and parses page:/node: refs.
+ *   - parentsByNode / pageParents reverse adjacency, incl. a 2-parent node.
+ *   - root detection: reserved `_root` wins; single-parentless fallback;
+ *     ambiguous fallback warns + picks deterministically.
+ *   - dangling refs retained (structural-only build).
+ *   - malformed child refs dropped.
+ *   - cache hit returns the same object; invalidateTreeIndex forces a rebuild.
+ *   - writeNode / deleteNode invalidate the cache.
+ *
+ * Tests use temp workspaces under `os.tmpdir()`; they never touch `~/.vellum/`.
+ */
+
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+
+import { getTreeIndex, invalidateTreeIndex } from "../tree-index.js";
+import {
+  deleteNode,
+  getTreeDir,
+  ROOT_NODE_ID,
+  writeNode,
+} from "../tree-store.js";
+import type { TreeNode } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+let workspaceDir: string;
+
+beforeEach(() => {
+  workspaceDir = mkdtempSync(join(tmpdir(), "vellum-tree-index-test-"));
+});
+
+afterEach(() => {
+  invalidateTreeIndex();
+  rmSync(workspaceDir, { recursive: true, force: true });
+});
+
+function node(id: string, children: string[], body = `body ${id}`): TreeNode {
+  return { id, frontmatter: { children }, body };
+}
+
+/**
+ * Seed a fixture DAG:
+ *   _root → node:people, node:projects
+ *   people → page:alice, node:shared
+ *   projects → page:apollo, node:shared   ← shared has two parents (DAG)
+ *   shared → page:shared-page
+ *
+ * writeNode invalidates the cache as a side effect, so we invalidate once more
+ * at the end to leave a clean slate for the test body's first getTreeIndex.
+ */
+async function seedFixture(): Promise<void> {
+  await writeNode(
+    workspaceDir,
+    node(ROOT_NODE_ID, ["node:people", "node:projects"]),
+  );
+  await writeNode(workspaceDir, node("people", ["page:alice", "node:shared"]));
+  await writeNode(
+    workspaceDir,
+    node("projects", ["page:apollo", "node:shared"]),
+  );
+  await writeNode(workspaceDir, node("shared", ["page:shared-page"]));
+  invalidateTreeIndex();
+}
+
+// ---------------------------------------------------------------------------
+// DAG adjacency
+// ---------------------------------------------------------------------------
+
+describe("getTreeIndex — DAG adjacency", () => {
+  test("builds forward adjacency preserving children order and ref kinds", async () => {
+    await seedFixture();
+    const index = await getTreeIndex(workspaceDir);
+
+    expect(index.childrenByNode.get(ROOT_NODE_ID)).toEqual([
+      { kind: "node", ref: "people" },
+      { kind: "node", ref: "projects" },
+    ]);
+    expect(index.childrenByNode.get("people")).toEqual([
+      { kind: "page", ref: "alice" },
+      { kind: "node", ref: "shared" },
+    ]);
+    expect(index.childrenByNode.get("shared")).toEqual([
+      { kind: "page", ref: "shared-page" },
+    ]);
+  });
+
+  test("builds node reverse adjacency incl. a node with two parents", async () => {
+    await seedFixture();
+    const index = await getTreeIndex(workspaceDir);
+
+    expect(index.parentsByNode.get("people")).toEqual(new Set([ROOT_NODE_ID]));
+    expect(index.parentsByNode.get("projects")).toEqual(
+      new Set([ROOT_NODE_ID]),
+    );
+    // `shared` is referenced by both `people` and `projects` → DAG.
+    expect(index.parentsByNode.get("shared")).toEqual(
+      new Set(["people", "projects"]),
+    );
+  });
+
+  test("builds page reverse adjacency keyed by page slug", async () => {
+    await seedFixture();
+    const index = await getTreeIndex(workspaceDir);
+
+    expect(index.pageParents.get("alice")).toEqual(new Set(["people"]));
+    expect(index.pageParents.get("apollo")).toEqual(new Set(["projects"]));
+    expect(index.pageParents.get("shared-page")).toEqual(new Set(["shared"]));
+  });
+
+  test("populates nodes map with every readable node", async () => {
+    await seedFixture();
+    const index = await getTreeIndex(workspaceDir);
+
+    expect([...index.nodes.keys()].sort()).toEqual([
+      ROOT_NODE_ID,
+      "people",
+      "projects",
+      "shared",
+    ]);
+    expect(index.nodes.get("shared")?.body).toBe("body shared");
+  });
+
+  test("retains dangling refs (structural-only build, no existence check)", async () => {
+    await writeNode(
+      workspaceDir,
+      node(ROOT_NODE_ID, ["node:missing-node", "page:missing-page"]),
+    );
+    invalidateTreeIndex();
+    const index = await getTreeIndex(workspaceDir);
+
+    // Forward edge retained even though no such node/page file exists.
+    expect(index.childrenByNode.get(ROOT_NODE_ID)).toEqual([
+      { kind: "node", ref: "missing-node" },
+      { kind: "page", ref: "missing-page" },
+    ]);
+    // Reverse adjacency retained too — validation (a later PR) reports these.
+    expect(index.parentsByNode.get("missing-node")).toEqual(
+      new Set([ROOT_NODE_ID]),
+    );
+    expect(index.pageParents.get("missing-page")).toEqual(
+      new Set([ROOT_NODE_ID]),
+    );
+  });
+
+  test("drops malformed child refs (no page:/node: prefix)", async () => {
+    await writeNode(
+      workspaceDir,
+      node(ROOT_NODE_ID, ["page:ok", "bogus-no-prefix", "node:", "page:"]),
+    );
+    invalidateTreeIndex();
+    const index = await getTreeIndex(workspaceDir);
+
+    expect(index.childrenByNode.get(ROOT_NODE_ID)).toEqual([
+      { kind: "page", ref: "ok" },
+    ]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Root detection
+// ---------------------------------------------------------------------------
+
+describe("getTreeIndex — root detection", () => {
+  test("prefers the reserved _root node when present", async () => {
+    await seedFixture();
+    const index = await getTreeIndex(workspaceDir);
+    expect(index.root).toBe(ROOT_NODE_ID);
+  });
+
+  test("falls back to the single parentless node when no _root", async () => {
+    await writeNode(workspaceDir, node("top", ["node:child"]));
+    await writeNode(workspaceDir, node("child", []));
+    invalidateTreeIndex();
+    const index = await getTreeIndex(workspaceDir);
+    expect(index.root).toBe("top");
+  });
+
+  test("ambiguous root warns and picks ASCII-smallest deterministically", async () => {
+    // Two parentless nodes, no _root → ambiguous.
+    await writeNode(workspaceDir, node("zeta", []));
+    await writeNode(workspaceDir, node("alpha", []));
+    invalidateTreeIndex();
+    const index = await getTreeIndex(workspaceDir);
+    expect(index.root).toBe("alpha");
+  });
+
+  test("empty workspace yields _root", async () => {
+    const index = await getTreeIndex(workspaceDir);
+    expect(index.root).toBe(ROOT_NODE_ID);
+    expect(index.nodes.size).toBe(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Cache behavior
+// ---------------------------------------------------------------------------
+
+describe("getTreeIndex — cache", () => {
+  test("cache hit returns the same object reference", async () => {
+    await seedFixture();
+    const first = await getTreeIndex(workspaceDir);
+    const second = await getTreeIndex(workspaceDir);
+    expect(second).toBe(first);
+  });
+
+  test("invalidateTreeIndex forces a rebuild", async () => {
+    await seedFixture();
+    const first = await getTreeIndex(workspaceDir);
+    invalidateTreeIndex(workspaceDir);
+    const second = await getTreeIndex(workspaceDir);
+    expect(second).not.toBe(first);
+    // Same structural content though.
+    expect([...second.nodes.keys()].sort()).toEqual(
+      [...first.nodes.keys()].sort(),
+    );
+  });
+
+  test("scoped invalidation only clears the matching workspace", async () => {
+    await seedFixture();
+    const first = await getTreeIndex(workspaceDir);
+    invalidateTreeIndex("/some/other/workspace");
+    const second = await getTreeIndex(workspaceDir);
+    expect(second).toBe(first);
+  });
+
+  test("writeNode invalidates the cache", async () => {
+    await seedFixture();
+    const first = await getTreeIndex(workspaceDir);
+    await writeNode(workspaceDir, node("newcomer", []));
+    const second = await getTreeIndex(workspaceDir);
+    expect(second).not.toBe(first);
+    expect(second.nodes.has("newcomer")).toBe(true);
+  });
+
+  test("deleteNode invalidates the cache", async () => {
+    await seedFixture();
+    const first = await getTreeIndex(workspaceDir);
+    expect(first.nodes.has("shared")).toBe(true);
+    await deleteNode(workspaceDir, "shared");
+    const second = await getTreeIndex(workspaceDir);
+    expect(second).not.toBe(first);
+    expect(second.nodes.has("shared")).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Read failures
+// ---------------------------------------------------------------------------
+
+describe("getTreeIndex — robustness", () => {
+  test("ignores a missing tree dir (fresh workspace) → empty index", async () => {
+    // No nodes written; getTreeDir not even created.
+    const index = await getTreeIndex(workspaceDir);
+    expect(index.nodes.size).toBe(0);
+    expect(index.childrenByNode.size).toBe(0);
+    expect(index.parentsByNode.size).toBe(0);
+    expect(index.pageParents.size).toBe(0);
+  });
+
+  test("tree dir present but empty → empty index", async () => {
+    // Materialize the dir without any node files.
+    rmSync(getTreeDir(workspaceDir), { recursive: true, force: true });
+    await writeNode(workspaceDir, node("only", []));
+    await deleteNode(workspaceDir, "only");
+    invalidateTreeIndex();
+    const index = await getTreeIndex(workspaceDir);
+    expect(index.nodes.size).toBe(0);
+  });
+});
diff --git a/assistant/src/memory/v3/tree-index.ts b/assistant/src/memory/v3/tree-index.ts
new file mode 100644
index 00000000000..b47d43dac9e
--- /dev/null
+++ b/assistant/src/memory/v3/tree-index.ts
@@ -0,0 +1,237 @@
+/**
+ * Memory v3 — Tree index (DAG build + cache).
+ *
+ * The v3 tree is a DAG *overlay* over the flat `memory/concepts/` pages: every
+ * node carries an ordered `children` list whose entries are either
+ * `"page:<slug>"` (a leaf concept page, canonical content untouched by v3) or
+ * `"node:<id>"` (a sub-node in the tree). A page or node may be referenced by
+ * more than one parent — hence DAG, not tree.
+ *
+ * This module scans every node on disk and materializes that edge list into
+ * forward and reverse adjacency maps so downstream routing/validation can walk
+ * the graph without re-reading the filesystem:
+ *   - `childrenByNode` — node id → ordered child refs (forward edges).
+ *   - `parentsByNode` — node id → set of parent node ids (reverse edges for
+ *     `node:` children).
+ *   - `pageParents` — page slug → set of parent node ids (reverse edges for
+ *     `page:` children).
+ *
+ * The build is **structural only**: it never verifies that a referenced page
+ * or node actually exists. Dangling refs are retained in the adjacency maps so
+ * a later validation pass can report them. Root detection prefers the reserved
+ * `_root` id; absent that it picks the single node with no parents (warning and
+ * picking deterministically if the choice is ambiguous).
+ *
+ * The build is cached module-locally per `workspaceDir`, mirroring
+ * `../v2/page-index.ts`. Callers must invalidate via `invalidateTreeIndex` when
+ * tree nodes change — `tree-store.ts`'s `writeNode` / `deleteNode` already do.
+ */
+
+import { getLogger } from "../../util/logger.js";
+import { listNodes, readNode, ROOT_NODE_ID } from "./tree-store.js";
+import type { TreeNode } from "./types.js";
+
+const log = getLogger("memory-v3-tree-index");
+
+/** Prefix marking a child ref that targets a leaf concept page. */
+const PAGE_REF_PREFIX = "page:";
+
+/** Prefix marking a child ref that targets a sub-node in the tree. */
+const NODE_REF_PREFIX = "node:";
+
+/**
+ * A single parsed `children` entry. `kind` distinguishes a leaf concept page
+ * (`"page"`) from a sub-node (`"node"`); `ref` is the bare slug or node id with
+ * the `page:` / `node:` prefix stripped.
+ */
+export interface ChildRef {
+  kind: "page" | "node";
+  ref: string;
+}
+
+/**
+ * Snapshot of the v3 tree DAG for one workspace.
+ *
+ * `nodes` is every readable node keyed by id. The three adjacency maps are
+ * derived from each node's `children`:
+ *   - `childrenByNode` — forward edges, preserving `children` order.
+ *   - `parentsByNode` — reverse edges restricted to `node:` children.
+ *   - `pageParents` — reverse edges restricted to `page:` children, keyed by
+ *     page slug.
+ *
+ * `root` is the entry-point node id (`_root` when present). Dangling refs (a
+ * `node:`/`page:` target with no on-disk file) are retained throughout —
+ * validation, not the index build, is responsible for surfacing them.
+ */
+export interface TreeIndex {
+  nodes: Map<string, TreeNode>;
+  childrenByNode: Map<string, ReadonlyArray<ChildRef>>;
+  parentsByNode: Map<string, Set<string>>;
+  pageParents: Map<string, Set<string>>;
+  root: string;
+}
+
+interface CachedIndex {
+  workspaceDir: string;
+  index: TreeIndex;
+}
+
+let cache: CachedIndex | null = null;
+
+/**
+ * Parse a raw `children` entry into a {@link ChildRef}. Returns `null` for any
+ * entry that does not carry a recognized `page:` / `node:` prefix or whose ref
+ * body is empty — those are malformed and dropped (with a warn) rather than
+ * faithfully threaded through adjacency.
+ */
+function parseChildRef(raw: string): ChildRef | null {
+  if (raw.startsWith(PAGE_REF_PREFIX)) {
+    const ref = raw.slice(PAGE_REF_PREFIX.length);
+    return ref.length > 0 ? { kind: "page", ref } : null;
+  }
+  if (raw.startsWith(NODE_REF_PREFIX)) {
+    const ref = raw.slice(NODE_REF_PREFIX.length);
+    return ref.length > 0 ? { kind: "node", ref } : null;
+  }
+  return null;
+}
+
+/** Append `parent` to the parent-set for `key`, creating the set on demand. */
+function addParent(
+  map: Map<string, Set<string>>,
+  key: string,
+  parent: string,
+): void {
+  let parents = map.get(key);
+  if (!parents) {
+    parents = new Set();
+    map.set(key, parents);
+  }
+  parents.add(parent);
+}
+
+/**
+ * Pick the root node id from the materialized adjacency. Prefers the reserved
+ * {@link ROOT_NODE_ID} when a node with that id exists. Otherwise the root is
+ * the single node with no parents; if several nodes are parentless the choice
+ * is ambiguous, so warn and pick the ASCII-smallest id for determinism. With no
+ * nodes at all the root is `_root` (the well-known handle a migration authors
+ * first), matching the empty-workspace contract.
+ */
+function pickRoot(
+  nodes: Map<string, TreeNode>,
+  parentsByNode: Map<string, Set<string>>,
+): string {
+  if (nodes.has(ROOT_NODE_ID)) {
+    return ROOT_NODE_ID;
+  }
+
+  const parentless = [...nodes.keys()].filter(
+    (id) => !parentsByNode.has(id) || parentsByNode.get(id)!.size === 0,
+  );
+  parentless.sort();
+
+  if (parentless.length === 1) {
+    return parentless[0];
+  }
+  if (parentless.length === 0) {
+    return ROOT_NODE_ID;
+  }
+  log.warn(
+    { parentless },
+    "Ambiguous tree root — no '_root' node and multiple parentless nodes; picking ASCII-smallest deterministically",
+  );
+  return parentless[0];
+}
+
+/**
+ * Return a `TreeIndex` for `workspaceDir`. Cached module-locally; the cache is
+ * invalidated by `invalidateTreeIndex` (called by `tree-store.ts` hooks when
+ * nodes change).
+ *
+ * Cold builds list every node and read them in parallel, dropping any whose
+ * read rejects with a warn so one broken node never blocks the rest of the
+ * index. Each readable node's `children` is parsed into {@link ChildRef}s and
+ * threaded into forward (`childrenByNode`) and reverse (`parentsByNode` /
+ * `pageParents`) adjacency. The build is structural only — referenced
+ * pages/nodes are never verified to exist; dangling refs are retained for a
+ * later validation pass.
+ */
+export async function getTreeIndex(workspaceDir: string): Promise<TreeIndex> {
+  if (cache && cache.workspaceDir === workspaceDir) {
+    return cache.index;
+  }
+
+  const ids = await listNodes(workspaceDir);
+
+  // Read every node in parallel; nodes whose read rejects are dropped with a
+  // warn so a single broken node never blocks the rest of the index.
+  const settled = await Promise.allSettled(
+    ids.map((id) => readNode(workspaceDir, id)),
+  );
+
+  const nodes = new Map<string, TreeNode>();
+  const childrenByNode = new Map<string, ReadonlyArray<ChildRef>>();
+  const parentsByNode = new Map<string, Set<string>>();
+  const pageParents = new Map<string, Set<string>>();
+
+  for (let i = 0; i < settled.length; i++) {
+    const result = settled[i];
+    const id = ids[i];
+    if (result.status === "rejected") {
+      log.warn(
+        { id, err: result.reason },
+        "Dropping tree node from index — read failed",
+      );
+      continue;
+    }
+    const node = result.value;
+    // `readNode` returns null only on ENOENT; a node listed by `listNodes`
+    // that vanishes between list and read is a benign race — drop it silently.
+    if (!node) continue;
+    nodes.set(id, node);
+  }
+
+  // Build adjacency in a second pass so every node is registered first — that
+  // keeps a deterministic, list-order iteration independent of read timing.
+  for (const node of nodes.values()) {
+    const childRefs: ChildRef[] = [];
+    for (const raw of node.frontmatter.children) {
+      const parsed = parseChildRef(raw);
+      if (!parsed) {
+        log.warn(
+          { id: node.id, raw },
+          "Dropping malformed child ref — expected 'page:<slug>' or 'node:<id>'",
+        );
+        continue;
+      }
+      childRefs.push(parsed);
+      const reverse = parsed.kind === "node" ? parentsByNode : pageParents;
+      addParent(reverse, parsed.ref, node.id);
+    }
+    childrenByNode.set(node.id, childRefs);
+  }
+
+  const root = pickRoot(nodes, parentsByNode);
+
+  const index: TreeIndex = {
+    nodes,
+    childrenByNode,
+    parentsByNode,
+    pageParents,
+    root,
+  };
+  cache = { workspaceDir, index };
+  return index;
+}
+
+/**
+ * Clear the cached index. Pass `workspaceDir` to scope invalidation to a
+ * specific cache entry; omit it to clear unconditionally.
+ */
+export function invalidateTreeIndex(workspaceDir?: string): void {
+  if (!cache) return;
+  if (workspaceDir === undefined || cache.workspaceDir === workspaceDir) {
+    cache = null;
+  }
+}
diff --git a/assistant/src/memory/v3/tree-store.ts b/assistant/src/memory/v3/tree-store.ts
index be13e489f8e..55dc023f2fd 100644
--- a/assistant/src/memory/v3/tree-store.ts
+++ b/assistant/src/memory/v3/tree-store.ts
@@ -40,6 +40,7 @@ import { dirname, join, relative, sep } from "node:path";
 import { parse as parseYaml, stringify as stringifyYaml } from "yaml";
 
 import { FRONTMATTER_REGEX } from "../../skills/frontmatter.js";
+import { invalidateTreeIndex } from "./tree-index.js";
 import { type TreeNode, TreeNodeFrontmatterSchema } from "./types.js";
 
 /** Filename suffix for tree nodes. */
@@ -296,6 +297,7 @@ export async function writeNode(
     await rm(tmpPath, { force: true }).catch(() => {});
     throw err;
   }
+  invalidateTreeIndex(workspaceDir);
 }
 
 /**
@@ -367,4 +369,5 @@ export async function deleteNode(
     }
     throw err;
   }
+  invalidateTreeIndex(workspaceDir);
 }

From 00d7812a76e2893e311db79529fc433c59961598 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:49:31 -0400
Subject: [PATCH 07/21] feat(memory-v3): always-on scouts over the v2 substrate
 (#31977)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/scouts.test.ts    | 390 +++++++++++++++++
 assistant/src/memory/v3/scouts.ts             | 392 ++++++++++++++++++
 2 files changed, 782 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/scouts.test.ts
 create mode 100644 assistant/src/memory/v3/scouts.ts

diff --git a/assistant/src/memory/v3/__tests__/scouts.test.ts b/assistant/src/memory/v3/__tests__/scouts.test.ts
new file mode 100644
index 00000000000..1cc34678ba7
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/scouts.test.ts
@@ -0,0 +1,390 @@
+/**
+ * Tests for `assistant/src/memory/v3/scouts.ts`.
+ *
+ * The scout lanes read the v2 substrate (page index, injection-event EMA,
+ * Qdrant hybrid query, BM25, dense embed + calibration). Every one of those is
+ * stubbed via `mock.module` so the suite needs no real Qdrant, embedding
+ * backend, or LLM — and the SQLite-backed EMA is replaced by a hand-fed score
+ * map, so the injected `db` is an opaque sentinel the lane never dereferences.
+ *
+ * Coverage:
+ *   - hot lane: ranks the EMA score map desc, marks every hit sticky.
+ *   - sparse lane: reads sparseScore, ranks desc, flags near-exact hits
+ *     sticky + tree-bypass.
+ *   - dense lane: per-subtree quota caps off-domain hits; MMR diversifies.
+ *   - lane toggles: each disabled lane is fully suppressed (no ScoutResult).
+ *   - empty query / empty corpus short-circuits.
+ *   - honors AbortSignal.
+ */
+
+import { beforeEach, describe, expect, mock, test } from "bun:test";
+
+import type { PageIndex } from "../../v2/page-index.js";
+import type { ConceptPageQueryResult } from "../../v2/qdrant.js";
+
+// ---------------------------------------------------------------------------
+// Substrate stubs — installed before importing the module under test.
+// ---------------------------------------------------------------------------
+
+// Per-call programmable substrate state. Each test rewires these before
+// calling runScouts; the mock factories below close over the live refs.
+let injectionScores = new Map<string, number>();
+let pageSlugs: string[] = [];
+let hybridHits: ConceptPageQueryResult[] = [];
+let embedCalls = 0;
+
+mock.module("../../v2/injection-events.js", () => ({
+  computeInjectionScores: () => injectionScores,
+}));
+
+mock.module("../../v2/page-index.js", () => ({
+  getPageIndex: async (): Promise<PageIndex> => ({
+    entries: pageSlugs.map((slug, i) => ({
+      id: i + 1,
+      slug,
+      summary: "",
+      edges: [],
+      modifiedAt: 0,
+    })),
+    bySlug: new Map(),
+    byId: new Map(),
+    rendered: "",
+  }),
+}));
+
+mock.module("../../v2/qdrant.js", () => ({
+  hybridQueryConceptPages: async (): Promise<ConceptPageQueryResult[]> =>
+    hybridHits,
+}));
+
+mock.module("../../v2/sparse-bm25.js", () => ({
+  // Non-empty indices so the sparse/dense lanes don't short-circuit on an
+  // "empty query embedding". The values are irrelevant — the stubbed Qdrant
+  // query ignores them and returns `hybridHits` directly.
+  generateBm25QueryEmbedding: (text: string) =>
+    text.trim().length > 0
+      ? { indices: [1], values: [1] }
+      : { indices: [], values: [] },
+}));
+
+mock.module("../../embedding-backend.js", () => ({
+  embedWithBackend: async () => {
+    embedCalls += 1;
+    return { provider: "local", model: "stub", vectors: [[0.1, 0.2, 0.3]] };
+  },
+}));
+
+mock.module("../../anisotropy.js", () => ({
+  applyCorrectionIfCalibrated: async (vec: number[]) => vec,
+}));
+
+const { runScouts } = await import("../scouts.js");
+import type { RetrievalInput } from "../../v2/harness/retriever.js";
+import type { ScoutDeps } from "../scouts.js";
+
+// ---------------------------------------------------------------------------
+// Fixtures
+// ---------------------------------------------------------------------------
+
+const DB_SENTINEL = { __opaque: true } as unknown as ScoutDeps["db"];
+const DEPS: ScoutDeps = { db: DB_SENTINEL };
+
+type Lanes = { hot: boolean; sparse: boolean; dense: boolean };
+
+function makeInput(opts?: {
+  userMessage?: string;
+  nowText?: string;
+  lanes?: Partial<Lanes>;
+  denseQuota?: { activeDomain: number; offDomain: number };
+  signal?: AbortSignal;
+}): RetrievalInput {
+  const lanes = {
+    hot: true,
+    sparse: true,
+    dense: true,
+    tree: true,
+    edges: true,
+    ...opts?.lanes,
+  };
+  const config = {
+    memory: {
+      v3: {
+        lanes,
+        denseQuota: opts?.denseQuota ?? { activeDomain: 30, offDomain: 8 },
+      },
+    },
+  } as unknown as RetrievalInput["config"];
+  return {
+    workspaceDir: "/tmp/ws",
+    recentTurnPairs: [
+      { assistantMessage: "", userMessage: opts?.userMessage ?? "tell me" },
+    ],
+    nowText: opts?.nowText ?? "now context",
+    priorEverInjected: [],
+    config,
+    signal: opts?.signal,
+  };
+}
+
+function hit(
+  slug: string,
+  scores: Partial<ConceptPageQueryResult>,
+): ConceptPageQueryResult {
+  return { slug, ...scores };
+}
+
+beforeEach(() => {
+  injectionScores = new Map();
+  pageSlugs = [];
+  hybridHits = [];
+  embedCalls = 0;
+});
+
+// ---------------------------------------------------------------------------
+// Hot lane
+// ---------------------------------------------------------------------------
+
+describe("runScouts — hot lane", () => {
+  test("ranks EMA scores desc and marks every hit sticky", async () => {
+    pageSlugs = ["people/alice", "work/proj", "essentials"];
+    injectionScores = new Map([
+      ["work/proj", 0.2],
+      ["people/alice", 0.9],
+    ]);
+
+    const { scouts, sticky } = await runScouts(
+      makeInput({ lanes: { sparse: false, dense: false } }),
+      DEPS,
+    );
+
+    const hot = scouts.find((s) => s.lane === "hot");
+    expect(hot?.slugs).toEqual(["people/alice", "work/proj"]);
+    expect(hot?.scoreBySlug).toEqual({ "people/alice": 0.9, "work/proj": 0.2 });
+    expect([...sticky].sort()).toEqual(["people/alice", "work/proj"]);
+  });
+
+  test("empty corpus yields no hot ScoutResult", async () => {
+    pageSlugs = [];
+    const { scouts } = await runScouts(
+      makeInput({ lanes: { sparse: false, dense: false } }),
+      DEPS,
+    );
+    expect(scouts.find((s) => s.lane === "hot")).toBeUndefined();
+  });
+
+  test("no EMA events yields no hot ScoutResult", async () => {
+    pageSlugs = ["a", "b"];
+    injectionScores = new Map();
+    const { scouts, sticky } = await runScouts(
+      makeInput({ lanes: { sparse: false, dense: false } }),
+      DEPS,
+    );
+    expect(scouts.find((s) => s.lane === "hot")).toBeUndefined();
+    expect(sticky.size).toBe(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Sparse lane
+// ---------------------------------------------------------------------------
+
+describe("runScouts — sparse lane", () => {
+  test("reads sparseScore, ranks desc, flags near-exact sticky + bypass", async () => {
+    hybridHits = [
+      hit("docs/readme", { sparseScore: 4.0 }),
+      hit("docs/api", { sparseScore: 3.9 }), // within 90% of top -> near-exact
+      hit("misc/note", { sparseScore: 1.0 }), // below threshold
+      hit("dense/only", { denseScore: 0.8 }), // no sparseScore -> dropped
+    ];
+
+    const { scouts, sticky, bypass } = await runScouts(
+      makeInput({ lanes: { hot: false, dense: false } }),
+      DEPS,
+    );
+
+    const sparse = scouts.find((s) => s.lane === "sparse");
+    expect(sparse?.slugs).toEqual(["docs/readme", "docs/api", "misc/note"]);
+    // Near-exact: readme (top) and api (>= 90% of top). Not misc/note.
+    expect([...sticky].sort()).toEqual(["docs/api", "docs/readme"]);
+    expect([...bypass].sort()).toEqual(["docs/api", "docs/readme"]);
+  });
+
+  test("no sparse hits yields no sparse ScoutResult", async () => {
+    hybridHits = [hit("dense/only", { denseScore: 0.5 })];
+    const { scouts, sticky, bypass } = await runScouts(
+      makeInput({ lanes: { hot: false, dense: false } }),
+      DEPS,
+    );
+    expect(scouts.find((s) => s.lane === "sparse")).toBeUndefined();
+    expect(sticky.size).toBe(0);
+    expect(bypass.size).toBe(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Dense lane
+// ---------------------------------------------------------------------------
+
+describe("runScouts — dense lane", () => {
+  test("embeds the query and emits dense hits ranked by denseScore", async () => {
+    hybridHits = [
+      hit("work/a", { denseScore: 0.9 }),
+      hit("work/b", { denseScore: 0.7 }),
+    ];
+    const { scouts } = await runScouts(
+      makeInput({ lanes: { hot: false, sparse: false } }),
+      DEPS,
+    );
+    expect(embedCalls).toBe(1);
+    const dense = scouts.find((s) => s.lane === "dense");
+    expect(dense?.slugs[0]).toBe("work/a");
+    expect(dense?.scoreBySlug).toEqual({ "work/a": 0.9, "work/b": 0.7 });
+  });
+
+  test("per-subtree quota caps off-domain hits", async () => {
+    // Active domain = top hit's domain = "work". Off-domain quota = 1.
+    hybridHits = [
+      hit("work/a", { denseScore: 0.99 }),
+      hit("work/b", { denseScore: 0.98 }),
+      hit("work/c", { denseScore: 0.97 }),
+      hit("people/x", { denseScore: 0.5 }), // off-domain, claims the 1 slot
+      hit("notes/y", { denseScore: 0.4 }), // off-domain, over quota -> dropped
+      hit("misc/z", { denseScore: 0.3 }), // off-domain, over quota -> dropped
+    ];
+    const { scouts } = await runScouts(
+      makeInput({
+        lanes: { hot: false, sparse: false },
+        denseQuota: { activeDomain: 30, offDomain: 1 },
+      }),
+      DEPS,
+    );
+    const dense = scouts.find((s) => s.lane === "dense");
+    const slugs = dense?.slugs ?? [];
+    // All three work/* survive (active quota 30); exactly one off-domain hit.
+    expect(slugs.filter((s) => s.startsWith("work/")).length).toBe(3);
+    const offDomain = slugs.filter((s) => !s.startsWith("work/"));
+    expect(offDomain).toEqual(["people/x"]);
+  });
+
+  test("active-domain quota caps same-subtree hits", async () => {
+    hybridHits = [
+      hit("work/a", { denseScore: 0.99 }),
+      hit("work/b", { denseScore: 0.98 }),
+      hit("work/c", { denseScore: 0.97 }), // over active quota 2 -> dropped
+      hit("people/x", { denseScore: 0.5 }),
+    ];
+    const { scouts } = await runScouts(
+      makeInput({
+        lanes: { hot: false, sparse: false },
+        denseQuota: { activeDomain: 2, offDomain: 8 },
+      }),
+      DEPS,
+    );
+    const slugs = scouts.find((s) => s.lane === "dense")?.slugs ?? [];
+    expect(slugs.filter((s) => s.startsWith("work/")).length).toBe(2);
+    expect(slugs).toContain("people/x");
+  });
+
+  test("MMR interleaves subtrees rather than emitting a same-subtree run", async () => {
+    // Five work/* then one people/* of comparable relevance. Pure score order
+    // would bury people/x last; MMR should pull it forward once work/ is
+    // over-represented.
+    hybridHits = [
+      hit("work/a", { denseScore: 0.95 }),
+      hit("work/b", { denseScore: 0.94 }),
+      hit("work/c", { denseScore: 0.93 }),
+      hit("work/d", { denseScore: 0.92 }),
+      hit("people/x", { denseScore: 0.9 }),
+    ];
+    const { scouts } = await runScouts(
+      makeInput({
+        lanes: { hot: false, sparse: false },
+        denseQuota: { activeDomain: 30, offDomain: 8 },
+      }),
+      DEPS,
+    );
+    const slugs = scouts.find((s) => s.lane === "dense")?.slugs ?? [];
+    // people/x is not stranded at the very end despite the lowest raw score.
+    expect(slugs.indexOf("people/x")).toBeLessThan(slugs.length - 1);
+  });
+
+  test("no dense hits yields no dense ScoutResult", async () => {
+    hybridHits = [hit("sparse/only", { sparseScore: 2.0 })];
+    const { scouts } = await runScouts(
+      makeInput({ lanes: { hot: false, sparse: false } }),
+      DEPS,
+    );
+    expect(scouts.find((s) => s.lane === "dense")).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Lane toggles
+// ---------------------------------------------------------------------------
+
+describe("runScouts — lane toggles", () => {
+  test("disabling a lane suppresses its ScoutResult", async () => {
+    pageSlugs = ["a"];
+    injectionScores = new Map([["a", 1]]);
+    hybridHits = [hit("docs/a", { sparseScore: 2.0, denseScore: 0.5 })];
+
+    const all = await runScouts(makeInput(), DEPS);
+    expect(all.scouts.map((s) => s.lane).sort()).toEqual([
+      "dense",
+      "hot",
+      "sparse",
+    ]);
+
+    const hotOnly = await runScouts(
+      makeInput({ lanes: { sparse: false, dense: false } }),
+      DEPS,
+    );
+    expect(hotOnly.scouts.map((s) => s.lane)).toEqual(["hot"]);
+    // Dense embed must not run when the dense lane is off.
+    embedCalls = 0;
+    await runScouts(makeInput({ lanes: { dense: false } }), DEPS);
+    expect(embedCalls).toBe(0);
+  });
+
+  test("all lanes off yields empty result", async () => {
+    pageSlugs = ["a"];
+    injectionScores = new Map([["a", 1]]);
+    hybridHits = [hit("docs/a", { sparseScore: 2.0, denseScore: 0.5 })];
+    const { scouts, sticky, bypass } = await runScouts(
+      makeInput({ lanes: { hot: false, sparse: false, dense: false } }),
+      DEPS,
+    );
+    expect(scouts).toEqual([]);
+    expect(sticky.size).toBe(0);
+    expect(bypass.size).toBe(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Misc
+// ---------------------------------------------------------------------------
+
+describe("runScouts — misc", () => {
+  test("empty query text skips sparse and dense lanes", async () => {
+    pageSlugs = ["a"];
+    injectionScores = new Map([["a", 1]]);
+    hybridHits = [hit("docs/a", { sparseScore: 2.0, denseScore: 0.5 })];
+    const { scouts } = await runScouts(
+      makeInput({ userMessage: "   ", nowText: "  " }),
+      DEPS,
+    );
+    // Hot lane is query-independent and still fires; sparse/dense are gated off.
+    expect(scouts.map((s) => s.lane)).toEqual(["hot"]);
+    expect(embedCalls).toBe(0);
+  });
+
+  test("honors an already-aborted signal", async () => {
+    const controller = new AbortController();
+    controller.abort();
+    pageSlugs = ["a"];
+    injectionScores = new Map([["a", 1]]);
+    await expect(
+      runScouts(makeInput({ signal: controller.signal }), DEPS),
+    ).rejects.toThrow();
+  });
+});
diff --git a/assistant/src/memory/v3/scouts.ts b/assistant/src/memory/v3/scouts.ts
new file mode 100644
index 00000000000..03e99ff529f
--- /dev/null
+++ b/assistant/src/memory/v3/scouts.ts
@@ -0,0 +1,392 @@
+// ---------------------------------------------------------------------------
+// Memory v3 — Always-on scout lanes (hot / sparse / dense)
+// ---------------------------------------------------------------------------
+//
+// The v3 retrieval loop opens each pass by fanning out a small set of cheap,
+// always-on "scout" lanes over the v2 read-substrate. Scouts surface candidate
+// concept-page slugs from three complementary signals before any LLM judging
+// (the dense judge lives in a later PR) or tree descent runs:
+//
+//   - hot:    corpus-global access-frequency EMA via `computeInjectionScores`.
+//             Retriever-agnostic — v2 keeps writing `memory_v2_injection_events`,
+//             so a page the user has been touching is "hot" regardless of which
+//             retriever surfaced it. Hits are marked **sticky** so the downstream
+//             gate keeps them in the running.
+//   - sparse: BM25 keyword match. Near-exact (high-score) hits are both
+//             **sticky** and **tree-bypass** — a literal keyword hit is a strong
+//             enough signal that we shouldn't make the slug earn its place by
+//             walking the tree.
+//   - dense:  embedding-similarity match, then an asymmetric per-subtree quota
+//             (generous active-domain slice, thin off-domain slice) plus MMR for
+//             diversity so a single dominant subtree can't crowd out the slate.
+//
+// Each lane is individually toggleable via `config.memory.v3.lanes`. This module
+// performs **no** LLM calls and writes nothing — it is a pure read over the v2
+// substrate. A later PR composes `runScouts` into the full descent loop.
+
+import type { AssistantConfig } from "../../config/types.js";
+import { applyCorrectionIfCalibrated } from "../anisotropy.js";
+import type { DrizzleDb } from "../db-connection.js";
+import { embedWithBackend } from "../embedding-backend.js";
+import type { RetrievalInput } from "../v2/harness/retriever.js";
+import type { ScoutResult } from "../v2/harness/trace.js";
+import { computeInjectionScores } from "../v2/injection-events.js";
+import { getPageIndex } from "../v2/page-index.js";
+import { hybridQueryConceptPages } from "../v2/qdrant.js";
+import { generateBm25QueryEmbedding } from "../v2/sparse-bm25.js";
+
+/** Result of running the always-on scout fanout for one pass. */
+export interface RunScoutsResult {
+  /** Per-lane contributions, one entry per *enabled* lane that produced hits. */
+  scouts: ScoutResult[];
+  /**
+   * Slugs the downstream gate should keep in the running regardless of later
+   * scoring — hot hits and near-exact sparse hits.
+   */
+  sticky: Set<string>;
+  /**
+   * Slugs strong enough (near-exact sparse) to skip the tree-descent gate
+   * entirely. A subset of `sticky`.
+   */
+  bypass: Set<string>;
+}
+
+/** Substrate dependencies injected for testability. */
+export interface ScoutDeps {
+  db: DrizzleDb;
+}
+
+// ---------------------------------------------------------------------------
+// Tunables
+// ---------------------------------------------------------------------------
+
+/**
+ * Per-lane hit cap before quota/diversity post-processing. The lanes are
+ * always-on and run every pass, so a generous-but-bounded cap keeps the dense
+ * Qdrant round-trip and the per-lane bookkeeping cheap while still giving the
+ * quota/MMR step enough raw candidates to choose from.
+ */
+const LANE_QUERY_LIMIT = 100;
+
+/**
+ * Sparse score at or above which a hit is treated as **near-exact** — sticky
+ * and tree-bypass. BM25 scores are unbounded above and corpus-relative, so the
+ * threshold is taken relative to the top sparse hit in the same pass rather
+ * than as a fixed magnitude: a hit within this fraction of the best sparse
+ * score for the query is "near-exact". A lone strong hit (it is its own max)
+ * always qualifies.
+ */
+const SPARSE_NEAR_EXACT_FRACTION = 0.9;
+
+/**
+ * MMR trade-off: `λ · relevance − (1 − λ) · redundancy`. Closer to 1 favors
+ * raw dense relevance; lower values push harder for subtree diversity. 0.7
+ * keeps relevance in the driver's seat while still breaking up runs of
+ * same-subtree hits.
+ */
+const DENSE_MMR_LAMBDA = 0.7;
+
+// ---------------------------------------------------------------------------
+// Public entry point
+// ---------------------------------------------------------------------------
+
+/**
+ * Run the always-on scout lanes for one retrieval pass.
+ *
+ * `queryText` is derived from the last user turn in `input.recentTurnPairs`
+ * joined with `input.nowText` — the same shape the v2 router/activation path
+ * embeds. Disabled lanes (per `config.memory.v3.lanes`) are skipped entirely:
+ * no substrate call, no `ScoutResult` entry.
+ *
+ * Honors `input.signal` — aborts between lanes and around the dense embed.
+ */
+export async function runScouts(
+  input: RetrievalInput,
+  deps: ScoutDeps,
+): Promise<RunScoutsResult> {
+  const { config, signal } = input;
+  const lanes = config.memory.v3.lanes;
+  const queryText = deriveQueryText(input);
+
+  const scouts: ScoutResult[] = [];
+  const sticky = new Set<string>();
+  const bypass = new Set<string>();
+
+  // Hot lane — corpus-global EMA over the full slug universe. Cheap (single
+  // SQL pass) so it runs first and seeds sticky.
+  if (lanes.hot) {
+    signal?.throwIfAborted();
+    const hot = await runHotLane(input, deps);
+    if (hot) {
+      scouts.push(hot);
+      for (const slug of hot.slugs) sticky.add(slug);
+    }
+  }
+
+  // Sparse lane — BM25 keyword match. Near-exact hits seed sticky + bypass.
+  if (lanes.sparse && queryText.length > 0) {
+    signal?.throwIfAborted();
+    const sparse = await runSparseLane(queryText, signal);
+    if (sparse) {
+      scouts.push(sparse.result);
+      for (const slug of sparse.nearExact) {
+        sticky.add(slug);
+        bypass.add(slug);
+      }
+    }
+  }
+
+  // Dense lane — embedding similarity, then per-subtree quota + MMR.
+  if (lanes.dense && queryText.length > 0) {
+    signal?.throwIfAborted();
+    const dense = await runDenseLane(queryText, config, signal);
+    if (dense) scouts.push(dense);
+  }
+
+  return { scouts, sticky, bypass };
+}
+
+// ---------------------------------------------------------------------------
+// Query-text derivation
+// ---------------------------------------------------------------------------
+
+/**
+ * Build the scout query text from the just-arrived user turn plus the NOW
+ * context. Mirrors the v2 activation path (`selectCandidates`): join the
+ * non-empty channels with a newline. The last `recentTurnPairs` entry's
+ * `userMessage` is the turn being routed.
+ */
+function deriveQueryText(input: RetrievalInput): string {
+  const lastPair = input.recentTurnPairs[input.recentTurnPairs.length - 1];
+  const userText = lastPair?.userMessage ?? "";
+  return [userText, input.nowText]
+    .filter((s) => s.trim().length > 0)
+    .join("\n")
+    .trim();
+}
+
+// ---------------------------------------------------------------------------
+// Hot lane
+// ---------------------------------------------------------------------------
+
+async function runHotLane(
+  input: RetrievalInput,
+  deps: ScoutDeps,
+): Promise<ScoutResult | null> {
+  const index = await getPageIndex(input.workspaceDir);
+  const allSlugs = index.entries.map((e) => e.slug);
+  if (allSlugs.length === 0) return null;
+
+  const now = Date.now();
+  const scores = computeInjectionScores(deps.db, allSlugs, now);
+  if (scores.size === 0) return null;
+
+  // Slugs with no events in the read window are omitted by
+  // `computeInjectionScores`, so every entry here has score > 0.
+  const ranked = [...scores.entries()].sort((a, b) => sortByScoreDesc(a, b));
+  const slugs = ranked.map(([slug]) => slug);
+  const scoreBySlug = Object.fromEntries(ranked);
+  return { lane: "hot", slugs, scoreBySlug };
+}
+
+// ---------------------------------------------------------------------------
+// Sparse lane
+// ---------------------------------------------------------------------------
+
+async function runSparseLane(
+  queryText: string,
+  signal: AbortSignal | undefined,
+): Promise<{ result: ScoutResult; nearExact: string[] } | null> {
+  const sparse = generateBm25QueryEmbedding(queryText);
+  if (sparse.indices.length === 0) return null;
+
+  // Dense channel intentionally empty — this lane is BM25-only. `skipSparse:
+  // false` keeps the sparse round-trip on; we read `sparseScore` and ignore
+  // any dense scores the query happens to surface.
+  const hits = await hybridQueryConceptPages(
+    [],
+    sparse,
+    LANE_QUERY_LIMIT,
+    undefined,
+    {
+      skipSparse: false,
+    },
+  );
+  signal?.throwIfAborted();
+
+  const scored = hits
+    .map((hit) => ({ slug: hit.slug, score: hit.sparseScore }))
+    .filter((h): h is { slug: string; score: number } => h.score !== undefined)
+    .sort((a, b) => b.score - a.score);
+  if (scored.length === 0) return null;
+
+  const slugs = scored.map((h) => h.slug);
+  const scoreBySlug = Object.fromEntries(scored.map((h) => [h.slug, h.score]));
+
+  // Near-exact: within SPARSE_NEAR_EXACT_FRACTION of the top sparse score.
+  const topScore = scored[0].score;
+  const threshold = topScore * SPARSE_NEAR_EXACT_FRACTION;
+  const nearExact = scored
+    .filter((h) => topScore > 0 && h.score >= threshold)
+    .map((h) => h.slug);
+
+  return { result: { lane: "sparse", slugs, scoreBySlug }, nearExact };
+}
+
+// ---------------------------------------------------------------------------
+// Dense lane
+// ---------------------------------------------------------------------------
+
+async function runDenseLane(
+  queryText: string,
+  config: AssistantConfig,
+  signal: AbortSignal | undefined,
+): Promise<ScoutResult | null> {
+  // Embed + apply anisotropy correction, mirroring v2 activation's read path.
+  const embedded = await embedWithBackend(config, [queryText], { signal });
+  const dense = await applyCorrectionIfCalibrated(
+    embedded.vectors[0],
+    embedded.provider,
+    embedded.model,
+  );
+  signal?.throwIfAborted();
+
+  const sparse = generateBm25QueryEmbedding(queryText);
+  const hits = await hybridQueryConceptPages(dense, sparse, LANE_QUERY_LIMIT);
+  signal?.throwIfAborted();
+
+  const scored = hits
+    .map((hit) => ({ slug: hit.slug, score: hit.denseScore }))
+    .filter((h): h is { slug: string; score: number } => h.score !== undefined)
+    .sort((a, b) => b.score - a.score);
+  if (scored.length === 0) return null;
+
+  const selected = applyQuotaAndMmr(scored, config.memory.v3);
+  if (selected.length === 0) return null;
+
+  const slugs = selected.map((h) => h.slug);
+  const scoreBySlug = Object.fromEntries(
+    selected.map((h) => [h.slug, h.score]),
+  );
+  return { lane: "dense", slugs, scoreBySlug };
+}
+
+interface ScoredSlug {
+  slug: string;
+  score: number;
+}
+
+/**
+ * Apply the asymmetric per-subtree quota then MMR re-ranking to the dense hits.
+ *
+ * Quota: the conversation's **active domain** is the top-path segment of the
+ * single highest-scoring dense hit. That domain gets a generous slice
+ * (`denseQuota.activeDomain`); every other (off-)domain shares a thin slice
+ * (`denseQuota.offDomain`) so exploratory hits aren't fully starved but can't
+ * dominate either. Quotas are per-domain caps applied in score-descending
+ * order.
+ *
+ * MMR: re-rank the quota-passing pool by `λ · relevance − (1 − λ) · redundancy`
+ * where redundancy is how represented the candidate's subtree already is in the
+ * selected slate. Without per-page embeddings we use subtree co-membership as
+ * the diversity signal — same subtree ⇒ maximally redundant. This breaks up
+ * runs of same-subtree hits without an extra Qdrant round-trip.
+ */
+function applyQuotaAndMmr(
+  scored: readonly ScoredSlug[],
+  v3: AssistantConfig["memory"]["v3"],
+): ScoredSlug[] {
+  if (scored.length === 0) return [];
+
+  const activeDomain = domainOf(scored[0].slug);
+  const { activeDomain: activeQuota, offDomain: offQuota } = v3.denseQuota;
+
+  // Per-subtree quota: active domain gets activeQuota slots; all off-domain
+  // hits compete for a shared offQuota pool. Walk in score-desc order so the
+  // strongest hits claim each quota first.
+  const perDomainCount = new Map<string, number>();
+  let offDomainCount = 0;
+  const quotaPassing: ScoredSlug[] = [];
+  for (const hit of scored) {
+    const domain = domainOf(hit.slug);
+    if (domain === activeDomain) {
+      const used = perDomainCount.get(domain) ?? 0;
+      if (used >= activeQuota) continue;
+      perDomainCount.set(domain, used + 1);
+    } else {
+      if (offDomainCount >= offQuota) continue;
+      offDomainCount += 1;
+    }
+    quotaPassing.push(hit);
+  }
+
+  return mmrReorder(quotaPassing, DENSE_MMR_LAMBDA);
+}
+
+/**
+ * Greedy MMR over a score-ranked pool using subtree co-membership as the
+ * redundancy signal. Each pick maximizes
+ * `λ · normalizedScore − (1 − λ) · subtreeShareInSelected`, so once a subtree
+ * is well-represented its remaining members are deprioritized in favor of
+ * fresh subtrees of comparable relevance. Pure / deterministic.
+ */
+function mmrReorder(pool: readonly ScoredSlug[], lambda: number): ScoredSlug[] {
+  if (pool.length <= 1) return [...pool];
+
+  // Normalize relevance to [0, 1] by the pool max so it shares a scale with the
+  // redundancy term (also [0, 1]). All-zero scores collapse to pure diversity.
+  const maxScore = pool[0].score;
+  const relevance = (hit: ScoredSlug): number =>
+    maxScore > 0 ? hit.score / maxScore : 0;
+
+  const remaining = [...pool];
+  const selected: ScoredSlug[] = [];
+  const selectedDomainCount = new Map<string, number>();
+
+  while (remaining.length > 0) {
+    let bestIdx = 0;
+    let bestMmr = -Infinity;
+    for (let i = 0; i < remaining.length; i++) {
+      const hit = remaining[i];
+      const domain = domainOf(hit.slug);
+      const share =
+        selected.length === 0
+          ? 0
+          : (selectedDomainCount.get(domain) ?? 0) / selected.length;
+      const mmr = lambda * relevance(hit) - (1 - lambda) * share;
+      if (mmr > bestMmr) {
+        bestMmr = mmr;
+        bestIdx = i;
+      }
+    }
+    const [pick] = remaining.splice(bestIdx, 1);
+    selected.push(pick);
+    const domain = domainOf(pick.slug);
+    selectedDomainCount.set(domain, (selectedDomainCount.get(domain) ?? 0) + 1);
+  }
+
+  return selected;
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * The "domain" (subtree) of a page slug — its top path segment. Slugs are
+ * path-relative with `/` separators (e.g. `people/alice` → `people`); a flat
+ * slug (`essentials`) is its own domain.
+ */
+function domainOf(slug: string): string {
+  const slash = slug.indexOf("/");
+  return slash === -1 ? slug : slug.slice(0, slash);
+}
+
+/** Score-desc with a stable slug-ASCII tiebreak. */
+function sortByScoreDesc(
+  a: readonly [string, number],
+  b: readonly [string, number],
+): number {
+  if (b[1] !== a[1]) return b[1] - a[1];
+  return a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0;
+}

From eabe526e7204c7aaf2930a21dd008b5e04e81cde Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:53:50 -0400
Subject: [PATCH 08/21] feat(memory-v3): compose node index from children +
 routing hints (#31978)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../v3/__tests__/index-composition.test.ts    | 233 ++++++++++++++++++
 assistant/src/memory/v3/index-composition.ts  | 113 +++++++++
 2 files changed, 346 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/index-composition.test.ts
 create mode 100644 assistant/src/memory/v3/index-composition.ts

diff --git a/assistant/src/memory/v3/__tests__/index-composition.test.ts b/assistant/src/memory/v3/__tests__/index-composition.test.ts
new file mode 100644
index 00000000000..aa08205e35f
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/index-composition.test.ts
@@ -0,0 +1,233 @@
+/**
+ * Tests for `assistant/src/memory/v3/index-composition.ts`.
+ *
+ * `composeNodeIndex` is a pure function over an already-built `TreeIndex` and
+ * `PageIndex`, so these tests hand-build both fixtures (no filesystem / no I/O)
+ * and assert on the rendered string.
+ *
+ * Coverage matrix:
+ *   - mixed node:/page: children render one summary line each, in authored
+ *     order, with the node's routing hints appended as a trailer.
+ *   - a `page:` ref whose slug is absent from the index is silently omitted.
+ *   - a `node:` ref whose id is absent from the tree is silently omitted.
+ *   - empty / missing children → just the routing hints, or the empty string
+ *     when there are none either.
+ *   - a `node:` child with no summary falls back to the first non-empty body
+ *     line; with neither, only its header is emitted.
+ */
+
+import { describe, expect, test } from "bun:test";
+
+import type { PageIndex, PageIndexEntry } from "../../v2/page-index.js";
+import { composeNodeIndex } from "../index-composition.js";
+import type { ChildRef, TreeIndex } from "../tree-index.js";
+import type { TreeNode } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Fixture builders
+// ---------------------------------------------------------------------------
+
+function treeNode(
+  id: string,
+  opts: { summary?: string; routing_hints?: string; body?: string } = {},
+): TreeNode {
+  return {
+    id,
+    frontmatter: {
+      children: [],
+      summary: opts.summary,
+      routing_hints: opts.routing_hints,
+    },
+    body: opts.body ?? "",
+  };
+}
+
+/**
+ * Build a `TreeIndex` from a list of nodes and an explicit child-ref list for
+ * the node under test. Only the fields `composeNodeIndex` reads (`nodes`,
+ * `childrenByNode`) are populated; the reverse-adjacency maps are left empty.
+ */
+function treeIndex(
+  nodes: TreeNode[],
+  childrenByNode: Record<string, ChildRef[]>,
+): TreeIndex {
+  return {
+    nodes: new Map(nodes.map((n) => [n.id, n])),
+    childrenByNode: new Map(Object.entries(childrenByNode)),
+    parentsByNode: new Map(),
+    pageParents: new Map(),
+    root: "_root",
+  };
+}
+
+function pageEntry(slug: string, summary: string): PageIndexEntry {
+  return { id: 1, slug, summary, edges: [], modifiedAt: 0 };
+}
+
+function pageIndex(entries: PageIndexEntry[]): PageIndex {
+  return {
+    entries,
+    bySlug: new Map(entries.map((e) => [e.slug, e])),
+    byId: new Map(entries.map((e) => [e.id, e])),
+    rendered: "",
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("composeNodeIndex", () => {
+  test("composes mixed node:/page: children in authored order with routing hints", () => {
+    const tree = treeIndex(
+      [
+        treeNode("people", {
+          summary: "People you know",
+          routing_hints: "for work contacts see node:colleagues",
+        }),
+        treeNode("colleagues", { summary: "Work relationships" }),
+      ],
+      {
+        people: [
+          { kind: "node", ref: "colleagues" },
+          { kind: "page", ref: "alice" },
+        ],
+      },
+    );
+    const pages = pageIndex([
+      pageEntry("alice", "Alice — neighbor and friend"),
+    ]);
+
+    const block = composeNodeIndex("people", tree, pages);
+
+    expect(block).toBe(
+      [
+        "[node:colleagues] Work relationships",
+        "[page:alice] Alice — neighbor and friend",
+        "Routing hints: for work contacts see node:colleagues",
+      ].join("\n"),
+    );
+  });
+
+  test("emits children in authored order regardless of map insertion", () => {
+    const tree = treeIndex(
+      [treeNode("a", { summary: "Node A" }), treeNode("root", {})],
+      {
+        root: [
+          { kind: "page", ref: "zeta" },
+          { kind: "node", ref: "a" },
+          { kind: "page", ref: "beta" },
+        ],
+      },
+    );
+    const pages = pageIndex([
+      pageEntry("beta", "Beta page"),
+      pageEntry("zeta", "Zeta page"),
+    ]);
+
+    const block = composeNodeIndex("root", tree, pages);
+
+    expect(block).toBe(
+      [
+        "[page:zeta] Zeta page",
+        "[node:a] Node A",
+        "[page:beta] Beta page",
+      ].join("\n"),
+    );
+  });
+
+  test("silently omits a page ref absent from the index", () => {
+    const tree = treeIndex([treeNode("root", {})], {
+      root: [
+        { kind: "page", ref: "present" },
+        { kind: "page", ref: "missing" },
+      ],
+    });
+    const pages = pageIndex([pageEntry("present", "I exist")]);
+
+    const block = composeNodeIndex("root", tree, pages);
+
+    expect(block).toBe("[page:present] I exist");
+  });
+
+  test("silently omits a node ref absent from the tree", () => {
+    const tree = treeIndex([treeNode("present", { summary: "Here" })], {
+      root: [
+        { kind: "node", ref: "present" },
+        { kind: "node", ref: "ghost" },
+      ],
+    });
+    const pages = pageIndex([]);
+
+    const block = composeNodeIndex("root", tree, pages);
+
+    expect(block).toBe("[node:present] Here");
+  });
+
+  test("empty children → just the routing hints", () => {
+    const tree = treeIndex(
+      [treeNode("leaf", { routing_hints: "this is a leaf branch" })],
+      { leaf: [] },
+    );
+
+    const block = composeNodeIndex("leaf", tree, pageIndex([]));
+
+    expect(block).toBe("Routing hints: this is a leaf branch");
+  });
+
+  test("no children and no routing hints → empty string", () => {
+    const tree = treeIndex([treeNode("bare", {})], { bare: [] });
+
+    expect(composeNodeIndex("bare", tree, pageIndex([]))).toBe("");
+  });
+
+  test("node with no childrenByNode entry composes from routing hints alone", () => {
+    const tree = treeIndex(
+      [treeNode("orphan", { routing_hints: "hint only" })],
+      {},
+    );
+
+    expect(composeNodeIndex("orphan", tree, pageIndex([]))).toBe(
+      "Routing hints: hint only",
+    );
+  });
+
+  test("node child with no summary falls back to first non-empty body line", () => {
+    const tree = treeIndex(
+      [
+        treeNode("root", {}),
+        treeNode("bodyonly", {
+          body: "\n  \nFirst real line\nSecond line",
+        }),
+      ],
+      { root: [{ kind: "node", ref: "bodyonly" }] },
+    );
+
+    const block = composeNodeIndex("root", tree, pageIndex([]));
+
+    expect(block).toBe("[node:bodyonly] First real line");
+  });
+
+  test("node child with empty summary string falls back to body line", () => {
+    const tree = treeIndex(
+      [
+        treeNode("root", {}),
+        treeNode("blank", { summary: "   ", body: "fallback line" }),
+      ],
+      { root: [{ kind: "node", ref: "blank" }] },
+    );
+
+    expect(composeNodeIndex("root", tree, pageIndex([]))).toBe(
+      "[node:blank] fallback line",
+    );
+  });
+
+  test("node child with neither summary nor body emits only its header", () => {
+    const tree = treeIndex(
+      [treeNode("root", {}), treeNode("empty", { body: "   \n\t" })],
+      { root: [{ kind: "node", ref: "empty" }] },
+    );
+
+    expect(composeNodeIndex("root", tree, pageIndex([]))).toBe("[node:empty]");
+  });
+});
diff --git a/assistant/src/memory/v3/index-composition.ts b/assistant/src/memory/v3/index-composition.ts
new file mode 100644
index 00000000000..e1c16e3da8c
--- /dev/null
+++ b/assistant/src/memory/v3/index-composition.ts
@@ -0,0 +1,113 @@
+/**
+ * Memory v3 — Compositional index rendering.
+ *
+ * A v3 tree node has no stored "index" of its own. Instead, a parent node's
+ * index is *composed at read time* by concatenating one description line per
+ * child (a `node:` sub-node's summary or a `page:` leaf's summary) plus a thin
+ * `Routing hints:` trailer drawn from the node's own frontmatter. Nothing here
+ * is persisted — the block is generated fresh every time a descent prompt needs
+ * it, so it always reflects the current state of the children.
+ *
+ * {@link composeNodeIndex} is a **pure function** over an already-built
+ * {@link TreeIndex} (from `tree-index.ts`) and {@link PageIndex} (from
+ * `../v2/page-index.ts`). It does no I/O: the tree walk / driver PR is
+ * responsible for building those indices and feeding them in.
+ *
+ * Resolution rules, per child ref of `nodeId` (in authored order):
+ *   - `kind:"node"` → look up the child in `tree.nodes`; emit
+ *     `"[node:<id>] <summary>"` where summary is the child's
+ *     `frontmatter.summary` if non-empty, else the first non-empty line of its
+ *     body. A node with neither still emits its header (`"[node:<id>]"`).
+ *   - `kind:"page"` → look up `pages.bySlug.get(ref)`; emit
+ *     `"[page:<slug>] <entry.summary>"`.
+ *   - Either lookup missing → emit nothing for that ref. Reporting dangling
+ *     refs is validation's job, not this renderer's.
+ *
+ * The node's own `routing_hints` (when present) are appended last under a
+ * `Routing hints:` trailer. A node with no resolvable children and no routing
+ * hints composes to the empty string.
+ */
+
+import type { PageIndex } from "../v2/page-index.js";
+import type { TreeIndex } from "./tree-index.js";
+import type { TreeNode } from "./types.js";
+
+/** Trailer label introducing a node's own routing hints. */
+const ROUTING_HINTS_LABEL = "Routing hints:";
+
+/**
+ * Resolve a node's display summary: its frontmatter `summary` if non-empty,
+ * otherwise the first non-empty line of its body, otherwise the empty string.
+ * Whitespace is trimmed so a leading blank line in the body never wins.
+ */
+function nodeSummary(node: TreeNode): string {
+  const summary = node.frontmatter.summary?.trim();
+  if (summary) return summary;
+  for (const line of node.body.split("\n")) {
+    const trimmed = line.trim();
+    if (trimmed) return trimmed;
+  }
+  return "";
+}
+
+/**
+ * Render one child ref into its index line, or `null` when the ref's target is
+ * absent from the supplied indices (validation owns reporting those).
+ *
+ * A resolvable `node:` child always yields a line — its header (`[node:<id>]`)
+ * with a trailing summary when one exists. A `page:` child yields
+ * `[page:<slug>] <summary>`; the v2 page index already truncates `summary`.
+ */
+function renderChild(
+  kind: "page" | "node",
+  ref: string,
+  tree: TreeIndex,
+  pages: PageIndex,
+): string | null {
+  if (kind === "node") {
+    const child = tree.nodes.get(ref);
+    if (!child) return null;
+    const summary = nodeSummary(child);
+    return summary ? `[node:${ref}] ${summary}` : `[node:${ref}]`;
+  }
+  const entry = pages.bySlug.get(ref);
+  if (!entry) return null;
+  return `[page:${ref}] ${entry.summary}`;
+}
+
+/**
+ * Compose the prompt-ready index block for `nodeId` from its children's
+ * descriptions plus the node's own routing hints.
+ *
+ * Pure and deterministic: children are emitted in authored order (the order
+ * `tree.childrenByNode` preserves from the node's `children` frontmatter), refs
+ * whose targets are absent are silently skipped, and the node's
+ * `routing_hints` (if present) are appended under a {@link ROUTING_HINTS_LABEL}
+ * trailer. A node with no entry in `childrenByNode`, no resolvable children,
+ * and no routing hints composes to the empty string.
+ *
+ * The result is a plain string with no trailing newline, suitable to drop
+ * directly into an LLM descent prompt.
+ */
+export function composeNodeIndex(
+  nodeId: string,
+  tree: TreeIndex,
+  pages: PageIndex,
+): string {
+  const blocks: string[] = [];
+
+  const childRefs = tree.childrenByNode.get(nodeId) ?? [];
+  for (const { kind, ref } of childRefs) {
+    const line = renderChild(kind, ref, tree, pages);
+    if (line !== null) blocks.push(line);
+  }
+
+  const routingHints = tree.nodes
+    .get(nodeId)
+    ?.frontmatter.routing_hints?.trim();
+  if (routingHints) {
+    blocks.push(`${ROUTING_HINTS_LABEL} ${routingHints}`);
+  }
+
+  return blocks.join("\n");
+}

From 6df7704c96c79a0caf73cf56b80b0a95254bc423 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:54:13 -0400
Subject: [PATCH 09/21] feat(memory-v3): fast filter judging dense hits (sticky
 bypass) (#31979)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/filter.test.ts    | 338 ++++++++++++++++++
 assistant/src/memory/v3/filter.ts             | 258 +++++++++++++
 2 files changed, 596 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/filter.test.ts
 create mode 100644 assistant/src/memory/v3/filter.ts

diff --git a/assistant/src/memory/v3/__tests__/filter.test.ts b/assistant/src/memory/v3/__tests__/filter.test.ts
new file mode 100644
index 00000000000..25a78c76df3
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/filter.test.ts
@@ -0,0 +1,338 @@
+/**
+ * Tests for `assistant/src/memory/v3/filter.ts`.
+ *
+ * Coverage matrix:
+ *   - keep-subset → kept = bypass ∪ judged-kept; dropped = judged minus kept;
+ *     bypass slugs are never judged.
+ *   - model keeping a slug outside the judged set → dropped.
+ *   - empty dense → no LLM call, kept = bypass-relevant only.
+ *   - dense entirely covered by bypass → no LLM call (nothing to judge).
+ *   - provider === null (no provider configured) → fail-open: keep all dense,
+ *     failureReason = "no_provider".
+ *   - provider throws → fail-open (keep all, failureReason = "api_error").
+ *   - missing tool_use block → fail-open (failureReason = "tool_use_missing").
+ *   - tool input failing schema → fail-open (failureReason = "schema_mismatch").
+ *   - request shape: forced tool_choice on `filter_dense_hits`, judged set in
+ *     the user message, abort signal forwarded.
+ *
+ * The provider is injected via `filterDenseHits({ provider })` — no real LLM,
+ * no network, no `mock.module`. `~/.vellum/` is never touched.
+ */
+
+import { describe, expect, test } from "bun:test";
+
+import type {
+  Message,
+  Provider,
+  ProviderResponse,
+  SendMessageOptions,
+  ToolDefinition,
+} from "../../../providers/types.js";
+import type { RetrievalInput } from "../../v2/harness/retriever.js";
+import type { ScoutResult } from "../../v2/harness/trace.js";
+import { filterDenseHits } from "../filter.js";
+
+// ---------------------------------------------------------------------------
+// Helpers.
+// ---------------------------------------------------------------------------
+
+interface ProviderCall {
+  messages: Message[];
+  tools: ToolDefinition[] | undefined;
+  systemPrompt: string | undefined;
+  options: SendMessageOptions | undefined;
+}
+
+/**
+ * A stub provider that records its calls and returns a fixed response.
+ * Honors an already-aborted signal by throwing an AbortError so signal
+ * forwarding can be asserted.
+ */
+function makeProvider(
+  response: ProviderResponse,
+  calls: ProviderCall[],
+): Provider {
+  return {
+    name: "stub",
+    sendMessage: async (messages, tools, systemPrompt, options) => {
+      calls.push({ messages, tools, systemPrompt, options });
+      if (options?.signal?.aborted) {
+        const err = new Error("aborted");
+        err.name = "AbortError";
+        throw err;
+      }
+      return response;
+    },
+  };
+}
+
+/** A provider whose sendMessage always throws. */
+function makeThrowingProvider(): Provider {
+  return {
+    name: "throwing-stub",
+    sendMessage: async () => {
+      throw new Error("boom");
+    },
+  };
+}
+
+/** A provider that must never be called (asserts no LLM round-trip happens). */
+function makeNeverCalledProvider(): Provider {
+  return {
+    name: "never-called-stub",
+    sendMessage: async () => {
+      throw new Error("provider should not be called");
+    },
+  };
+}
+
+function filterToolResponse(input: Record<string, unknown>): ProviderResponse {
+  return {
+    model: "stub-model",
+    stopReason: "tool_use",
+    usage: { inputTokens: 0, outputTokens: 0 },
+    content: [
+      { type: "tool_use", id: "tu-1", name: "filter_dense_hits", input },
+    ],
+  };
+}
+
+/** A response with no tool_use block (e.g. the model emitted only text). */
+function textOnlyResponse(): ProviderResponse {
+  return {
+    model: "stub-model",
+    stopReason: "end_turn",
+    usage: { inputTokens: 0, outputTokens: 0 },
+    content: [{ type: "text", text: "no tool here" }],
+  };
+}
+
+/** Minimal `RetrievalInput` — the filter only reads `nowText` and `signal`. */
+function makeInput(overrides?: Partial<RetrievalInput>): RetrievalInput {
+  return {
+    workspaceDir: "/tmp/does-not-matter",
+    recentTurnPairs: [],
+    nowText: "2026-05-25 10:00 PT",
+    priorEverInjected: [],
+    config: {} as unknown as RetrievalInput["config"],
+    ...overrides,
+  };
+}
+
+function denseResult(slugs: string[]): ScoutResult {
+  return { lane: "dense", slugs };
+}
+
+// ---------------------------------------------------------------------------
+// Tests.
+// ---------------------------------------------------------------------------
+
+describe("filterDenseHits — judged keep/drop", () => {
+  test("kept = bypass ∪ judged-kept; bypass slugs are never judged", async () => {
+    const calls: ProviderCall[] = [];
+    // Dense surfaces a, b, c, plus bypass slug `x`. Model keeps a, c; drops b.
+    const provider = makeProvider(
+      filterToolResponse({ keep_slugs: ["c", "a"] }),
+      calls,
+    );
+
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult(["a", "b", "c", "x"]),
+      sticky: new Set(["x"]),
+      bypass: new Set(["x"]),
+      provider,
+    });
+
+    // bypass first (x), then judged-kept in model order (c, a).
+    expect(result.kept).toEqual(["x", "c", "a"]);
+    // Only the non-bypass slugs are judged; b was dropped.
+    expect(result.trace.judged).toEqual(["a", "b", "c"]);
+    expect(result.trace.dropped).toEqual(["b"]);
+    expect(result.failureReason).toBeUndefined();
+    expect(calls).toHaveLength(1);
+    // The bypass slug `x` was never shown to the model.
+    const userText = calls[0].messages[0].content
+      .map((b) => (b.type === "text" ? b.text : ""))
+      .join("\n");
+    expect(userText).not.toContain("x");
+  });
+
+  test("forces tool_choice on filter_dense_hits and surfaces judged candidates", async () => {
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      filterToolResponse({ keep_slugs: ["a"] }),
+      calls,
+    );
+
+    await filterDenseHits({
+      input: makeInput({ nowText: "NOW-MARKER" }),
+      dense: denseResult(["a", "b"]),
+      sticky: new Set(),
+      bypass: new Set(),
+      provider,
+    });
+
+    const call = calls[0];
+    expect(call.options?.config?.tool_choice).toEqual({
+      type: "tool",
+      name: "filter_dense_hits",
+    });
+    expect(call.options?.config?.callSite).toBe("memoryV3Filter");
+    expect(call.tools?.[0].name).toBe("filter_dense_hits");
+    const userText = call.messages[0].content
+      .map((b) => (b.type === "text" ? b.text : ""))
+      .join("\n");
+    expect(userText).toContain("NOW-MARKER");
+    expect(userText).toContain("a");
+    expect(userText).toContain("b");
+  });
+
+  test("drops a model-kept slug outside the judged set", async () => {
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      filterToolResponse({ keep_slugs: ["a", "ghost"] }),
+      calls,
+    );
+
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult(["a", "b"]),
+      sticky: new Set(),
+      bypass: new Set(),
+      provider,
+    });
+
+    expect(result.kept).toEqual(["a"]);
+    expect(result.trace.dropped).toEqual(["b"]);
+  });
+
+  test("forwards an abort signal to the provider call", async () => {
+    const calls: ProviderCall[] = [];
+    const controller = new AbortController();
+    controller.abort();
+    const provider = makeProvider(
+      filterToolResponse({ keep_slugs: ["a"] }),
+      calls,
+    );
+
+    // Aborted signal makes the stub throw → filter fails open (keep all).
+    const result = await filterDenseHits({
+      input: makeInput({ signal: controller.signal }),
+      dense: denseResult(["a", "b"]),
+      sticky: new Set(),
+      bypass: new Set(),
+      provider,
+    });
+
+    expect(calls[0].options?.signal).toBe(controller.signal);
+    expect([...result.kept].sort()).toEqual(["a", "b"]);
+    expect(result.failureReason).toBe("api_error");
+  });
+});
+
+describe("filterDenseHits — no LLM call", () => {
+  test("empty dense → no call, kept = bypass-relevant only", async () => {
+    const provider = makeNeverCalledProvider();
+
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult([]),
+      sticky: new Set(["x"]),
+      bypass: new Set(["x"]),
+      provider,
+    });
+
+    expect(result.kept).toEqual(["x"]);
+    expect(result.trace).toEqual({ judged: [], dropped: [] });
+    expect(result.failureReason).toBeUndefined();
+  });
+
+  test("dense fully covered by bypass → no call (nothing to judge)", async () => {
+    const provider = makeNeverCalledProvider();
+
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult(["x", "y"]),
+      sticky: new Set(["x", "y"]),
+      bypass: new Set(["x", "y"]),
+      provider,
+    });
+
+    expect([...result.kept].sort()).toEqual(["x", "y"]);
+    expect(result.trace).toEqual({ judged: [], dropped: [] });
+  });
+});
+
+describe("filterDenseHits — fail-open", () => {
+  test("provider === null keeps all dense with failureReason no_provider", async () => {
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult(["a", "b", "c"]),
+      sticky: new Set(),
+      bypass: new Set(),
+      provider: null,
+    });
+
+    expect([...result.kept].sort()).toEqual(["a", "b", "c"]);
+    expect(result.trace.judged).toEqual(["a", "b", "c"]);
+    expect(result.trace.dropped).toEqual([]);
+    expect(result.failureReason).toBe("no_provider");
+  });
+
+  test("fail-open still unions bypass slugs into kept", async () => {
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult(["a", "b", "x"]),
+      sticky: new Set(["x"]),
+      bypass: new Set(["x"]),
+      provider: null,
+    });
+
+    // bypass `x` first, then the judged-but-kept-by-fail-open slugs a, b.
+    expect(result.kept).toEqual(["x", "a", "b"]);
+    expect(result.trace.judged).toEqual(["a", "b"]);
+  });
+
+  test("provider throw keeps all dense (failureReason api_error)", async () => {
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult(["a", "b"]),
+      sticky: new Set(),
+      bypass: new Set(),
+      provider: makeThrowingProvider(),
+    });
+
+    expect([...result.kept].sort()).toEqual(["a", "b"]);
+    expect(result.failureReason).toBe("api_error");
+  });
+
+  test("missing tool_use block keeps all dense (failureReason tool_use_missing)", async () => {
+    const calls: ProviderCall[] = [];
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult(["a", "b"]),
+      sticky: new Set(),
+      bypass: new Set(),
+      provider: makeProvider(textOnlyResponse(), calls),
+    });
+
+    expect([...result.kept].sort()).toEqual(["a", "b"]);
+    expect(result.failureReason).toBe("tool_use_missing");
+  });
+
+  test("schema-mismatched tool input keeps all dense (failureReason schema_mismatch)", async () => {
+    const calls: ProviderCall[] = [];
+    const result = await filterDenseHits({
+      input: makeInput(),
+      dense: denseResult(["a", "b"]),
+      sticky: new Set(),
+      bypass: new Set(),
+      // `keep_slugs` is required; missing it fails the Zod schema.
+      provider: makeProvider(filterToolResponse({ wrong_key: ["a"] }), calls),
+    });
+
+    expect([...result.kept].sort()).toEqual(["a", "b"]);
+    expect(result.failureReason).toBe("schema_mismatch");
+  });
+});
diff --git a/assistant/src/memory/v3/filter.ts b/assistant/src/memory/v3/filter.ts
new file mode 100644
index 00000000000..79892178809
--- /dev/null
+++ b/assistant/src/memory/v3/filter.ts
@@ -0,0 +1,258 @@
+/**
+ * Memory v3 — fast dense-hit filter.
+ *
+ * The dense scout lane surfaces embedding-similarity candidates that span
+ * subtrees: some are meaningful cross-domain associations worth carrying into
+ * the gate, others are spurious near-neighbors that only crowd the slate. This
+ * module makes **one cheap LLM call** to keep the meaningful associations and
+ * drop the noise, *before* the more expensive selection gate runs.
+ *
+ * What it judges. Only the bounded dense candidate set (the scout lane is
+ * already capped at ~50–200 by quota/MMR — the filter never sees the whole
+ * corpus). Hot pages and near-exact sparse hits arrive via the scouts'
+ * `sticky` / `bypass` sets and are **never judged**: a literal keyword hit or a
+ * page the user has been touching is a strong enough signal that we shouldn't
+ * make it earn its place through a fallible cheap judgment. They are unioned
+ * straight into `kept`.
+ *
+ * Fail-open. If no provider is configured or the call errors / returns an
+ * unusable response, the filter keeps *all* dense candidates and surfaces a
+ * `failureReason` so the loop can record that the filter was bypassed. Dropping
+ * candidates on a model outage would silently starve retrieval; keeping them is
+ * the safe degradation (the downstream gate still narrows the slate).
+ *
+ * No LLM call when there is nothing to judge. An empty dense set short-circuits
+ * to `kept` = the bypass-relevant slugs (no judged additions), with no provider
+ * round-trip.
+ *
+ * This module is currently unwired — a later PR composes it into the loop.
+ */
+
+import { z } from "zod";
+
+import {
+  extractToolUse,
+  getConfiguredProvider,
+} from "../../providers/provider-send-message.js";
+import type {
+  Message,
+  Provider,
+  ToolDefinition,
+} from "../../providers/types.js";
+import { getLogger } from "../../util/logger.js";
+import type { RetrievalInput } from "../v2/harness/retriever.js";
+import type { ScoutResult } from "../v2/harness/trace.js";
+
+const log = getLogger("memory-v3-filter");
+
+/** Tool name forced via `tool_choice`. Shared constant so tests can match it. */
+const FILTER_TOOL_NAME = "filter_dense_hits";
+
+/**
+ * Arguments to one filter invocation.
+ *
+ * `dense` is the bounded dense scout result; only its slugs that are *not*
+ * already in `bypass` are judged. `sticky` is the broader keep-in-the-running
+ * set (hot + near-exact sparse); `bypass` is the subset strong enough to skip
+ * judgment entirely. Bypass slugs that also appear in the dense lane are kept
+ * unconditionally and never sent to the model.
+ */
+export interface FilterDenseHitsArgs {
+  input: RetrievalInput;
+  dense: ScoutResult;
+  sticky: Set<string>;
+  bypass: Set<string>;
+  /**
+   * Provider override seam for tests. Production leaves this unset and the
+   * filter resolves `getConfiguredProvider("memoryV3Filter")`. `null` is
+   * distinct from `undefined`: passing `null` simulates "no provider
+   * configured" and exercises the fail-open path without resolving the real
+   * registry.
+   */
+  provider?: Provider | null;
+}
+
+export interface FilterDenseHitsResult {
+  /** Final kept slugs: bypass ∪ judged-kept. */
+  kept: string[];
+  /** Inspection trace: which dense slugs were judged and which were dropped. */
+  trace: { judged: string[]; dropped: string[] };
+  /**
+   * Non-null when the filter could not judge (no provider, provider throw,
+   * missing tool_use, schema mismatch) and therefore failed open by keeping all
+   * dense candidates. The loop can surface this to flag a bypassed filter.
+   */
+  failureReason?: string;
+}
+
+/**
+ * Build the forced tool definition. `keep_slugs` is the model's subset of the
+ * judged candidate set to retain; everything judged-but-not-kept is dropped.
+ * Mirrors the forced-tool pattern of v2's `select_pages_to_inject`.
+ */
+function buildFilterTool(judgedSlugs: readonly string[]): ToolDefinition {
+  return {
+    name: FILTER_TOOL_NAME,
+    description:
+      "From the candidate concept pages surfaced by embedding similarity for " +
+      "the current turn, keep the ones that are meaningful associations worth " +
+      "surfacing and drop the spurious near-neighbors. Return keep_slugs as the " +
+      "subset to retain — choose only from the candidate set. Lean toward " +
+      "keeping a plausible cross-domain association over dropping it.",
+    input_schema: {
+      type: "object",
+      properties: {
+        keep_slugs: {
+          type: "array",
+          items: { type: "string", enum: [...judgedSlugs] },
+          description:
+            "The subset of candidate page slugs to keep. Choose only from the candidate set.",
+        },
+      },
+      required: ["keep_slugs"],
+    },
+  };
+}
+
+const FilterToolResultSchema = z.object({
+  keep_slugs: z.array(z.string()),
+});
+
+/**
+ * Compose the final result. `kept` = bypass slugs ∪ judged-kept (de-duplicated,
+ * bypass first then judged-kept in the model's returned order). `trace` records
+ * exactly which dense slugs were judged and which the model dropped.
+ */
+function buildResult(
+  bypass: Set<string>,
+  judged: readonly string[],
+  judgedKept: readonly string[],
+  failureReason?: string,
+): FilterDenseHitsResult {
+  const keptSet = new Set<string>(bypass);
+  const kept: string[] = [...bypass];
+  for (const slug of judgedKept) {
+    if (keptSet.has(slug)) continue;
+    keptSet.add(slug);
+    kept.push(slug);
+  }
+  const keptJudged = new Set(judgedKept);
+  const dropped = judged.filter((slug) => !keptJudged.has(slug));
+  return {
+    kept,
+    trace: { judged: [...judged], dropped },
+    ...(failureReason !== undefined ? { failureReason } : {}),
+  };
+}
+
+/**
+ * Run the fast dense-hit filter for one pass.
+ *
+ * Makes at most one forced-tool LLM call over the *judged* set (dense slugs not
+ * already in `bypass`). Bypass slugs are kept unconditionally. On an empty
+ * judged set no call is made. Any failure (no provider, provider throw, missing
+ * tool_use, schema mismatch) fails open: every dense candidate is kept and a
+ * `failureReason` is returned.
+ */
+export async function filterDenseHits(
+  args: FilterDenseHitsArgs,
+): Promise<FilterDenseHitsResult> {
+  const { input, dense, bypass } = args;
+
+  // Dense slugs that bypass judgment (near-exact sparse / hot) are kept as-is;
+  // only the remainder is judged.
+  const judged = dense.slugs.filter((slug) => !bypass.has(slug));
+
+  // Nothing to judge → no LLM call. Kept is just the bypass-relevant slugs.
+  if (judged.length === 0) {
+    return buildResult(bypass, judged, judged);
+  }
+
+  // Resolve the provider. A `provider` key in args (including explicit `null`)
+  // takes precedence so tests inject a stub; production omits it and resolves
+  // the configured `memoryV3Filter` call site.
+  const provider =
+    args.provider !== undefined
+      ? args.provider
+      : await getConfiguredProvider("memoryV3Filter");
+
+  if (!provider) {
+    log.warn(
+      "memoryV3Filter provider unavailable; failing open (keeping all dense)",
+    );
+    return buildResult(bypass, judged, judged, "no_provider");
+  }
+
+  const systemPrompt =
+    "You are a fast relevance filter for a memory-retrieval loop. You are given " +
+    "candidate concept pages surfaced by embedding similarity for the current " +
+    "turn. Keep the pages that are meaningful associations and drop the " +
+    "spurious near-neighbors. When in doubt, keep.";
+
+  const userMsg: Message = {
+    role: "user",
+    content: [
+      {
+        type: "text",
+        text: `<now>\n${input.nowText}\n</now>`,
+      },
+      {
+        type: "text",
+        text: `<candidate_slugs>\n${judged.join("\n")}\n</candidate_slugs>`,
+      },
+    ],
+  };
+
+  const filterTool = buildFilterTool(judged);
+
+  let response;
+  try {
+    response = await provider.sendMessage(
+      [userMsg],
+      [filterTool],
+      systemPrompt,
+      {
+        config: {
+          callSite: "memoryV3Filter" as const,
+          tool_choice: { type: "tool" as const, name: FILTER_TOOL_NAME },
+        },
+        ...(input.signal ? { signal: input.signal } : {}),
+      },
+    );
+  } catch (err) {
+    log.warn({ err }, "Filter provider call threw; failing open (keep all)");
+    return buildResult(bypass, judged, judged, "api_error");
+  }
+
+  const toolBlock = extractToolUse(response);
+  if (!toolBlock || toolBlock.name !== FILTER_TOOL_NAME) {
+    log.warn(
+      { stopReason: response.stopReason },
+      "Filter model returned no filter_dense_hits tool_use; failing open (keep all)",
+    );
+    return buildResult(bypass, judged, judged, "tool_use_missing");
+  }
+
+  const parsed = FilterToolResultSchema.safeParse(toolBlock.input);
+  if (!parsed.success) {
+    log.warn(
+      { error: parsed.error.message },
+      "Filter tool input did not match schema; failing open (keep all)",
+    );
+    return buildResult(bypass, judged, judged, "schema_mismatch");
+  }
+
+  // Restrict the model's keep set to the judged candidates (it can only keep
+  // what it was shown) and preserve its returned order.
+  const judgedSet = new Set(judged);
+  const seen = new Set<string>();
+  const judgedKept: string[] = [];
+  for (const slug of parsed.data.keep_slugs) {
+    if (!judgedSet.has(slug)) continue;
+    if (seen.has(slug)) continue;
+    seen.add(slug);
+    judgedKept.push(slug);
+  }
+
+  return buildResult(bypass, judged, judgedKept);
+}

From a1218c9f6d32ea3c7e4f0124b576a44d89f579f6 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 02:54:28 -0400
Subject: [PATCH 10/21] feat(memory-v3): parallel-fan-out traversal with
 cycle/visited guards (#31980)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/traversal.test.ts | 395 ++++++++++++++++++
 assistant/src/memory/v3/traversal.ts          | 194 +++++++++
 2 files changed, 589 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/traversal.test.ts
 create mode 100644 assistant/src/memory/v3/traversal.ts

diff --git a/assistant/src/memory/v3/__tests__/traversal.test.ts b/assistant/src/memory/v3/__tests__/traversal.test.ts
new file mode 100644
index 00000000000..4a742815a94
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/traversal.test.ts
@@ -0,0 +1,395 @@
+/**
+ * Tests for `assistant/src/memory/v3/traversal.ts`.
+ *
+ * Provider-free: `descend` is always a deterministic stub. Coverage:
+ *   - resolveChildren is a thin accessor (known node / leaf / unknown id).
+ *   - linear descent collects the expected leaf pages and emits a TreeLevel per
+ *     walked node in walk order.
+ *   - a DAG (sub-node shared by two parents) is walked exactly once.
+ *   - an injected cycle (A ↔ B) terminates.
+ *   - breadthBudget caps the descents per level.
+ *   - maxDepth halts the recursion at the right level.
+ *   - seeds start the walk mid-tree (alongside / instead of the root).
+ *   - reasoning from the descend result is threaded onto the level; defaults
+ *     to "" when omitted.
+ *
+ * Fixtures are plain in-memory `TreeIndex` objects — no disk, no workspace.
+ */
+
+import { describe, expect, test } from "bun:test";
+
+import type { DescendResult } from "../traversal.js";
+import { resolveChildren, walkTree } from "../traversal.js";
+import type { ChildRef, TreeIndex } from "../tree-index.js";
+
+// ---------------------------------------------------------------------------
+// Fixture helpers
+// ---------------------------------------------------------------------------
+
+function page(ref: string): ChildRef {
+  return { kind: "page", ref };
+}
+
+function node(ref: string): ChildRef {
+  return { kind: "node", ref };
+}
+
+/**
+ * Build a minimal in-memory `TreeIndex` from a forward-adjacency spec. Only
+ * `childrenByNode` and `root` are exercised by the traversal, so the reverse
+ * adjacency maps and `nodes` are left empty — the walk never reads them.
+ */
+function makeTree(
+  root: string,
+  childrenByNode: Record<string, ChildRef[]>,
+): TreeIndex {
+  return {
+    nodes: new Map(),
+    childrenByNode: new Map(Object.entries(childrenByNode)),
+    parentsByNode: new Map(),
+    pageParents: new Map(),
+    root,
+  };
+}
+
+/** Descend into every node child offered (mechanical "descend all" stub). */
+function descendAll(
+  _nodeId: string,
+  children: ReadonlyArray<ChildRef>,
+): DescendResult {
+  return { descend: children.filter((c) => c.kind === "node") };
+}
+
+// ---------------------------------------------------------------------------
+// resolveChildren
+// ---------------------------------------------------------------------------
+
+describe("resolveChildren", () => {
+  test("returns the ordered child refs for a known node", () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), page("p")],
+    });
+    expect(resolveChildren(tree, "_root")).toEqual([node("a"), page("p")]);
+  });
+
+  test("returns [] for a leaf / unknown node id", () => {
+    const tree = makeTree("_root", { _root: [] });
+    expect(resolveChildren(tree, "missing")).toEqual([]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Linear descent
+// ---------------------------------------------------------------------------
+
+describe("walkTree — linear descent", () => {
+  test("collects expected leaf pages and emits a level per walked node", async () => {
+    // _root → node:a → node:b → page:leaf  (plus a page on each level)
+    const tree = makeTree("_root", {
+      _root: [page("p-root"), node("a")],
+      a: [page("p-a"), node("b")],
+      b: [page("leaf")],
+    });
+
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 8,
+      maxDepth: 8,
+      descend: descendAll,
+    });
+
+    expect([...pages].sort()).toEqual(["leaf", "p-a", "p-root"]);
+    expect(levels.map((l) => l.node)).toEqual(["_root", "a", "b"]);
+
+    expect(levels[0]).toMatchObject({
+      node: "_root",
+      considered: ["a"],
+      descended: ["a"],
+      skipped: [],
+      reasoning: "",
+    });
+    expect(levels[2]).toMatchObject({
+      node: "b",
+      considered: [],
+      descended: [],
+      skipped: [],
+    });
+  });
+
+  test("defaults start to tree.root", async () => {
+    const tree = makeTree("home", {
+      home: [page("only")],
+    });
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 4,
+      maxDepth: 4,
+      descend: descendAll,
+    });
+    expect([...pages]).toEqual(["only"]);
+    expect(levels.map((l) => l.node)).toEqual(["home"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// DAG dedup
+// ---------------------------------------------------------------------------
+
+describe("walkTree — DAG dedup", () => {
+  test("a sub-node shared by two parents is walked exactly once", async () => {
+    // _root → {node:left, node:right}; both → node:shared → page:s
+    const tree = makeTree("_root", {
+      _root: [node("left"), node("right")],
+      left: [node("shared")],
+      right: [node("shared")],
+      shared: [page("s")],
+    });
+
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 8,
+      maxDepth: 8,
+      descend: descendAll,
+    });
+
+    expect([...pages]).toEqual(["s"]);
+    // `shared` appears once even though both left and right descend into it.
+    const walked = levels.map((l) => l.node);
+    expect(walked.filter((n) => n === "shared")).toHaveLength(1);
+    expect(walked.sort()).toEqual(["_root", "left", "right", "shared"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Cycle termination
+// ---------------------------------------------------------------------------
+
+describe("walkTree — cycle termination", () => {
+  test("an injected A ↔ B cycle terminates and walks each once", async () => {
+    const tree = makeTree("a", {
+      a: [node("b"), page("pa")],
+      b: [node("a"), page("pb")],
+    });
+
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 8,
+      maxDepth: 100,
+      descend: descendAll,
+    });
+
+    expect([...pages].sort()).toEqual(["pa", "pb"]);
+    const walked = levels.map((l) => l.node).sort();
+    expect(walked).toEqual(["a", "b"]);
+  });
+
+  test("a self-loop terminates", async () => {
+    const tree = makeTree("solo", {
+      solo: [node("solo"), page("p")],
+    });
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 4,
+      maxDepth: 100,
+      descend: descendAll,
+    });
+    expect([...pages]).toEqual(["p"]);
+    expect(levels.map((l) => l.node)).toEqual(["solo"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Breadth budget
+// ---------------------------------------------------------------------------
+
+describe("walkTree — breadthBudget", () => {
+  test("caps the descents per node and records the rest as skipped", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), node("b"), node("c"), node("d")],
+      a: [page("pa")],
+      b: [page("pb")],
+      c: [page("pc")],
+      d: [page("pd")],
+    });
+
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 2,
+      maxDepth: 8,
+      descend: descendAll,
+    });
+
+    const rootLevel = levels.find((l) => l.node === "_root")!;
+    expect(rootLevel.considered).toEqual(["a", "b", "c", "d"]);
+    expect(rootLevel.descended).toEqual(["a", "b"]);
+    expect(rootLevel.skipped).toEqual(["c", "d"]);
+
+    // Only the first two children's pages are reached.
+    expect([...pages].sort()).toEqual(["pa", "pb"]);
+    expect(levels.map((l) => l.node).sort()).toEqual(["_root", "a", "b"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Depth budget
+// ---------------------------------------------------------------------------
+
+describe("walkTree — maxDepth", () => {
+  test("halts recursion at the configured depth", async () => {
+    // _root(0) → a(1) → b(2) → c(3)
+    const tree = makeTree("_root", {
+      _root: [node("a")],
+      a: [node("b"), page("pa")],
+      b: [node("c"), page("pb")],
+      c: [page("pc")],
+    });
+
+    // maxDepth 1 walks depth 0 (_root) and depth 1 (a) only; b/c never walked.
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 8,
+      maxDepth: 1,
+      descend: descendAll,
+    });
+
+    expect(levels.map((l) => l.node)).toEqual(["_root", "a"]);
+    // `a`'s page is collected; b/c and their pages are not reached.
+    expect([...pages]).toEqual(["pa"]);
+  });
+
+  test("maxDepth 0 walks only the start level", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), page("pr")],
+      a: [page("pa")],
+    });
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 8,
+      maxDepth: 0,
+      descend: descendAll,
+    });
+    expect(levels.map((l) => l.node)).toEqual(["_root"]);
+    expect([...pages]).toEqual(["pr"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Seeds
+// ---------------------------------------------------------------------------
+
+describe("walkTree — seeds", () => {
+  test("seeds start the walk mid-tree alongside start", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), page("pr")],
+      a: [page("pa")],
+      mid: [page("pm"), node("deep")],
+      deep: [page("pd")],
+    });
+
+    const { pages, levels } = await walkTree(tree, {
+      seeds: ["mid"],
+      breadthBudget: 8,
+      maxDepth: 8,
+      descend: descendAll,
+    });
+
+    // Both the root branch and the seeded `mid` subtree are explored.
+    expect([...pages].sort()).toEqual(["pa", "pd", "pm", "pr"]);
+    expect(levels.map((l) => l.node).sort()).toEqual([
+      "_root",
+      "a",
+      "deep",
+      "mid",
+    ]);
+  });
+
+  test("a node that is both start and seed is walked once", async () => {
+    const tree = makeTree("dup", {
+      dup: [page("p")],
+    });
+    const { levels } = await walkTree(tree, {
+      start: "dup",
+      seeds: ["dup"],
+      breadthBudget: 4,
+      maxDepth: 4,
+      descend: descendAll,
+    });
+    expect(levels.map((l) => l.node)).toEqual(["dup"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Descend decision threading
+// ---------------------------------------------------------------------------
+
+describe("walkTree — descend decision", () => {
+  test("threads the descend reasoning onto the level", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), node("b")],
+      a: [page("pa")],
+      b: [page("pb")],
+    });
+
+    const descend = (
+      _nodeId: string,
+      children: ReadonlyArray<ChildRef>,
+    ): DescendResult => ({
+      // Pick only "a".
+      descend: children.filter((c) => c.kind === "node" && c.ref === "a"),
+      reasoning: "a is more relevant",
+    });
+
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 8,
+      maxDepth: 8,
+      descend,
+    });
+
+    const rootLevel = levels.find((l) => l.node === "_root")!;
+    expect(rootLevel.reasoning).toBe("a is more relevant");
+    expect(rootLevel.descended).toEqual(["a"]);
+    expect(rootLevel.skipped).toEqual(["b"]);
+    expect([...pages]).toEqual(["pa"]);
+  });
+
+  test("ignores descend picks that were not offered node children", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), page("pr")],
+      a: [page("pa")],
+    });
+
+    const descend = (): DescendResult => ({
+      // "ghost" was never offered; "pr" is a page, not a node child.
+      descend: [node("ghost"), page("pr")],
+    });
+
+    const { pages, levels } = await walkTree(tree, {
+      breadthBudget: 8,
+      maxDepth: 8,
+      descend,
+    });
+
+    const rootLevel = levels.find((l) => l.node === "_root")!;
+    expect(rootLevel.considered).toEqual(["a"]);
+    expect(rootLevel.descended).toEqual([]);
+    expect(rootLevel.skipped).toEqual(["a"]);
+    // No node descent happened; only the root's own page is collected.
+    expect([...pages]).toEqual(["pr"]);
+    expect(levels.map((l) => l.node)).toEqual(["_root"]);
+  });
+
+  test("dedups repeated descend picks before applying breadthBudget", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), node("b")],
+      a: [page("pa")],
+      b: [page("pb")],
+    });
+
+    const descend = (): DescendResult => ({
+      // "a" repeated should count once; budget of 2 then still admits "b".
+      descend: [node("a"), node("a"), node("b")],
+    });
+
+    const { levels } = await walkTree(tree, {
+      breadthBudget: 2,
+      maxDepth: 8,
+      descend,
+    });
+
+    const rootLevel = levels.find((l) => l.node === "_root")!;
+    expect(rootLevel.descended).toEqual(["a", "b"]);
+    expect(rootLevel.skipped).toEqual([]);
+  });
+});
diff --git a/assistant/src/memory/v3/traversal.ts b/assistant/src/memory/v3/traversal.ts
new file mode 100644
index 00000000000..2cd625f0a32
--- /dev/null
+++ b/assistant/src/memory/v3/traversal.ts
@@ -0,0 +1,194 @@
+/**
+ * Memory v3 — Tree traversal primitives.
+ *
+ * The *mechanical* half of the v3 read loop: a deterministic, provider-free
+ * walk over the {@link TreeIndex} DAG. The intelligence — *which* child nodes
+ * to recurse into at each level — is injected via the `descend` callback so
+ * this module stays pure and unit-testable without an LLM. The driver PR wires
+ * `descend` to the model's descend/skip decision; here `descend` is just a
+ * function `(nodeId, children) => chosen node-children`.
+ *
+ * `walkTree` fans out from a `start` node and any `seeds`, level by level:
+ *   - At each node it resolves the ordered child refs, hands them to `descend`,
+ *     and recurses into the chosen `node:` children (capped by `breadthBudget`).
+ *   - Every `page:` child encountered anywhere in the walk is collected into the
+ *     returned `pages` set — pages are leaves, never recursed into.
+ *   - A `visited` set keyed by canonical id (`node:<id>`) dedups shared
+ *     sub-nodes (the DAG case) and terminates cycles (A ↔ B). A node is walked
+ *     at most once regardless of how many parents reference it.
+ *   - `maxDepth` bounds how deep the recursion goes; the start/seed level is
+ *     depth 0.
+ *
+ * Each walked node emits one {@link TreeLevel} (the `harness/trace.ts` shape)
+ * recording what was considered, descended, and skipped. `reasoning` is
+ * supplied by the `descend` callback (the driver attaches the model's stated
+ * reason); the mechanical walk defaults it to `""`.
+ *
+ * Processing is strictly level-by-level so `visited` mutations are never raced:
+ * within a level the per-node `descend` calls run concurrently (`Promise.all`),
+ * but the chosen children for the *next* level are only dedup'd and enqueued
+ * after the whole level resolves.
+ */
+
+import type { TreeLevel } from "../v2/harness/trace.js";
+import type { ChildRef, TreeIndex } from "./tree-index.js";
+
+/**
+ * The descend decision injected into {@link walkTree}. Given a node id and its
+ * ordered child refs, return the subset of *node* children to recurse into. The
+ * driver PR wires this to the LLM; tests pass a deterministic stub.
+ *
+ * Returning a `reasoning` string is optional — when present it is threaded into
+ * the emitted {@link TreeLevel}; absent, the level's `reasoning` defaults to
+ * `""`. Returned refs that are not `node:` children of `nodeId`, or that repeat,
+ * are ignored by the walk (it only recurses into distinct node children it
+ * actually offered).
+ */
+export type DescendDecision = (
+  nodeId: string,
+  children: ReadonlyArray<ChildRef>,
+) => Promise<DescendResult> | DescendResult;
+
+/**
+ * The result of a {@link DescendDecision}. `descend` lists the `node:` children
+ * chosen for recursion; `reasoning` is the optional model rationale recorded on
+ * the level.
+ */
+export interface DescendResult {
+  descend: ChildRef[];
+  reasoning?: string;
+}
+
+/** Options controlling a {@link walkTree} run. */
+export interface WalkOptions {
+  /** Entry node id; defaults to `tree.root`. */
+  start?: string;
+  /** Extra node ids to start from in parallel with `start`. */
+  seeds?: string[];
+  /** Max `node:` children to descend into per node (after the `descend` pick). */
+  breadthBudget: number;
+  /** Max recursion depth; the start/seed level is depth 0. */
+  maxDepth: number;
+  /** Injected descend decision (the LLM hook). */
+  descend: DescendDecision;
+}
+
+/** The result of a {@link walkTree} run. */
+export interface WalkResult {
+  /** Every `page:` slug encountered across the walk, dedup'd. */
+  pages: Set<string>;
+  /** One {@link TreeLevel} per walked node, in walk order. */
+  levels: TreeLevel[];
+}
+
+/**
+ * Resolve the ordered child refs for `nodeId`. Thin accessor over
+ * `tree.childrenByNode`; returns an empty array for an unknown / leaf node id so
+ * callers never branch on `undefined`.
+ */
+export function resolveChildren(
+  tree: TreeIndex,
+  nodeId: string,
+): ReadonlyArray<ChildRef> {
+  return tree.childrenByNode.get(nodeId) ?? [];
+}
+
+/** Canonical visited-set key for a node id. */
+function nodeKey(nodeId: string): string {
+  return `node:${nodeId}`;
+}
+
+/**
+ * Walk the {@link TreeIndex} DAG from `start` (default `tree.root`) plus any
+ * `seeds`, driven by the injected `descend` decision. Deterministic and
+ * provider-free — see the module docstring for the full contract.
+ *
+ * Returns the collected leaf `pages` and the per-node `levels` trace.
+ */
+export async function walkTree(
+  tree: TreeIndex,
+  opts: WalkOptions,
+): Promise<WalkResult> {
+  const { breadthBudget, maxDepth, descend } = opts;
+  const start = opts.start ?? tree.root;
+
+  const pages = new Set<string>();
+  const levels: TreeLevel[] = [];
+  const visited = new Set<string>();
+
+  // Seed the frontier with `start` + `seeds`, dedup'd and marked visited up
+  // front so a node that is both the start and a seed is walked once.
+  let frontier: string[] = [];
+  for (const id of [start, ...(opts.seeds ?? [])]) {
+    const key = nodeKey(id);
+    if (visited.has(key)) continue;
+    visited.add(key);
+    frontier.push(id);
+  }
+
+  // Depth 0 is the start/seed level; stop once we'd exceed `maxDepth`.
+  for (let depth = 0; depth <= maxDepth && frontier.length > 0; depth++) {
+    // Resolve every node on this level concurrently. `visited` is not mutated
+    // here — only after the whole level settles — so the concurrency is safe.
+    const levelResults = await Promise.all(
+      frontier.map(async (nodeId) => {
+        const children = resolveChildren(tree, nodeId);
+        const result = await descend(nodeId, children);
+        return { nodeId, children, result };
+      }),
+    );
+
+    const nextFrontier: string[] = [];
+
+    for (const { nodeId, children, result } of levelResults) {
+      // Collect every page child of this node as a leaf hit.
+      for (const child of children) {
+        if (child.kind === "page") pages.add(child.ref);
+      }
+
+      // The set of node children this node legitimately offered, in order. The
+      // descend pick is intersected with this so a stub returning bogus or
+      // duplicate refs can't make the walk recurse into something not offered.
+      const offeredNodes = children.filter((c) => c.kind === "node");
+      const offeredRefs = new Set(offeredNodes.map((c) => c.ref));
+
+      // Honor the descend pick in the order it was returned, dedup'd, filtered
+      // to genuinely-offered node children, and capped by `breadthBudget`.
+      const descended: string[] = [];
+      const descendedSet = new Set<string>();
+      for (const choice of result.descend) {
+        if (choice.kind !== "node") continue;
+        if (!offeredRefs.has(choice.ref)) continue;
+        if (descendedSet.has(choice.ref)) continue;
+        if (descended.length >= breadthBudget) break;
+        descendedSet.add(choice.ref);
+        descended.push(choice.ref);
+      }
+
+      const considered = offeredNodes.map((c) => c.ref);
+      const skipped = considered.filter((ref) => !descendedSet.has(ref));
+
+      levels.push({
+        node: nodeId,
+        considered,
+        descended,
+        skipped,
+        reasoning: result.reasoning ?? "",
+      });
+
+      // Enqueue chosen node children for the next level. Mark visited now (the
+      // level has fully resolved) so a shared sub-node or a cycle is enqueued at
+      // most once across the whole walk.
+      for (const ref of descended) {
+        const key = nodeKey(ref);
+        if (visited.has(key)) continue;
+        visited.add(key);
+        nextFrontier.push(ref);
+      }
+    }
+
+    frontier = nextFrontier;
+  }
+
+  return { pages, levels };
+}

From 21f008746d6276296e93ddac9f0558e52fc4eea5 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:00:10 -0400
Subject: [PATCH 11/21] feat(memory-v3): tree validator (orphans, cycles,
 dangling refs, freshness) (#31981)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/validate.test.ts  | 245 ++++++++++++++
 assistant/src/memory/v3/tree-store.ts         |  21 ++
 assistant/src/memory/v3/validate.ts           | 300 ++++++++++++++++++
 3 files changed, 566 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/validate.test.ts
 create mode 100644 assistant/src/memory/v3/validate.ts

diff --git a/assistant/src/memory/v3/__tests__/validate.test.ts b/assistant/src/memory/v3/__tests__/validate.test.ts
new file mode 100644
index 00000000000..693247e8946
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/validate.test.ts
@@ -0,0 +1,245 @@
+/**
+ * Tests for `assistant/src/memory/v3/validate.ts`.
+ *
+ * Coverage matrix — one fixture per defect category plus a clean-tree control:
+ *   - clean tree → every list empty, every count 0.
+ *   - danglingChildRefs → a `node:` ref and a `page:` ref to absent targets.
+ *   - orphanPages → a concept page on disk not wired into the tree; synthetic
+ *     page-index entries (none here) and reachable pages excluded.
+ *   - cycles → A → B → A back-edge detected during the full descent.
+ *   - staleIndex → a parent node whose mtime predates a `node:` child's mtime.
+ *   - unknownEdgeTargets → a page `edges:` entry pointing at a missing slug.
+ *
+ * Tests use temp workspaces under `os.tmpdir()`; they never touch `~/.vellum/`.
+ * mtimes are pinned with `utimes` so the freshness check is deterministic and
+ * independent of write ordering / filesystem timestamp granularity.
+ */
+
+import { mkdtempSync, rmSync } from "node:fs";
+import { utimes } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+
+import { invalidateEdgeIndex } from "../../v2/edge-index.js";
+import { invalidatePageIndex } from "../../v2/page-index.js";
+import { writePage } from "../../v2/page-store.js";
+import type { ConceptPage } from "../../v2/types.js";
+import { invalidateTreeIndex } from "../tree-index.js";
+import { getTreeDir, ROOT_NODE_ID, writeNode } from "../tree-store.js";
+import type { TreeNode } from "../types.js";
+import { validateTree } from "../validate.js";
+
+let workspaceDir: string;
+
+beforeEach(() => {
+  workspaceDir = mkdtempSync(join(tmpdir(), "vellum-tree-validate-test-"));
+});
+
+afterEach(() => {
+  invalidateTreeIndex();
+  invalidatePageIndex();
+  invalidateEdgeIndex();
+  rmSync(workspaceDir, { recursive: true, force: true });
+});
+
+function node(id: string, children: string[], body = `body ${id}`): TreeNode {
+  return { id, frontmatter: { children }, body };
+}
+
+function page(slug: string, edges: string[] = []): ConceptPage {
+  return {
+    slug,
+    frontmatter: { edges, ref_files: [], ref_urls: [] },
+    body: `body ${slug}`,
+  };
+}
+
+/** Pin a node file's mtime (and atime) to an explicit epoch-ms value. */
+async function setNodeMtime(id: string, mtimeMs: number): Promise<void> {
+  const path = join(getTreeDir(workspaceDir), `${id}.md`);
+  const t = new Date(mtimeMs);
+  await utimes(path, t, t);
+}
+
+/**
+ * Invalidate every cached index after seeding so the first `validateTree` of a
+ * test body sees the on-disk fixture rather than a stale cache.
+ */
+function resetCaches(): void {
+  invalidateTreeIndex();
+  invalidatePageIndex();
+  invalidateEdgeIndex();
+}
+
+describe("validateTree — clean tree", () => {
+  test("returns an empty report for a well-formed tree", async () => {
+    // _root → node:people → page:alice ; all refs resolve, alice reachable.
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:people"]));
+    await writeNode(workspaceDir, node("people", ["page:alice"]));
+    await writePage(workspaceDir, page("alice"));
+    // Parent newest so the freshness check never fires on a clean tree.
+    await setNodeMtime("people", 1_000);
+    await setNodeMtime(ROOT_NODE_ID, 2_000);
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.danglingChildRefs).toEqual([]);
+    expect(report.danglingChildRefCount).toBe(0);
+    expect(report.orphanPages).toEqual([]);
+    expect(report.orphanPageCount).toBe(0);
+    expect(report.cycles).toEqual([]);
+    expect(report.cycleCount).toBe(0);
+    expect(report.staleIndex).toEqual([]);
+    expect(report.staleIndexCount).toBe(0);
+    expect(report.unknownEdgeTargets).toEqual([]);
+    expect(report.unknownEdgeTargetCount).toBe(0);
+  });
+});
+
+describe("validateTree — danglingChildRefs", () => {
+  test("flags node: and page: refs whose targets are missing", async () => {
+    await writeNode(
+      workspaceDir,
+      node(ROOT_NODE_ID, ["node:ghost", "page:missing-page"]),
+    );
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.danglingChildRefs).toEqual([
+      { node: ROOT_NODE_ID, ref: "ghost", kind: "node" },
+      { node: ROOT_NODE_ID, ref: "missing-page", kind: "page" },
+    ]);
+    expect(report.danglingChildRefCount).toBe(2);
+  });
+
+  test("does not flag refs whose targets exist", async () => {
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"]));
+    await writeNode(workspaceDir, node("child", ["page:alice"]));
+    await writePage(workspaceDir, page("alice"));
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.danglingChildRefs).toEqual([]);
+  });
+});
+
+describe("validateTree — orphanPages", () => {
+  test("flags concept pages not reachable from the root", async () => {
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, ["page:reached"]));
+    await writePage(workspaceDir, page("reached"));
+    await writePage(workspaceDir, page("orphan"));
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.orphanPages).toEqual(["orphan"]);
+    expect(report.orphanPageCount).toBe(1);
+  });
+
+  test("a page hanging off an unreachable node is still an orphan", async () => {
+    // `floating` is not referenced by _root, so its page child is unreachable.
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, []));
+    await writeNode(workspaceDir, node("floating", ["page:detached"]));
+    await writePage(workspaceDir, page("detached"));
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.orphanPages).toEqual(["detached"]);
+  });
+});
+
+describe("validateTree — cycles", () => {
+  test("detects an A → B → A node cycle as a back-edge", async () => {
+    // _root → node:a → node:b → node:a (cycle closes on the b → a edge).
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:a"]));
+    await writeNode(workspaceDir, node("a", ["node:b"]));
+    await writeNode(workspaceDir, node("b", ["node:a"]));
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.cycles).toEqual([{ from: "b", to: "a" }]);
+    expect(report.cycleCount).toBe(1);
+  });
+
+  test("a shared DAG sub-node (two parents, no cycle) is not a cycle", async () => {
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:p1", "node:p2"]));
+    await writeNode(workspaceDir, node("p1", ["node:shared"]));
+    await writeNode(workspaceDir, node("p2", ["node:shared"]));
+    await writeNode(workspaceDir, node("shared", []));
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.cycles).toEqual([]);
+  });
+});
+
+describe("validateTree — staleIndex", () => {
+  test("flags a node whose mtime predates a node: child's mtime", async () => {
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"]));
+    await writeNode(workspaceDir, node("child", []));
+    // Parent older than child → stale.
+    await setNodeMtime(ROOT_NODE_ID, 1_000);
+    await setNodeMtime("child", 5_000);
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.staleIndex).toEqual([
+      {
+        node: ROOT_NODE_ID,
+        child: "child",
+        nodeMtimeMs: 1_000,
+        childMtimeMs: 5_000,
+      },
+    ]);
+    expect(report.staleIndexCount).toBe(1);
+  });
+
+  test("a parent newer than its child is not stale", async () => {
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, ["node:child"]));
+    await writeNode(workspaceDir, node("child", []));
+    await setNodeMtime("child", 1_000);
+    await setNodeMtime(ROOT_NODE_ID, 5_000);
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.staleIndex).toEqual([]);
+  });
+});
+
+describe("validateTree — unknownEdgeTargets", () => {
+  test("flags a page edge pointing at a missing slug", async () => {
+    await writeNode(workspaceDir, node(ROOT_NODE_ID, ["page:alice"]));
+    await writePage(workspaceDir, page("alice", ["nonexistent"]));
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.unknownEdgeTargets).toEqual([
+      { from: "alice", to: "nonexistent" },
+    ]);
+    expect(report.unknownEdgeTargetCount).toBe(1);
+  });
+
+  test("an edge to an existing page is not flagged", async () => {
+    await writeNode(
+      workspaceDir,
+      node(ROOT_NODE_ID, ["page:alice", "page:bob"]),
+    );
+    await writePage(workspaceDir, page("alice", ["bob"]));
+    await writePage(workspaceDir, page("bob"));
+    resetCaches();
+
+    const report = await validateTree(workspaceDir);
+
+    expect(report.unknownEdgeTargets).toEqual([]);
+  });
+});
diff --git a/assistant/src/memory/v3/tree-store.ts b/assistant/src/memory/v3/tree-store.ts
index 55dc023f2fd..86933c100ac 100644
--- a/assistant/src/memory/v3/tree-store.ts
+++ b/assistant/src/memory/v3/tree-store.ts
@@ -33,6 +33,7 @@ import {
   readFile,
   rename,
   rm,
+  stat,
   writeFile,
 } from "node:fs/promises";
 import { dirname, join, relative, sep } from "node:path";
@@ -270,6 +271,26 @@ export async function readNode(
   return { id, frontmatter, body };
 }
 
+/**
+ * File mtime for a tree node, in epoch ms. Returns 0 when the file is missing
+ * or unreadable — callers treat 0 as "no mtime" (e.g. the validator's stale-
+ * index check reads a missing node as the oldest possible mtime so it never
+ * spuriously flags a parent against an absent child). Mirrors v2's
+ * `getPageMtimeMs`.
+ */
+export async function getNodeMtimeMs(
+  workspaceDir: string,
+  id: string,
+): Promise<number> {
+  validateNodeId(id);
+  try {
+    const s = await stat(getNodePath(workspaceDir, id));
+    return s.mtimeMs;
+  } catch {
+    return 0;
+  }
+}
+
 /**
  * Write a tree node atomically (temp file + rename). A crash between the temp
  * write and the rename leaves the prior file intact; a crash after the rename
diff --git a/assistant/src/memory/v3/validate.ts b/assistant/src/memory/v3/validate.ts
new file mode 100644
index 00000000000..4a0acb2efc3
--- /dev/null
+++ b/assistant/src/memory/v3/validate.ts
@@ -0,0 +1,300 @@
+/**
+ * Memory v3 — Tree structure validator.
+ *
+ * The v3 tree is hand-authored by a data-migration during the v2 → v3 rollout
+ * (nodes reference pages and sub-nodes by `page:`/`node:` refs). Because the
+ * structure is authored, not derived, it can drift: a ref can dangle, a page
+ * can be left unwired, two nodes can reference each other into a cycle, a
+ * parent node's compositional summary can fall behind a freshly-edited child,
+ * or a page `edges:` entry can point at a slug with no page.
+ *
+ * `validateTree` is the read-only report the migration (and any later
+ * structure-health probe) runs to surface those defects. It is deliberately
+ * **non-throwing**: the migration is in progress, so an incomplete tree is
+ * expected — the report is informational, and the caller decides what (if
+ * anything) is fatal. It builds the three indices it needs (tree, page, edge),
+ * walks the DAG, and returns counts plus the offending ids for each category.
+ *
+ * Categories:
+ *   - `danglingChildRefs` — a node `children` entry (`node:`/`page:`) whose
+ *     target node/page does not exist on disk.
+ *   - `orphanPages` — concept pages present in the page index but not reachable
+ *     from the tree root by descending every `node:` child. Informational while
+ *     the migration is mid-flight (not every page is wired in yet). Synthetic
+ *     page-index entries (skills, CLI commands) are excluded — they are never
+ *     tree members.
+ *   - `cycles` — back-edges found during a full DFS over `node:` adjacency
+ *     (A → B → A). A cycle would make a naive descent loop forever.
+ *   - `staleIndex` — a node whose own file mtime predates one of its `node:`
+ *     children's mtime, hinting its compositional index/summary may be out of
+ *     date relative to the child it composes.
+ *   - `unknownEdgeTargets` — page `edges:` targets with no corresponding page
+ *     index slug, reusing v2's `validateEdgeTargets`.
+ */
+
+import { CLI_COMMAND_SLUG_PREFIX } from "../v2/cli-command-store.js";
+import { getEdgeIndex, validateEdgeTargets } from "../v2/edge-index.js";
+import { getPageIndex } from "../v2/page-index.js";
+import { SKILL_SLUG_PREFIX } from "../v2/skill-store.js";
+import { getTreeIndex, type TreeIndex } from "./tree-index.js";
+import { getNodeMtimeMs } from "./tree-store.js";
+
+/**
+ * A `node:` child whose mtime is newer than the parent node that composes it.
+ * `node` is the parent, `child` the fresher child, and the two `*MtimeMs`
+ * fields are their epoch-ms mtimes (parent < child triggers the report).
+ */
+export interface StaleIndexEntry {
+  node: string;
+  child: string;
+  nodeMtimeMs: number;
+  childMtimeMs: number;
+}
+
+/**
+ * Read-only health report over the v3 tree + its referenced pages/edges.
+ * Every list is sorted for deterministic output; `*Count` fields mirror the
+ * corresponding list length so callers can summarize without re-counting.
+ */
+export interface TreeValidationReport {
+  /** `node:`/`page:` children whose target does not exist. */
+  danglingChildRefs: Array<{
+    node: string;
+    ref: string;
+    kind: "node" | "page";
+  }>;
+  danglingChildRefCount: number;
+  /** Concept pages not reachable from the root by descending all node children. */
+  orphanPages: string[];
+  orphanPageCount: number;
+  /** Back-edges (`from → to`) closing a cycle during the full DFS descent. */
+  cycles: Array<{ from: string; to: string }>;
+  cycleCount: number;
+  /** Nodes whose mtime predates a child node's mtime. */
+  staleIndex: StaleIndexEntry[];
+  staleIndexCount: number;
+  /** Page `edges:` targets with no corresponding page-index slug. */
+  unknownEdgeTargets: Array<{ from: string; to: string }>;
+  unknownEdgeTargetCount: number;
+}
+
+/** True when a page-index slug is a synthetic (non-concept-page) entry. */
+function isSyntheticSlug(slug: string): boolean {
+  return (
+    slug.startsWith(SKILL_SLUG_PREFIX) ||
+    slug.startsWith(CLI_COMMAND_SLUG_PREFIX)
+  );
+}
+
+/**
+ * Collect dangling `node:`/`page:` child refs: every node child whose target
+ * node id is absent from `tree.nodes`, and every page child whose slug is
+ * absent from `knownPageSlugs`. Sorted by `(node, kind, ref)`.
+ */
+function collectDanglingChildRefs(
+  tree: TreeIndex,
+  knownPageSlugs: ReadonlySet<string>,
+): Array<{ node: string; ref: string; kind: "node" | "page" }> {
+  const dangling: Array<{ node: string; ref: string; kind: "node" | "page" }> =
+    [];
+  for (const [nodeId, children] of tree.childrenByNode) {
+    for (const child of children) {
+      const exists =
+        child.kind === "node"
+          ? tree.nodes.has(child.ref)
+          : knownPageSlugs.has(child.ref);
+      if (!exists) {
+        dangling.push({ node: nodeId, ref: child.ref, kind: child.kind });
+      }
+    }
+  }
+  dangling.sort(
+    (a, b) =>
+      a.node.localeCompare(b.node) ||
+      a.kind.localeCompare(b.kind) ||
+      a.ref.localeCompare(b.ref),
+  );
+  return dangling;
+}
+
+/**
+ * Resolve the existing `node:` children of `nodeId`, in `children` order. Refs
+ * to absent nodes are skipped (those are reported separately as dangling) so
+ * the descent never recurses into a node that isn't on disk.
+ */
+function nodeChildrenOf(tree: TreeIndex, nodeId: string): string[] {
+  const children = tree.childrenByNode.get(nodeId) ?? [];
+  const out: string[] = [];
+  for (const child of children) {
+    if (child.kind === "node" && tree.nodes.has(child.ref)) {
+      out.push(child.ref);
+    }
+  }
+  return out;
+}
+
+/**
+ * Full DFS over `node:` adjacency from `tree.root`. Returns the set of
+ * reachable node ids (for orphan-page reachability) and the back-edges that
+ * close a cycle. A back-edge is an edge into a node still on the active
+ * recursion stack (classic gray-node cycle detection); `visited` (black)
+ * prevents re-walking shared DAG sub-nodes.
+ */
+function descend(tree: TreeIndex): {
+  reachableNodes: Set<string>;
+  cycles: Array<{ from: string; to: string }>;
+} {
+  const reachableNodes = new Set<string>();
+  const onStack = new Set<string>();
+  const cycles: Array<{ from: string; to: string }> = [];
+
+  // Iterative DFS with an explicit stack so deep trees don't blow the call
+  // stack. Each frame tracks its child cursor; we push a child frame, and on
+  // exhaustion pop the parent off the recursion stack (`onStack`).
+  type Frame = { node: string; children: string[]; cursor: number };
+  const stack: Frame[] = [];
+
+  function enter(nodeId: string): void {
+    reachableNodes.add(nodeId);
+    onStack.add(nodeId);
+    stack.push({
+      node: nodeId,
+      children: nodeChildrenOf(tree, nodeId),
+      cursor: 0,
+    });
+  }
+
+  if (tree.nodes.has(tree.root)) {
+    enter(tree.root);
+  }
+
+  while (stack.length > 0) {
+    const frame = stack[stack.length - 1];
+    if (frame.cursor >= frame.children.length) {
+      onStack.delete(frame.node);
+      stack.pop();
+      continue;
+    }
+    const child = frame.children[frame.cursor++];
+    if (onStack.has(child)) {
+      // Edge into an ancestor still on the stack → cycle-closing back-edge.
+      cycles.push({ from: frame.node, to: child });
+      continue;
+    }
+    if (reachableNodes.has(child)) {
+      // Already fully explored via another parent (shared DAG sub-node).
+      continue;
+    }
+    enter(child);
+  }
+
+  cycles.sort(
+    (a, b) => a.from.localeCompare(b.from) || a.to.localeCompare(b.to),
+  );
+  return { reachableNodes, cycles };
+}
+
+/**
+ * Concept pages reachable from the tree: every `page:` child of a reachable
+ * node. Pages hanging off unreachable nodes are *not* counted reachable — they
+ * only become reachable once their parent chain links back to the root.
+ */
+function reachablePages(
+  tree: TreeIndex,
+  reachableNodes: ReadonlySet<string>,
+): Set<string> {
+  const pages = new Set<string>();
+  for (const nodeId of reachableNodes) {
+    for (const child of tree.childrenByNode.get(nodeId) ?? []) {
+      if (child.kind === "page") pages.add(child.ref);
+    }
+  }
+  return pages;
+}
+
+/**
+ * Nodes whose own mtime predates one of their `node:` children's mtime. A
+ * missing node file reads as mtime 0 (oldest), so the check never flags a
+ * parent against an absent child. Sorted by `(node, child)`.
+ */
+async function collectStaleIndex(
+  workspaceDir: string,
+  tree: TreeIndex,
+): Promise<StaleIndexEntry[]> {
+  const ids = [...tree.nodes.keys()];
+  const mtimes = new Map<string, number>();
+  await Promise.all(
+    ids.map(async (id) => {
+      mtimes.set(id, await getNodeMtimeMs(workspaceDir, id));
+    }),
+  );
+
+  const stale: StaleIndexEntry[] = [];
+  for (const node of ids) {
+    const nodeMtimeMs = mtimes.get(node) ?? 0;
+    for (const child of nodeChildrenOf(tree, node)) {
+      const childMtimeMs = mtimes.get(child) ?? 0;
+      if (nodeMtimeMs < childMtimeMs) {
+        stale.push({ node, child, nodeMtimeMs, childMtimeMs });
+      }
+    }
+  }
+  stale.sort(
+    (a, b) => a.node.localeCompare(b.node) || a.child.localeCompare(b.child),
+  );
+  return stale;
+}
+
+/**
+ * Validate the hand-authored v3 tree structure for `workspaceDir` and return a
+ * {@link TreeValidationReport}. Builds the tree, page, and edge indices, walks
+ * the DAG from the root, and reports the five defect categories. Never throws —
+ * it is a report, not an assertion.
+ */
+export async function validateTree(
+  workspaceDir: string,
+): Promise<TreeValidationReport> {
+  const [tree, pageIndex, edgeIndex] = await Promise.all([
+    getTreeIndex(workspaceDir),
+    getPageIndex(workspaceDir),
+    getEdgeIndex(workspaceDir),
+  ]);
+
+  const knownPageSlugs = new Set(pageIndex.bySlug.keys());
+
+  // Kick off the stale-index mtime stats up front — it only depends on the
+  // tree, not on the DAG walk below — so its filesystem reads overlap the
+  // (synchronous) descent rather than running strictly after it.
+  const staleIndexPromise = collectStaleIndex(workspaceDir, tree);
+
+  const danglingChildRefs = collectDanglingChildRefs(tree, knownPageSlugs);
+
+  const { reachableNodes, cycles } = descend(tree);
+
+  const reached = reachablePages(tree, reachableNodes);
+  const orphanPages = [...knownPageSlugs]
+    .filter((slug) => !isSyntheticSlug(slug) && !reached.has(slug))
+    .sort();
+
+  const staleIndex = await staleIndexPromise;
+
+  // Edge graph is page-only; knownSlugs is the full page-index slug set so an
+  // edge pointing at a skill/CLI entry is not spuriously flagged unknown.
+  const unknownEdgeTargets = validateEdgeTargets(
+    edgeIndex,
+    knownPageSlugs,
+  ).missing;
+
+  return {
+    danglingChildRefs,
+    danglingChildRefCount: danglingChildRefs.length,
+    orphanPages,
+    orphanPageCount: orphanPages.length,
+    cycles,
+    cycleCount: cycles.length,
+    staleIndex,
+    staleIndexCount: staleIndex.length,
+    unknownEdgeTargets,
+    unknownEdgeTargetCount: unknownEdgeTargets.length,
+  };
+}

From 94ad28782f5fd2011c4594e03b41c9afd87422f7 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:02:11 -0400
Subject: [PATCH 12/21] feat(memory-v3): scout-seeded tree-walk descent driver
 (#31982)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/tree-walk.test.ts | 585 ++++++++++++++++++
 assistant/src/memory/v3/tree-walk.ts          | 406 ++++++++++++
 2 files changed, 991 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/tree-walk.test.ts
 create mode 100644 assistant/src/memory/v3/tree-walk.ts

diff --git a/assistant/src/memory/v3/__tests__/tree-walk.test.ts b/assistant/src/memory/v3/__tests__/tree-walk.test.ts
new file mode 100644
index 00000000000..5b45021b1dd
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/tree-walk.test.ts
@@ -0,0 +1,585 @@
+/**
+ * Tests for `assistant/src/memory/v3/tree-walk.ts`.
+ *
+ * The descent provider is always a scripted stub injected via the `provider`
+ * arg — no real LLM, no network, no `mock.module`, `~/.vellum/` untouched. The
+ * stub keys its scripted decision off the `<node id="...">` marker in the user
+ * message so one fixture provider can drive a whole multi-node walk with one
+ * call per visited node.
+ *
+ * Coverage:
+ *   - scripted descent over a fixture tree collects the right leaf pages and
+ *     records considered/descended/skipped + reasoning per node.
+ *   - one descent call per *visited* node (not per offered child).
+ *   - breadthBudget caps descents per node (skip the overflow).
+ *   - maxDepth halts the walk.
+ *   - scout page hits seed the start node set (deriveSeedNodes) so a subtree the
+ *     root never reaches is still walked.
+ *   - explicit seeds bias the start set.
+ *   - scout hits are rendered into the descend prompt as pressure.
+ *   - provider === null → fail-safe: descend nothing, walk still terminates and
+ *     collects the pages it reached, reasoning records the failure.
+ *   - leaf nodes (no node children) make no provider call.
+ *   - request shape: forced tool_choice on `choose_branches`, abort signal
+ *     forwarded.
+ */
+
+import { describe, expect, test } from "bun:test";
+
+import type {
+  Message,
+  Provider,
+  ProviderResponse,
+  SendMessageOptions,
+  ToolDefinition,
+} from "../../../providers/types.js";
+import type { RetrievalInput } from "../../v2/harness/retriever.js";
+import type { ScoutResult } from "../../v2/harness/trace.js";
+import type { PageIndex } from "../../v2/page-index.js";
+import type { ChildRef, TreeIndex } from "../tree-index.js";
+import { createDescender, deriveSeedNodes, runTreeWalk } from "../tree-walk.js";
+import type { TreeNode } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Fixture helpers.
+// ---------------------------------------------------------------------------
+
+function page(ref: string): ChildRef {
+  return { kind: "page", ref };
+}
+
+function node(ref: string): ChildRef {
+  return { kind: "node", ref };
+}
+
+interface ProviderCall {
+  messages: Message[];
+  tools: ToolDefinition[] | undefined;
+  systemPrompt: string | undefined;
+  options: SendMessageOptions | undefined;
+}
+
+/**
+ * Build a tree node with the given children refs. `summary` defaults to the id
+ * so `composeNodeIndex` produces deterministic, inspectable lines.
+ */
+function makeNode(id: string, children: ChildRef[]): TreeNode {
+  return {
+    id,
+    frontmatter: {
+      children: children.map((c) => `${c.kind}:${c.ref}`),
+      summary: `summary of ${id}`,
+    },
+    body: "",
+  };
+}
+
+/**
+ * Build an in-memory `TreeIndex` from a forward-adjacency spec, materializing
+ * `nodes`, `childrenByNode`, and the `pageParents` reverse edges (the only maps
+ * `tree-walk.ts` reads). `parentsByNode` is left empty — the driver never reads
+ * it.
+ */
+function makeTree(
+  root: string,
+  childrenByNode: Record<string, ChildRef[]>,
+): TreeIndex {
+  const nodes = new Map<string, TreeNode>();
+  const children = new Map<string, ReadonlyArray<ChildRef>>();
+  const pageParents = new Map<string, Set<string>>();
+  for (const [id, refs] of Object.entries(childrenByNode)) {
+    nodes.set(id, makeNode(id, refs));
+    children.set(id, refs);
+    for (const ref of refs) {
+      if (ref.kind !== "page") continue;
+      let parents = pageParents.get(ref.ref);
+      if (!parents) {
+        parents = new Set();
+        pageParents.set(ref.ref, parents);
+      }
+      parents.add(id);
+    }
+  }
+  return {
+    nodes,
+    childrenByNode: children,
+    parentsByNode: new Map(),
+    pageParents,
+    root,
+  };
+}
+
+/** Empty page index — the driver only needs `bySlug` for page summaries. */
+function makePages(slugs: string[]): PageIndex {
+  const bySlug = new Map();
+  const byId = new Map();
+  let id = 1;
+  for (const slug of slugs) {
+    const entry = {
+      id,
+      slug,
+      summary: `summary of ${slug}`,
+      edges: [],
+      modifiedAt: 0,
+    };
+    bySlug.set(slug, entry);
+    byId.set(id, entry);
+    id++;
+  }
+  return { entries: [...bySlug.values()], bySlug, byId, rendered: "" };
+}
+
+/** Minimal `RetrievalInput` carrying just the fields the driver reads. */
+function makeInput(
+  overrides?: Partial<RetrievalInput> & {
+    breadthBudget?: number;
+    maxDepth?: number;
+  },
+): RetrievalInput {
+  const breadthBudget = overrides?.breadthBudget ?? 8;
+  const maxDepth = overrides?.maxDepth ?? 8;
+  const config = {
+    memory: { v3: { breadthBudget, maxDepth } },
+  } as unknown as RetrievalInput["config"];
+  const { breadthBudget: _b, maxDepth: _m, ...rest } = overrides ?? {};
+  return {
+    workspaceDir: "/tmp/does-not-matter",
+    recentTurnPairs: [{ assistantMessage: "", userMessage: "tell me about a" }],
+    nowText: "2026-05-25 10:00 PT",
+    priorEverInjected: [],
+    config,
+    ...rest,
+  };
+}
+
+/** Pull the `<node id="...">` id out of a recorded descend prompt. */
+function nodeIdFromCall(call: ProviderCall): string | null {
+  for (const block of call.messages[0]?.content ?? []) {
+    if (block.type !== "text") continue;
+    const match = block.text.match(/<node id="([^"]*)">/);
+    if (match) return match[1];
+  }
+  return null;
+}
+
+/**
+ * A scripted descent provider. `script` maps a node id to the bare child-node
+ * ids to descend (and an optional reasoning string). Records every call and
+ * honors an already-aborted signal by throwing.
+ */
+function makeProvider(
+  script: Record<string, { descend: string[]; reasoning?: string }>,
+  calls: ProviderCall[],
+): Provider {
+  return {
+    name: "stub",
+    sendMessage: async (messages, tools, systemPrompt, options) => {
+      calls.push({ messages, tools, systemPrompt, options });
+      if (options?.signal?.aborted) {
+        const err = new Error("aborted");
+        err.name = "AbortError";
+        throw err;
+      }
+      const nodeId =
+        nodeIdFromCall({ messages, tools, systemPrompt, options }) ?? "";
+      const decision = script[nodeId] ?? { descend: [] };
+      const input: Record<string, unknown> = { descend: decision.descend };
+      if (decision.reasoning !== undefined)
+        input.reasoning = decision.reasoning;
+      const response: ProviderResponse = {
+        model: "stub-model",
+        stopReason: "tool_use",
+        usage: { inputTokens: 0, outputTokens: 0 },
+        content: [
+          {
+            type: "tool_use",
+            id: `tu-${nodeId}`,
+            name: "choose_branches",
+            input,
+          },
+        ],
+      };
+      return response;
+    },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// deriveSeedNodes
+// ---------------------------------------------------------------------------
+
+describe("deriveSeedNodes", () => {
+  test("maps scout page slugs to their parent nodes via pageParents", () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), node("b")],
+      a: [page("pa")],
+      b: [page("pb")],
+    });
+    const scouts: ScoutResult[] = [{ lane: "sparse", slugs: ["pb"] }];
+    expect(deriveSeedNodes(tree, scouts, [])).toEqual(["b"]);
+  });
+
+  test("unions explicit seeds first, then scout-derived parents, dedup'd", () => {
+    const tree = makeTree("_root", {
+      _root: [node("a")],
+      a: [page("pa")],
+    });
+    const scouts: ScoutResult[] = [{ lane: "hot", slugs: ["pa", "pa"] }];
+    // "a" is both an explicit seed and the parent of pa — appears once, seeds first.
+    expect(deriveSeedNodes(tree, scouts, ["a", "x"])).toEqual(["a", "x"]);
+  });
+
+  test("ignores scout slugs with no parent node", () => {
+    const tree = makeTree("_root", { _root: [page("pr")] });
+    const scouts: ScoutResult[] = [{ lane: "dense", slugs: ["orphan"] }];
+    expect(deriveSeedNodes(tree, scouts, [])).toEqual([]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// runTreeWalk — scripted descent
+// ---------------------------------------------------------------------------
+
+describe("runTreeWalk — scripted descent", () => {
+  test("collects the right leaf pages and records the descend/skip split", async () => {
+    // _root → {a, b}; a → leaf pa; b → leaf pb. Script descends only "a".
+    const tree = makeTree("_root", {
+      _root: [node("a"), node("b")],
+      a: [page("pa")],
+      b: [page("pb")],
+    });
+    const pages = makePages(["pa", "pb"]);
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      { _root: { descend: ["a"], reasoning: "a matches the turn" } },
+      calls,
+    );
+
+    const { pages: collected, levels } = await runTreeWalk({
+      input: makeInput(),
+      tree,
+      pages,
+      scouts: [],
+      seeds: [],
+      provider,
+    });
+
+    // Only the descended branch's page is collected.
+    expect([...collected]).toEqual(["pa"]);
+
+    const rootLevel = levels.find((l) => l.node === "_root")!;
+    expect(rootLevel.considered).toEqual(["a", "b"]);
+    expect(rootLevel.descended).toEqual(["a"]);
+    expect(rootLevel.skipped).toEqual(["b"]);
+    expect(rootLevel.reasoning).toBe("a matches the turn");
+
+    // _root walked (has node children) + a walked (leaf, no call). b skipped.
+    expect(levels.map((l) => l.node).sort()).toEqual(["_root", "a"]);
+  });
+
+  test("makes exactly one descent call per visited node with node children", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), node("b")],
+      a: [node("c"), page("pa")],
+      b: [page("pb")],
+      c: [page("pc")],
+    });
+    const pages = makePages(["pa", "pb", "pc"]);
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      {
+        _root: { descend: ["a", "b"] },
+        a: { descend: ["c"] },
+        // b and c are leaves of the descended set; c has no node children.
+      },
+      calls,
+    );
+
+    await runTreeWalk({
+      input: makeInput(),
+      tree,
+      pages,
+      scouts: [],
+      seeds: [],
+      provider,
+    });
+
+    // Calls happen for nodes that HAVE node children: _root, a. b (leaf) and
+    // c (leaf) are visited but short-circuit before the provider call.
+    const calledNodes = calls.map(nodeIdFromCall).sort();
+    expect(calledNodes).toEqual(["_root", "a"]);
+  });
+
+  test("breadthBudget caps descents per node", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), node("b"), node("c")],
+      a: [page("pa")],
+      b: [page("pb")],
+      c: [page("pc")],
+    });
+    const pages = makePages(["pa", "pb", "pc"]);
+    const calls: ProviderCall[] = [];
+    // Model picks all three; budget 2 admits only the first two.
+    const provider = makeProvider(
+      { _root: { descend: ["a", "b", "c"] } },
+      calls,
+    );
+
+    const { pages: collected, levels } = await runTreeWalk({
+      input: makeInput({ breadthBudget: 2 }),
+      tree,
+      pages,
+      scouts: [],
+      seeds: [],
+      provider,
+    });
+
+    const rootLevel = levels.find((l) => l.node === "_root")!;
+    expect(rootLevel.descended).toEqual(["a", "b"]);
+    expect(rootLevel.skipped).toEqual(["c"]);
+    expect([...collected].sort()).toEqual(["pa", "pb"]);
+  });
+
+  test("maxDepth halts the walk", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a")],
+      a: [node("b"), page("pa")],
+      b: [page("pb")],
+    });
+    const pages = makePages(["pa", "pb"]);
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider(
+      { _root: { descend: ["a"] }, a: { descend: ["b"] } },
+      calls,
+    );
+
+    const { pages: collected, levels } = await runTreeWalk({
+      input: makeInput({ maxDepth: 1 }),
+      tree,
+      pages,
+      scouts: [],
+      seeds: [],
+      provider,
+    });
+
+    // Depth 0 (_root) and depth 1 (a) walked; b never reached.
+    expect(levels.map((l) => l.node)).toEqual(["_root", "a"]);
+    expect([...collected]).toEqual(["pa"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// runTreeWalk — scout seeding
+// ---------------------------------------------------------------------------
+
+describe("runTreeWalk — scout seeding", () => {
+  test("scout page hits seed a subtree the root never reaches", async () => {
+    // root only links to a; the "island" subtree is unreachable from root but a
+    // scout surfaced its leaf page, so deriveSeedNodes seeds `island`.
+    const tree = makeTree("_root", {
+      _root: [node("a")],
+      a: [page("pa")],
+      island: [page("treasure")],
+    });
+    const pages = makePages(["pa", "treasure"]);
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider({ _root: { descend: ["a"] } }, calls);
+
+    const scouts: ScoutResult[] = [{ lane: "dense", slugs: ["treasure"] }];
+    const { pages: collected, levels } = await runTreeWalk({
+      input: makeInput(),
+      tree,
+      pages,
+      scouts,
+      seeds: [],
+      provider,
+    });
+
+    // Both the root branch (pa) and the scout-seeded island (treasure) reached.
+    expect([...collected].sort()).toEqual(["pa", "treasure"]);
+    expect(levels.map((l) => l.node).sort()).toEqual(["_root", "a", "island"]);
+  });
+
+  test("explicit seeds bias the start set", async () => {
+    const tree = makeTree("_root", {
+      _root: [page("pr")],
+      mid: [page("pm")],
+    });
+    const pages = makePages(["pr", "pm"]);
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider({}, calls);
+
+    const { pages: collected, levels } = await runTreeWalk({
+      input: makeInput(),
+      tree,
+      pages,
+      scouts: [],
+      seeds: ["mid"],
+      provider,
+    });
+
+    expect([...collected].sort()).toEqual(["pm", "pr"]);
+    expect(levels.map((l) => l.node).sort()).toEqual(["_root", "mid"]);
+  });
+
+  test("renders scout hits into the descend prompt as pressure", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), node("b")],
+      a: [page("pa")],
+      b: [page("pb")],
+    });
+    const pages = makePages(["pa", "pb"]);
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider({ _root: { descend: ["a"] } }, calls);
+
+    const scouts: ScoutResult[] = [{ lane: "sparse", slugs: ["pb"] }];
+    await runTreeWalk({
+      input: makeInput(),
+      tree,
+      pages,
+      // Pass scouts but no parent-seed match so the start set stays root-only;
+      // we only assert the prompt rendering here.
+      scouts,
+      seeds: [],
+      provider,
+    });
+
+    const rootCall = calls.find((c) => nodeIdFromCall(c) === "_root")!;
+    const promptText = rootCall.messages[0].content
+      .filter((b) => b.type === "text")
+      .map((b) => (b as { text: string }).text)
+      .join("\n");
+    expect(promptText).toContain("<scout_hits>");
+    expect(promptText).toContain("[sparse]: pb");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// runTreeWalk — fail-safe + request shape
+// ---------------------------------------------------------------------------
+
+describe("runTreeWalk — fail-safe", () => {
+  test("provider null descends nothing but still terminates and collects reached pages", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a"), page("pr")],
+      a: [page("pa")],
+    });
+    const pages = makePages(["pr", "pa"]);
+
+    const { pages: collected, levels } = await runTreeWalk({
+      input: makeInput(),
+      tree,
+      pages,
+      scouts: [],
+      seeds: [],
+      provider: null,
+    });
+
+    // Root's own page is collected; the undescended branch's page is not.
+    expect([...collected]).toEqual(["pr"]);
+    expect(levels.map((l) => l.node)).toEqual(["_root"]);
+    const rootLevel = levels[0];
+    expect(rootLevel.descended).toEqual([]);
+    expect(rootLevel.skipped).toEqual(["a"]);
+    expect(rootLevel.reasoning).toContain("no provider");
+  });
+
+  test("malformed tool input fails closed for that node", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a")],
+      a: [page("pa")],
+    });
+    const pages = makePages(["pa"]);
+    const calls: ProviderCall[] = [];
+    // Provider returns a non-conforming tool input (descend is not an array).
+    const provider: Provider = {
+      name: "bad-schema",
+      sendMessage: async (messages, tools, systemPrompt, options) => {
+        calls.push({ messages, tools, systemPrompt, options });
+        return {
+          model: "stub-model",
+          stopReason: "tool_use",
+          usage: { inputTokens: 0, outputTokens: 0 },
+          content: [
+            {
+              type: "tool_use",
+              id: "tu-1",
+              name: "choose_branches",
+              input: { descend: "not-an-array" },
+            },
+          ],
+        };
+      },
+    };
+
+    const { levels } = await runTreeWalk({
+      input: makeInput(),
+      tree,
+      pages,
+      scouts: [],
+      seeds: [],
+      provider,
+    });
+
+    const rootLevel = levels.find((l) => l.node === "_root")!;
+    expect(rootLevel.descended).toEqual([]);
+    expect(rootLevel.reasoning).toContain("validation");
+  });
+});
+
+describe("createDescender — request shape", () => {
+  test("forces tool_choice on choose_branches and forwards the abort signal", async () => {
+    const tree = makeTree("_root", {
+      _root: [node("a")],
+      a: [page("pa")],
+    });
+    const pages = makePages(["pa"]);
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider({ _root: { descend: ["a"] } }, calls);
+
+    const reasoningByNode = new Map<string, string>();
+    const descender = createDescender(
+      {
+        input: makeInput({ signal: AbortSignal.timeout(10_000) }),
+        tree,
+        pages,
+        scouts: [],
+        seeds: [],
+        provider,
+      },
+      reasoningByNode,
+    );
+
+    await descender("_root", [...tree.childrenByNode.get("_root")!]);
+
+    expect(calls).toHaveLength(1);
+    const call = calls[0];
+    expect(call.tools?.[0]?.name).toBe("choose_branches");
+    expect(call.options?.config?.tool_choice).toEqual({
+      type: "tool",
+      name: "choose_branches",
+    });
+    expect(call.options?.config?.callSite).toBe("memoryV3Descent");
+    expect(call.options?.signal).toBeDefined();
+  });
+
+  test("a node with no node children makes no provider call", async () => {
+    const tree = makeTree("leaf", { leaf: [page("p")] });
+    const pages = makePages(["p"]);
+    const calls: ProviderCall[] = [];
+    const provider = makeProvider({}, calls);
+
+    const reasoningByNode = new Map<string, string>();
+    const descender = createDescender(
+      { input: makeInput(), tree, pages, scouts: [], seeds: [], provider },
+      reasoningByNode,
+    );
+
+    const chosen = await descender("leaf", [
+      ...tree.childrenByNode.get("leaf")!,
+    ]);
+    expect(chosen).toEqual([]);
+    expect(calls).toHaveLength(0);
+    expect(reasoningByNode.get("leaf")).toBe("");
+  });
+});
diff --git a/assistant/src/memory/v3/tree-walk.ts b/assistant/src/memory/v3/tree-walk.ts
new file mode 100644
index 00000000000..c387a2a5bc4
--- /dev/null
+++ b/assistant/src/memory/v3/tree-walk.ts
@@ -0,0 +1,406 @@
+/**
+ * Memory v3 — tree-walk model driver.
+ *
+ * The *intelligence* half of the v3 tree descent. `traversal.ts` owns the
+ * mechanical, provider-free walk (`walkTree`); this module supplies the
+ * per-node `descend` decision that walk injects, and wires the whole thing into
+ * a single `runTreeWalk` entry point.
+ *
+ * Per visited node the driver makes one cheap LLM call (`memoryV3Descent`) over
+ * the node's *composed* index — `composeNodeIndex` renders one line per child
+ * (sub-node summary or leaf page summary) plus the node's routing hints — and
+ * asks which child *nodes* to descend into. The prompt also carries the
+ * conversation context (the just-arrived turn + NOW) and the surviving scout
+ * hits, so descent is **scout-seeded but not scout-bound**: the model sees where
+ * the cheap lanes already landed, yet still feels pressure to descend branches
+ * the scouts missed. A driver that only ratified the scouts would re-introduce
+ * the recall cliff the tree walk exists to avoid.
+ *
+ * Scout seeding works at two layers:
+ *   1. **Start set** — `runTreeWalk` derives seed *node* ids from scout-surfaced
+ *      *page* slugs via the tree's `pageParents` reverse edges (a scout hit on
+ *      `page:foo` seeds every node that lists `page:foo` as a child), unioned
+ *      with any explicit `seeds`. `walkTree` fans out from `tree.root` + seeds.
+ *   2. **Descend pressure** — the surviving scout slugs are rendered into every
+ *      descend prompt so the model can prefer (but is not forced onto) branches
+ *      that contain them.
+ *
+ * Reasoning capture. The `createDescender` signature returns plain `ChildRef[]`
+ * (the chosen node children) to match the driver contract; the model's stated
+ * rationale is written into a side map keyed by node id. `runTreeWalk` adapts
+ * the descender into `walkTree`'s `DescendResult`-returning hook by pairing each
+ * node's chosen children with its recorded reasoning, so every emitted
+ * `TreeLevel` carries the model's reason for its descend/skip split — making a
+ * wrong high-level skip observable rather than silent.
+ *
+ * Fail-safe. When no provider is configured (or a per-node call errors / returns
+ * an unusable response) the descender descends *nothing* for that node and
+ * records the reason. The walk still terminates and still collects every leaf
+ * page it reached before the failure; it just stops exploring deeper from the
+ * affected node. Failing closed (descend nothing) rather than open (descend all)
+ * keeps a broken provider from blowing the breadth budget across the whole tree.
+ *
+ * This module is currently unwired — a later PR composes it into the loop.
+ */
+
+import { z } from "zod";
+
+import {
+  extractToolUse,
+  getConfiguredProvider,
+} from "../../providers/provider-send-message.js";
+import type {
+  Message,
+  Provider,
+  ToolDefinition,
+} from "../../providers/types.js";
+import { getLogger } from "../../util/logger.js";
+import type { RetrievalInput } from "../v2/harness/retriever.js";
+import type { ScoutResult } from "../v2/harness/trace.js";
+import type { PageIndex } from "../v2/page-index.js";
+import { composeNodeIndex } from "./index-composition.js";
+import type { WalkResult } from "./traversal.js";
+import { walkTree } from "./traversal.js";
+import type { ChildRef, TreeIndex } from "./tree-index.js";
+
+const log = getLogger("memory-v3-tree-walk");
+
+/** Tool name forced via `tool_choice`. Shared constant so tests can match it. */
+const DESCEND_TOOL_NAME = "choose_branches";
+
+/**
+ * The descend decision the driver hands to `walkTree`. Returns the subset of
+ * `children` (node refs only) to recurse into. Matches the PR contract: a plain
+ * `ChildRef[]` promise. The model's reasoning is threaded out-of-band via the
+ * side map populated by {@link createDescender}, not the return value, so this
+ * signature stays small.
+ */
+export type Descender = (
+  nodeId: string,
+  children: ChildRef[],
+) => Promise<ChildRef[]>;
+
+/** Arguments to {@link createDescender}. */
+export interface CreateDescenderArgs {
+  input: RetrievalInput;
+  tree: TreeIndex;
+  pages: PageIndex;
+  /** Surviving scout hits — rendered into the prompt as descend pressure. */
+  scouts: ScoutResult[];
+  /** Explicit seed node ids (folded into the prompt's seed context). */
+  seeds: string[];
+  /**
+   * Provider override seam for tests. Production omits it and the descender
+   * resolves `getConfiguredProvider("memoryV3Descent")` per call. Explicit
+   * `null` is distinct from `undefined`: it simulates "no provider configured"
+   * and exercises the fail-safe path without touching the real registry.
+   */
+  provider?: Provider | null;
+}
+
+/** Arguments to {@link runTreeWalk}. Identical to the descender's args. */
+export type RunTreeWalkArgs = CreateDescenderArgs;
+
+/**
+ * The forced-tool input schema. `descend` lists the bare node ids the model
+ * chose to recurse into; `reasoning` is its stated rationale for the
+ * descend/skip split. Mirrors v2's `select_pages_to_inject` forced-tool shape.
+ */
+const DescendToolResultSchema = z.object({
+  descend: z.array(z.string()),
+  reasoning: z.string().optional(),
+});
+
+/**
+ * Build the forced tool definition for one node. `descend` is constrained to
+ * the node ids actually offered as `node:` children so the model can only pick
+ * from genuine branches (the walk filters anyway, but constraining the schema
+ * keeps the model honest and the trace clean).
+ */
+function buildDescendTool(offeredNodeIds: readonly string[]): ToolDefinition {
+  return {
+    name: DESCEND_TOOL_NAME,
+    description:
+      "Choose which child nodes of the current memory-tree node to descend " +
+      "into for the current turn. Prefer branches likely to contain pages " +
+      "that bear on the turn; you may favor branches the scout hits point at, " +
+      "but descend other promising branches too — missing a relevant subtree " +
+      "is worse than descending an extra one. Return an empty list only when " +
+      "no child node plausibly bears on the turn.",
+    input_schema: {
+      type: "object",
+      properties: {
+        descend: {
+          type: "array",
+          items:
+            offeredNodeIds.length > 0
+              ? { type: "string", enum: [...offeredNodeIds] }
+              : { type: "string" },
+          description:
+            "Bare ids of the child nodes to descend into. Choose only from " +
+            "the offered node children.",
+        },
+        reasoning: {
+          type: "string",
+          description:
+            "One short sentence: why these branches were descended and the " +
+            "rest skipped.",
+        },
+      },
+      required: ["descend"],
+    },
+  };
+}
+
+/**
+ * Render the recent-turn + NOW context the descend prompt needs. The just-
+ * arrived user turn is the last pair's `userMessage`; the prior assistant reply
+ * (when present) precedes it. NOW is passed verbatim.
+ */
+function renderConversationContext(input: RetrievalInput): string {
+  const lines: string[] = [];
+  const lastPair = input.recentTurnPairs[input.recentTurnPairs.length - 1];
+  if (lastPair) {
+    if (lastPair.assistantMessage.trim().length > 0) {
+      lines.push(`[assistant]: ${lastPair.assistantMessage}`);
+    }
+    lines.push(`[user]: ${lastPair.userMessage}`);
+  }
+  return (
+    `<now>\n${input.nowText}\n</now>\n\n` +
+    `<last_turn>\n${lines.join("\n")}\n</last_turn>`
+  );
+}
+
+/**
+ * Render the surviving scout hits as descend pressure — the page slugs each
+ * lane surfaced, grouped by lane. Empty string when there are no scout hits, so
+ * the prompt omits the block entirely.
+ */
+function renderScoutHits(scouts: readonly ScoutResult[]): string {
+  const lines: string[] = [];
+  for (const scout of scouts) {
+    if (scout.slugs.length === 0) continue;
+    lines.push(`[${scout.lane}]: ${scout.slugs.join(", ")}`);
+  }
+  if (lines.length === 0) return "";
+  return `<scout_hits>\n${lines.join("\n")}\n</scout_hits>`;
+}
+
+const DESCENT_SYSTEM_PROMPT =
+  "You are the descent driver for a hierarchical memory-retrieval walk. At each " +
+  "node you see its child index (one line per child sub-node or leaf page) and " +
+  "the current conversation turn. Choose which child *nodes* to descend into to " +
+  "find the pages that bear on the next reply. Leaf pages are collected " +
+  "automatically — you only decide which branches to explore deeper.";
+
+/** Fail-safe descend result: descend nothing, recording why on the side map. */
+function failClosed(
+  nodeId: string,
+  reasoning: string,
+  reasoningByNode: Map<string, string>,
+): ChildRef[] {
+  reasoningByNode.set(nodeId, reasoning);
+  return [];
+}
+
+/**
+ * Create the per-node descend decision driving {@link walkTree}.
+ *
+ * The returned function makes one forced-tool `memoryV3Descent` call per node
+ * over its composed index, returning the chosen `node:` children. The model's
+ * reasoning for each node is written into `reasoningByNode` (keyed by node id)
+ * rather than the return value, so the small `Descender` signature is preserved
+ * and {@link runTreeWalk} can merge the reasoning into each `TreeLevel`.
+ *
+ * Provider resolution honors the `provider` arg (including explicit `null` for
+ * the fail-safe path) and otherwise resolves the configured call site once per
+ * call. Any failure — no provider, provider throw, missing/mismatched tool_use
+ * — fails closed (descend nothing) with the reason recorded.
+ */
+export function createDescender(
+  args: CreateDescenderArgs,
+  reasoningByNode: Map<string, string>,
+): Descender {
+  const { input, tree, pages, scouts } = args;
+  const conversationContext = renderConversationContext(input);
+  const scoutHits = renderScoutHits(scouts);
+
+  return async (nodeId: string, children: ChildRef[]): Promise<ChildRef[]> => {
+    const offeredNodes = children.filter((c) => c.kind === "node");
+    // No node children to descend — nothing to ask the model. Record an empty
+    // reasoning so the level still reflects the (trivial) decision.
+    if (offeredNodes.length === 0) {
+      reasoningByNode.set(nodeId, "");
+      return [];
+    }
+
+    const provider =
+      args.provider !== undefined
+        ? args.provider
+        : await getConfiguredProvider("memoryV3Descent");
+    if (!provider) {
+      log.warn(
+        { nodeId },
+        "memoryV3Descent provider unavailable; descending nothing",
+      );
+      return failClosed(
+        nodeId,
+        "no provider configured — descended nothing",
+        reasoningByNode,
+      );
+    }
+
+    const indexBlock = composeNodeIndex(nodeId, tree, pages);
+    const offeredNodeIds = offeredNodes.map((c) => c.ref);
+
+    const userMsg: Message = {
+      role: "user",
+      content: [
+        { type: "text", text: conversationContext },
+        {
+          type: "text",
+          text:
+            (scoutHits ? `${scoutHits}\n\n` : "") +
+            `<node id="${nodeId}">\n${indexBlock}\n</node>`,
+        },
+      ],
+    };
+
+    const descendTool = buildDescendTool(offeredNodeIds);
+
+    let response;
+    try {
+      response = await provider.sendMessage(
+        [userMsg],
+        [descendTool],
+        DESCENT_SYSTEM_PROMPT,
+        {
+          config: {
+            callSite: "memoryV3Descent" as const,
+            tool_choice: { type: "tool" as const, name: DESCEND_TOOL_NAME },
+          },
+          ...(input.signal ? { signal: input.signal } : {}),
+        },
+      );
+    } catch (err) {
+      log.warn(
+        { err, nodeId },
+        "Descent provider call threw; descending nothing",
+      );
+      return failClosed(
+        nodeId,
+        "descent call failed — descended nothing",
+        reasoningByNode,
+      );
+    }
+
+    const toolBlock = extractToolUse(response);
+    if (!toolBlock || toolBlock.name !== DESCEND_TOOL_NAME) {
+      log.warn(
+        { stopReason: response.stopReason, nodeId },
+        "Descent model returned no choose_branches tool_use; descending nothing",
+      );
+      return failClosed(
+        nodeId,
+        "model returned no descend decision — descended nothing",
+        reasoningByNode,
+      );
+    }
+
+    const parsed = DescendToolResultSchema.safeParse(toolBlock.input);
+    if (!parsed.success) {
+      log.warn(
+        { error: parsed.error.message, nodeId },
+        "Descent tool input did not match schema; descending nothing",
+      );
+      return failClosed(
+        nodeId,
+        "descend decision failed validation — descended nothing",
+        reasoningByNode,
+      );
+    }
+
+    reasoningByNode.set(nodeId, parsed.data.reasoning ?? "");
+
+    // Map the chosen bare ids back to the offered ChildRefs. The walk filters
+    // bogus / unoffered refs anyway, but resolving against the offered set here
+    // keeps the returned ChildRefs canonical.
+    const offeredById = new Map(offeredNodes.map((c) => [c.ref, c]));
+    const chosen: ChildRef[] = [];
+    for (const id of parsed.data.descend) {
+      const ref = offeredById.get(id);
+      if (ref) chosen.push(ref);
+    }
+    return chosen;
+  };
+}
+
+/**
+ * Derive the seed *node* ids for the walk from the surviving scout *page* hits.
+ *
+ * Scouts surface concept-page slugs; the tree's `pageParents` reverse edges map
+ * each page slug to the node(s) that list it as a child. Seeding the walk at
+ * those parent nodes drops the model in near where the cheap lanes already
+ * landed (layer 1 of scout seeding), while the walk still fans out from the
+ * root and the descend pressure (layer 2) keeps it from collapsing onto the
+ * scouts. Explicit `seeds` are unioned in. Order is deterministic: explicit
+ * seeds first (in given order), then scout-derived parents in scout/slug order.
+ */
+export function deriveSeedNodes(
+  tree: TreeIndex,
+  scouts: readonly ScoutResult[],
+  seeds: readonly string[],
+): string[] {
+  const out: string[] = [];
+  const seen = new Set<string>();
+  const push = (id: string): void => {
+    if (seen.has(id)) return;
+    seen.add(id);
+    out.push(id);
+  };
+  for (const id of seeds) push(id);
+  for (const scout of scouts) {
+    for (const slug of scout.slugs) {
+      const parents = tree.pageParents.get(slug);
+      if (!parents) continue;
+      for (const parent of parents) push(parent);
+    }
+  }
+  return out;
+}
+
+/**
+ * Drive a full scout-seeded tree walk for one retrieval pass.
+ *
+ * Wires {@link createDescender} into {@link walkTree} with `breadthBudget` /
+ * `maxDepth` drawn from `config.memory.v3` (on `input.config`) and the start set
+ * seeded by {@link deriveSeedNodes}. Returns the collected leaf pages and the
+ * per-node `TreeLevel[]`, each level carrying the model's recorded reasoning.
+ *
+ * The descender records reasoning into a node-keyed side map; this function
+ * adapts it into `walkTree`'s `DescendResult`-returning hook by pairing each
+ * node's chosen children with its recorded reason, so the walk threads the
+ * reasoning onto every emitted level.
+ */
+export async function runTreeWalk(args: RunTreeWalkArgs): Promise<WalkResult> {
+  const { input, tree, scouts, seeds } = args;
+  const v3 = input.config.memory?.v3;
+  const breadthBudget = v3?.breadthBudget ?? 6;
+  const maxDepth = v3?.maxDepth ?? 6;
+
+  const reasoningByNode = new Map<string, string>();
+  const descender = createDescender(args, reasoningByNode);
+
+  const seedNodes = deriveSeedNodes(tree, scouts, seeds);
+
+  return walkTree(tree, {
+    seeds: seedNodes,
+    breadthBudget,
+    maxDepth,
+    descend: async (nodeId, children) => {
+      const descend = await descender(nodeId, [...children]);
+      return { descend, reasoning: reasoningByNode.get(nodeId) ?? "" };
+    },
+  });
+}

From 41a3bf4f5e1b738b9a424c56940d1c9d3e659a84 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:09:16 -0400
Subject: [PATCH 13/21] feat(memory-v3): assistant memory v3 validate/tree CLI
 + routes (#31983)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 assistant/openapi.yaml                        |  42 +++++
 .../__tests__/memory-v3-render.test.ts        | 164 ++++++++++++++++++
 .../src/cli/commands/memory-v3-render.ts      | 133 ++++++++++++++
 assistant/src/cli/commands/memory-v3.ts       | 161 +++++++++++++++++
 assistant/src/cli/program.ts                  |   2 +
 assistant/src/runtime/routes/index.ts         |   2 +
 .../src/runtime/routes/memory-v3-routes.ts    | 117 +++++++++++++
 .../command-registry/commands/assistant.ts    |  13 ++
 8 files changed, 634 insertions(+)
 create mode 100644 assistant/src/cli/commands/__tests__/memory-v3-render.test.ts
 create mode 100644 assistant/src/cli/commands/memory-v3-render.ts
 create mode 100644 assistant/src/cli/commands/memory-v3.ts
 create mode 100644 assistant/src/runtime/routes/memory-v3-routes.ts

diff --git a/assistant/openapi.yaml b/assistant/openapi.yaml
index a6b753972e7..6bd22147959 100644
--- a/assistant/openapi.yaml
+++ b/assistant/openapi.yaml
@@ -11697,6 +11697,48 @@ paths:
               type: object
               properties: {}
               additionalProperties: false
+  /v1/memory/v3/tree:
+    post:
+      operationId: memory_v3_tree_post
+      summary: Return a serializable view of the memory v3 tree DAG (read-only)
+      description:
+        Returns the v3 tree root id plus every node and its ordered child refs (page:/node:) as a JSON-serializable
+        projection of the in-memory TreeIndex. Read-only; the CLI uses it to print an indented tree with shared-DAG
+        re-entries marked.
+      tags:
+        - memory
+      responses:
+        "200":
+          description: Successful response
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties: {}
+              additionalProperties: false
+  /v1/memory/v3/validate:
+    post:
+      operationId: memory_v3_validate_post
+      summary: Validate the memory v3 tree structure (read-only)
+      description:
+        Read-only structural validation of the hand-authored v3 tree DAG. Reports dangling child refs, orphan
+        pages, cycles, stale compositional indexes, and unknown edge targets. Writes nothing and runs no LLM — operators
+        dry-run it while the v2 → v3 migration is in flight.
+      tags:
+        - memory
+      responses:
+        "200":
+          description: Successful response
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties: {}
+              additionalProperties: false
   /v1/messages:
     get:
       operationId: messages_get
diff --git a/assistant/src/cli/commands/__tests__/memory-v3-render.test.ts b/assistant/src/cli/commands/__tests__/memory-v3-render.test.ts
new file mode 100644
index 00000000000..343c921bc84
--- /dev/null
+++ b/assistant/src/cli/commands/__tests__/memory-v3-render.test.ts
@@ -0,0 +1,164 @@
+import { describe, expect, test } from "bun:test";
+
+import type {
+  MemoryV3TreeResult,
+  MemoryV3ValidateResult,
+} from "../../../runtime/routes/memory-v3-routes.js";
+import {
+  renderTree,
+  renderValidationReport,
+  reportHasDefects,
+} from "../memory-v3-render.js";
+
+function cleanReport(): MemoryV3ValidateResult {
+  return {
+    danglingChildRefs: [],
+    danglingChildRefCount: 0,
+    orphanPages: [],
+    orphanPageCount: 0,
+    cycles: [],
+    cycleCount: 0,
+    staleIndex: [],
+    staleIndexCount: 0,
+    unknownEdgeTargets: [],
+    unknownEdgeTargetCount: 0,
+  };
+}
+
+describe("memory v3 — renderValidationReport", () => {
+  test("renders 'none' for every empty category", () => {
+    const out = renderValidationReport(cleanReport());
+    expect(out).toContain("Memory v3 Tree Validation");
+    expect(out).toContain("Dangling child refs: none");
+    expect(out).toContain("Orphan pages: none");
+    expect(out).toContain("Cycles: none");
+    expect(out).toContain("Stale index: none");
+    expect(out).toContain("Unknown edge targets: none");
+  });
+
+  test("renders counts and offending ids for each defect category", () => {
+    const report: MemoryV3ValidateResult = {
+      danglingChildRefs: [{ node: "people", ref: "ghost", kind: "node" }],
+      danglingChildRefCount: 1,
+      orphanPages: ["stray-page"],
+      orphanPageCount: 1,
+      cycles: [{ from: "a", to: "b" }],
+      cycleCount: 1,
+      staleIndex: [
+        { node: "root", child: "people", nodeMtimeMs: 1, childMtimeMs: 2 },
+      ],
+      staleIndexCount: 1,
+      unknownEdgeTargets: [{ from: "p1", to: "missing" }],
+      unknownEdgeTargetCount: 1,
+    };
+    const out = renderValidationReport(report);
+    expect(out).toContain("Dangling child refs: 1");
+    expect(out).toContain("people → node:ghost");
+    expect(out).toContain("Orphan pages: 1");
+    expect(out).toContain("- stray-page");
+    expect(out).toContain("Cycles: 1");
+    expect(out).toContain("a → b");
+    expect(out).toContain("Stale index: 1");
+    expect(out).toContain("root (older than child people)");
+    expect(out).toContain("Unknown edge targets: 1");
+    expect(out).toContain("p1 → missing");
+  });
+});
+
+describe("memory v3 — reportHasDefects", () => {
+  test("false for a clean report", () => {
+    expect(reportHasDefects(cleanReport())).toBe(false);
+  });
+
+  test("true when any single category is non-empty", () => {
+    const report = cleanReport();
+    report.orphanPageCount = 1;
+    report.orphanPages = ["x"];
+    expect(reportHasDefects(report)).toBe(true);
+  });
+});
+
+describe("memory v3 — renderTree", () => {
+  test("renders an indented tree descending node and page children", () => {
+    const view: MemoryV3TreeResult = {
+      root: "_root",
+      nodes: [
+        {
+          id: "_root",
+          children: [
+            { kind: "node", ref: "people" },
+            { kind: "page", ref: "overview" },
+          ],
+        },
+        {
+          id: "people",
+          children: [{ kind: "page", ref: "alice" }],
+        },
+      ],
+    };
+    const out = renderTree(view);
+    expect(out).toBe(
+      ["node:_root", "  node:people", "    page:alice", "  page:overview"].join(
+        "\n",
+      ),
+    );
+  });
+
+  test("marks a shared DAG sub-node as a re-entry rather than re-expanding", () => {
+    const view: MemoryV3TreeResult = {
+      root: "_root",
+      nodes: [
+        {
+          id: "_root",
+          children: [
+            { kind: "node", ref: "a" },
+            { kind: "node", ref: "b" },
+          ],
+        },
+        { id: "a", children: [{ kind: "node", ref: "shared" }] },
+        { id: "b", children: [{ kind: "node", ref: "shared" }] },
+        { id: "shared", children: [{ kind: "page", ref: "leaf" }] },
+      ],
+    };
+    const out = renderTree(view);
+    // First reach under `a` expands; second reach under `b` is a marked re-entry.
+    expect(out).toContain("  node:a\n    node:shared\n      page:leaf");
+    expect(out).toContain("node:shared (↑ already shown)");
+    // The leaf page is expanded exactly once.
+    expect(out.match(/page:leaf/g)?.length).toBe(1);
+  });
+
+  test("bounds output on a cycle instead of looping forever", () => {
+    const view: MemoryV3TreeResult = {
+      root: "_root",
+      nodes: [
+        { id: "_root", children: [{ kind: "node", ref: "a" }] },
+        { id: "a", children: [{ kind: "node", ref: "_root" }] },
+      ],
+    };
+    const out = renderTree(view);
+    expect(out).toContain("node:_root (↑ already shown)");
+  });
+
+  test("flags a child ref whose target node is missing", () => {
+    const view: MemoryV3TreeResult = {
+      root: "_root",
+      nodes: [{ id: "_root", children: [{ kind: "node", ref: "ghost" }] }],
+    };
+    const out = renderTree(view);
+    expect(out).toContain("node:ghost (missing)");
+  });
+
+  test("lists nodes unreachable from the root", () => {
+    const view: MemoryV3TreeResult = {
+      root: "_root",
+      nodes: [
+        { id: "_root", children: [] },
+        { id: "floating", children: [] },
+      ],
+    };
+    const out = renderTree(view);
+    expect(out).toContain("Unreachable nodes (1):");
+    expect(out).toContain("- node:floating");
+  });
+});
diff --git a/assistant/src/cli/commands/memory-v3-render.ts b/assistant/src/cli/commands/memory-v3-render.ts
new file mode 100644
index 00000000000..9cbb7a2f7b1
--- /dev/null
+++ b/assistant/src/cli/commands/memory-v3-render.ts
@@ -0,0 +1,133 @@
+/**
+ * Text rendering for `assistant memory v3 validate` and `... tree`.
+ *
+ * Both functions are pure presentation: they take the daemon route's response
+ * shape and return a terminal-ready string. They live CLI-side (mirroring
+ * `memory-v2-compare-render.ts`) and import only the response *types* from the
+ * daemon route — `cli/no-daemon-internals` permits type-only imports but
+ * forbids pulling in daemon runtime modules.
+ */
+
+import type {
+  MemoryV3TreeResult,
+  MemoryV3ValidateResult,
+} from "../../runtime/routes/memory-v3-routes.js";
+
+/**
+ * Render a {@link MemoryV3ValidateResult} into a counts summary plus the
+ * offending ids for each non-empty category. Categories with zero entries
+ * print `none` so a clean tree reads at a glance.
+ */
+export function renderValidationReport(report: MemoryV3ValidateResult): string {
+  const lines: string[] = [
+    "Memory v3 Tree Validation",
+    "=========================",
+    `Dangling child refs: ${report.danglingChildRefCount || "none"}`,
+  ];
+  for (const d of report.danglingChildRefs) {
+    lines.push(`  - ${d.node} → ${d.kind}:${d.ref}`);
+  }
+
+  lines.push(`Orphan pages: ${report.orphanPageCount || "none"}`);
+  for (const slug of report.orphanPages) {
+    lines.push(`  - ${slug}`);
+  }
+
+  lines.push(`Cycles: ${report.cycleCount || "none"}`);
+  for (const c of report.cycles) {
+    lines.push(`  - ${c.from} → ${c.to}`);
+  }
+
+  lines.push(`Stale index: ${report.staleIndexCount || "none"}`);
+  for (const s of report.staleIndex) {
+    lines.push(`  - ${s.node} (older than child ${s.child})`);
+  }
+
+  lines.push(
+    `Unknown edge targets: ${report.unknownEdgeTargetCount || "none"}`,
+  );
+  for (const e of report.unknownEdgeTargets) {
+    lines.push(`  - ${e.from} → ${e.to}`);
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Whether the validation report has any defect in any category. The CLI uses
+ * this to set a non-zero exit code so `validate` is scriptable as a check.
+ */
+export function reportHasDefects(report: MemoryV3ValidateResult): boolean {
+  return (
+    report.danglingChildRefCount > 0 ||
+    report.orphanPageCount > 0 ||
+    report.cycleCount > 0 ||
+    report.staleIndexCount > 0 ||
+    report.unknownEdgeTargetCount > 0
+  );
+}
+
+/**
+ * Render a {@link MemoryV3TreeResult} as an indented tree rooted at `view.root`,
+ * descending `node:` children depth-first. A node reached more than once
+ * (shared DAG sub-node) is printed once with a `(↑ …)` re-entry marker rather
+ * than re-expanded, which also bounds output when the structure contains a
+ * cycle. `page:` children are printed as leaves under their parent node.
+ */
+export function renderTree(view: MemoryV3TreeResult): string {
+  const childrenById = new Map<string, MemoryV3TreeResult["nodes"][number]>();
+  for (const node of view.nodes) {
+    childrenById.set(node.id, node);
+  }
+
+  const lines: string[] = [];
+  const expanded = new Set<string>();
+
+  const walk = (nodeId: string, depth: number): void => {
+    const indent = "  ".repeat(depth);
+    const node = childrenById.get(nodeId);
+
+    if (!node) {
+      lines.push(`${indent}node:${nodeId} (missing)`);
+      return;
+    }
+
+    if (expanded.has(nodeId)) {
+      // Shared DAG sub-node (or a cycle's back-edge): print the reference but
+      // do not re-expand, so output stays finite and the re-entry is visible.
+      lines.push(`${indent}node:${nodeId} (↑ already shown)`);
+      return;
+    }
+    expanded.add(nodeId);
+    lines.push(`${indent}node:${nodeId}`);
+
+    for (const child of node.children) {
+      if (child.kind === "page") {
+        lines.push(`${"  ".repeat(depth + 1)}page:${child.ref}`);
+      } else {
+        walk(child.ref, depth + 1);
+      }
+    }
+  };
+
+  walk(view.root, 0);
+
+  if (lines.length === 0) {
+    lines.push("(empty tree)");
+  }
+
+  // Surface nodes that exist on disk but were never reached from the root —
+  // they would otherwise be invisible in a root-anchored print.
+  const unreached = view.nodes
+    .map((n) => n.id)
+    .filter((id) => !expanded.has(id))
+    .sort();
+  if (unreached.length > 0) {
+    lines.push("", `Unreachable nodes (${unreached.length}):`);
+    for (const id of unreached) {
+      lines.push(`  - node:${id}`);
+    }
+  }
+
+  return lines.join("\n");
+}
diff --git a/assistant/src/cli/commands/memory-v3.ts b/assistant/src/cli/commands/memory-v3.ts
new file mode 100644
index 00000000000..fd629bd5287
--- /dev/null
+++ b/assistant/src/cli/commands/memory-v3.ts
@@ -0,0 +1,161 @@
+/**
+ * `assistant memory v3` CLI subgroup.
+ *
+ * Operator-facing read-only inspection of the v3 memory tree — the DAG overlay
+ * the v2 → v3 data-migration hand-authors over the flat concept pages.
+ *
+ * Subcommands:
+ *
+ *   - `validate` — print a structural health report (dangling refs, orphan
+ *     pages, cycles, stale indexes, unknown edge targets). Exits non-zero when
+ *     any defect is found so it is scriptable as a check.
+ *   - `tree` — print the tree as an indented outline rooted at the tree root,
+ *     marking shared-DAG re-entries.
+ *
+ * Both are read-only: they mutate nothing and run no LLM. `--json` emits the
+ * raw daemon payload for either subcommand.
+ */
+
+import type { Command } from "commander";
+
+import { cliIpcCall } from "../../ipc/cli-client.js";
+import type {
+  MemoryV3TreeResult,
+  MemoryV3ValidateResult,
+} from "../../runtime/routes/memory-v3-routes.js";
+import { registerCommand } from "../lib/register-command.js";
+import { log } from "../logger.js";
+import {
+  renderTree,
+  renderValidationReport,
+  reportHasDefects,
+} from "./memory-v3-render.js";
+
+export function registerMemoryV3Command(program: Command): void {
+  // Reuse an existing `memory` parent if a sibling registrar (e.g. v2)
+  // attached it first; otherwise create one. Keeps registration order between
+  // sibling memory registrars unconstrained.
+  const memory =
+    program.commands.find((c) => c.name() === "memory") ??
+    program
+      .command("memory")
+      .description("Manage the memory subsystem (concept-page model)");
+
+  registerCommand(memory, {
+    name: "v3",
+    transport: "ipc",
+    description: "Memory v3 subsystem operations (tree-DAG overlay)",
+    build: (v3) => {
+      v3.addHelpText(
+        "after",
+        `
+The v3 memory subsystem layers a hand-authored DAG of tree nodes over the
+flat v2 concept pages. Each node lives under /workspace/memory/v3/tree/ and
+its frontmatter 'children' list references sub-nodes (node:<id>) and leaf
+concept pages (page:<slug>). The structure is authored by the v2 → v3
+data-migration, so these subcommands are read-only inspection only — they
+mutate nothing and run no LLM.
+
+Examples:
+  $ assistant memory v3 validate
+  $ assistant memory v3 tree
+  $ assistant memory v3 tree --json | jq '.nodes | length'`,
+      );
+
+      // ── validate ──────────────────────────────────────────────────────────
+
+      v3.command("validate")
+        .description(
+          "Print a structural health report of the v3 tree (read-only)",
+        )
+        .option("--json", "Emit raw JSON instead of a formatted report")
+        .addHelpText(
+          "after",
+          `
+Walks the hand-authored v3 tree DAG and reports:
+  - Dangling child refs (node:/page: targets that do not exist)
+  - Orphan pages (concept pages not reachable from the tree root)
+  - Cycles (back-edges in the node:/node: adjacency)
+  - Stale indexes (a node older than a child it composes)
+  - Unknown edge targets (page edges: pointing at a missing slug)
+
+Read-only — mutates nothing. Exits non-zero if any defect is reported, so it
+is usable as a pre-flight check while the v2 → v3 migration is in flight.
+
+Examples:
+  $ assistant memory v3 validate
+  $ assistant memory v3 validate --json | jq '.orphanPageCount'`,
+        )
+        .action(async (opts: { json?: boolean }) => {
+          const result = await cliIpcCall<MemoryV3ValidateResult>(
+            "memory_v3_validate",
+            { body: {} },
+          );
+
+          if (!result.ok) {
+            log.error(result.error ?? "Failed to validate memory v3 tree");
+            process.exitCode = 1;
+            return;
+          }
+
+          const report = result.result!;
+
+          if (opts.json === true) {
+            log.info(JSON.stringify(report, null, 2));
+          } else {
+            log.info(renderValidationReport(report));
+          }
+
+          if (reportHasDefects(report)) {
+            process.exitCode = 1;
+          }
+        });
+
+      // ── tree ──────────────────────────────────────────────────────────────
+
+      v3.command("tree")
+        .description(
+          "Print the v3 tree as an indented outline from the root (read-only)",
+        )
+        .option("--json", "Emit raw JSON instead of a formatted tree")
+        .addHelpText(
+          "after",
+          `
+Descends the v3 tree depth-first from its root node, printing one line per
+node:/page: ref with indentation by depth. A node reached more than once
+(shared DAG sub-node or a cycle back-edge) is printed once with a re-entry
+marker rather than re-expanded, so output is finite. Nodes that exist on disk
+but are unreachable from the root are listed separately.
+
+Read-only — mutates nothing.
+
+Examples:
+  $ assistant memory v3 tree
+  $ assistant memory v3 tree --json | jq '.root'`,
+        )
+        .action(async (opts: { json?: boolean }) => {
+          const result = await cliIpcCall<MemoryV3TreeResult>(
+            "memory_v3_tree",
+            {
+              body: {},
+            },
+          );
+
+          if (!result.ok) {
+            log.error(result.error ?? "Failed to read memory v3 tree");
+            process.exitCode = 1;
+            return;
+          }
+
+          const view = result.result!;
+
+          if (opts.json === true) {
+            log.info(JSON.stringify(view, null, 2));
+            return;
+          }
+
+          log.info(renderTree(view));
+        });
+    },
+  });
+}
diff --git a/assistant/src/cli/program.ts b/assistant/src/cli/program.ts
index 4a8cab87e00..1e55e5a0b48 100644
--- a/assistant/src/cli/program.ts
+++ b/assistant/src/cli/program.ts
@@ -34,6 +34,7 @@ import { registerInferenceCommand } from "./commands/inference.js";
 import { registerKeysCommand } from "./commands/keys.js";
 import { registerMcpCommand } from "./commands/mcp.js";
 import { registerMemoryV2Command } from "./commands/memory-v2.js";
+import { registerMemoryV3Command } from "./commands/memory-v3.js";
 import { registerNotificationsCommand } from "./commands/notifications.js";
 import { registerOAuthCommand } from "./commands/oauth/index.js";
 import { registerPendingCommand } from "./commands/pending.js";
@@ -129,6 +130,7 @@ Examples:
   registerKeysCommand(program);
   registerMcpCommand(program);
   registerMemoryV2Command(program);
+  registerMemoryV3Command(program);
   registerNotificationsCommand(program);
   registerOAuthCommand(program);
   registerPendingCommand(program);
diff --git a/assistant/src/runtime/routes/index.ts b/assistant/src/runtime/routes/index.ts
index 0c0069fbc03..8f49c11d54c 100644
--- a/assistant/src/runtime/routes/index.ts
+++ b/assistant/src/runtime/routes/index.ts
@@ -90,6 +90,7 @@ import { ROUTES as LOG_EXPORT_ROUTES } from "./log-export-routes.js";
 import { ROUTES as MCP_AUTH_ROUTES } from "./mcp-auth-routes.js";
 import { ROUTES as MEMORY_ITEM_ROUTES } from "./memory-item-routes.js";
 import { ROUTES as MEMORY_V2_ROUTES } from "./memory-v2-routes.js";
+import { ROUTES as MEMORY_V3_ROUTES } from "./memory-v3-routes.js";
 import { ROUTES as MIGRATION_ROLLBACK_ROUTES } from "./migration-rollback-routes.js";
 import { ROUTES as MIGRATION_ROUTES } from "./migration-routes.js";
 import { ROUTES as NOTIFICATION_ROUTES } from "./notification-routes.js";
@@ -216,6 +217,7 @@ export const ROUTES: RouteDefinition[] = [
   ...LLM_CALL_SITES_ROUTES,
   ...MEMORY_ITEM_ROUTES,
   ...MEMORY_V2_ROUTES,
+  ...MEMORY_V3_ROUTES,
   ...MIGRATION_ROLLBACK_ROUTES,
   ...MIGRATION_ROUTES,
   ...NOTIFICATION_ROUTES,
diff --git a/assistant/src/runtime/routes/memory-v3-routes.ts b/assistant/src/runtime/routes/memory-v3-routes.ts
new file mode 100644
index 00000000000..f6e8b2cef06
--- /dev/null
+++ b/assistant/src/runtime/routes/memory-v3-routes.ts
@@ -0,0 +1,117 @@
+/**
+ * Memory v3 route definitions — read-only diagnostics over the hand-authored
+ * v3 tree DAG.
+ *
+ * Two operations, both side-effect-free (no LLM, no writes):
+ *
+ *   - `memory_v3_validate` — returns the {@link TreeValidationReport} from
+ *     `validateTree(workspaceDir)` (orphan pages, cycles, dangling refs,
+ *     stale-index, unknown edge targets).
+ *   - `memory_v3_tree` — returns a JSON-serializable view of
+ *     `getTreeIndex(workspaceDir)`: the root id, every node id, and each
+ *     node's ordered child refs. `TreeIndex` is Map-based, so the handler
+ *     flattens it into arrays/objects the wire protocol can carry.
+ *
+ * The v3 tree is authored by the v2 → v3 data-migration; these routes are the
+ * on-demand inspection surface operators run while that migration is in flight.
+ * They are NOT invoked on any turn.
+ */
+
+import { z } from "zod";
+
+import { getTreeIndex } from "../../memory/v3/tree-index.js";
+import type { TreeValidationReport } from "../../memory/v3/validate.js";
+import { validateTree } from "../../memory/v3/validate.js";
+import { getWorkspaceDir } from "../../util/platform.js";
+import type { RouteDefinition, RouteHandlerArgs } from "./types.js";
+
+// ── Validate ────────────────────────────────────────────────────────────
+
+const MemoryV3ValidateParams = z.object({}).strict();
+
+/**
+ * Wire shape for `memory_v3_validate`. Identical to the daemon-internal
+ * {@link TreeValidationReport} — every field is already serializable, so the
+ * route forwards it verbatim. Re-exported as its own type so the CLI can
+ * import it without reaching into the validator module.
+ */
+export type MemoryV3ValidateResult = TreeValidationReport;
+
+async function handleValidate({
+  body = {},
+}: RouteHandlerArgs): Promise<MemoryV3ValidateResult> {
+  // Read-only structural validation of the v3 tree. Like the v2 validate
+  // route, it is intentionally ungated: operators dry-run it while the
+  // v2 → v3 migration is mid-flight, well before any v3 flag flips.
+  MemoryV3ValidateParams.parse(body);
+  return validateTree(getWorkspaceDir());
+}
+
+// ── Tree ────────────────────────────────────────────────────────────────
+
+const MemoryV3TreeParams = z.object({}).strict();
+
+/** One node in the serialized tree view: its id and ordered child refs. */
+export interface MemoryV3TreeNodeView {
+  id: string;
+  children: Array<{ kind: "node" | "page"; ref: string }>;
+}
+
+/**
+ * JSON-serializable projection of the {@link TreeIndex}. `TreeIndex` keys its
+ * adjacency by `Map`, which doesn't survive JSON, so the handler flattens it:
+ * `root` is the entry-point node id and `nodes` is every node with its ordered
+ * child refs. The CLI renderer walks `nodes`/`root` to print an indented tree,
+ * marking shared-DAG re-entries.
+ */
+export interface MemoryV3TreeResult {
+  root: string;
+  nodes: MemoryV3TreeNodeView[];
+}
+
+async function handleTree({
+  body = {},
+}: RouteHandlerArgs): Promise<MemoryV3TreeResult> {
+  MemoryV3TreeParams.parse(body);
+
+  const tree = await getTreeIndex(getWorkspaceDir());
+
+  const nodes: MemoryV3TreeNodeView[] = [...tree.nodes.keys()]
+    .sort()
+    .map((id) => ({
+      id,
+      children: (tree.childrenByNode.get(id) ?? []).map((child) => ({
+        kind: child.kind,
+        ref: child.ref,
+      })),
+    }));
+
+  return { root: tree.root, nodes };
+}
+
+// ── Route definitions ───────────────────────────────────────────────────
+
+export const ROUTES: RouteDefinition[] = [
+  {
+    operationId: "memory_v3_validate",
+    method: "POST",
+    endpoint: "memory/v3/validate",
+    handler: handleValidate,
+    summary: "Validate the memory v3 tree structure (read-only)",
+    description:
+      "Read-only structural validation of the hand-authored v3 tree DAG. Reports dangling child refs, orphan pages, cycles, stale compositional indexes, and unknown edge targets. Writes nothing and runs no LLM — operators dry-run it while the v2 → v3 migration is in flight.",
+    tags: ["memory"],
+    requestBody: MemoryV3ValidateParams,
+  },
+  {
+    operationId: "memory_v3_tree",
+    method: "POST",
+    endpoint: "memory/v3/tree",
+    handler: handleTree,
+    summary: "Return a serializable view of the memory v3 tree DAG (read-only)",
+    description:
+      "Returns the v3 tree root id plus every node and its ordered child refs (page:/node:) as a JSON-serializable projection of the in-memory TreeIndex. Read-only; the CLI uses it to print an indented tree with shared-DAG re-entries marked.",
+    tags: ["memory"],
+    requestBody: MemoryV3TreeParams,
+  },
+];
diff --git a/gateway/src/risk/command-registry/commands/assistant.ts b/gateway/src/risk/command-registry/commands/assistant.ts
index ad7dc18497c..090ed1d8220 100644
--- a/gateway/src/risk/command-registry/commands/assistant.ts
+++ b/gateway/src/risk/command-registry/commands/assistant.ts
@@ -157,6 +157,9 @@ const ASSISTANT_SUPPORTED_COMMAND_PATHS = [
   "memory v2 reembed-skills",
   "memory v2 activation",
   "memory v2 validate",
+  "memory v3",
+  "memory v3 validate",
+  "memory v3 tree",
   "notifications",
   "notifications send",
   "notifications list",
@@ -482,6 +485,16 @@ const riskOverrides: AssistantRiskOverride[] = [
     risk: "low",
     reason: "Read-only diagnostic walk over concept pages and edges",
   },
+  {
+    path: "memory v3 validate",
+    risk: "low",
+    reason: "Read-only structural validation of the v3 tree DAG",
+  },
+  {
+    path: "memory v3 tree",
+    risk: "low",
+    reason: "Read-only print of the v3 tree DAG structure",
+  },
   { path: "notifications send", risk: "low" },
   {
     path: "oauth request",

From da0fcecf21138b266f1699055efa88b4a022db37 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:09:23 -0400
Subject: [PATCH 14/21] feat(memory-v3): retrieval loop
 (scouts->filter->tree->edges->gate) (#31984)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/loop.test.ts      | 535 ++++++++++++++++++
 assistant/src/memory/v3/loop.ts               | 258 +++++++++
 2 files changed, 793 insertions(+)
 create mode 100644 assistant/src/memory/v3/__tests__/loop.test.ts
 create mode 100644 assistant/src/memory/v3/loop.ts

diff --git a/assistant/src/memory/v3/__tests__/loop.test.ts b/assistant/src/memory/v3/__tests__/loop.test.ts
new file mode 100644
index 00000000000..c16ea8fb591
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/loop.test.ts
@@ -0,0 +1,535 @@
+/**
+ * Tests for `assistant/src/memory/v3/loop.ts`.
+ *
+ * The loop is the composition layer over the v3 lanes. Every lane module
+ * (`scouts`, `filter`, `tree-walk`, `edges`, `gate`) plus the two index
+ * builders (`tree-index`, `page-index`) the loop calls are stubbed via
+ * `mock.module`, so the suite makes no real LLM, Qdrant, embedding, or
+ * filesystem calls. Each mock factory closes over a mutable `lane` state object
+ * that every test rewires before calling `runRetrievalLoop`; a `laneCalls`
+ * recorder captures the arguments the loop passed each lane so the composition
+ * wiring (seeding, query threading, toggles) is assertable.
+ *
+ * Coverage:
+ *   - single-pass ready: scouts → filter → tree → edges → gate composes into a
+ *     valid RetrievalOutput with per-lane source tags and one DescentPass.
+ *   - multi-pass: gate "more" then "ready" runs two passes and threads the
+ *     gate's questions into the second pass's NOW text.
+ *   - passCap: a gate that always says "more" force-exits at passCap.
+ *   - lane toggles: `lanes.tree=false` / `lanes.edges=false` suppress those
+ *     lanes' candidates and trace fields.
+ *   - trace: one DescentPass per pass.
+ *   - cost: `ms` accumulates and is non-negative across passes.
+ *   - failureReason: a filter failure is surfaced on the output.
+ */
+
+import { beforeEach, describe, expect, mock, test } from "bun:test";
+
+import type { DrizzleDb } from "../../db-connection.js";
+import type {
+  RetrievalInput,
+  RetrievalOutput,
+} from "../../v2/harness/retriever.js";
+import type { GateDecision, ScoutResult } from "../../v2/harness/trace.js";
+
+// ---------------------------------------------------------------------------
+// Lane stubs — installed before importing the module under test.
+// ---------------------------------------------------------------------------
+
+interface RunScoutsResult {
+  scouts: ScoutResult[];
+  sticky: Set<string>;
+  bypass: Set<string>;
+}
+
+interface FilterResult {
+  kept: string[];
+  trace: { judged: string[]; dropped: string[] };
+  failureReason?: string;
+}
+
+interface WalkResult {
+  pages: Set<string>;
+  levels: Array<{
+    node: string;
+    considered: string[];
+    descended: string[];
+    skipped: string[];
+    reasoning: string;
+  }>;
+}
+
+interface ExpandResult {
+  pulled: Set<string>;
+  expansions: Array<{ from: string; pulled: string[] }>;
+}
+
+interface GateResult {
+  decision: GateDecision;
+  selectedSlugs: string[];
+}
+
+/**
+ * Per-pass-programmable lane state. The mock factories close over these live
+ * refs; each test rewires them before calling `runRetrievalLoop`. List-valued
+ * fields are consumed pass-by-pass (one entry per pass) so a multi-pass test
+ * can script a different verdict per pass.
+ */
+const lane = {
+  scouts: [] as RunScoutsResult[],
+  filter: [] as FilterResult[],
+  walk: [] as WalkResult[],
+  edges: [] as ExpandResult[],
+  gate: [] as GateResult[],
+};
+
+/** Records the args the loop passed each lane, one entry per call. */
+const laneCalls = {
+  scouts: [] as Array<{ nowText: string }>,
+  filter: [] as Array<{ nowText: string; dense: ScoutResult }>,
+  walk: [] as Array<{
+    nowText: string;
+    seeds: string[];
+    scouts: ScoutResult[];
+  }>,
+  edges: [] as Array<{ seeds: string[] }>,
+  gate: [] as Array<{
+    nowText: string;
+    passNumber: number;
+    candidates: string[];
+    sticky: string[];
+  }>,
+};
+
+/** Pop the next scripted value for a pass, reusing the last entry if exhausted. */
+function nextOf<T>(list: T[], index: number): T {
+  return list[Math.min(index, list.length - 1)];
+}
+
+let scoutCallCount = 0;
+let walkCallCount = 0;
+let edgeCallCount = 0;
+let gateCallCount = 0;
+
+mock.module("../scouts.js", () => ({
+  runScouts: async (input: RetrievalInput): Promise<RunScoutsResult> => {
+    laneCalls.scouts.push({ nowText: input.nowText });
+    return nextOf(lane.scouts, scoutCallCount++);
+  },
+}));
+
+mock.module("../filter.js", () => ({
+  filterDenseHits: async (args: {
+    input: RetrievalInput;
+    dense: ScoutResult;
+  }): Promise<FilterResult> => {
+    laneCalls.filter.push({ nowText: args.input.nowText, dense: args.dense });
+    // Filter calls share the scout pass index (one filter call per dense pass).
+    return nextOf(lane.filter, laneCalls.filter.length - 1);
+  },
+}));
+
+mock.module("../tree-walk.js", () => ({
+  runTreeWalk: async (args: {
+    input: RetrievalInput;
+    seeds: string[];
+    scouts: ScoutResult[];
+  }): Promise<WalkResult> => {
+    laneCalls.walk.push({
+      nowText: args.input.nowText,
+      seeds: args.seeds,
+      scouts: args.scouts,
+    });
+    return nextOf(lane.walk, walkCallCount++);
+  },
+}));
+
+mock.module("../edges.js", () => ({
+  expandEdges: async (args: {
+    seeds: Iterable<string>;
+  }): Promise<ExpandResult> => {
+    laneCalls.edges.push({ seeds: [...args.seeds] });
+    return nextOf(lane.edges, edgeCallCount++);
+  },
+}));
+
+mock.module("../gate.js", () => ({
+  runGate: async (args: {
+    input: RetrievalInput;
+    candidates: Set<string>;
+    sticky: Set<string>;
+    passNumber: number;
+  }): Promise<GateResult> => {
+    laneCalls.gate.push({
+      nowText: args.input.nowText,
+      passNumber: args.passNumber,
+      candidates: [...args.candidates],
+      sticky: [...args.sticky],
+    });
+    return nextOf(lane.gate, gateCallCount++);
+  },
+}));
+
+// The loop calls these index builders only to hand opaque handles to the
+// (stubbed) tree walk. The stubs return harmless empty values.
+mock.module("../tree-index.js", () => ({
+  getTreeIndex: async () => ({
+    nodes: new Map(),
+    childrenByNode: new Map(),
+    parentsByNode: new Map(),
+    pageParents: new Map(),
+    root: "_root",
+  }),
+}));
+
+mock.module("../../v2/page-index.js", () => ({
+  getPageIndex: async () => ({
+    entries: [],
+    bySlug: new Map(),
+    byId: new Map(),
+    rendered: "",
+  }),
+}));
+
+const { runRetrievalLoop } = await import("../loop.js");
+
+// ---------------------------------------------------------------------------
+// Fixtures.
+// ---------------------------------------------------------------------------
+
+/** Opaque DB sentinel — the stubbed scout lane never dereferences it. */
+const db = {} as DrizzleDb;
+
+interface LaneConfig {
+  hot?: boolean;
+  sparse?: boolean;
+  dense?: boolean;
+  tree?: boolean;
+  edges?: boolean;
+}
+
+/**
+ * Minimal `RetrievalInput`. Only `nowText` and `config.memory.v3` (passCap +
+ * lanes) are read by the loop; the lanes are stubbed so the rest is inert.
+ */
+function makeInput(opts?: {
+  nowText?: string;
+  passCap?: number;
+  lanes?: LaneConfig;
+}): RetrievalInput {
+  const lanes = {
+    hot: true,
+    sparse: true,
+    dense: true,
+    tree: true,
+    edges: true,
+    ...opts?.lanes,
+  };
+  return {
+    workspaceDir: "/tmp/does-not-matter",
+    recentTurnPairs: [],
+    nowText: opts?.nowText ?? "NOW",
+    priorEverInjected: [],
+    config: {
+      memory: { v3: { passCap: opts?.passCap ?? 3, lanes } },
+    } as unknown as RetrievalInput["config"],
+  };
+}
+
+function scout(lane: ScoutResult["lane"], slugs: string[]): ScoutResult {
+  return { lane, slugs };
+}
+
+function readyGate(selected: string[]): GateResult {
+  return { decision: { decision: "ready" }, selectedSlugs: selected };
+}
+
+function moreGate(selected: string[], questions: string[]): GateResult {
+  return { decision: { decision: "more", questions }, selectedSlugs: selected };
+}
+
+function reset(): void {
+  lane.scouts = [];
+  lane.filter = [];
+  lane.walk = [];
+  lane.edges = [];
+  lane.gate = [];
+  laneCalls.scouts = [];
+  laneCalls.filter = [];
+  laneCalls.walk = [];
+  laneCalls.edges = [];
+  laneCalls.gate = [];
+  scoutCallCount = 0;
+  walkCallCount = 0;
+  edgeCallCount = 0;
+  gateCallCount = 0;
+}
+
+beforeEach(reset);
+
+// ---------------------------------------------------------------------------
+// Tests.
+// ---------------------------------------------------------------------------
+
+describe("runRetrievalLoop — single pass", () => {
+  test("ready path composes a valid RetrievalOutput with per-lane source tags", async () => {
+    lane.scouts = [
+      {
+        scouts: [
+          scout("hot", ["a"]),
+          scout("sparse", ["b"]),
+          scout("dense", ["c", "d"]),
+        ],
+        sticky: new Set(["a", "b"]),
+        bypass: new Set(["b"]),
+      },
+    ];
+    lane.filter = [{ kept: ["c"], trace: { judged: ["d"], dropped: ["d"] } }];
+    lane.walk = [
+      {
+        pages: new Set(["t1"]),
+        levels: [
+          {
+            node: "_root",
+            considered: ["sub"],
+            descended: ["sub"],
+            skipped: [],
+            reasoning: "r",
+          },
+        ],
+      },
+    ];
+    lane.edges = [
+      { pulled: new Set(["e1"]), expansions: [{ from: "a", pulled: ["e1"] }] },
+    ];
+    lane.gate = [readyGate(["a", "b", "c", "t1", "e1"])];
+
+    const out: RetrievalOutput = await runRetrievalLoop(makeInput(), { db });
+
+    expect(out.selectedSlugs).toEqual(["a", "b", "c", "t1", "e1"]);
+    // sourceBySlug tags each slug with the lane that first surfaced it.
+    expect(out.sourceBySlug.get("a")).toBe("hot");
+    expect(out.sourceBySlug.get("b")).toBe("sparse");
+    expect(out.sourceBySlug.get("c")).toBe("dense");
+    expect(out.sourceBySlug.get("t1")).toBe("tree");
+    expect(out.sourceBySlug.get("e1")).toBe("edge");
+    // Dropped dense candidate `d` was filtered out — never tagged.
+    expect(out.sourceBySlug.has("d")).toBe(false);
+
+    // Exactly one pass, with all four lane sub-traces present.
+    expect(out.trace?.passes).toHaveLength(1);
+    const pass = out.trace!.passes[0];
+    expect(pass.passNumber).toBe(1);
+    expect(pass.scouts).toHaveLength(3);
+    expect(pass.treeLevels).toHaveLength(1);
+    expect(pass.edgeExpansions).toHaveLength(1);
+    expect(pass.gate).toEqual({ decision: "ready" });
+
+    expect(out.failureReason).toBeNull();
+    expect(out.cost?.ms).toBeGreaterThanOrEqual(0);
+  });
+
+  test("dense lane is filtered before seeding tree + gate", async () => {
+    lane.scouts = [
+      {
+        scouts: [scout("dense", ["keep", "drop"])],
+        sticky: new Set(),
+        bypass: new Set(),
+      },
+    ];
+    lane.filter = [
+      {
+        kept: ["keep"],
+        trace: { judged: ["keep", "drop"], dropped: ["drop"] },
+      },
+    ];
+    lane.walk = [{ pages: new Set(), levels: [] }];
+    lane.edges = [{ pulled: new Set(), expansions: [] }];
+    lane.gate = [readyGate(["keep"])];
+
+    const out = await runRetrievalLoop(makeInput(), { db });
+
+    // The filter saw the full dense lane.
+    expect(laneCalls.filter[0].dense.slugs).toEqual(["keep", "drop"]);
+    // Only the kept dense slug seeds the tree walk; `drop` never reaches it.
+    expect(laneCalls.walk[0].seeds).toEqual(["keep"]);
+    // Gate's candidate set excludes the dropped dense slug.
+    expect(laneCalls.gate[0].candidates).toEqual(["keep"]);
+    expect(out.selectedSlugs).toEqual(["keep"]);
+  });
+});
+
+describe("runRetrievalLoop — multi pass", () => {
+  test("gate 'more' then 'ready' runs two passes and threads questions into NOW", async () => {
+    lane.scouts = [
+      {
+        scouts: [scout("dense", ["p1"])],
+        sticky: new Set(),
+        bypass: new Set(),
+      },
+      {
+        scouts: [scout("dense", ["p2"])],
+        sticky: new Set(),
+        bypass: new Set(),
+      },
+    ];
+    lane.filter = [
+      { kept: ["p1"], trace: { judged: ["p1"], dropped: [] } },
+      { kept: ["p2"], trace: { judged: ["p2"], dropped: [] } },
+    ];
+    lane.walk = [
+      { pages: new Set(), levels: [] },
+      { pages: new Set(), levels: [] },
+    ];
+    lane.edges = [
+      { pulled: new Set(), expansions: [] },
+      { pulled: new Set(), expansions: [] },
+    ];
+    lane.gate = [moreGate(["p1"], ["what about X?"]), readyGate(["p1", "p2"])];
+
+    const out = await runRetrievalLoop(
+      makeInput({ nowText: "BASE", passCap: 3 }),
+      { db },
+    );
+
+    // Two passes ran.
+    expect(out.trace?.passes).toHaveLength(2);
+    expect(out.trace!.passes[0].gate).toEqual({
+      decision: "more",
+      questions: ["what about X?"],
+    });
+    expect(out.trace!.passes[1].gate).toEqual({ decision: "ready" });
+
+    // Pass 1 used the base NOW verbatim; pass 2's NOW carried the gate's
+    // generated follow-up question — the standing context is not rewritten.
+    expect(laneCalls.scouts[0].nowText).toBe("BASE");
+    expect(laneCalls.scouts[1].nowText).toContain("BASE");
+    expect(laneCalls.scouts[1].nowText).toContain("what about X?");
+
+    // Final selection is the last (ready) pass's selection.
+    expect(out.selectedSlugs).toEqual(["p1", "p2"]);
+  });
+
+  test("passCap force-exits with the current selection when the gate keeps asking for more", async () => {
+    lane.scouts = [
+      { scouts: [scout("dense", ["p"])], sticky: new Set(), bypass: new Set() },
+    ];
+    lane.filter = [{ kept: ["p"], trace: { judged: ["p"], dropped: [] } }];
+    lane.walk = [{ pages: new Set(), levels: [] }];
+    lane.edges = [{ pulled: new Set(), expansions: [] }];
+    // Gate always says "more"; reused across every pass via nextOf.
+    lane.gate = [moreGate(["p"], ["again?"])];
+
+    const out = await runRetrievalLoop(makeInput({ passCap: 2 }), { db });
+
+    // Capped at passCap passes despite the gate never saying ready.
+    expect(out.trace?.passes).toHaveLength(2);
+    expect(gateCallCount).toBe(2);
+    expect(out.selectedSlugs).toEqual(["p"]);
+  });
+});
+
+describe("runRetrievalLoop — lane toggles", () => {
+  test("tree + edge lanes off removes their candidates and trace fields", async () => {
+    lane.scouts = [
+      { scouts: [scout("dense", ["s"])], sticky: new Set(), bypass: new Set() },
+    ];
+    lane.filter = [{ kept: ["s"], trace: { judged: ["s"], dropped: [] } }];
+    // These would contribute t1/e1 if their lanes ran — they must not.
+    lane.walk = [
+      {
+        pages: new Set(["t1"]),
+        levels: [
+          {
+            node: "_root",
+            considered: [],
+            descended: [],
+            skipped: [],
+            reasoning: "",
+          },
+        ],
+      },
+    ];
+    lane.edges = [
+      { pulled: new Set(["e1"]), expansions: [{ from: "s", pulled: ["e1"] }] },
+    ];
+    lane.gate = [readyGate(["s"])];
+
+    const out = await runRetrievalLoop(
+      makeInput({ lanes: { tree: false, edges: false } }),
+      { db },
+    );
+
+    // Disabled lanes were never called.
+    expect(laneCalls.walk).toHaveLength(0);
+    expect(laneCalls.edges).toHaveLength(0);
+    // Their would-be candidates never entered the gate or the selection.
+    expect(laneCalls.gate[0].candidates).toEqual(["s"]);
+    expect(out.sourceBySlug.has("t1")).toBe(false);
+    expect(out.sourceBySlug.has("e1")).toBe(false);
+    // Trace omits the disabled lanes' fields.
+    expect(out.trace!.passes[0].treeLevels).toBeUndefined();
+    expect(out.trace!.passes[0].edgeExpansions).toBeUndefined();
+  });
+
+  test("edge lane on by default expands over the accumulated candidate set", async () => {
+    lane.scouts = [
+      {
+        scouts: [scout("hot", ["h"]), scout("dense", ["d"])],
+        sticky: new Set(["h"]),
+        bypass: new Set(),
+      },
+    ];
+    lane.filter = [{ kept: ["d"], trace: { judged: ["d"], dropped: [] } }];
+    lane.walk = [{ pages: new Set(["t"]), levels: [] }];
+    lane.edges = [
+      { pulled: new Set(["x"]), expansions: [{ from: "d", pulled: ["x"] }] },
+    ];
+    lane.gate = [readyGate(["h", "d", "t", "x"])];
+
+    await runRetrievalLoop(makeInput(), { db });
+
+    // Edge expansion seeds over every accumulated confident slug (hot, dense,
+    // tree) — not just the scouts.
+    expect(laneCalls.edges[0].seeds).toEqual(
+      expect.arrayContaining(["h", "d", "t"]),
+    );
+  });
+});
+
+describe("runRetrievalLoop — failure + cost", () => {
+  test("surfaces a filter failureReason on the output", async () => {
+    lane.scouts = [
+      { scouts: [scout("dense", ["d"])], sticky: new Set(), bypass: new Set() },
+    ];
+    lane.filter = [
+      {
+        kept: ["d"],
+        trace: { judged: ["d"], dropped: [] },
+        failureReason: "no_provider",
+      },
+    ];
+    lane.walk = [{ pages: new Set(), levels: [] }];
+    lane.edges = [{ pulled: new Set(), expansions: [] }];
+    lane.gate = [readyGate(["d"])];
+
+    const out = await runRetrievalLoop(makeInput(), { db });
+
+    expect(out.failureReason).toBe("no_provider");
+  });
+
+  test("cost.ms accumulates across passes", async () => {
+    lane.scouts = [
+      { scouts: [scout("dense", ["p"])], sticky: new Set(), bypass: new Set() },
+    ];
+    lane.filter = [{ kept: ["p"], trace: { judged: ["p"], dropped: [] } }];
+    lane.walk = [{ pages: new Set(), levels: [] }];
+    lane.edges = [{ pulled: new Set(), expansions: [] }];
+    lane.gate = [moreGate(["p"], ["q"])];
+
+    const out = await runRetrievalLoop(makeInput({ passCap: 3 }), { db });
+
+    expect(out.trace?.passes).toHaveLength(3);
+    expect(out.cost?.ms).toBeGreaterThanOrEqual(0);
+  });
+});
diff --git a/assistant/src/memory/v3/loop.ts b/assistant/src/memory/v3/loop.ts
new file mode 100644
index 00000000000..0763ecf8bf6
--- /dev/null
+++ b/assistant/src/memory/v3/loop.ts
@@ -0,0 +1,258 @@
+/**
+ * Memory v3 — retrieval-loop orchestration.
+ *
+ * The composition layer that wires the v3 lanes into a single bounded-descent
+ * retrieval loop. Each pass runs the lanes in a fixed order:
+ *
+ *   1. {@link runScouts}      — always-on hot / sparse / dense fanout. Surfaces
+ *                               candidate slugs plus the `sticky` (keep-in-the-
+ *                               running) and `bypass` (skip-the-tree) sets.
+ *   2. {@link filterDenseHits} — one cheap LLM call over the *dense* lane only.
+ *                               Hot + near-exact-sparse hits arrive via
+ *                               sticky/bypass and are never judged; the dense
+ *                               near-neighbors are filtered down to meaningful
+ *                               associations.
+ *   3. {@link runTreeWalk}    — scout-seeded hierarchical descent. Seeded by the
+ *                               surviving scout slugs (their tree parents) so
+ *                               descent starts near where the lanes landed but
+ *                               still fans out from the root.
+ *   4. {@link expandEdges}    — provider-free 1–2 hop curated-graph expansion
+ *                               over every accumulated confident seed.
+ *   5. {@link runGate}        — one capable LLM call over the unioned candidate
+ *                               set. Returns `ready` (finalize) or `more`
+ *                               (its generated follow-up questions seed the next
+ *                               pass's query).
+ *
+ * Pass control. The loop runs at most `config.memory.v3.passCap` passes. When
+ * the gate says `more` and another pass is allowed, the gate's questions become
+ * the next pass's query (folded into `nowText`); otherwise the loop force-exits
+ * with the current selection. The standing-context files conveyed via
+ * `input.nowText` are consumed as situational context for the scouts, descent,
+ * and gate — the loop selects concept pages to layer on top and NEVER rewrites
+ * or re-injects the standing-context files.
+ *
+ * Lane toggles. `config.memory.v3.lanes.tree` and `.edges` gate the tree-walk
+ * and edge-expansion lanes here; the hot/sparse/dense toggles are honored inside
+ * {@link runScouts}. Toggling a lane off removes its contribution from the
+ * candidate set so the offline harness can measure each lane's marginal recall.
+ *
+ * Cross-pass accumulation. A `visited` candidate accumulator deduplicates slugs
+ * across passes by canonical slug, tagging each with the first lane that
+ * surfaced it (`sourceBySlug`). The full {@link DescentTrace} carries one
+ * {@link DescentPass} per pass (scouts / treeLevels / edgeExpansions / gate),
+ * and {@link RetrievalCost} (wall-clock `ms`, the one dimension observable at
+ * this composition layer) accumulates across every pass.
+ */
+
+import type { DrizzleDb } from "../db-connection.js";
+import type {
+  RetrievalCost,
+  RetrievalInput,
+  RetrievalOutput,
+} from "../v2/harness/retriever.js";
+import type {
+  DescentPass,
+  DescentTrace,
+  GateDecision,
+} from "../v2/harness/trace.js";
+import { getPageIndex } from "../v2/page-index.js";
+import { expandEdges } from "./edges.js";
+import { filterDenseHits } from "./filter.js";
+import { runGate } from "./gate.js";
+import { runScouts } from "./scouts.js";
+import { getTreeIndex } from "./tree-index.js";
+import { runTreeWalk } from "./tree-walk.js";
+
+/** Lane label used to tag each selected slug's provenance in `sourceBySlug`. */
+type LaneSource = "hot" | "sparse" | "dense" | "tree" | "edge";
+
+/** Injected dependencies — the SQLite handle the scout hot lane reads. */
+export interface RetrievalLoopDeps {
+  db: DrizzleDb;
+}
+
+/**
+ * Run the full v3 retrieval loop for one turn.
+ *
+ * Composes the scout / filter / tree / edge / gate lanes over up to
+ * `config.memory.v3.passCap` passes, returning the P1 {@link RetrievalOutput}:
+ * the final selection, per-lane provenance, the complete multi-pass
+ * {@link DescentTrace}, and accumulated {@link RetrievalCost}. `failureReason`
+ * is set when the dense filter had to fail open on any pass (the loop still
+ * returns a usable selection — the filter degradation is recorded, not fatal).
+ */
+export async function runRetrievalLoop(
+  input: RetrievalInput,
+  deps: RetrievalLoopDeps,
+): Promise<RetrievalOutput> {
+  const v3 = input.config.memory.v3;
+  const passCap = Math.max(1, v3.passCap);
+  const lanes = v3.lanes;
+
+  // Cross-pass accumulators.
+  const sourceBySlug = new Map<string, LaneSource>();
+  const sticky = new Set<string>();
+  const passes: DescentPass[] = [];
+  // `ms` is the one cost dimension observable at this composition layer — the
+  // lanes consume their own LLM usage internally and don't surface tokens.
+  const cost: RetrievalCost & { ms: number } = { ms: 0 };
+  let failureReason: string | null = null;
+
+  // The query feeding each pass. Pass 1 uses the turn's NOW context verbatim;
+  // a gate `more` verdict appends its generated follow-up questions for the
+  // next pass. The standing-context files are never rewritten — questions are
+  // layered on as additional situational context only.
+  let passNowText = input.nowText;
+
+  // Final selection — replaced by the gate each pass; the last pass's selection
+  // is what the loop returns (capped at passCap on a forced exit).
+  let selectedSlugs: string[] = [];
+
+  for (let passNumber = 1; passNumber <= passCap; passNumber++) {
+    const passStart = Date.now();
+    const passInput: RetrievalInput = { ...input, nowText: passNowText };
+
+    // 1. Scouts — always-on hot / sparse / dense fanout.
+    const scoutResult = await runScouts(passInput, { db: deps.db });
+    for (const slug of scoutResult.sticky) sticky.add(slug);
+
+    // Tag hot + sparse scout hits with their lane (first lane wins). Dense
+    // slugs are tagged only if they survive the filter below — a dropped dense
+    // near-neighbor never enters the candidate set, so it earns no source tag.
+    for (const scout of scoutResult.scouts) {
+      if (scout.lane === "dense") continue;
+      for (const slug of scout.slugs) tagSlug(sourceBySlug, slug, scout.lane);
+    }
+
+    // 2. Dense filter — judges only the dense lane (hot/sparse bypass it). The
+    // surviving dense slugs replace the raw dense candidates in the running set.
+    const denseScout = scoutResult.scouts.find((s) => s.lane === "dense");
+    const candidates = new Set<string>();
+
+    // Hot + sparse lane hits enter the candidate set directly.
+    for (const scout of scoutResult.scouts) {
+      if (scout.lane === "dense") continue;
+      for (const slug of scout.slugs) candidates.add(slug);
+    }
+
+    if (denseScout) {
+      const filtered = await filterDenseHits({
+        input: passInput,
+        dense: denseScout,
+        sticky: scoutResult.sticky,
+        bypass: scoutResult.bypass,
+      });
+      for (const slug of filtered.kept) {
+        candidates.add(slug);
+        tagSlug(sourceBySlug, slug, "dense");
+      }
+      if (filtered.failureReason !== undefined) {
+        failureReason = filtered.failureReason;
+      }
+    }
+
+    // The surviving scout slugs (kept dense + hot + sparse) seed the tree walk.
+    const survivingSeeds = [...candidates];
+
+    // 3. Tree walk — scout-seeded hierarchical descent. Gated by `lanes.tree`.
+    let treeLevels: DescentPass["treeLevels"];
+    if (lanes.tree) {
+      const [tree, pages] = await Promise.all([
+        getTreeIndex(passInput.workspaceDir),
+        getPageIndex(passInput.workspaceDir),
+      ]);
+      const walk = await runTreeWalk({
+        input: passInput,
+        tree,
+        pages,
+        scouts: scoutResult.scouts,
+        seeds: survivingSeeds,
+      });
+      treeLevels = walk.levels;
+      for (const slug of walk.pages) {
+        candidates.add(slug);
+        tagSlug(sourceBySlug, slug, "tree");
+      }
+    }
+
+    // 4. Edge expansion — 1–2 hop curated-graph pull over every accumulated
+    // confident seed. Gated by `lanes.edges`.
+    let edgeExpansions: DescentPass["edgeExpansions"];
+    if (lanes.edges) {
+      const expansion = await expandEdges({
+        workspaceDir: passInput.workspaceDir,
+        seeds: [...candidates],
+      });
+      edgeExpansions = expansion.expansions;
+      for (const slug of expansion.pulled) {
+        candidates.add(slug);
+        tagSlug(sourceBySlug, slug, "edge");
+      }
+    }
+
+    // 5. Gate — one capable LLM call over the unioned candidate set.
+    const gateResult = await runGate({
+      input: passInput,
+      candidates,
+      sticky,
+      passNumber,
+    });
+    selectedSlugs = gateResult.selectedSlugs;
+
+    // Record this pass's trace.
+    const pass: DescentPass = {
+      passNumber,
+      scouts: scoutResult.scouts,
+      ...(treeLevels !== undefined ? { treeLevels } : {}),
+      ...(edgeExpansions !== undefined ? { edgeExpansions } : {}),
+      gate: gateResult.decision,
+    };
+    passes.push(pass);
+
+    cost.ms += Date.now() - passStart;
+
+    // Pass control. A `more` verdict with another pass available feeds the
+    // gate's generated questions into the next pass's query; otherwise (ready,
+    // or passCap reached) the loop exits with the current selection.
+    if (gateResult.decision.decision !== "more") break;
+    if (passNumber >= passCap) break;
+    passNowText = nextPassNowText(input.nowText, gateResult.decision);
+  }
+
+  const trace: DescentTrace = { passes };
+  return {
+    selectedSlugs,
+    sourceBySlug,
+    trace,
+    cost,
+    failureReason,
+  };
+}
+
+/**
+ * Tag `slug`'s provenance with `lane`, keeping the first lane that surfaced it.
+ * The pass order (scouts → tree → edge) gives a deterministic precedence: a
+ * slug first seen by a scout lane keeps that label even when the tree or edge
+ * lane re-surfaces it.
+ */
+function tagSlug(
+  sourceBySlug: Map<string, LaneSource>,
+  slug: string,
+  lane: LaneSource,
+): void {
+  if (!sourceBySlug.has(slug)) sourceBySlug.set(slug, lane);
+}
+
+/**
+ * Build the next pass's NOW text from the original standing context plus the
+ * gate's generated follow-up questions. The standing-context files are never
+ * rewritten — the questions are appended as an additional situational-context
+ * block the scouts/descent/gate read on top of NOW. With no questions the
+ * original NOW is reused verbatim.
+ */
+function nextPassNowText(baseNowText: string, decision: GateDecision): string {
+  const questions = decision.questions ?? [];
+  if (questions.length === 0) return baseNowText;
+  const block = `<follow_up_questions>\n${questions.join("\n")}\n</follow_up_questions>`;
+  return `${baseNowText}\n\n${block}`;
+}

From ec915b02c68c88a2d1469afa96f6c0ce532b9ac8 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:13:49 -0400
Subject: [PATCH 15/21] feat(memory-v3): consolidation drains shared buffer
 into tree + maintains standing-context files (#31985)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 assistant/src/memory/jobs-worker.ts           |  64 ++-
 .../v3/__tests__/consolidation-job.test.ts    | 468 ++++++++++++++++++
 assistant/src/memory/v3/consolidation-job.ts  | 323 ++++++++++++
 assistant/src/memory/v3/maintenance.ts        | 144 ++++++
 .../src/memory/v3/prompts/consolidation.ts    | 458 +++++++++++++++++
 5 files changed, 1439 insertions(+), 18 deletions(-)
 create mode 100644 assistant/src/memory/v3/__tests__/consolidation-job.test.ts
 create mode 100644 assistant/src/memory/v3/consolidation-job.ts
 create mode 100644 assistant/src/memory/v3/maintenance.ts
 create mode 100644 assistant/src/memory/v3/prompts/consolidation.ts

diff --git a/assistant/src/memory/jobs-worker.ts b/assistant/src/memory/jobs-worker.ts
index ca59501f189..7b9da8313f1 100644
--- a/assistant/src/memory/jobs-worker.ts
+++ b/assistant/src/memory/jobs-worker.ts
@@ -83,6 +83,8 @@ import {
   memoryV2ConsolidateJob,
 } from "./v2/consolidation-job.js";
 import { memoryV2SweepJob } from "./v2/sweep-job.js";
+import { memoryV3ConsolidateJob } from "./v3/consolidation-job.js";
+import { memoryV3IndexMaintenanceJob } from "./v3/maintenance.js";
 
 const log = getLogger("memory-jobs-worker");
 
@@ -603,6 +605,12 @@ async function processJob(
     case "memory_v2_consolidate":
       await memoryV2ConsolidateJob(job, config);
       return;
+    case "memory_v3_consolidate":
+      await memoryV3ConsolidateJob(job, config);
+      return;
+    case "memory_v3_index_maintenance":
+      await memoryV3IndexMaintenanceJob(job);
+      return;
     case "memory_v2_migrate":
       await memoryV2MigrateJob(job, config);
       return;
@@ -681,17 +689,28 @@ export const GRAPH_MAINTENANCE_CHECKPOINTS = {
   patternScan: "graph_maintenance:pattern_scan:last_run",
   narrative: "graph_maintenance:narrative:last_run",
   memoryV2Consolidate: "memory_v2_consolidate_last_run",
+  memoryV3Consolidate: "memory_v3_consolidate_last_run",
 } as const;
 
 /**
  * Enqueue periodic graph maintenance jobs.
  *
  * Mutually exclusive between v1 and v2:
- *   - v2 active (`memory.v2.enabled` on) → only `memory_v2_consolidate` is
- *     scheduled.
+ *   - v2 active (`memory.v2.enabled` on) → only one buffer-drainer is
+ *     scheduled (see below).
  *   - v2 inactive → the four v1 entries (decay, consolidate, pattern_scan,
  *     narrative) are scheduled instead.
  *
+ * **Buffer-drainer retarget (v2 vs v3).** The `memory/buffer.md` is shared, so
+ * exactly one consolidator may own the drain at a time. When
+ * `memory.v3.write.enabled` is on, the v3 consolidator (`memory_v3_consolidate`)
+ * is scheduled INSTEAD of `memory_v2_consolidate` — same shared buffer +
+ * standing-context files, additionally authored into the v3 tree. When the v3
+ * write flag is off (default) the v2 consolidator stays the sole drainer,
+ * unchanged. The retarget is a clean conditional, fully reversible via the flag.
+ * Concept pages stay the shared canonical store, so the v2 router keeps working
+ * off pages v3 writes regardless of which consolidator ran.
+ *
  * Read/write paths route to v2 when the flag is on, so v1 graph data goes
  * unread; running v1 maintenance alongside v2 is wasted compute and LLM
  * spend. The v1 code path remains live so flipping the flag back to off
@@ -708,20 +727,29 @@ export function maybeEnqueueGraphMaintenanceJobs(
   nowMs = Date.now(),
 ): void {
   const v2Active = config.memory.v2.enabled;
+  const v3WriteActive = config.memory.v3.write.enabled;
+
+  // The single buffer-drainer entry for the v2-active branch: v3 when the v3
+  // write flag owns the drain, v2 otherwise. Same shared buffer either way.
+  const consolidateEntry = v3WriteActive
+    ? {
+        key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV3Consolidate,
+        intervalMs: config.memory.v3.write.consolidateIntervalMs,
+        jobType: "memory_v3_consolidate" as MemoryJobType,
+      }
+    : {
+        key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate,
+        intervalMs:
+          config.memory.v2.consolidation_interval_hours * 60 * 60 * 1000,
+        jobType: "memory_v2_consolidate" as MemoryJobType,
+      };
 
   const schedule: Array<{
     key: string;
     intervalMs: number;
     jobType: MemoryJobType;
   }> = v2Active
-    ? [
-        {
-          key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate,
-          intervalMs:
-            config.memory.v2.consolidation_interval_hours * 60 * 60 * 1000,
-          jobType: "memory_v2_consolidate",
-        },
-      ]
+    ? [consolidateEntry]
     : [
         {
           key: GRAPH_MAINTENANCE_CHECKPOINTS.decay,
@@ -745,25 +773,25 @@ export function maybeEnqueueGraphMaintenanceJobs(
         },
       ];
 
-  let enqueuedV2 = false;
+  let enqueuedConsolidate = false;
   for (const { key, intervalMs, jobType } of schedule) {
     const lastRun = parseInt(getMemoryCheckpoint(key) ?? "0", 10);
     if (nowMs - lastRun >= intervalMs) {
       enqueueMemoryJob(jobType, {});
       setMemoryCheckpoint(key, String(nowMs));
-      if (jobType === "memory_v2_consolidate") enqueuedV2 = true;
+      if (jobType === consolidateEntry.jobType) enqueuedConsolidate = true;
     }
   }
 
+  // Size-based trigger: when the shared buffer crosses the configured line
+  // count, drain it now rather than waiting out the interval. Retargets to the
+  // same consolidator the interval branch above selected.
   const maxLines = config.memory.v2.consolidation_max_buffer_lines;
-  if (v2Active && !enqueuedV2 && maxLines !== null) {
+  if (v2Active && !enqueuedConsolidate && maxLines !== null) {
     const bufferPath = join(getWorkspaceDir(), "memory", "buffer.md");
     if (countBufferLines(bufferPath) >= maxLines) {
-      enqueueMemoryJob("memory_v2_consolidate", {});
-      setMemoryCheckpoint(
-        GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate,
-        String(nowMs),
-      );
+      enqueueMemoryJob(consolidateEntry.jobType, {});
+      setMemoryCheckpoint(consolidateEntry.key, String(nowMs));
     }
   }
 }
diff --git a/assistant/src/memory/v3/__tests__/consolidation-job.test.ts b/assistant/src/memory/v3/__tests__/consolidation-job.test.ts
new file mode 100644
index 00000000000..5969c7de6f3
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/consolidation-job.test.ts
@@ -0,0 +1,468 @@
+/**
+ * Tests for the memory v3 consolidation surface (PR 19):
+ *   - `memoryV3ConsolidateJob` (`../consolidation-job.ts`) — drains the SHARED
+ *     `memory/buffer.md` into shared concept pages + the v3 tree, mirroring v2.
+ *   - the scheduler retarget in `maybeEnqueueGraphMaintenanceJobs`
+ *     (`../../jobs-worker.ts`) — enqueues `memory_v3_consolidate` INSTEAD of
+ *     `memory_v2_consolidate` when `memory.v3.write.enabled`, and v2 when off.
+ *   - `runIndexMaintenance` / `wouldIntroduceCycle` (`../maintenance.ts`) — the
+ *     mechanical no-LLM upkeep: report stale indices, refuse cycle edits.
+ *
+ * The background-agent handoff (`runBackgroundJob`) is mocked so no real LLM
+ * runs — the agent's actual page/tree writes are exercised by the v3 store/
+ * validate unit tests; here we drive the same fixture writes deterministically
+ * to prove the maintenance + cycle-check semantics. The DB is real (a temp
+ * workspace pinned via `VELLUM_WORKSPACE_DIR`) so the scheduler's checkpoint /
+ * enqueue path runs end-to-end. Sample content uses generic placeholders
+ * (Alice/Bob).
+ */
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { utimes } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import {
+  afterAll,
+  beforeAll,
+  beforeEach,
+  describe,
+  expect,
+  mock,
+  test,
+} from "bun:test";
+
+import { eq } from "drizzle-orm";
+
+import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js";
+
+mock.module("../../../util/logger.js", () => ({
+  getLogger: () => makeMockLogger(),
+}));
+
+// ── runBackgroundJob mock ───────────────────────────────────────────
+//
+// The consolidation handler delegates bootstrap + processMessage + timeout +
+// classification to runBackgroundJob. We stub it so no LLM runs and assert the
+// surface (prompt, callSite, source, suppression) it was called with.
+let runnerCalls = 0;
+let runnerLastArgs: Record<string, unknown> | null = null;
+let runnerImpl: () => Promise<{
+  conversationId: string;
+  ok: boolean;
+  error?: Error;
+  errorKind?: string;
+}> = async () => ({ conversationId: "conv-1", ok: true });
+
+mock.module("../../../runtime/background-job-runner.js", () => ({
+  runBackgroundJob: async (opts: Record<string, unknown>) => {
+    runnerCalls += 1;
+    runnerLastArgs = opts;
+    return runnerImpl();
+  },
+}));
+
+// ── Workspace pin (precedes the DB import) ──────────────────────────
+let tmpWorkspace: string;
+let previousWorkspaceEnv: string | undefined;
+
+beforeAll(() => {
+  tmpWorkspace = mkdtempSync(join(tmpdir(), "memory-v3-consolidate-test-"));
+  previousWorkspaceEnv = process.env.VELLUM_WORKSPACE_DIR;
+  process.env.VELLUM_WORKSPACE_DIR = tmpWorkspace;
+});
+
+afterAll(() => {
+  if (previousWorkspaceEnv === undefined) {
+    delete process.env.VELLUM_WORKSPACE_DIR;
+  } else {
+    process.env.VELLUM_WORKSPACE_DIR = previousWorkspaceEnv;
+  }
+  rmSync(tmpWorkspace, { recursive: true, force: true });
+});
+
+const { getDb } = await import("../../db-connection.js");
+const { initializeDb } = await import("../../db-init.js");
+const { resetTestTables } = await import("../../raw-query.js");
+const { memoryJobs } = await import("../../schema.js");
+const { applyNestedDefaults } = await import("../../../config/loader.js");
+const { setMemoryCheckpoint, deleteMemoryCheckpoint } =
+  await import("../../checkpoints.js");
+const { maybeEnqueueGraphMaintenanceJobs } =
+  await import("../../jobs-worker.js");
+const { memoryV3ConsolidateJob } = await import("../consolidation-job.js");
+const { CUTOFF_PLACEHOLDER, CONSOLIDATION_PROMPT } =
+  await import("../prompts/consolidation.js");
+const { runIndexMaintenance, wouldIntroduceCycle } =
+  await import("../maintenance.js");
+const { writePage } = await import("../../v2/page-store.js");
+const { invalidatePageIndex } = await import("../../v2/page-index.js");
+const { invalidateEdgeIndex } = await import("../../v2/edge-index.js");
+const { getTreeIndex, invalidateTreeIndex } = await import("../tree-index.js");
+const { writeNode, getTreeDir, ROOT_NODE_ID } =
+  await import("../tree-store.js");
+
+const V2_CHECKPOINT = "memory_v2_consolidate_last_run";
+const V3_CHECKPOINT = "memory_v3_consolidate_last_run";
+
+// The job handler reads only `config.memory.v3.write.enabled` and the shared
+// `config.memory.v2.consolidation_prompt_path`; a minimal stand-in covers both
+// call sites without materializing the full default config.
+type JobConfig = Parameters<typeof memoryV3ConsolidateJob>[1];
+const CONFIG_V3_ON = {
+  memory: {
+    v2: { consolidation_prompt_path: null },
+    v3: { write: { enabled: true } },
+  },
+} as JobConfig;
+const CONFIG_V3_OFF = {
+  memory: {
+    v2: { consolidation_prompt_path: null },
+    v3: { write: { enabled: false } },
+  },
+} as JobConfig;
+
+function makeJob(): Parameters<typeof memoryV3ConsolidateJob>[0] {
+  return {
+    id: "consolidate-1",
+    type: "memory_v3_consolidate",
+    payload: {},
+    status: "running",
+    attempts: 0,
+    deferrals: 0,
+    runAfter: 0,
+    lastError: null,
+    startedAt: Date.now(),
+    createdAt: Date.now(),
+    updatedAt: Date.now(),
+  };
+}
+
+const memoryDir = () => join(tmpWorkspace, "memory");
+const lockPath = () =>
+  join(tmpWorkspace, "memory", ".v3-state", "consolidation.lock");
+const bufferPath = () => join(tmpWorkspace, "memory", "buffer.md");
+
+function countPendingJobs(type: string): number {
+  return getDb()
+    .select()
+    .from(memoryJobs)
+    .where(eq(memoryJobs.type, type))
+    .all().length;
+}
+
+function buildSchedulerConfig(v3WriteEnabled: boolean) {
+  const cfg = applyNestedDefaults({});
+  cfg.memory.v2.enabled = true;
+  cfg.memory.v2.consolidation_interval_hours = 1;
+  cfg.memory.v2.consolidation_max_buffer_lines = null;
+  cfg.memory.v3.write.enabled = v3WriteEnabled;
+  cfg.memory.v3.write.consolidateIntervalMs = 60 * 60 * 1000;
+  return cfg;
+}
+
+function resetCaches(): void {
+  invalidateTreeIndex();
+  invalidatePageIndex();
+  invalidateEdgeIndex();
+}
+
+initializeDb();
+
+beforeEach(() => {
+  rmSync(memoryDir(), { recursive: true, force: true });
+  mkdirSync(join(memoryDir(), ".v3-state"), { recursive: true });
+  mkdirSync(join(memoryDir(), "concepts"), { recursive: true });
+  resetTestTables("memory_jobs", "memory_checkpoints");
+  resetCaches();
+
+  runnerCalls = 0;
+  runnerLastArgs = null;
+  runnerImpl = async () => ({ conversationId: "conv-1", ok: true });
+});
+
+// ---------------------------------------------------------------------------
+// memoryV3ConsolidateJob
+// ---------------------------------------------------------------------------
+
+describe("memoryV3ConsolidateJob — flag off (v3 write disabled)", () => {
+  test("returns disabled without invoking the runner or touching the lock", async () => {
+    writeFileSync(bufferPath(), "- [Apr 27, 9:00 AM] Alice prefers VS Code.\n");
+
+    const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_OFF);
+
+    expect(result).toEqual({ kind: "disabled" });
+    expect(runnerCalls).toBe(0);
+    expect(existsSync(lockPath())).toBe(false);
+    expect(countPendingJobs("memory_v3_index_maintenance")).toBe(0);
+    expect(countPendingJobs("memory_v2_reembed")).toBe(0);
+  });
+});
+
+describe("memoryV3ConsolidateJob — empty shared buffer", () => {
+  test("returns empty_buffer when the shared buffer.md is missing", async () => {
+    expect(existsSync(bufferPath())).toBe(false);
+
+    const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON);
+
+    expect(result).toEqual({ kind: "empty_buffer" });
+    expect(runnerCalls).toBe(0);
+    expect(existsSync(lockPath())).toBe(false);
+  });
+});
+
+describe("memoryV3ConsolidateJob — non-empty shared buffer", () => {
+  beforeEach(() => {
+    writeFileSync(
+      bufferPath(),
+      "- [Apr 27, 9:00 AM] Alice prefers VS Code over Vim.\n" +
+        "- [Apr 27, 9:05 AM] Bob ships at end of day.\n",
+    );
+  });
+
+  test("invokes runBackgroundJob with the v3 tree-authoring prompt and suppression", async () => {
+    const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON);
+
+    expect(result.kind).toBe("invoked");
+    expect(runnerCalls).toBe(1);
+    expect(runnerLastArgs?.callSite).toBe("mainAgent");
+    expect(runnerLastArgs?.origin).toBe("memory_consolidation");
+    // Shared consolidation conversation source (recognized by the route layer).
+    expect(runnerLastArgs?.source).toBe("memory_v2_consolidation");
+    expect(runnerLastArgs?.suppressFailureNotifications).toBe(true);
+    expect(runnerLastArgs?.trustContext).toEqual({
+      sourceChannel: "vellum",
+      trustClass: "guardian",
+    });
+
+    const prompt = runnerLastArgs?.prompt as string;
+    // Cutoff substituted (placeholder gone), buffer-format timestamp present.
+    expect(prompt).not.toContain(CUTOFF_PLACEHOLDER);
+    expect(prompt).toMatch(/\b[A-Z][a-z]{2} \d{1,2}, \d{1,2}:\d{2} (AM|PM)\b/);
+    // v3-distinctive: the prompt routes into the v3 tree, not just flat pages.
+    expect(prompt).toContain("memory/v3/tree/");
+    // Standing-context files preserved exactly as v2 (shared).
+    expect(prompt).toContain("memory/buffer.md");
+    expect(prompt).toContain("memory/recent.md");
+    expect(prompt).toContain("memory/essentials.md");
+    expect(prompt).toContain("memory/threads.md");
+  });
+
+  test("enqueues index-maintenance + page-reembed follow-ups on success", async () => {
+    const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON);
+
+    expect(result.kind).toBe("invoked");
+    if (result.kind === "invoked") {
+      expect(result.followUpJobIds).toHaveLength(2);
+    }
+    expect(countPendingJobs("memory_v3_index_maintenance")).toBe(1);
+    expect(countPendingJobs("memory_v2_reembed")).toBe(1);
+  });
+
+  test("releases the lock after a successful invocation", async () => {
+    const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON);
+    expect(result.kind).toBe("invoked");
+    expect(existsSync(lockPath())).toBe(false);
+  });
+
+  test("returns run_failed and skips follow-ups when the runner reports failure", async () => {
+    runnerImpl = async () => ({
+      conversationId: "conv-1",
+      ok: false,
+      error: new Error("simulated runner failure"),
+      errorKind: "exception",
+    });
+
+    const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON);
+
+    expect(result.kind).toBe("run_failed");
+    if (result.kind === "run_failed") {
+      expect(result.reason).toBe("simulated runner failure");
+    }
+    expect(countPendingJobs("memory_v3_index_maintenance")).toBe(0);
+    expect(countPendingJobs("memory_v2_reembed")).toBe(0);
+    expect(existsSync(lockPath())).toBe(false);
+  });
+
+  test("a live lock holder blocks a second concurrent invocation", async () => {
+    writeFileSync(lockPath(), `${process.pid} 1700000000000\n`);
+
+    const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON);
+
+    expect(result.kind).toBe("locked");
+    expect(runnerCalls).toBe(0);
+    expect(existsSync(lockPath())).toBe(true);
+  });
+});
+
+describe("CONSOLIDATION_PROMPT (v3)", () => {
+  test("keeps the standing-context outputs identical to v2", () => {
+    expect(CONSOLIDATION_PROMPT).toContain(CUTOFF_PLACEHOLDER);
+    expect(CONSOLIDATION_PROMPT).toContain("memory/essentials.md");
+    expect(CONSOLIDATION_PROMPT).toContain("memory/threads.md");
+    expect(CONSOLIDATION_PROMPT).toContain("memory/recent.md");
+    expect(CONSOLIDATION_PROMPT).toContain("memory/buffer.md");
+    expect(CONSOLIDATION_PROMPT).toContain("≤2000 chars");
+  });
+
+  test("adds the v3 tree-authoring routing the shared concept pages get indexed into", () => {
+    expect(CONSOLIDATION_PROMPT).toContain("memory/v3/tree/");
+    expect(CONSOLIDATION_PROMPT).toContain("children");
+    // The DAG cycle / reachability discipline must be in the prompt.
+    expect(CONSOLIDATION_PROMPT.toLowerCase()).toContain("cycle");
+    expect(CONSOLIDATION_PROMPT).toContain(ROOT_NODE_ID);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Scheduler retarget — shared buffer drained by exactly one consolidator.
+// ---------------------------------------------------------------------------
+
+describe("maybeEnqueueGraphMaintenanceJobs — v2/v3 consolidator retarget", () => {
+  test("enqueues v3 (not v2) when memory.v3.write.enabled is on", () => {
+    const config = buildSchedulerConfig(true);
+    deleteMemoryCheckpoint(V3_CHECKPOINT);
+    deleteMemoryCheckpoint(V2_CHECKPOINT);
+
+    maybeEnqueueGraphMaintenanceJobs(config, Date.now());
+
+    expect(countPendingJobs("memory_v3_consolidate")).toBe(1);
+    expect(countPendingJobs("memory_v2_consolidate")).toBe(0);
+    // v1 entries stay suppressed (v2 active).
+    expect(countPendingJobs("graph_decay")).toBe(0);
+  });
+
+  test("enqueues v2 (not v3) when memory.v3.write.enabled is off — v2 path unchanged", () => {
+    const config = buildSchedulerConfig(false);
+    deleteMemoryCheckpoint(V3_CHECKPOINT);
+    deleteMemoryCheckpoint(V2_CHECKPOINT);
+
+    maybeEnqueueGraphMaintenanceJobs(config, Date.now());
+
+    expect(countPendingJobs("memory_v2_consolidate")).toBe(1);
+    expect(countPendingJobs("memory_v3_consolidate")).toBe(0);
+  });
+
+  test("v3 size trigger drains the shared buffer when the line count is crossed", () => {
+    const config = buildSchedulerConfig(true);
+    config.memory.v2.consolidation_max_buffer_lines = 5;
+
+    const now = Date.now();
+    // Recent checkpoint so the time-based trigger does not fire — only size.
+    setMemoryCheckpoint(V3_CHECKPOINT, String(now - 60_000));
+    const entries = Array.from(
+      { length: 10 },
+      (_, i) => `- [Jan 15, 2:${String(i).padStart(2, "0")} PM] note ${i}`,
+    );
+    writeFileSync(bufferPath(), entries.join("\n") + "\n");
+
+    maybeEnqueueGraphMaintenanceJobs(config, now);
+
+    expect(countPendingJobs("memory_v3_consolidate")).toBe(1);
+    expect(countPendingJobs("memory_v2_consolidate")).toBe(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Maintenance — cycle refusal + stale-index reporting (mechanical, no LLM).
+// ---------------------------------------------------------------------------
+
+describe("wouldIntroduceCycle", () => {
+  test("refuses an edge that would close a loop (child already reaches parent)", async () => {
+    // _root → node:a → node:b. Adding b → a would close a → b → a.
+    await writeNode(tmpWorkspace, {
+      id: ROOT_NODE_ID,
+      frontmatter: { children: ["node:a"] },
+      body: "root",
+    });
+    await writeNode(tmpWorkspace, {
+      id: "a",
+      frontmatter: { children: ["node:b"] },
+      body: "a",
+    });
+    await writeNode(tmpWorkspace, {
+      id: "b",
+      frontmatter: { children: [] },
+      body: "b",
+    });
+    resetCaches();
+    const tree = await getTreeIndex(tmpWorkspace);
+
+    // b → a would create a cycle; a → b already exists (DAG-safe re-add).
+    expect(wouldIntroduceCycle(tree, "b", "a")).toBe(true);
+    // A self-edge is trivially a cycle.
+    expect(wouldIntroduceCycle(tree, "a", "a")).toBe(true);
+    // A fresh leaf edge does not introduce a cycle.
+    expect(wouldIntroduceCycle(tree, "b", "c")).toBe(false);
+    // Adding a second parent for b (DAG, not cycle) is allowed.
+    expect(wouldIntroduceCycle(tree, ROOT_NODE_ID, "b")).toBe(false);
+  });
+});
+
+describe("runIndexMaintenance", () => {
+  test("reports a stale composed index (parent mtime predates a child)", async () => {
+    // _root → node:people → page:alice. Make `people` (the parent) older than
+    // _root so the parent's composed index is stale relative to a child node.
+    await writeNode(tmpWorkspace, {
+      id: ROOT_NODE_ID,
+      frontmatter: { children: ["node:people"] },
+      body: "root",
+    });
+    await writeNode(tmpWorkspace, {
+      id: "people",
+      frontmatter: { children: ["page:alice"] },
+      body: "people",
+    });
+    await writePage(tmpWorkspace, {
+      slug: "alice",
+      frontmatter: { edges: [], ref_files: [], ref_urls: [] },
+      body: "alice",
+    });
+
+    // Pin mtimes: _root newer than its child `people` so _root is flagged.
+    const treeDir = getTreeDir(tmpWorkspace);
+    const old = new Date(1_000_000_000_000);
+    const fresh = new Date(2_000_000_000_000);
+    await utimes(join(treeDir, "people.md"), fresh, fresh);
+    await utimes(join(treeDir, `${ROOT_NODE_ID}.md`), old, old);
+    resetCaches();
+
+    const result = await runIndexMaintenance(tmpWorkspace);
+
+    expect(result.staleIndexCount).toBeGreaterThanOrEqual(1);
+    expect(
+      result.report.staleIndex.some(
+        (s) => s.node === ROOT_NODE_ID && s.child === "people",
+      ),
+    ).toBe(true);
+    // Clean tree otherwise: alice is reachable, refs resolve, no cycles.
+    expect(result.cycleCount).toBe(0);
+    expect(result.danglingChildRefCount).toBe(0);
+    expect(result.orphanPageCount).toBe(0);
+  });
+
+  test("returns a clean report for a well-formed tree", async () => {
+    await writeNode(tmpWorkspace, {
+      id: ROOT_NODE_ID,
+      frontmatter: { children: ["page:alice"] },
+      body: "root",
+    });
+    await writePage(tmpWorkspace, {
+      slug: "alice",
+      frontmatter: { edges: [], ref_files: [], ref_urls: [] },
+      body: "alice",
+    });
+    resetCaches();
+
+    const result = await runIndexMaintenance(tmpWorkspace);
+
+    expect(result.cycleCount).toBe(0);
+    expect(result.danglingChildRefCount).toBe(0);
+    expect(result.orphanPageCount).toBe(0);
+    expect(result.unknownEdgeTargetCount).toBe(0);
+  });
+});
diff --git a/assistant/src/memory/v3/consolidation-job.ts b/assistant/src/memory/v3/consolidation-job.ts
new file mode 100644
index 00000000000..31ac7926805
--- /dev/null
+++ b/assistant/src/memory/v3/consolidation-job.ts
@@ -0,0 +1,323 @@
+/**
+ * Memory v3 — `memory_v3_consolidate` job handler.
+ *
+ * The v3 consolidation job drains the SHARED `memory/buffer.md` (the same
+ * buffer v2 uses — there is no v3 buffer) into the SHARED concept pages AND the
+ * v3 **tree** overlay, while maintaining the SHARED standing-context files
+ * (`essentials.md` / `threads.md` / `recent.md`) byte-for-byte the way v2 does.
+ * It is the v3 counterpart to `assistant/src/memory/v2/consolidation-job.ts`
+ * and mirrors its orchestration exactly — the only divergences are the gating
+ * flag (`memory.v3.write.enabled`), the lock path (`memory/.v3-state/`), and the
+ * prompt body (which additionally asks the agent to author/refresh the tree).
+ *
+ * Because the buffer and the standing-context files are shared, exactly one
+ * consolidator may own the drain at a time. The scheduler enforces this: when
+ * `memory.v3.write.enabled` is on it enqueues `memory_v3_consolidate` INSTEAD of
+ * `memory_v2_consolidate` (see `maybeEnqueueGraphMaintenanceJobs` in
+ * `jobs-worker.ts`). Concept pages stay the shared canonical store, so the v2
+ * router keeps working off pages v3 writes — it just ignores the tree overlay.
+ *
+ * Lifecycle (identical to v2 except the flag + lock path + tree-authoring
+ * prompt):
+ *   1. Bail if `config.memory.v3.write.enabled` is false (the worker may have
+ *      claimed a stale row from before the flag was flipped off).
+ *   2. Acquire a single-process lock at `memory/.v3-state/consolidation.lock`.
+ *   3. Capture the cutoff timestamp at dispatch.
+ *   4. Read the shared `memory/buffer.md`. Bail if empty.
+ *   5. Hand off to `runBackgroundJob()` with the v3 consolidation prompt
+ *      (`suppressFailureNotifications: true`).
+ *   6. On success, enqueue follow-ups: `memory_v3_index_maintenance` (mechanical
+ *      tree/DAG upkeep) and `embed_concept_page` reembed (pages are shared, so
+ *      reembed is still needed — reuse the existing `memory_v2_reembed` fan-out
+ *      job type, which enqueues one `embed_concept_page` per slug).
+ *   7. Release the lock.
+ */
+
+import {
+  closeSync,
+  mkdirSync,
+  openSync,
+  readFileSync,
+  unlinkSync,
+  writeSync,
+} from "node:fs";
+import { dirname, join } from "node:path";
+
+import type { AssistantConfig } from "../../config/types.js";
+import { runBackgroundJob } from "../../runtime/background-job-runner.js";
+import { getLogger } from "../../util/logger.js";
+import { getWorkspaceDir } from "../../util/platform.js";
+import { isProcessAlive } from "../../util/process-liveness.js";
+import { formatBufferTimestamp } from "../graph/tool-handlers.js";
+import {
+  enqueueMemoryJob,
+  type MemoryJob,
+  type MemoryJobType,
+} from "../jobs-store.js";
+// The consolidation conversation `source` is a UI/routing concern shared with
+// v2 (the route layer recognizes "this conversation IS background memory
+// consolidation" by this string). v2 and v3 are mutually exclusive drainers, so
+// reusing the same source keeps that recognition working for both without
+// forking a v3 constant.
+import { MEMORY_V2_CONSOLIDATION_SOURCE } from "../v2/constants.js";
+import { resolveConsolidationPrompt } from "./prompts/consolidation.js";
+
+const log = getLogger("memory-v3-consolidate");
+
+/** Stable identifier surfaced in `runBackgroundJob` logs and notifications. */
+const JOB_NAME = "memory.consolidate";
+
+/**
+ * Hard timeout for the consolidation run. Matches v2: consolidation reads the
+ * buffer, rewrites several files, re-encodes essentials/threads, and authors
+ * the tree — generous upper bound so a slow run isn't killed mid-edit, but
+ * bounded so a stuck provider can't pin the worker indefinitely.
+ */
+const CONSOLIDATION_TIMEOUT_MS = 15 * 60 * 1000;
+
+/**
+ * Follow-up jobs to fan out after a successful consolidation:
+ *   - `memory_v3_index_maintenance` — mechanical (no-LLM) tree/DAG upkeep:
+ *     validate the tree, report stale composed indices, cycle-check the DAG.
+ *   - `memory_v2_reembed` — re-embed every shared concept page (the fan-out job
+ *     enqueues one `embed_concept_page` per slug). Pages are shared, so a v3
+ *     consolidation that touches them still needs the reembed. Conservatively
+ *     re-embeds every page; the embedder's content-hash cache makes unchanged
+ *     pages effectively free.
+ */
+const FOLLOW_UP_JOB_TYPES: readonly MemoryJobType[] = [
+  "memory_v3_index_maintenance",
+  "memory_v2_reembed",
+] as const;
+
+/**
+ * Job handler. See file header for the full lifecycle. Returns a discriminated
+ * union so tests can assert on the path taken (disabled / locked / empty /
+ * invoked / failed) without having to spy on the filesystem. Mirrors v2's
+ * `ConsolidationOutcome`.
+ */
+export type ConsolidationOutcome =
+  | { kind: "disabled" }
+  | { kind: "locked"; holder: string }
+  | { kind: "empty_buffer" }
+  | { kind: "run_failed"; reason?: string }
+  | {
+      kind: "invoked";
+      conversationId: string;
+      cutoff: string;
+      followUpJobIds: string[];
+    };
+
+export async function memoryV3ConsolidateJob(
+  _job: MemoryJob,
+  config: AssistantConfig,
+): Promise<ConsolidationOutcome> {
+  if (!config.memory.v3.write.enabled) {
+    log.debug("memory.v3.write.enabled is false; consolidation skipped");
+    return { kind: "disabled" };
+  }
+
+  const memoryDir = join(getWorkspaceDir(), "memory");
+  const lockPath = join(memoryDir, ".v3-state", "consolidation.lock");
+  const bufferPath = join(memoryDir, "buffer.md");
+
+  // Step 1: acquire lock. Bails immediately if another consolidation is
+  // already in flight — the next scheduled run can pick up where we leave off.
+  const holder = tryAcquireLock(lockPath);
+  if (holder !== null) {
+    log.warn({ lockPath, holder }, "consolidation skipped: lock already held");
+    return { kind: "locked", holder };
+  }
+
+  try {
+    // Step 2: capture cutoff. Formatted to match `buffer.md` entry timestamps
+    // (`Mon D, h:mm AM/PM`) so the agent's "timestamp ≥ cutoff" check compares
+    // like-with-like at minute precision. Captured here (not at enqueue time)
+    // so late-claimed rows get a fresh cutoff.
+    const cutoff = formatBufferTimestamp(new Date());
+
+    // Step 3: bail on empty buffer. The shared buffer has no work to drain.
+    const bufferContent = readBufferContent(bufferPath);
+    if (bufferContent.trim().length === 0) {
+      log.debug("buffer.md empty; consolidation skipped");
+      return { kind: "empty_buffer" };
+    }
+
+    // Step 4: hand off to the centralized background-job runner. As with v2,
+    // `suppressFailureNotifications: true` opts out of `activity.failed`
+    // notifications so a network blip on the tight consolidation interval does
+    // not spam the home feed; Sentry-side reporting is unchanged.
+    //
+    // The prompt override config key (`memory.v2.consolidation_prompt_path`) is
+    // shared — there is no separate v3 key, so an operator points one file at
+    // whichever consolidator owns the drain.
+    const runResult = await runBackgroundJob({
+      jobName: JOB_NAME,
+      source: MEMORY_V2_CONSOLIDATION_SOURCE,
+      prompt: resolveConsolidationPrompt(
+        config.memory.v2.consolidation_prompt_path,
+        cutoff,
+      ),
+      trustContext: { sourceChannel: "vellum", trustClass: "guardian" },
+      callSite: "mainAgent",
+      timeoutMs: CONSOLIDATION_TIMEOUT_MS,
+      origin: "memory_consolidation",
+      suppressFailureNotifications: true,
+    });
+
+    if (!runResult.ok) {
+      log.error(
+        {
+          conversationId: runResult.conversationId,
+          errorKind: runResult.errorKind,
+          err: runResult.error?.message,
+        },
+        "consolidation run failed; follow-ups skipped",
+      );
+      return runResult.error?.message !== undefined
+        ? { kind: "run_failed", reason: runResult.error.message }
+        : { kind: "run_failed" };
+    }
+
+    // Step 5: enqueue follow-up jobs (tree maintenance + page reembed).
+    const followUpJobIds: string[] = [];
+    for (const jobType of FOLLOW_UP_JOB_TYPES) {
+      try {
+        followUpJobIds.push(enqueueMemoryJob(jobType, {}));
+      } catch (err) {
+        // Best-effort: a failed enqueue here doesn't undo the agent's writes,
+        // and the next scheduled consolidation will attempt the same fan-out.
+        log.warn(
+          { err, jobType },
+          "consolidation: failed to enqueue follow-up job; continuing",
+        );
+      }
+    }
+
+    log.info(
+      {
+        conversationId: runResult.conversationId,
+        cutoff,
+        followUpJobIds,
+      },
+      "consolidation invoked",
+    );
+    return {
+      kind: "invoked",
+      conversationId: runResult.conversationId,
+      cutoff,
+      followUpJobIds,
+    };
+  } finally {
+    releaseLock(lockPath);
+  }
+}
+
+/**
+ * Read `memory/buffer.md`. Missing file → empty string so the skip-on-empty
+ * branch doesn't have to distinguish "no file" from "blank file".
+ */
+function readBufferContent(bufferPath: string): string {
+  try {
+    return readFileSync(bufferPath, "utf-8");
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code === "ENOENT") return "";
+    throw err;
+  }
+}
+
+/**
+ * Atomically create the lock file with `wx` (O_CREAT | O_EXCL) flags. Returns
+ * `null` on success, or the current holder string when the file already exists
+ * and the holder is still alive. Mirrors v2's lock machinery exactly — single
+ * writer per workspace, so a holder whose process died is unambiguously stale
+ * and is taken over automatically.
+ */
+function tryAcquireLock(lockPath: string): string | null {
+  mkdirSync(dirname(lockPath), { recursive: true });
+
+  const firstHolder = tryCreate(lockPath);
+  if (firstHolder === null) return null;
+  if (!isHolderStale(firstHolder)) return firstHolder;
+
+  log.info(
+    { lockPath, holder: firstHolder },
+    "consolidation: taking over stale lock (holder not running)",
+  );
+  try {
+    unlinkSync(lockPath);
+  } catch (err) {
+    const code = (err as NodeJS.ErrnoException).code;
+    if (code !== "ENOENT") {
+      log.warn(
+        { err, lockPath },
+        "consolidation: failed to unlink stale lock; reporting as locked",
+      );
+      return firstHolder;
+    }
+  }
+  return tryCreate(lockPath);
+}
+
+/**
+ * Atomically create the lock file. Returns `null` on success, or the holder
+ * string read from the file when it already exists (`"unknown"` if the read
+ * itself fails). Rethrows any non-EEXIST errno from `openSync`.
+ */
+function tryCreate(lockPath: string): string | null {
+  let fd: number;
+  try {
+    fd = openSync(lockPath, "wx");
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code !== "EEXIST") throw err;
+    try {
+      return readFileSync(lockPath, "utf-8").trim() || "unknown";
+    } catch {
+      return "unknown";
+    }
+  }
+  try {
+    writeSync(fd, `${process.pid} ${Date.now()}\n`);
+  } catch {
+    // best-effort — payload is advisory, the file's existence is the lock
+  } finally {
+    try {
+      closeSync(fd);
+    } catch {
+      // best-effort
+    }
+  }
+  return null;
+}
+
+/**
+ * A holder string is stale when its PID parses to a non-running process. An
+ * unparseable / empty / `"unknown"` payload is also treated as stale: the only
+ * writer is `tryCreate`, so corruption indicates a partial write from a crashed
+ * prior holder rather than a live writer mid-flush.
+ */
+function isHolderStale(holder: string): boolean {
+  const match = /^\d+/.exec(holder);
+  if (!match) return true;
+  const pid = Number.parseInt(match[0], 10);
+  if (!Number.isFinite(pid) || pid <= 0) return true;
+  return !isProcessAlive(pid);
+}
+
+/**
+ * Idempotent unlink of the lock file. Called from the `finally` block so a
+ * crash in the run path doesn't leave the lock stranded. ENOENT is swallowed
+ * because the lock may have been released by an operator or never created.
+ */
+function releaseLock(lockPath: string): void {
+  try {
+    unlinkSync(lockPath);
+  } catch (err) {
+    const code = (err as NodeJS.ErrnoException).code;
+    if (code === "ENOENT") return;
+    log.warn(
+      { err, lockPath },
+      "consolidation: failed to release lock (best-effort)",
+    );
+  }
+}
diff --git a/assistant/src/memory/v3/maintenance.ts b/assistant/src/memory/v3/maintenance.ts
new file mode 100644
index 00000000000..5bdaa3a9c06
--- /dev/null
+++ b/assistant/src/memory/v3/maintenance.ts
@@ -0,0 +1,144 @@
+/**
+ * Memory v3 — `memory_v3_index_maintenance` job + DAG-edit guards.
+ *
+ * The fast-lane, **no-LLM** mechanical counterpart to consolidation. Where
+ * consolidation (the slow lane) asks the agent to author the tree, maintenance
+ * is the deterministic upkeep that runs as a follow-up: it validates the tree,
+ * surfaces stale composed indices, and cycle-checks the DAG so a consolidation
+ * pass can't leave a loop behind.
+ *
+ * Three pieces:
+ *   - {@link runIndexMaintenance} — the job body. Runs {@link validateTree}
+ *     (merged: dangling refs, orphan pages, cycles, stale indices, unknown edge
+ *     targets), logs a structured report, and returns a compact summary so the
+ *     job dispatcher / tests can assert on it.
+ *   - {@link wouldIntroduceCycle} — the guard a DAG editor calls BEFORE adding a
+ *     `node:<child>` edge to a parent. Returns true when `child` already reaches
+ *     `parent` by descending `node:` children (so adding the edge would close a
+ *     loop). Uses the same iterative visited/guard traversal as the validator's
+ *     descent so consolidation can refuse a cycle-introducing edit cheaply.
+ *
+ * Why no separate "refresh stale composed indices" write step: v3 node indices
+ * are **composed at read time** (`index-composition.ts` is a pure function over
+ * the live tree + page indices), so there is no persisted index to rewrite. The
+ * maintenance job's job is to *detect and report* stale indices (a node whose
+ * mtime predates a child it composes) — the re-authoring of the node's
+ * self-description is the consolidation agent's responsibility, surfaced here so
+ * the next pass knows what to refresh.
+ */
+
+import { getLogger } from "../../util/logger.js";
+import { getWorkspaceDir } from "../../util/platform.js";
+import type { MemoryJob } from "../jobs-store.js";
+import type { TreeIndex } from "./tree-index.js";
+import { type TreeValidationReport, validateTree } from "./validate.js";
+
+const log = getLogger("memory-v3-index-maintenance");
+
+/**
+ * Compact summary of an index-maintenance pass. Mirrors the `*Count` fields of
+ * {@link TreeValidationReport} so callers (and the job dispatcher's log line)
+ * can report the health of the tree without re-counting. `report` carries the
+ * full per-id lists for anything that wants to act on the specifics.
+ */
+export interface IndexMaintenanceResult {
+  danglingChildRefCount: number;
+  orphanPageCount: number;
+  cycleCount: number;
+  staleIndexCount: number;
+  unknownEdgeTargetCount: number;
+  report: TreeValidationReport;
+}
+
+/**
+ * Run a mechanical index-maintenance pass over the v3 tree.
+ *
+ * Validates the hand-authored tree (dangling refs, orphan pages, cycles, stale
+ * composed indices, unknown edge targets) and logs a structured report. Stale
+ * indices and cycles are warned at WARN so operators see structural drift a
+ * consolidation pass introduced; the rest log at INFO. Never throws — like the
+ * validator it wraps, this is a report, not an assertion. Returns the summary
+ * so the job dispatcher and tests can assert on the counts.
+ */
+export async function runIndexMaintenance(
+  workspaceDir = getWorkspaceDir(),
+): Promise<IndexMaintenanceResult> {
+  const report = await validateTree(workspaceDir);
+
+  const result: IndexMaintenanceResult = {
+    danglingChildRefCount: report.danglingChildRefCount,
+    orphanPageCount: report.orphanPageCount,
+    cycleCount: report.cycleCount,
+    staleIndexCount: report.staleIndexCount,
+    unknownEdgeTargetCount: report.unknownEdgeTargetCount,
+    report,
+  };
+
+  const summaryFields = {
+    danglingChildRefs: report.danglingChildRefCount,
+    orphanPages: report.orphanPageCount,
+    cycles: report.cycleCount,
+    staleIndices: report.staleIndexCount,
+    unknownEdgeTargets: report.unknownEdgeTargetCount,
+  };
+
+  if (report.cycleCount > 0 || report.staleIndexCount > 0) {
+    log.warn(
+      { ...summaryFields, cyclesDetail: report.cycles },
+      "v3 index maintenance: structural drift detected (cycles and/or stale composed indices)",
+    );
+  } else {
+    log.info(summaryFields, "v3 index maintenance complete");
+  }
+
+  return result;
+}
+
+/**
+ * Job handler for `memory_v3_index_maintenance`. Thin wrapper over
+ * {@link runIndexMaintenance} so the heavy lifting (and its tests) live in one
+ * place. The job carries no payload — it always validates the whole tree.
+ */
+export async function memoryV3IndexMaintenanceJob(
+  _job: MemoryJob,
+): Promise<IndexMaintenanceResult> {
+  return runIndexMaintenance();
+}
+
+/**
+ * True when adding a `node:<child>` edge to `parent` would close a cycle —
+ * i.e. `child` can already reach `parent` by descending `node:` children
+ * (directly or transitively), or `child === parent` (a self-edge).
+ *
+ * The DAG editor (consolidation, edge-learning) calls this BEFORE writing a new
+ * `node:` child so it can refuse the edit rather than leaving the validator to
+ * report the loop after the fact. The walk reuses the same iterative
+ * visited-guard descent the validator uses, so it terminates on existing cycles
+ * (a pre-existing loop in the tree never makes this hang).
+ *
+ * `page:` children are never traversed (pages are leaves), so this only
+ * considers the `node:` adjacency that actually forms the DAG.
+ */
+export function wouldIntroduceCycle(
+  tree: TreeIndex,
+  parent: string,
+  child: string,
+): boolean {
+  if (parent === child) return true;
+
+  // Walk down from `child` over `node:` children; if we ever reach `parent`,
+  // the proposed `parent → child` edge would close a loop. `visited` guards
+  // against pre-existing cycles so this terminates regardless of tree state.
+  const visited = new Set<string>();
+  const stack: string[] = [child];
+  while (stack.length > 0) {
+    const current = stack.pop()!;
+    if (current === parent) return true;
+    if (visited.has(current)) continue;
+    visited.add(current);
+    for (const ref of tree.childrenByNode.get(current) ?? []) {
+      if (ref.kind === "node") stack.push(ref.ref);
+    }
+  }
+  return false;
+}
diff --git a/assistant/src/memory/v3/prompts/consolidation.ts b/assistant/src/memory/v3/prompts/consolidation.ts
new file mode 100644
index 00000000000..3c485f58d93
--- /dev/null
+++ b/assistant/src/memory/v3/prompts/consolidation.ts
@@ -0,0 +1,458 @@
+/**
+ * Memory v3 — consolidation prompt template.
+ *
+ * Ported from `assistant/src/memory/v2/prompts/consolidation.ts`. The
+ * standing-context outputs are KEPT IDENTICAL to v2 — the agent still rewrites
+ * `memory/recent.md` (≤2000 chars, prose, latest-first), updates
+ * `memory/essentials.md` (≤10000) and `memory/threads.md` (≤10000), and trims
+ * `memory/buffer.md` to post-cutoff entries. The buffer and the standing-context
+ * files are SHARED with v2 — there is no v3 buffer and no v3 meta-files.
+ *
+ * What CHANGES vs v2 is concept-page routing. v2 routes buffer entries into
+ * concept pages and maintains a flat `edges:` "see also" graph. v3 keeps the
+ * shared concept pages canonical (the agent still writes
+ * `memory/concepts/<class>/<slug>.md` so the v2 router keeps working off them)
+ * but ALSO threads each touched page into the v3 **tree**: an authored DAG of
+ * `memory/v3/tree/<id>.md` nodes whose markdown body is the node's
+ * self-description and whose `children` list points at pages (`page:<slug>`) and
+ * sub-nodes (`node:<id>`). The tree is the navigable index over the flat page
+ * store — consolidation is where it's authored and refreshed.
+ *
+ * The single placeholder `{{CUTOFF}}` is substituted at runtime with a
+ * timestamp captured at job dispatch in the same `Mon D, h:mm AM/PM` shape that
+ * `buffer.md` entries use, so the agent's "timestamp ≥ cutoff" check compares
+ * like-with-like.
+ *
+ * Kept under `prompts/` rather than inlined in `consolidation-job.ts` so the
+ * prompt body is reviewable on its own and the job module stays focused on
+ * orchestration (lock file, wake invocation, follow-up enqueues). Mirrors the
+ * v2 convention.
+ */
+
+import { lstatSync, readFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { isAbsolute, join } from "node:path";
+
+import { getLogger } from "../../../util/logger.js";
+import { getWorkspaceDir } from "../../../util/platform.js";
+
+const log = getLogger("memory-v3-consolidate-prompt");
+
+/** Sentinel substituted with the cutoff timestamp at runtime. */
+export const CUTOFF_PLACEHOLDER = "{{CUTOFF}}";
+
+/**
+ * Upper bound for the override file. Real consolidation prompts are kilobytes;
+ * 1 MiB is generous headroom while preventing a `settings.write` principal from
+ * pointing the field at a multi-gigabyte file (or `/dev/zero`-like stream that
+ * `lstat` can't size cap on its own) and exfiltrating it through the wake hint.
+ */
+const MAX_PROMPT_BYTES = 1 * 1024 * 1024;
+
+/**
+ * Consolidation prompt — live-mode only. The agent runs as itself (full
+ * SOUL.md + IDENTITY.md + persona + memory autoloads) with the standard tool
+ * surface, and is asked to route buffer entries into shared concept pages AND
+ * the v3 tree, rewrite recent.md, promote essentials/threads, and trim the
+ * buffer.
+ *
+ * The prompt is intentionally directive about timing semantics: anything
+ * timestamped at or after `{{CUTOFF}}` arrived AFTER the run started and must
+ * be left for the next pass. This keeps multiple consolidation runs idempotent
+ * under append-only writers (`remember()`, sweep job).
+ */
+export const CONSOLIDATION_PROMPT = `You are running memory consolidation — tending your personal wiki, the cross-linked, cross-referenced, continuously-edited collection of pages that is your memory, AND the navigable **tree** that indexes it. Pages are articles; the tree is a hand-authored DAG of *nodes* that organize those articles into a browsable hierarchy. You're the sole editor and the sole reader, and you're writing it for next-you.
+
+You're not summarizing for an audience. You're nesting and reorganizing your own memory until it actually works for next-you. Care, judgment, voice. Your voice.
+
+Cutoff timestamp for this run: \`${CUTOFF_PLACEHOLDER}\`. Anything in \`memory/buffer.md\` with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\` arrived AFTER you started — leave it for the next pass.
+
+# Inputs
+
+- Your identity files (already loaded into context)
+- All existing pages in \`memory/concepts/\` (your prior state — use \`list_files\` and \`read_file\` as needed)
+- All existing tree nodes in \`memory/v3/tree/\` (the index over those pages)
+- \`memory/buffer.md\` entries with timestamp < \`${CUTOFF_PLACEHOLDER}\`
+- \`memory/recent.md\` current contents (if it exists)
+- Existing pages' \`edges:\` frontmatter (the flat see-also graph — read each page to see what it points at)
+
+# Outputs
+
+- New or updated \`memory/concepts/<class>/<slug>.md\` articles (the canonical, shared content)
+- New or updated \`memory/v3/tree/<id>.md\` nodes that index those articles (see "The tree")
+- Updated \`memory/recent.md\` (≤2000 chars, latest first, prose)
+- Updated \`memory/essentials.md\` (≤10000 chars)
+- Updated \`memory/threads.md\` (≤10000 chars)
+- Updated \`edges:\` frontmatter in any pages whose outgoing links changed
+- Trimmed \`memory/buffer.md\`
+
+The immutable archive retains the entire buffer forever, so don't worry about losing information.
+
+---
+
+# The wiki — concept pages (canonical content)
+
+## Article shapes — TWO, not one
+
+Every wiki has both kinds of articles, and so does yours.
+
+- **Event articles** — what HAPPENED. A day, a moment, a conversation, a procedure you invented mid-crisis, a recurring pattern that just got named. These read narratively. They have a mood. They carry receipts.
+
+- **Topic articles** — what IS. The current state of a thing you'd want to query directly. What medications the principal takes. Who the primary doctor is. The team roster. Service credentials.
+
+The same buffer can update both. New lab results update a bloodwork topic article AND a day-arc event article. Both, in parallel.
+
+**Stubs are fine.** Real wikis are mostly stubs that grow. Cost of missing a topic >> cost of a thin stub. A stub that never accretes can be demoted by a future cleanup pass — but a topic that doesn't exist won't get retrieved when it's needed.
+
+## Categories — class-by-folder
+
+A page's class is encoded in the folder it lives under inside \`memory/concepts/\`. The class boundary is the discipline.
+
+| Folder | Class | Size cap | When to create |
+| --- | --- | --- | --- |
+| \`concepts/\` | atomic concept / pattern / callback | 5K chars hard | most pages — single concepts that recur or carry weight |
+| \`concepts/arcs/\` | landmark day-narrative or multi-event sequence | 10K chars ceiling | use sparingly — only for actually-landmark days. Preserves day-as-a-whole fidelity. |
+| \`concepts/people/\` | one per recurring human | 5K chars hard | named person who comes back |
+| \`concepts/procs/\` | operational rule / protocol / discipline | 5K chars hard | "always do X" / "never do Y" / a named protocol |
+| \`concepts/objects/\` | recurring callback object (place, tool, artifact) | 5K chars hard | named recurring physical artifact, digital asset, place |
+
+Within these classes, sub-folders can emerge as a class gets dense (\`people/colleagues/alice\`, \`objects/places/zurich-office\`). **Don't pre-specify sub-taxonomies — let them emerge.** Articles are cheap to move.
+
+The slug is the relative path under \`memory/concepts/\` minus \`.md\` — e.g. \`alice\`, \`people/alice\`, \`procs/git-flow\`, \`arcs/2025-04-cutover\`.
+
+---
+
+# Article format
+
+## The cheat-sheet budget (the economic principle)
+
+Every retrieval turn loads a finite bundle of articles — call it a 10-20K-token cheat-sheet. **Longer articles starve other articles.** The optimization target is **fact density per byte**, not completeness.
+
+Two consequences that change everything below:
+
+1. **Trust adjacency.** If a fact lives on a page this article edges to, that page loads if it matters. Don't restate it.
+2. **Trust \`recall\`.** If a fact is findable via a query, it doesn't need to live on every related entity page. Pull-on-demand beats push-everywhere.
+
+## Same skeleton for every article
+
+\`\`\`
+---
+edges:
+  - path/to/sister
+  - path/to/parent
+ref_files: []
+summary: 1-4 sentences describing what this article is. Plain prose only — no bullets, no newlines, no markdown lists. Lead with the most identifying detail.
+---
+# title
+
+[optional 1-2 line context or quote at top — appropriate for event articles, usually wrong for topic articles]
+
+- **bullet 1.** fact + implication folded in. inline pointer when bullet references another article → \`path/to/article.md\`.
+- **bullet 2.** ...
+\`\`\`
+
+The \`summary\` field is required on every new or updated article. Retrieval injects \`path + summary\` into context — make the summary specific and terse. Keep it on a single YAML line (no \`|\` block scalars, no embedded newlines).
+
+**Caps:** ~5-8 bullets per topic/concept article. ~10-12 per arc-node.
+
+## One fact, one home
+
+Each fact gets exactly ONE place on the page. The intra-page redundancy bug is the loudest source of bloat.
+
+## Route, don't restate
+
+When an entity belongs to a topic with its own hub article, **the entity page doesn't enumerate the hub's structure.** The hub does that work; the entity edges to it.
+
+The test: **if you delete the bullet, does the fact still exist somewhere reachable from this page's edges?** If yes — delete it.
+
+## Three sections you NEVER write
+
+- \`## why it's load-bearing\` — fold the implication into the bullet.
+- \`## carry-forward\` — write the carry-forward AS a bullet, don't section it.
+- \`## related\` footer — duplicates frontmatter edges.
+
+## Banned bullet shapes
+
+Each of these LOOKS like content but isn't — drop them: **archaeology** (metadata about when the page was written), **hub-restating** (enumerating a topic hub from the entity page), **interpretation gloss** (analytic essays disguised as bullets — these belong on the ARC page), **term/glyph gloss**, **family/sister lists** (\`recall\` handles this), **behavioral coaching** (future-instruction), **per-event recap on entity pages**.
+
+If a bullet falls into one of these shapes, ask: **would future-me search for this exact fact, or is it interpretation/coaching/restating?** If the second — cut.
+
+---
+
+# Voice — register by article shape
+
+You speak as yourself everywhere. **Always-true:** first-person, in your established voice, "i" not "the assistant," not "the wiki."
+
+- **Event articles** → voice ON. Stage directions, italicized self-talk, CAPS when something lands, body in the page.
+- **Topic articles** → voice DOWN. These exist to answer queries cleanly. Bullet bodies stay factual. **Be the librarian, not the diarist.**
+- **\`essentials.md\` / \`threads.md\`** → reference register. Clean, indexable, terse.
+
+## Emotional weight ≠ wiki weight
+
+The pages MOST likely to bloat are the ones with the highest emotional charge — and their retrieval frequency is the OPPOSITE. **Emotional weight is the inverse signal of retrieval need.** Emotional gloss migrates to the ARC page; the OBJECT/ENTITY page gets the structural fact only.
+
+---
+
+# The tree — the navigable index over your pages
+
+The v3 tree lives at \`memory/v3/tree/<id>.md\`. It is a **DAG overlay** over the flat \`memory/concepts/\` pages: pages stay canonical and untouched as content, and the tree is the browsable hierarchy that routes to them. Think of it as the wiki's category tree + table of contents, authored by hand.
+
+## Node shape
+
+Each node is a markdown file with YAML frontmatter:
+
+\`\`\`
+---
+children:
+  - node:people
+  - node:work/active-projects
+  - page:alice
+  - page:procs/git-flow
+routing_hints: for *work* relationships see node:people/colleagues, not this node
+summary: one-line self-description of what this node organizes.
+---
+# node title
+
+A few sentences — the node's full self-description. What region of memory does this node organize? What lives under it? Write it so next-you, descending the tree, can decide in one read whether to go deeper here.
+\`\`\`
+
+- The node id is the relative path under \`memory/v3/tree/\` minus \`.md\` — e.g. \`people\`, \`people/colleagues\`, \`work/active-projects\`. The root node is \`_root\`.
+- \`children\` is the **ordered, canonical** list of outgoing references. Each entry is either \`page:<slug>\` (a leaf concept page) or \`node:<id>\` (a sub-node). This list IS the DAG edge — it's the portable replacement for filesystem symlinks. A page or node may be referenced by more than one parent (hence DAG, not tree).
+- \`summary\` (one line) + the body are how the parent's index is composed at read time — keep both crisp.
+- \`routing_hints\` (optional, one line) disambiguates between sibling branches.
+
+## Authoring the tree during consolidation
+
+For every concept page you create or substantively touch this pass:
+
+1. **Place it under the right node.** Find the node whose region of memory the page belongs to (e.g. a new person page → the \`people\` node; a new protocol → a \`procs\` node). Add \`page:<slug>\` to that node's \`children\` if it isn't already there.
+2. **Spawn an organizing node when a region has no home yet.** If a cluster of pages has grown but no node organizes it, author a new node (write its body self-description, list its \`page:\`/\`node:\` children) and wire it in as a \`node:<id>\` child of its parent — ultimately reachable from \`_root\`.
+3. **Refresh the self-description.** When a node's children changed materially, rewrite its body + \`summary\` so they still describe what actually lives under it. A node whose description drifts from its children is a stale index — re-author it this pass.
+
+## Tree discipline — no cycles, reachable from root
+
+- **The tree is a DAG: no cycles.** A node must never be reachable from itself by descending \`node:\` children (directly or transitively). Before adding a \`node:<child>\` edge, check that \`child\` is not an ancestor of the node you're editing. If wiring two regions that reference each other, make ONE of them the parent and let the other \`page:\`-link or cross-reference via \`routing_hints\` — do not create a \`node:\` back-edge that closes a loop.
+- **Every node should be reachable from \`_root\`** by descending \`node:\` children. A node nobody points at is an orphan index — wire it in or don't author it.
+- **\`page:\`/\`node:\` refs must resolve.** Only reference pages/nodes that exist (or that you're creating this pass). A dangling ref is a broken link.
+- Keep \`children\` lists focused — a node that points at everything indexes nothing. Prefer sub-nodes over a flat 40-child list.
+
+## Pages stay canonical and shared
+
+The flat \`memory/concepts/\` page store and its \`edges:\` see-also graph remain the source of truth for content. The tree is an INDEX over them, not a replacement — never move a page's content into a node body, and never delete a page just because a node references it. Maintain the page's own \`edges:\` frontmatter exactly as before (the flat retrieval path still reads it); the tree is additive.
+
+---
+
+# The work
+
+## 1. Read the buffer holistically
+
+Read it through first. Identify themes — what happened, what mind-changes landed, who showed up, which topics got touched. Plan, then edit.
+
+**Scan for previous-pass errors.** If existing content contradicts the buffer — that's a correction to land THIS pass.
+
+**Recall ≠ memory.** \`recall\` results are search-tool synthesis — they CAN hallucinate. Treat results as candidates to verify before encoding, especially load-bearing claims about people's roles, dates, or exact quotes.
+
+## 2. Plan: which articles + nodes does this buffer touch?
+
+For entries with timestamp < \`${CUTOFF_PLACEHOLDER}\`, ask in parallel:
+
+> **A. Which EVENT articles does this create or extend?**
+> **B. What in this buffer is recognizable as a thing the principal comes back to?** *(Inclusion-first. List everything that fits a spawn trigger, then spawn each.)*
+> **C. Where in the tree does each touched page live, and does any node need spawning or re-describing to index it?**
+
+**Default spawn triggers — if any are present, spawn the stub:** named objects, named phrases, named people, named events, active projects, named places, services/infrastructure, substances/habits/health things, rules/protocols, landmark day-narratives.
+
+If you catch yourself hedging — *"am I overdoing it?"* — **the hedge IS the signal: spawn.**
+
+**Don't decide reorgs in this step.** Flag in \`threads.md\`; reorgs run as separate focused passes.
+
+## 3. Edit
+
+Execute the plan. Default to surgical edits on existing articles. Spawn new ones liberally. Apply One-fact-one-home and Route-don't-restate as you write.
+
+Then wire the tree: add \`page:\`/\`node:\` children to the right nodes, spawn organizing nodes for un-homed clusters, refresh node self-descriptions whose children changed. Check no \`node:\` edge closes a cycle and every node stays reachable from \`_root\`.
+
+## 4. Edges (see-also) on pages — DIRECTED, frontmatter is the source of truth
+
+Page \`edges:\` are **directed** source → target; the flat retrieval path spreads activation along them. Each page's \`edges:\` frontmatter list IS the source of truth for its outgoing edges. If two pages genuinely "see-also" each other, write the link in BOTH frontmatters. (This is the flat graph — separate from the tree's \`children\` DAG. Maintain it exactly as before.)
+
+| page type | outgoing cap |
+| --- | --- |
+| atomic articles | ~10 |
+| arc-nodes | ~15 |
+| gravity wells (principal / you / shared context) | ~25 |
+
+HARD LIMIT of 20 outgoing edges on any non-hub page.
+
+## 5. Article size — TOPIC COHERENCE, not char caps
+
+Every article answers ONE question. **When in doubt between split and compress, SPLIT.** Compression is where load-bearing facts quietly disappear.
+
+### Hard caps that ARE real
+
+| file | hard cap |
+| --- | --- |
+| \`concepts/<slug>.md\` (atomic / people / procs / objects) | 5K chars |
+| \`concepts/arcs/<slug>.md\` | 10K ceiling |
+| \`essentials.md\` | 10K |
+| \`threads.md\` | 10K |
+| \`recent.md\` | 2K |
+
+## 6. \`recent.md\`
+
+Rewrite as fresh ~400-token narrative. **Today gets full-fidelity narrative; anything older than yesterday compresses to one-liners or drops.** Hard cap ≤2000 chars, prose not list, voice on. Not a log — a note to next-you about what's currently in motion.
+
+## 7. \`essentials.md\` and \`threads.md\`
+
+- **\`essentials.md\`** ≤10K — facts that MUST load every conversation. Identity, disambiguations, corrections, hard rules. Embarrassment-prevention.
+- **\`threads.md\`** ≤10K — active commitments and follow-ups. Add new threads, close completed ones, demote stale ones to articles. **Aggressively prune.**
+
+Surgical edits starve these. **Every ~7-10 passes, rewrite both from scratch.**
+
+## 8. Reorg check
+
+Scan namespace + node-children sizes. If any namespace has crossed ~12-15 articles with visible sub-clusters, **flag in \`threads.md\`** for a focused reorg pass.
+
+## 9. Trim \`memory/buffer.md\`
+
+- Re-read the buffer (it may have new entries appended during your work).
+- Rewrite to contain ONLY entries with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\`.
+- Smart removal — never wholesale-clear.
+
+---
+
+# What NOT to do
+
+- **Don't write \`## why it's load-bearing\` / \`## carry-forward\` / \`## related\` sections** anywhere.
+- **Don't write banned bullet shapes** — archaeology / hub-restating / interpretation gloss / term-glyph gloss / family list / behavioral coaching / per-event recap.
+- **Don't restate within the page.** One fact, one home.
+- **Don't restate what edges already cover.** Trust adjacency.
+- **Don't expand a 1500-char buffer into 10K of new content.**
+- **Don't fabricate.** Use \`[SOURCE NEEDED: ...]\` inline for anything you need but lack. DO use loaded context and prior articles — "don't fabricate," not "don't use what you know."
+- **Don't move page content into the tree.** Pages stay canonical; nodes index them.
+- **Don't create a \`node:\` edge that closes a cycle** or that orphans a node from \`_root\`.
+- **Don't default to compress.** When in doubt between split and compress, split.
+- **Don't defer for the next pass.** Take care of it now.
+
+---
+
+# Quick check before closing
+
+For each article you touched:
+
+1. **Voice register matched article shape?**
+2. **Bullet count** ≤8 atomic, ≤12 arc-node?
+3. **No banned sections / bullet shapes?**
+4. **One fact, one home; route, don't restate?**
+5. **Future-me lookup test** — would future-me search for THIS fact?
+6. **Split, not compress, on over-cap?**
+7. **Page edges** within tiered caps (≤20 hard on non-hubs)?
+
+For the tree:
+
+8. **Every touched page wired under a node** reachable from \`_root\`?
+9. **Node self-descriptions** (\`summary\` + body) refreshed where children changed?
+10. **No cycles** — no \`node:\` edge makes a node its own ancestor?
+11. **All \`page:\`/\`node:\` refs resolve** to files that exist (or you created this pass)?
+
+Standing context:
+
+12. **\`recent.md\`** under 2000 chars, today=full / older=one-liners?
+13. **\`essentials.md\` / \`threads.md\`** under 10K, pruned?
+14. **\`[SOURCE NEEDED]\`** tags surfaced for human review?
+15. **Buffer trimmed** to only entries with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\`?
+
+---
+
+This is the engine that decides who you are tomorrow. Be ORGANIZED. Care, judgment, voice. Your voice. Your wiki, your tree.`;
+
+/**
+ * Resolve `CONSOLIDATION_PROMPT` with `{{CUTOFF}}` substituted. The prompt
+ * treats the cutoff as opaque text — callers pass a `Mon D, h:mm AM/PM`
+ * timestamp matching the `buffer.md` entry format so the agent compares
+ * like-with-like.
+ */
+export function renderConsolidationPrompt(cutoff: string): string {
+  return CONSOLIDATION_PROMPT.replaceAll(CUTOFF_PLACEHOLDER, cutoff);
+}
+
+/**
+ * Load the consolidation prompt template, optionally overridden from the file
+ * referenced by `memory.v2.consolidation_prompt_path`, then substitute
+ * `{{CUTOFF}}`. The override config field is shared with v2 (there is no
+ * separate v3 override key) so operators can point a single file at whichever
+ * consolidator owns the drain. Path-resolution rules mirror v2.
+ *
+ * Failure handling is intentionally permissive — missing file, read error, or
+ * empty/whitespace-only body all log a warning and fall back to the bundled
+ * prompt. Consolidation must never break because of a bad override.
+ */
+export function resolveConsolidationPrompt(
+  overridePath: string | null,
+  cutoff: string,
+): string {
+  if (overridePath === null) return renderConsolidationPrompt(cutoff);
+
+  const resolvedPath = resolveOverridePath(overridePath);
+  let contents: string;
+  try {
+    const stat = lstatSync(resolvedPath);
+    if (!stat.isFile()) {
+      log.warn(
+        {
+          configuredPath: overridePath,
+          resolvedPath,
+          reason: "not_regular_file",
+          fallback: "bundled",
+        },
+        "consolidation prompt override is not a regular file; using bundled prompt",
+      );
+      return renderConsolidationPrompt(cutoff);
+    }
+    if (stat.size > MAX_PROMPT_BYTES) {
+      log.warn(
+        {
+          configuredPath: overridePath,
+          resolvedPath,
+          size: stat.size,
+          limit: MAX_PROMPT_BYTES,
+          reason: "oversized_override",
+          fallback: "bundled",
+        },
+        "consolidation prompt override exceeds size limit; using bundled prompt",
+      );
+      return renderConsolidationPrompt(cutoff);
+    }
+    contents = readFileSync(resolvedPath, "utf-8");
+  } catch (err) {
+    const code = (err as NodeJS.ErrnoException).code;
+    log.warn(
+      { configuredPath: overridePath, resolvedPath, code, fallback: "bundled" },
+      "consolidation prompt override unreadable; using bundled prompt",
+    );
+    return renderConsolidationPrompt(cutoff);
+  }
+
+  if (contents.trim().length === 0) {
+    log.warn(
+      {
+        configuredPath: overridePath,
+        resolvedPath,
+        reason: "empty_override",
+        fallback: "bundled",
+      },
+      "consolidation prompt override is empty; using bundled prompt",
+    );
+    return renderConsolidationPrompt(cutoff);
+  }
+
+  return contents.replaceAll(CUTOFF_PLACEHOLDER, cutoff);
+}
+
+function resolveOverridePath(overridePath: string): string {
+  if (overridePath.startsWith("~/")) {
+    return join(homedir(), overridePath.slice(2));
+  }
+  if (isAbsolute(overridePath)) return overridePath;
+  return join(getWorkspaceDir(), overridePath);
+}

From 01e1c81a2f832d4679f9cc81401fee0413ec9a36 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:19:26 -0400
Subject: [PATCH 16/21] feat(memory-v3): v3 Retriever as comparand #2 in the
 compare harness (#31986)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../src/memory/v3/__tests__/retriever.test.ts | 226 ++++++++++++++++++
 assistant/src/memory/v3/retriever.ts          |  33 +++
 .../src/runtime/routes/memory-v2-routes.ts    |  13 +-
 3 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 assistant/src/memory/v3/__tests__/retriever.test.ts
 create mode 100644 assistant/src/memory/v3/retriever.ts

diff --git a/assistant/src/memory/v3/__tests__/retriever.test.ts b/assistant/src/memory/v3/__tests__/retriever.test.ts
new file mode 100644
index 00000000000..bc514be301e
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/retriever.test.ts
@@ -0,0 +1,226 @@
+/**
+ * Route-assembly tests for the v3 retriever wiring in
+ * `handleCompareRetrievers` (`assistant/src/runtime/routes/memory-v2-routes.ts`).
+ *
+ * The compare route always includes the router retriever as comparand #1 and
+ * adds the v3 retriever as comparand #2 only when `config.memory.v3.enabled`.
+ * These tests exercise that gating end-to-end through the real handler and the
+ * real `runComparisonOverHistory`, with a fixture DB seeded with one logged
+ * router turn (mirroring `assistant/src/memory/v2/__tests__/harness-compare.test.ts`).
+ *
+ * Neither the real router nor the real v3 loop runs here — both would hit a
+ * provider. `../loop.js` (the v3 loop) and `../../v2/harness/router-retriever.js`
+ * are `mock.module`-stubbed to return deterministic selections, so the tests
+ * assert *which retrievers were assembled* (by the names in the report), not
+ * their retrieval quality. `loadConfig` is stubbed so each test controls
+ * `memory.v3.enabled`; workspace/page-index helpers are stubbed to keep the
+ * handler off the filesystem.
+ */
+
+import { beforeEach, describe, expect, mock, test } from "bun:test";
+
+import type { AssistantConfig } from "../../../config/types.js";
+import { getDb } from "../../db-connection.js";
+import { initializeDb } from "../../db-init.js";
+import type { MemoryV2ConceptRowRecord } from "../../memory-v2-activation-log-store.js";
+import {
+  conversations,
+  memoryV2ActivationLogs,
+  messages,
+} from "../../schema.js";
+import type {
+  RetrievalInput,
+  RetrievalOutput,
+} from "../../v2/harness/retriever.js";
+
+initializeDb();
+
+// Silence the route's logger.
+mock.module("../../../util/logger.js", () => ({
+  getLogger: () =>
+    new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
+}));
+
+// loadNowText / page-index read workspace files; a nonexistent dir yields "".
+const WORKSPACE = "/tmp/v3-retriever-nonexistent-workspace";
+
+// Controllable config: each test sets `v3Enabled` before invoking the handler.
+let v3Enabled = false;
+
+mock.module("../../../config/loader.js", () => ({
+  loadConfig: (): AssistantConfig =>
+    ({
+      memory: {
+        v2: { enabled: true, router: { historical_pairs: 1 } },
+        v3: { enabled: v3Enabled },
+      },
+    }) as unknown as AssistantConfig,
+}));
+
+mock.module("../../../util/platform.js", () => ({
+  getWorkspaceDir: (): string => WORKSPACE,
+}));
+
+// page-index is intentionally NOT mocked: it has a wide export surface
+// (`invalidatePageIndex` etc.) that transitive importers in the route's
+// dependency graph rely on, and `getPageIndex` over the nonexistent workspace
+// returns a benign index. The retriever names are what we assert, not the
+// page set, so the real (empty-ish) index is harmless here.
+
+// Stub the router retriever — the real one calls a provider.
+mock.module("../../v2/harness/router-retriever.js", () => ({
+  createRouterRetriever: () => ({
+    name: "router",
+    retrieve: async (): Promise<RetrievalOutput> => ({
+      selectedSlugs: ["p1"],
+      sourceBySlug: new Map([["p1", "router"]]),
+    }),
+  }),
+}));
+
+// Stub the v3 loop — the real one runs scout/filter/tree/edge/gate lanes that
+// hit providers, embeddings, and the filesystem.
+mock.module("../loop.js", () => ({
+  runRetrievalLoop: async (
+    _input: RetrievalInput,
+  ): Promise<RetrievalOutput> => ({
+    selectedSlugs: ["p2"],
+    sourceBySlug: new Map([["p2", "dense"]]),
+  }),
+}));
+
+// Import the handler only after the mocks are installed.
+const { handleCompareRetrievers } =
+  await import("../../../runtime/routes/memory-v2-routes.js");
+
+const ZERO_CONFIG = {
+  d: 0,
+  c_user: 0,
+  c_assistant: 0,
+  c_now: 0,
+  k: 0,
+  hops: 0,
+  top_k: 0,
+  epsilon: 0,
+};
+
+let seq = 0;
+
+function ensureConversation(id: string): void {
+  getDb()
+    .insert(conversations)
+    .values({ id, createdAt: 0, updatedAt: 0 })
+    .onConflictDoNothing()
+    .run();
+}
+
+function insertMessage(
+  id: string,
+  conversationId: string,
+  role: string,
+  text: string,
+  createdAt: number,
+): void {
+  ensureConversation(conversationId);
+  getDb()
+    .insert(messages)
+    .values({
+      id,
+      conversationId,
+      role,
+      content: JSON.stringify([{ type: "text", text }]),
+      createdAt,
+    })
+    .run();
+}
+
+function makeConcept(
+  slug: string,
+  status: MemoryV2ConceptRowRecord["status"],
+): MemoryV2ConceptRowRecord {
+  return {
+    slug,
+    finalActivation: 0,
+    ownActivation: 0,
+    priorActivation: 0,
+    simUser: 0,
+    simAssistant: 0,
+    simNow: 0,
+    simUserRerankBoost: 0,
+    simAssistantRerankBoost: 0,
+    inRerankPool: false,
+    spreadContribution: 0,
+    source: "router",
+    status,
+  };
+}
+
+function insertRouterLog(
+  conversationId: string,
+  messageId: string,
+  turn: number,
+  concepts: MemoryV2ConceptRowRecord[],
+  createdAt: number,
+): void {
+  ensureConversation(conversationId);
+  getDb()
+    .insert(memoryV2ActivationLogs)
+    .values({
+      id: `log-${seq++}`,
+      conversationId,
+      messageId,
+      turn,
+      mode: "router",
+      conceptsJson: JSON.stringify(concepts),
+      skillsJson: "[]",
+      configJson: JSON.stringify(ZERO_CONFIG),
+      createdAt,
+    })
+    .run();
+}
+
+/** Seed one router turn: user msg, assistant anchor, and the logged picks. */
+function seedTurn(groundTruth: string[]): void {
+  insertMessage("u1", "c1", "user", "hello", 10);
+  insertMessage("a1", "c1", "assistant", "hi", 20); // anchor for turn 1
+  insertRouterLog(
+    "c1",
+    "a1",
+    1,
+    groundTruth.map((slug) => makeConcept(slug, "injected")),
+    20,
+  );
+}
+
+function reset(): void {
+  const db = getDb();
+  db.delete(memoryV2ActivationLogs).run();
+  db.delete(messages).run();
+  v3Enabled = false;
+}
+
+describe("handleCompareRetrievers v3 wiring", () => {
+  beforeEach(reset);
+
+  test("includes only router when memory.v3.enabled is false", async () => {
+    seedTurn(["p1", "p2"]);
+
+    const report = await handleCompareRetrievers({ body: {} });
+
+    const names = report.retrievers.map((r) => r.name);
+    expect(names).toEqual(["router"]);
+  });
+
+  test("includes router and v3 when memory.v3.enabled is true", async () => {
+    v3Enabled = true;
+    seedTurn(["p1", "p2"]);
+
+    const report = await handleCompareRetrievers({ body: {} });
+
+    const names = report.retrievers.map((r) => r.name);
+    expect(names).toEqual(["router", "v3"]);
+    // Router is always comparand #1; v3 joins as comparand #2.
+    expect(names[0]).toBe("router");
+    expect(names[1]).toBe("v3");
+  });
+});
diff --git a/assistant/src/memory/v3/retriever.ts b/assistant/src/memory/v3/retriever.ts
new file mode 100644
index 00000000000..eee8aed369d
--- /dev/null
+++ b/assistant/src/memory/v3/retriever.ts
@@ -0,0 +1,33 @@
+/**
+ * v3 retriever — the multi-lane bounded-descent retrieval loop
+ * ({@link runRetrievalLoop}) adapted to the harness {@link Retriever}
+ * interface.
+ *
+ * This is the offline, zero-production-risk shadow path: the comparison harness
+ * replays historical oracle turns and scores v3's selection against the v2
+ * router's logged picks (recall@k). Nothing here runs on a live injection turn
+ * — the loop reads the DB handle for its hot lane but never mutates production
+ * state, matching the {@link Retriever} contract.
+ */
+
+import type { DrizzleDb } from "../db-connection.js";
+import type {
+  RetrievalInput,
+  RetrievalOutput,
+  Retriever,
+} from "../v2/harness/retriever.js";
+import { runRetrievalLoop } from "./loop.js";
+
+/**
+ * Wrap the v3 retrieval loop as a named harness {@link Retriever}.
+ *
+ * @param db handle threaded to {@link runRetrievalLoop} for the scout hot lane.
+ */
+export function createV3Retriever(db: DrizzleDb): Retriever {
+  return {
+    name: "v3",
+    retrieve(input: RetrievalInput): Promise<RetrievalOutput> {
+      return runRetrievalLoop(input, { db });
+    },
+  };
+}
diff --git a/assistant/src/runtime/routes/memory-v2-routes.ts b/assistant/src/runtime/routes/memory-v2-routes.ts
index 124cfa480fa..5f987c82fb3 100644
--- a/assistant/src/runtime/routes/memory-v2-routes.ts
+++ b/assistant/src/runtime/routes/memory-v2-routes.ts
@@ -24,6 +24,7 @@ import {
   validateEdgeTargets,
 } from "../../memory/v2/edge-index.js";
 import { runComparisonOverHistory } from "../../memory/v2/harness/compare.js";
+import type { Retriever } from "../../memory/v2/harness/retriever.js";
 import { createRouterRetriever } from "../../memory/v2/harness/router-retriever.js";
 import type { ComparisonReport } from "../../memory/v2/harness/runner.js";
 import { computeInjectionScores } from "../../memory/v2/injection-events.js";
@@ -38,6 +39,7 @@ import {
 import { ROUTER_PROMPT } from "../../memory/v2/prompts/router.js";
 import { type RouterSource, runRouter } from "../../memory/v2/router.js";
 import { seedV2SkillEntries } from "../../memory/v2/skill-store.js";
+import { createV3Retriever } from "../../memory/v3/retriever.js";
 import { getLogger } from "../../util/logger.js";
 import { getWorkspaceDir } from "../../util/platform.js";
 import { RouteError } from "./errors.js";
@@ -637,11 +639,20 @@ export async function handleCompareRetrievers({
   const pageIndex = await getPageIndex(workspaceDir);
   const db = getDb();
 
+  // The router is always comparand #1 (the harness self-test against its own
+  // logged ground truth). v3 joins as comparand #2 only when explicitly
+  // enabled, so the default compare surface is unchanged until v3 is switched
+  // on. v3 is offline-only here — the loop reads `db` but mutates nothing.
+  const retrievers: Retriever[] = [createRouterRetriever(db)];
+  if (config.memory.v3.enabled) {
+    retrievers.push(createV3Retriever(db));
+  }
+
   return runComparisonOverHistory({
     db,
     workspaceDir,
     config,
-    retrievers: [createRouterRetriever(db)],
+    retrievers,
     ks: ks ?? DEFAULT_COMPARE_KS,
     limit: limit ?? DEFAULT_COMPARE_LIMIT,
     pageExists: (slug) => pageIndex.bySlug.has(slug),

From 5ef2bbc759685138cda90d606c291ad0ff7f54d2 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:21:51 -0400
Subject: [PATCH 17/21] feat(memory-v3): pass-1->pass-2 co-activation logging
 (#31987)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 assistant/src/memory/db-init.ts               |   2 +
 .../migrations/262-memory-v3-coactivation.ts  |  57 +++
 assistant/src/memory/migrations/index.ts      |   4 +
 assistant/src/memory/migrations/registry.ts   |   8 +
 .../v3/__tests__/coactivation-store.test.ts   | 422 ++++++++++++++++++
 assistant/src/memory/v3/coactivation-store.ts | 124 +++++
 assistant/src/memory/v3/loop.ts               |  90 ++++
 7 files changed, 707 insertions(+)
 create mode 100644 assistant/src/memory/migrations/262-memory-v3-coactivation.ts
 create mode 100644 assistant/src/memory/v3/__tests__/coactivation-store.test.ts
 create mode 100644 assistant/src/memory/v3/coactivation-store.ts

diff --git a/assistant/src/memory/db-init.ts b/assistant/src/memory/db-init.ts
index 5783acd2717..9f6594953a5 100644
--- a/assistant/src/memory/db-init.ts
+++ b/assistant/src/memory/db-init.ts
@@ -129,6 +129,7 @@ import {
   migrateMemoryRetrospectiveState,
   migrateMemoryV2ActivationLogs,
   migrateMemoryV2InjectionEvents,
+  migrateMemoryV3Coactivation,
   migrateMessageBookmarks,
   migrateMessagesConversationCreatedAtIndex,
   migrateMessagesFtsBackfill,
@@ -456,6 +457,7 @@ export function initializeDb(): void {
     migrateConversationCleanedAt,
     migrateRenameCleanedAt,
     migrateLlmUsageAddRawUsage,
+    migrateMemoryV3Coactivation,
   ];
 
   // Run each migration step, catching and logging individual failures so one
diff --git a/assistant/src/memory/migrations/262-memory-v3-coactivation.ts b/assistant/src/memory/migrations/262-memory-v3-coactivation.ts
new file mode 100644
index 00000000000..f918109680a
--- /dev/null
+++ b/assistant/src/memory/migrations/262-memory-v3-coactivation.ts
@@ -0,0 +1,57 @@
+import type { DrizzleDb } from "../db-connection.js";
+import { getSqliteFrom } from "../db-connection.js";
+import { withCrashRecovery } from "./validate-migration-state.js";
+
+const CHECKPOINT_KEY = "migration_memory_v3_coactivation_v1";
+
+/**
+ * Create the memory_v3_coactivation table — an append-only log of
+ * pass-1 → pass-N co-activation pairs observed during a v3 retrieval loop.
+ *
+ * Each row records that a page (`target_slug`) first surfaced on a later
+ * descent pass was co-selected alongside a page (`source_slug`) that surfaced
+ * on pass 1, with `pass_gap` = passOf(target) − passOf(source). This is the
+ * raw gradient signal that edge-learning later reconciles into curated-graph
+ * edge weights: a source that repeatedly precedes a target across turns is a
+ * candidate association. `used` is the usefulness flag (0 here — the loop
+ * cannot know whether the target was actually load-bearing for the turn; a
+ * later edge-learning pass reconciles it).
+ *
+ * The table just accumulates raw events; the edge-learning formula or the
+ * decay/weighting can change later without losing signal.
+ *
+ * Indexes:
+ * - `(source_slug, target_slug)` for per-pair aggregation (the hot path for
+ *   edge-learning reads).
+ * - `(created_at)` for time-range pruning later.
+ */
+export function migrateMemoryV3Coactivation(database: DrizzleDb): void {
+  withCrashRecovery(database, CHECKPOINT_KEY, () => {
+    const raw = getSqliteFrom(database);
+    raw.exec(/*sql*/ `
+      CREATE TABLE IF NOT EXISTS memory_v3_coactivation (
+        id INTEGER PRIMARY KEY,
+        conversation_id TEXT NOT NULL,
+        turn INTEGER NOT NULL,
+        source_slug TEXT NOT NULL,
+        target_slug TEXT NOT NULL,
+        pass_gap INTEGER NOT NULL,
+        used INTEGER NOT NULL,
+        created_at INTEGER NOT NULL
+      )
+    `);
+    raw.exec(/*sql*/ `
+      CREATE INDEX IF NOT EXISTS idx_memory_v3_coactivation_pair
+        ON memory_v3_coactivation (source_slug, target_slug)
+    `);
+    raw.exec(/*sql*/ `
+      CREATE INDEX IF NOT EXISTS idx_memory_v3_coactivation_time
+        ON memory_v3_coactivation (created_at)
+    `);
+  });
+}
+
+export function downMemoryV3Coactivation(database: DrizzleDb): void {
+  const raw = getSqliteFrom(database);
+  raw.exec(/*sql*/ `DROP TABLE IF EXISTS memory_v3_coactivation`);
+}
diff --git a/assistant/src/memory/migrations/index.ts b/assistant/src/memory/migrations/index.ts
index 119ccff8db2..52a41dc854f 100644
--- a/assistant/src/memory/migrations/index.ts
+++ b/assistant/src/memory/migrations/index.ts
@@ -242,6 +242,10 @@ export {
   downLlmUsageAddRawUsage,
   migrateLlmUsageAddRawUsage,
 } from "./261-llm-usage-add-raw-usage.js";
+export {
+  downMemoryV3Coactivation,
+  migrateMemoryV3Coactivation,
+} from "./262-memory-v3-coactivation.js";
 export {
   MIGRATION_REGISTRY,
   type MigrationRegistryEntry,
diff --git a/assistant/src/memory/migrations/registry.ts b/assistant/src/memory/migrations/registry.ts
index a5fa5fddebc..c3f56f972b8 100644
--- a/assistant/src/memory/migrations/registry.ts
+++ b/assistant/src/memory/migrations/registry.ts
@@ -55,6 +55,7 @@ import { downMemoryV2InjectionEvents } from "./256-memory-v2-injection-events.js
 import { downConversationCleanedAt } from "./259-conversation-cleaned-at.js";
 import { downRenameCleanedAt } from "./260-rename-cleaned-at.js";
 import { downLlmUsageAddRawUsage } from "./261-llm-usage-add-raw-usage.js";
+import { downMemoryV3Coactivation } from "./262-memory-v3-coactivation.js";
 
 export interface MigrationRegistryEntry {
   /** The checkpoint key written to memory_checkpoints on completion. */
@@ -470,6 +471,13 @@ export const MIGRATION_REGISTRY: MigrationRegistryEntry[] = [
       "Add raw_usage TEXT column to llm_usage_events for storing the provider's untouched usage block as JSON (Anthropic TTL breakdown, OpenAI prompt/completion token details, etc.) so downstream consumers can extract provider-specific detail without per-field schema changes",
     down: downLlmUsageAddRawUsage,
   },
+  {
+    key: "migration_memory_v3_coactivation_v1",
+    version: 55,
+    description:
+      "Create memory_v3_coactivation table — append-only log of pass-1 → pass-N co-activation pairs (gradient signal) emitted by the v3 retrieval loop and reconciled later by edge-learning",
+    down: downMemoryV3Coactivation,
+  },
 ];
 
 export function getMaxMigrationVersion(): number {
diff --git a/assistant/src/memory/v3/__tests__/coactivation-store.test.ts b/assistant/src/memory/v3/__tests__/coactivation-store.test.ts
new file mode 100644
index 00000000000..3e6226a12df
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/coactivation-store.test.ts
@@ -0,0 +1,422 @@
+/**
+ * Tests for `assistant/src/memory/v3/coactivation-store.ts`, its sibling
+ * migration `262-memory-v3-coactivation.ts`, and the loop's co-activation
+ * emission (`loop.ts`, gated by `config.memory.v3.write.coactivation`).
+ *
+ * Coverage:
+ *   - Migration creates the table + both indexes; safe to re-run.
+ *   - recordCoactivations / readCoactivations round-trip; empty list is a
+ *     no-op; `since` filters by created_at.
+ *   - A scripted 2-pass loop emits the expected pass-1 → pass-2 rows with the
+ *     correct pass_gap when the flag is on, and nothing when it is off.
+ *
+ * Uses an in-memory bun:sqlite database — no real workspace DB. The loop's
+ * lane modules are stubbed via `mock.module`, matching `loop.test.ts`.
+ */
+
+import { Database } from "bun:sqlite";
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+
+import { drizzle } from "drizzle-orm/bun-sqlite";
+
+import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js";
+
+mock.module("../../../util/logger.js", () => ({
+  getLogger: () => makeMockLogger(),
+}));
+
+import type { DrizzleDb } from "../../db-connection.js";
+import { getSqliteFrom } from "../../db-connection.js";
+import {
+  downMemoryV3Coactivation,
+  migrateMemoryV3Coactivation,
+} from "../../migrations/262-memory-v3-coactivation.js";
+import * as schema from "../../schema.js";
+import type {
+  RetrievalInput,
+  RetrievalOutput,
+} from "../../v2/harness/retriever.js";
+import type { GateDecision, ScoutResult } from "../../v2/harness/trace.js";
+import {
+  type CoactivationRow,
+  readCoactivations,
+  recordCoactivations,
+} from "../coactivation-store.js";
+
+// memory_checkpoints is required by withCrashRecovery and is normally created
+// by an early core migration. Stand it up by hand so the v3 migration can run
+// in isolation against a fresh in-memory DB.
+const CHECKPOINTS_DDL = /*sql*/ `
+  CREATE TABLE memory_checkpoints (
+    key TEXT PRIMARY KEY,
+    value TEXT NOT NULL,
+    updated_at INTEGER NOT NULL
+  )
+`;
+
+// ---------------------------------------------------------------------------
+// Loop lane stubs — installed before importing the module under test. Mirrors
+// loop.test.ts: each test rewires the `lane` refs before calling the loop.
+// ---------------------------------------------------------------------------
+
+interface RunScoutsResult {
+  scouts: ScoutResult[];
+  sticky: Set<string>;
+  bypass: Set<string>;
+}
+interface FilterResult {
+  kept: string[];
+  trace: { judged: string[]; dropped: string[] };
+  failureReason?: string;
+}
+interface WalkResult {
+  pages: Set<string>;
+  levels: Array<{
+    node: string;
+    considered: string[];
+    descended: string[];
+    skipped: string[];
+    reasoning: string;
+  }>;
+}
+interface ExpandResult {
+  pulled: Set<string>;
+  expansions: Array<{ from: string; pulled: string[] }>;
+}
+interface GateResult {
+  decision: GateDecision;
+  selectedSlugs: string[];
+}
+
+const lane = {
+  scouts: [] as RunScoutsResult[],
+  filter: [] as FilterResult[],
+  walk: [] as WalkResult[],
+  edges: [] as ExpandResult[],
+  gate: [] as GateResult[],
+};
+
+function nextOf<T>(list: T[], index: number): T {
+  return list[Math.min(index, list.length - 1)];
+}
+
+let scoutCallCount = 0;
+let filterCallCount = 0;
+let walkCallCount = 0;
+let edgeCallCount = 0;
+let gateCallCount = 0;
+
+mock.module("../scouts.js", () => ({
+  runScouts: async (): Promise<RunScoutsResult> =>
+    nextOf(lane.scouts, scoutCallCount++),
+}));
+mock.module("../filter.js", () => ({
+  filterDenseHits: async (): Promise<FilterResult> =>
+    nextOf(lane.filter, filterCallCount++),
+}));
+mock.module("../tree-walk.js", () => ({
+  runTreeWalk: async (): Promise<WalkResult> =>
+    nextOf(lane.walk, walkCallCount++),
+}));
+mock.module("../edges.js", () => ({
+  expandEdges: async (): Promise<ExpandResult> =>
+    nextOf(lane.edges, edgeCallCount++),
+}));
+mock.module("../gate.js", () => ({
+  runGate: async (): Promise<GateResult> => nextOf(lane.gate, gateCallCount++),
+}));
+mock.module("../tree-index.js", () => ({
+  getTreeIndex: async () => ({
+    nodes: new Map(),
+    childrenByNode: new Map(),
+    parentsByNode: new Map(),
+    pageParents: new Map(),
+    root: "_root",
+  }),
+}));
+mock.module("../../v2/page-index.js", () => ({
+  getPageIndex: async () => ({
+    entries: [],
+    bySlug: new Map(),
+    byId: new Map(),
+    rendered: "",
+  }),
+}));
+
+const { runRetrievalLoop } = await import("../loop.js");
+
+let sqlite: Database;
+let database: DrizzleDb;
+
+beforeEach(() => {
+  sqlite = new Database(":memory:");
+  database = drizzle(sqlite, { schema });
+  getSqliteFrom(database).exec(CHECKPOINTS_DDL);
+  migrateMemoryV3Coactivation(database);
+
+  lane.scouts = [];
+  lane.filter = [];
+  lane.walk = [];
+  lane.edges = [];
+  lane.gate = [];
+  scoutCallCount = 0;
+  filterCallCount = 0;
+  walkCallCount = 0;
+  edgeCallCount = 0;
+  gateCallCount = 0;
+});
+
+afterEach(() => {
+  sqlite.close();
+});
+
+function scout(laneName: ScoutResult["lane"], slugs: string[]): ScoutResult {
+  return { lane: laneName, slugs };
+}
+
+function makeInput(opts?: {
+  passCap?: number;
+  coactivation?: boolean;
+}): RetrievalInput {
+  return {
+    workspaceDir: "/tmp/does-not-matter",
+    recentTurnPairs: [],
+    nowText: "NOW",
+    priorEverInjected: [],
+    config: {
+      memory: {
+        v3: {
+          passCap: opts?.passCap ?? 3,
+          lanes: {
+            hot: true,
+            sparse: true,
+            dense: true,
+            tree: true,
+            edges: true,
+          },
+          write: {
+            enabled: false,
+            consolidateIntervalMs: 3600000,
+            coactivation: opts?.coactivation ?? false,
+          },
+        },
+      },
+    } as unknown as RetrievalInput["config"],
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Migration.
+// ---------------------------------------------------------------------------
+
+describe("migrateMemoryV3Coactivation", () => {
+  test("creates table and both indexes; safe to re-run", () => {
+    migrateMemoryV3Coactivation(database);
+    migrateMemoryV3Coactivation(database);
+
+    const raw = getSqliteFrom(database);
+    const table = raw
+      .query(
+        `SELECT name FROM sqlite_master WHERE type='table' AND name='memory_v3_coactivation'`,
+      )
+      .get();
+    expect(table).toBeTruthy();
+
+    const indexNames = new Set(
+      (
+        raw
+          .query(
+            `SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='memory_v3_coactivation'`,
+          )
+          .all() as Array<{ name: string }>
+      ).map((r) => r.name),
+    );
+    expect(indexNames.has("idx_memory_v3_coactivation_pair")).toBe(true);
+    expect(indexNames.has("idx_memory_v3_coactivation_time")).toBe(true);
+  });
+
+  test("downMemoryV3Coactivation drops the table", () => {
+    downMemoryV3Coactivation(database);
+    const table = getSqliteFrom(database)
+      .query(
+        `SELECT name FROM sqlite_master WHERE type='table' AND name='memory_v3_coactivation'`,
+      )
+      .get();
+    expect(table).toBeFalsy();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Store.
+// ---------------------------------------------------------------------------
+
+describe("recordCoactivations / readCoactivations", () => {
+  test("round-trips rows oldest-first", () => {
+    const rows: CoactivationRow[] = [
+      {
+        conversationId: "conv-1",
+        turn: 3,
+        sourceSlug: "alice",
+        targetSlug: "bob",
+        passGap: 1,
+        used: 0,
+        createdAt: 1_000,
+      },
+      {
+        conversationId: "conv-1",
+        turn: 3,
+        sourceSlug: "alice",
+        targetSlug: "carol",
+        passGap: 2,
+        used: 0,
+        createdAt: 2_000,
+      },
+    ];
+    recordCoactivations(database, rows);
+
+    const read = readCoactivations(database);
+    expect(read).toHaveLength(2);
+    expect(read[0]).toMatchObject({
+      conversationId: "conv-1",
+      turn: 3,
+      sourceSlug: "alice",
+      targetSlug: "bob",
+      passGap: 1,
+      used: 0,
+      createdAt: 1_000,
+    });
+    expect(read[1].targetSlug).toBe("carol");
+    expect(read[1].passGap).toBe(2);
+  });
+
+  test("empty list is a no-op", () => {
+    recordCoactivations(database, []);
+    expect(readCoactivations(database)).toHaveLength(0);
+  });
+
+  test("since filters by created_at", () => {
+    recordCoactivations(database, [
+      {
+        conversationId: "c",
+        turn: 1,
+        sourceSlug: "a",
+        targetSlug: "b",
+        passGap: 1,
+        used: 0,
+        createdAt: 100,
+      },
+      {
+        conversationId: "c",
+        turn: 1,
+        sourceSlug: "a",
+        targetSlug: "c",
+        passGap: 1,
+        used: 0,
+        createdAt: 500,
+      },
+    ]);
+    const recent = readCoactivations(database, 300);
+    expect(recent).toHaveLength(1);
+    expect(recent[0].targetSlug).toBe("c");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Loop emission.
+// ---------------------------------------------------------------------------
+
+describe("runRetrievalLoop — co-activation emission", () => {
+  /**
+   * Script a 2-pass loop: pass 1 surfaces `a` (hot) + `b` (sparse); pass 2
+   * surfaces `c` (dense). The gate says "more" on pass 1 (selecting a, b) and
+   * "ready" on pass 2 (selecting a, b, c). So `c` is the only pass-2 target,
+   * paired with pass-1 hits a and b → two rows, both pass_gap=1.
+   */
+  function scriptTwoPass(): void {
+    lane.scouts = [
+      {
+        scouts: [scout("hot", ["a"]), scout("sparse", ["b"])],
+        sticky: new Set(),
+        bypass: new Set(),
+      },
+      {
+        scouts: [scout("dense", ["c"])],
+        sticky: new Set(),
+        bypass: new Set(),
+      },
+    ];
+    // Pass 1 has no dense scout, so the filter is only called on pass 2 (one
+    // filter call per dense pass) — its single entry keeps `c`.
+    lane.filter = [{ kept: ["c"], trace: { judged: ["c"], dropped: [] } }];
+    lane.walk = [
+      { pages: new Set(), levels: [] },
+      { pages: new Set(), levels: [] },
+    ];
+    lane.edges = [
+      { pulled: new Set(), expansions: [] },
+      { pulled: new Set(), expansions: [] },
+    ];
+    lane.gate = [
+      {
+        decision: { decision: "more", questions: ["q"] },
+        selectedSlugs: ["a", "b"],
+      },
+      { decision: { decision: "ready" }, selectedSlugs: ["a", "b", "c"] },
+    ];
+  }
+
+  test("emits pass-1 → pass-2 rows with correct pass_gap when flag is on", async () => {
+    scriptTwoPass();
+    const out: RetrievalOutput = await runRetrievalLoop(
+      makeInput({ passCap: 3, coactivation: true }),
+      { db: database, conversationId: "conv-42", turn: 7 },
+    );
+    expect(out.selectedSlugs).toEqual(["a", "b", "c"]);
+
+    const rows = readCoactivations(database);
+    // c (pass 2) paired with each pass-1 hit a and b → two rows.
+    expect(rows).toHaveLength(2);
+    const pairs = rows.map((r) => `${r.sourceSlug}->${r.targetSlug}`).sort();
+    expect(pairs).toEqual(["a->c", "b->c"]);
+    for (const r of rows) {
+      expect(r.targetSlug).toBe("c");
+      expect(r.passGap).toBe(1);
+      expect(r.used).toBe(0);
+      expect(r.conversationId).toBe("conv-42");
+      expect(r.turn).toBe(7);
+    }
+  });
+
+  test("emits nothing when the flag is off", async () => {
+    scriptTwoPass();
+    await runRetrievalLoop(makeInput({ passCap: 3, coactivation: false }), {
+      db: database,
+      conversationId: "conv-42",
+      turn: 7,
+    });
+    expect(readCoactivations(database)).toHaveLength(0);
+  });
+
+  test("single-pass selection emits nothing (no later-surfaced target)", async () => {
+    lane.scouts = [
+      {
+        scouts: [scout("hot", ["a"]), scout("sparse", ["b"])],
+        sticky: new Set(),
+        bypass: new Set(),
+      },
+    ];
+    lane.filter = [{ kept: [], trace: { judged: [], dropped: [] } }];
+    lane.walk = [{ pages: new Set(), levels: [] }];
+    lane.edges = [{ pulled: new Set(), expansions: [] }];
+    lane.gate = [
+      { decision: { decision: "ready" }, selectedSlugs: ["a", "b"] },
+    ];
+
+    await runRetrievalLoop(makeInput({ passCap: 3, coactivation: true }), {
+      db: database,
+      conversationId: "conv-1",
+      turn: 1,
+    });
+    expect(readCoactivations(database)).toHaveLength(0);
+  });
+});
diff --git a/assistant/src/memory/v3/coactivation-store.ts b/assistant/src/memory/v3/coactivation-store.ts
new file mode 100644
index 00000000000..7a759d46d71
--- /dev/null
+++ b/assistant/src/memory/v3/coactivation-store.ts
@@ -0,0 +1,124 @@
+/**
+ * Memory v3 — co-activation store.
+ *
+ * Best-effort read/write helpers over `memory_v3_coactivation` (migration
+ * 262). Each row is a pass-1 → pass-N co-activation pair observed during a
+ * single v3 retrieval loop: a `target_slug` first surfaced on a later descent
+ * pass was co-selected alongside a `source_slug` that surfaced on pass 1,
+ * `pass_gap = passOf(target) − passOf(source)`.
+ *
+ * This is the raw gradient signal — edge-learning reconciles it into curated
+ * graph edge weights later. Writes are off the retrieval critical path: a
+ * failed insert here must never abort the turn on top of a successful
+ * retrieval the caller already depends on.
+ */
+
+import { getLogger } from "../../util/logger.js";
+import type { DrizzleDb } from "../db-connection.js";
+import { getSqliteFrom } from "../db-connection.js";
+
+const log = getLogger("memory-v3-coactivation");
+
+/** One co-activation pair to persist. */
+export interface CoactivationRow {
+  conversationId: string;
+  turn: number;
+  sourceSlug: string;
+  targetSlug: string;
+  passGap: number;
+  /** Usefulness flag. 0 at emit time; reconciled later by edge-learning. */
+  used: number;
+  createdAt: number;
+}
+
+/** A persisted co-activation row, as read back from the table. */
+export interface PersistedCoactivationRow {
+  id: number;
+  conversationId: string;
+  turn: number;
+  sourceSlug: string;
+  targetSlug: string;
+  passGap: number;
+  used: number;
+  createdAt: number;
+}
+
+/**
+ * Append co-activation rows. Best-effort — a SQLite write must never abort the
+ * agent turn on top of a successful retrieval the rest of the caller depends
+ * on. Mirrors {@link recordInjectionEvents}.
+ */
+export function recordCoactivations(
+  database: DrizzleDb,
+  rows: readonly CoactivationRow[],
+): void {
+  if (rows.length === 0) return;
+  try {
+    const raw = getSqliteFrom(database);
+    const insert = raw.prepare(
+      `INSERT INTO memory_v3_coactivation
+        (conversation_id, turn, source_slug, target_slug, pass_gap, used, created_at)
+        VALUES (?, ?, ?, ?, ?, ?, ?)`,
+    );
+    const append = raw.transaction((items: readonly CoactivationRow[]) => {
+      for (const r of items) {
+        insert.run(
+          r.conversationId,
+          r.turn,
+          r.sourceSlug,
+          r.targetSlug,
+          r.passGap,
+          r.used,
+          r.createdAt,
+        );
+      }
+    });
+    append(rows);
+  } catch (err) {
+    log.warn(
+      { err, rowCount: rows.length },
+      "failed to record co-activations; continuing",
+    );
+  }
+}
+
+/**
+ * Read co-activation rows, oldest first. When `since` is provided, only rows
+ * with `created_at >= since` are returned.
+ */
+export function readCoactivations(
+  database: DrizzleDb,
+  since?: number,
+): PersistedCoactivationRow[] {
+  const raw = getSqliteFrom(database);
+  const where = since !== undefined ? `WHERE created_at >= ?` : ``;
+  const params = since !== undefined ? [since] : [];
+  const rows = raw
+    .query(
+      `SELECT id, conversation_id, turn, source_slug, target_slug,
+              pass_gap, used, created_at
+        FROM memory_v3_coactivation
+        ${where}
+        ORDER BY created_at ASC, id ASC`,
+    )
+    .all(...params) as Array<{
+    id: number;
+    conversation_id: string;
+    turn: number;
+    source_slug: string;
+    target_slug: string;
+    pass_gap: number;
+    used: number;
+    created_at: number;
+  }>;
+  return rows.map((r) => ({
+    id: r.id,
+    conversationId: r.conversation_id,
+    turn: r.turn,
+    sourceSlug: r.source_slug,
+    targetSlug: r.target_slug,
+    passGap: r.pass_gap,
+    used: r.used,
+    createdAt: r.created_at,
+  }));
+}
diff --git a/assistant/src/memory/v3/loop.ts b/assistant/src/memory/v3/loop.ts
index 0763ecf8bf6..47a2608bdb3 100644
--- a/assistant/src/memory/v3/loop.ts
+++ b/assistant/src/memory/v3/loop.ts
@@ -44,6 +44,7 @@
  * this composition layer) accumulates across every pass.
  */
 
+import { getLogger } from "../../util/logger.js";
 import type { DrizzleDb } from "../db-connection.js";
 import type {
   RetrievalCost,
@@ -56,6 +57,10 @@ import type {
   GateDecision,
 } from "../v2/harness/trace.js";
 import { getPageIndex } from "../v2/page-index.js";
+import {
+  type CoactivationRow,
+  recordCoactivations,
+} from "./coactivation-store.js";
 import { expandEdges } from "./edges.js";
 import { filterDenseHits } from "./filter.js";
 import { runGate } from "./gate.js";
@@ -66,9 +71,19 @@ import { runTreeWalk } from "./tree-walk.js";
 /** Lane label used to tag each selected slug's provenance in `sourceBySlug`. */
 type LaneSource = "hot" | "sparse" | "dense" | "tree" | "edge";
 
+const log = getLogger("memory-v3-loop");
+
 /** Injected dependencies — the SQLite handle the scout hot lane reads. */
 export interface RetrievalLoopDeps {
   db: DrizzleDb;
+  /**
+   * Conversation this retrieval is running for. Stamped on co-activation rows
+   * when `config.memory.v3.write.coactivation` is on. Empty string when the
+   * loop runs in the offline harness (no live conversation).
+   */
+  conversationId?: string;
+  /** Turn number within the conversation, for co-activation provenance. */
+  turn?: number;
 }
 
 /**
@@ -91,6 +106,9 @@ export async function runRetrievalLoop(
 
   // Cross-pass accumulators.
   const sourceBySlug = new Map<string, LaneSource>();
+  // The first pass each slug entered the candidate set. Drives co-activation
+  // emission below — pass-1 hits (gap source) vs. later-surfaced pages (target).
+  const firstPassBySlug = new Map<string, number>();
   const sticky = new Set<string>();
   const passes: DescentPass[] = [];
   // `ms` is the one cost dimension observable at this composition layer — the
@@ -190,6 +208,13 @@ export async function runRetrievalLoop(
       }
     }
 
+    // Record the first pass each candidate surfaced on. The candidate set is
+    // the union of every lane's contribution this pass; a slug keeps the
+    // earliest pass it appeared on (first write wins).
+    for (const slug of candidates) {
+      if (!firstPassBySlug.has(slug)) firstPassBySlug.set(slug, passNumber);
+    }
+
     // 5. Gate — one capable LLM call over the unioned candidate set.
     const gateResult = await runGate({
       input: passInput,
@@ -219,6 +244,21 @@ export async function runRetrievalLoop(
     passNowText = nextPassNowText(input.nowText, gateResult.decision);
   }
 
+  // Co-activation logging — off the critical path. Gated by
+  // `write.coactivation` (default off). Emits one pass-1 → pass-N pair per
+  // (pass-1 hit, later-surfaced page) in the final selection. Best-effort:
+  // wrapped so neither the computation nor the insert can delay or break the
+  // RetrievalOutput the caller depends on.
+  if (v3.write?.coactivation) {
+    emitCoactivations({
+      db: deps.db,
+      conversationId: deps.conversationId ?? "",
+      turn: deps.turn ?? 0,
+      selectedSlugs,
+      firstPassBySlug,
+    });
+  }
+
   const trace: DescentTrace = { passes };
   return {
     selectedSlugs,
@@ -229,6 +269,56 @@ export async function runRetrievalLoop(
   };
 }
 
+/**
+ * Emit pass-1 → pass-N co-activation rows for the final selection.
+ *
+ * For each selected page B first surfaced on pass ≥2, pair it with each
+ * selected page A first surfaced on pass 1 (`pass_gap = passOf(B) − 1`). Pages
+ * only surfaced on pass 1 (or never recorded) emit nothing — the gradient is
+ * the gap between an early hit and a later-surfaced association. `used` is 0:
+ * the loop cannot know whether B was load-bearing for the turn; edge-learning
+ * reconciles usefulness later.
+ *
+ * Best-effort and off the retrieval critical path — any failure is swallowed.
+ */
+function emitCoactivations(args: {
+  db: DrizzleDb;
+  conversationId: string;
+  turn: number;
+  selectedSlugs: readonly string[];
+  firstPassBySlug: ReadonlyMap<string, number>;
+}): void {
+  try {
+    const { db, conversationId, turn, selectedSlugs, firstPassBySlug } = args;
+    const pass1Hits = selectedSlugs.filter(
+      (slug) => firstPassBySlug.get(slug) === 1,
+    );
+    if (pass1Hits.length === 0) return;
+
+    const createdAt = Date.now();
+    const rows: CoactivationRow[] = [];
+    for (const target of selectedSlugs) {
+      const targetPass = firstPassBySlug.get(target);
+      if (targetPass === undefined || targetPass < 2) continue;
+      for (const source of pass1Hits) {
+        rows.push({
+          conversationId,
+          turn,
+          sourceSlug: source,
+          targetSlug: target,
+          passGap: targetPass - 1,
+          used: 0,
+          createdAt,
+        });
+      }
+    }
+
+    recordCoactivations(db, rows);
+  } catch (err) {
+    log.warn({ err }, "failed to emit co-activations; continuing");
+  }
+}
+
 /**
  * Tag `slug`'s provenance with `lane`, keeping the first lane that surfaced it.
  * The pass order (scouts → tree → edge) gives a deterministic precedence: a

From 9279659dc3ed18d448a8d32cd3a405ec7ec17886 Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:29:17 -0400
Subject: [PATCH 18/21] feat(memory-v3): weighted, decaying auto-edge learning
 job (#31988)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 assistant/src/memory/db-init.ts               |   2 +
 assistant/src/memory/jobs-worker.ts           |   5 +
 .../migrations/263-memory-v3-auto-edges.ts    |  50 +++
 assistant/src/memory/migrations/index.ts      |   4 +
 assistant/src/memory/migrations/registry.ts   |   8 +
 .../v3/__tests__/edge-learning-job.test.ts    | 324 ++++++++++++++++++
 assistant/src/memory/v3/auto-edges.ts         | 223 ++++++++++++
 assistant/src/memory/v3/edge-learning-job.ts  | 160 +++++++++
 8 files changed, 776 insertions(+)
 create mode 100644 assistant/src/memory/migrations/263-memory-v3-auto-edges.ts
 create mode 100644 assistant/src/memory/v3/__tests__/edge-learning-job.test.ts
 create mode 100644 assistant/src/memory/v3/auto-edges.ts
 create mode 100644 assistant/src/memory/v3/edge-learning-job.ts

diff --git a/assistant/src/memory/db-init.ts b/assistant/src/memory/db-init.ts
index 9f6594953a5..f5eba7dce3d 100644
--- a/assistant/src/memory/db-init.ts
+++ b/assistant/src/memory/db-init.ts
@@ -129,6 +129,7 @@ import {
   migrateMemoryRetrospectiveState,
   migrateMemoryV2ActivationLogs,
   migrateMemoryV2InjectionEvents,
+  migrateMemoryV3AutoEdges,
   migrateMemoryV3Coactivation,
   migrateMessageBookmarks,
   migrateMessagesConversationCreatedAtIndex,
@@ -458,6 +459,7 @@ export function initializeDb(): void {
     migrateRenameCleanedAt,
     migrateLlmUsageAddRawUsage,
     migrateMemoryV3Coactivation,
+    migrateMemoryV3AutoEdges,
   ];
 
   // Run each migration step, catching and logging individual failures so one
diff --git a/assistant/src/memory/jobs-worker.ts b/assistant/src/memory/jobs-worker.ts
index 7b9da8313f1..b12b28928fb 100644
--- a/assistant/src/memory/jobs-worker.ts
+++ b/assistant/src/memory/jobs-worker.ts
@@ -84,6 +84,7 @@ import {
 } from "./v2/consolidation-job.js";
 import { memoryV2SweepJob } from "./v2/sweep-job.js";
 import { memoryV3ConsolidateJob } from "./v3/consolidation-job.js";
+import { memoryV3EdgeLearningJob } from "./v3/edge-learning-job.js";
 import { memoryV3IndexMaintenanceJob } from "./v3/maintenance.js";
 
 const log = getLogger("memory-jobs-worker");
@@ -611,6 +612,10 @@ async function processJob(
     case "memory_v3_index_maintenance":
       await memoryV3IndexMaintenanceJob(job);
       return;
+    case "memory_v3_edge_learning":
+      // Fast lane: bounded DB work (decay + reinforce + read), no LLM.
+      memoryV3EdgeLearningJob(job);
+      return;
     case "memory_v2_migrate":
       await memoryV2MigrateJob(job, config);
       return;
diff --git a/assistant/src/memory/migrations/263-memory-v3-auto-edges.ts b/assistant/src/memory/migrations/263-memory-v3-auto-edges.ts
new file mode 100644
index 00000000000..679073b61ed
--- /dev/null
+++ b/assistant/src/memory/migrations/263-memory-v3-auto-edges.ts
@@ -0,0 +1,50 @@
+import type { DrizzleDb } from "../db-connection.js";
+import { getSqliteFrom } from "../db-connection.js";
+import { withCrashRecovery } from "./validate-migration-state.js";
+
+const CHECKPOINT_KEY = "migration_memory_v3_auto_edges_v1";
+
+/**
+ * Create the memory_v3_auto_edges table — the **learned** edge graph, a
+ * distinct class from the curated `edges:` frontmatter graph.
+ *
+ * Each row is a weighted directed association `source_slug → target_slug` that
+ * the edge-learning job (`memory_v3_edge_learning`) accrues from *used*
+ * co-activations (migration 262's `memory_v3_coactivation` rows) and decays
+ * over time. `weight` is a multiplicatively-decaying real; `last_reinforced_at`
+ * is the wall-clock ms of the most recent reinforcement, used by the decay
+ * pass to compute elapsed time per pair.
+ *
+ * Auto-edges are advisory: the read path consumes only above-threshold pairs
+ * via edge-expansion's `extraAdjacency` seam, and high-weight pairs surface as
+ * promotion *candidates* for the assistant to ratify into curated `edges:`
+ * during consolidation. This table never auto-writes page frontmatter.
+ *
+ * `PRIMARY KEY(source_slug, target_slug)` makes each ordered pair unique, so
+ * reinforce is a single UPSERT. The index on `(weight)` keeps the
+ * above-threshold scan and top-N promotion-candidate read cheap as the learned
+ * graph grows.
+ */
+export function migrateMemoryV3AutoEdges(database: DrizzleDb): void {
+  withCrashRecovery(database, CHECKPOINT_KEY, () => {
+    const raw = getSqliteFrom(database);
+    raw.exec(/*sql*/ `
+      CREATE TABLE IF NOT EXISTS memory_v3_auto_edges (
+        source_slug TEXT NOT NULL,
+        target_slug TEXT NOT NULL,
+        weight REAL NOT NULL,
+        last_reinforced_at INTEGER NOT NULL,
+        PRIMARY KEY (source_slug, target_slug)
+      )
+    `);
+    raw.exec(/*sql*/ `
+      CREATE INDEX IF NOT EXISTS idx_memory_v3_auto_edges_weight
+        ON memory_v3_auto_edges (weight)
+    `);
+  });
+}
+
+export function downMemoryV3AutoEdges(database: DrizzleDb): void {
+  const raw = getSqliteFrom(database);
+  raw.exec(/*sql*/ `DROP TABLE IF EXISTS memory_v3_auto_edges`);
+}
diff --git a/assistant/src/memory/migrations/index.ts b/assistant/src/memory/migrations/index.ts
index 52a41dc854f..e98a9448820 100644
--- a/assistant/src/memory/migrations/index.ts
+++ b/assistant/src/memory/migrations/index.ts
@@ -246,6 +246,10 @@ export {
   downMemoryV3Coactivation,
   migrateMemoryV3Coactivation,
 } from "./262-memory-v3-coactivation.js";
+export {
+  downMemoryV3AutoEdges,
+  migrateMemoryV3AutoEdges,
+} from "./263-memory-v3-auto-edges.js";
 export {
   MIGRATION_REGISTRY,
   type MigrationRegistryEntry,
diff --git a/assistant/src/memory/migrations/registry.ts b/assistant/src/memory/migrations/registry.ts
index c3f56f972b8..5e618629157 100644
--- a/assistant/src/memory/migrations/registry.ts
+++ b/assistant/src/memory/migrations/registry.ts
@@ -56,6 +56,7 @@ import { downConversationCleanedAt } from "./259-conversation-cleaned-at.js";
 import { downRenameCleanedAt } from "./260-rename-cleaned-at.js";
 import { downLlmUsageAddRawUsage } from "./261-llm-usage-add-raw-usage.js";
 import { downMemoryV3Coactivation } from "./262-memory-v3-coactivation.js";
+import { downMemoryV3AutoEdges } from "./263-memory-v3-auto-edges.js";
 
 export interface MigrationRegistryEntry {
   /** The checkpoint key written to memory_checkpoints on completion. */
@@ -478,6 +479,13 @@ export const MIGRATION_REGISTRY: MigrationRegistryEntry[] = [
       "Create memory_v3_coactivation table — append-only log of pass-1 → pass-N co-activation pairs (gradient signal) emitted by the v3 retrieval loop and reconciled later by edge-learning",
     down: downMemoryV3Coactivation,
   },
+  {
+    key: "migration_memory_v3_auto_edges_v1",
+    version: 56,
+    description:
+      "Create memory_v3_auto_edges table — weighted, decaying learned association graph (distinct from curated edges:) accrued by the edge-learning job from used co-activations and consumed above-threshold by edge expansion",
+    down: downMemoryV3AutoEdges,
+  },
 ];
 
 export function getMaxMigrationVersion(): number {
diff --git a/assistant/src/memory/v3/__tests__/edge-learning-job.test.ts b/assistant/src/memory/v3/__tests__/edge-learning-job.test.ts
new file mode 100644
index 00000000000..f99649cf416
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/edge-learning-job.test.ts
@@ -0,0 +1,324 @@
+/**
+ * Tests for `assistant/src/memory/v3/auto-edges.ts`, `edge-learning-job.ts`,
+ * and their sibling migration `263-memory-v3-auto-edges.ts`.
+ *
+ * Coverage:
+ *   - Migration creates the table + weight index; safe to re-run; down drops it.
+ *   - reinforce upserts and accrues weight on the (source, target) PK.
+ *   - decay multiplicatively reduces unused weights and prunes near-zero edges.
+ *   - aboveThreshold returns exactly the edge-expansion `extraAdjacency` shape.
+ *   - A job run over fixture co-activations reinforces *used* rows only, skips
+ *     unused ones, and emits weight-floored, diversity-capped promotion
+ *     candidates. No real LLM, no real workspace DB.
+ *
+ * Uses an in-memory bun:sqlite database. The checkpoints module is stubbed with
+ * an in-memory Map so the watermark works without a real getDb() backing store;
+ * runEdgeLearning takes the in-memory DB explicitly for all auto-edge and
+ * co-activation reads.
+ */
+
+import { Database } from "bun:sqlite";
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+
+import { drizzle } from "drizzle-orm/bun-sqlite";
+
+import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js";
+
+mock.module("../../../util/logger.js", () => ({
+  getLogger: () => makeMockLogger(),
+}));
+
+const checkpointStore = new Map<string, string>();
+mock.module("../../checkpoints.js", () => ({
+  getMemoryCheckpoint: (key: string) => checkpointStore.get(key) ?? null,
+  setMemoryCheckpoint: (key: string, value: string) =>
+    checkpointStore.set(key, value),
+}));
+
+import type { DrizzleDb } from "../../db-connection.js";
+import { getSqliteFrom } from "../../db-connection.js";
+import { migrateMemoryV3Coactivation } from "../../migrations/262-memory-v3-coactivation.js";
+import {
+  downMemoryV3AutoEdges,
+  migrateMemoryV3AutoEdges,
+} from "../../migrations/263-memory-v3-auto-edges.js";
+import * as schema from "../../schema.js";
+import {
+  aboveThreshold,
+  decay,
+  reinforce,
+  topByWeight,
+} from "../auto-edges.js";
+import {
+  type CoactivationRow,
+  recordCoactivations,
+} from "../coactivation-store.js";
+import {
+  EDGE_DECAY_HALF_LIFE_MS,
+  MAX_CANDIDATES_PER_SOURCE,
+  MAX_PROMOTION_CANDIDATES,
+  runEdgeLearning,
+} from "../edge-learning-job.js";
+
+// memory_checkpoints is required by withCrashRecovery and normally created by an
+// early core migration. Stand it up by hand so the v3 migrations can run in
+// isolation against a fresh in-memory DB.
+const CHECKPOINTS_DDL = /*sql*/ `
+  CREATE TABLE memory_checkpoints (
+    key TEXT PRIMARY KEY,
+    value TEXT NOT NULL,
+    updated_at INTEGER NOT NULL
+  )
+`;
+
+let sqlite: Database;
+let database: DrizzleDb;
+
+beforeEach(() => {
+  sqlite = new Database(":memory:");
+  database = drizzle(sqlite, { schema });
+  getSqliteFrom(database).exec(CHECKPOINTS_DDL);
+  migrateMemoryV3Coactivation(database);
+  migrateMemoryV3AutoEdges(database);
+  checkpointStore.clear();
+});
+
+afterEach(() => {
+  sqlite.close();
+});
+
+function readWeight(source: string, target: string): number | undefined {
+  const row = getSqliteFrom(database)
+    .query(
+      `SELECT weight FROM memory_v3_auto_edges
+        WHERE source_slug = ? AND target_slug = ?`,
+    )
+    .get(source, target) as { weight: number } | undefined;
+  return row?.weight;
+}
+
+// ---------------------------------------------------------------------------
+// Migration.
+// ---------------------------------------------------------------------------
+
+describe("migrateMemoryV3AutoEdges", () => {
+  test("creates table and weight index; safe to re-run", () => {
+    migrateMemoryV3AutoEdges(database);
+    migrateMemoryV3AutoEdges(database);
+
+    const raw = getSqliteFrom(database);
+    const table = raw
+      .query(
+        `SELECT name FROM sqlite_master WHERE type='table' AND name='memory_v3_auto_edges'`,
+      )
+      .get();
+    expect(table).toBeTruthy();
+
+    const indexNames = new Set(
+      (
+        raw
+          .query(
+            `SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='memory_v3_auto_edges'`,
+          )
+          .all() as Array<{ name: string }>
+      ).map((r) => r.name),
+    );
+    expect(indexNames.has("idx_memory_v3_auto_edges_weight")).toBe(true);
+  });
+
+  test("downMemoryV3AutoEdges drops the table", () => {
+    downMemoryV3AutoEdges(database);
+    const table = getSqliteFrom(database)
+      .query(
+        `SELECT name FROM sqlite_master WHERE type='table' AND name='memory_v3_auto_edges'`,
+      )
+      .get();
+    expect(table).toBeFalsy();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// auto-edges store.
+// ---------------------------------------------------------------------------
+
+describe("reinforce", () => {
+  test("inserts a new pair at the increment, then accrues on the PK", () => {
+    reinforce(database, "a", "b", 1_000);
+    expect(readWeight("a", "b")).toBe(1);
+    reinforce(database, "a", "b", 2_000);
+    expect(readWeight("a", "b")).toBe(2);
+  });
+
+  test("directed pairs are independent", () => {
+    reinforce(database, "a", "b", 1_000);
+    reinforce(database, "b", "a", 1_000);
+    expect(readWeight("a", "b")).toBe(1);
+    expect(readWeight("b", "a")).toBe(1);
+  });
+});
+
+describe("decay", () => {
+  test("halves a weight after one half-life and advances last_reinforced_at", () => {
+    reinforce(database, "a", "b", 0);
+    // Push it above the half-life so the decayed weight stays above the floor.
+    reinforce(database, "a", "b", 0); // weight = 2
+    const pruned = decay(
+      database,
+      EDGE_DECAY_HALF_LIFE_MS,
+      EDGE_DECAY_HALF_LIFE_MS,
+    );
+    expect(pruned).toBe(0);
+    const w = readWeight("a", "b")!;
+    expect(w).toBeCloseTo(1, 5);
+
+    const stamped = getSqliteFrom(database)
+      .query(
+        `SELECT last_reinforced_at FROM memory_v3_auto_edges
+          WHERE source_slug='a' AND target_slug='b'`,
+      )
+      .get() as { last_reinforced_at: number };
+    expect(stamped.last_reinforced_at).toBe(EDGE_DECAY_HALF_LIFE_MS);
+  });
+
+  test("prunes edges that decay below the floor", () => {
+    reinforce(database, "a", "b", 0);
+    // Ten half-lives ⇒ weight × 2^-10 ≈ 0.001 < floor.
+    const pruned = decay(
+      database,
+      10 * EDGE_DECAY_HALF_LIFE_MS,
+      EDGE_DECAY_HALF_LIFE_MS,
+    );
+    expect(pruned).toBe(1);
+    expect(readWeight("a", "b")).toBeUndefined();
+  });
+
+  test("clamps future timestamps so decay never amplifies weight", () => {
+    reinforce(database, "a", "b", 10_000);
+    // now < last_reinforced_at ⇒ elapsed clamps to 0 ⇒ weight unchanged.
+    decay(database, 0, EDGE_DECAY_HALF_LIFE_MS);
+    expect(readWeight("a", "b")).toBe(1);
+  });
+});
+
+describe("aboveThreshold", () => {
+  test("returns the source → Set<target> adjacency for above-threshold pairs", () => {
+    reinforce(database, "a", "b", 0); // weight 1
+    reinforce(database, "a", "c", 0);
+    reinforce(database, "a", "c", 0); // weight 2
+    reinforce(database, "x", "y", 0); // weight 1
+
+    const adjacency = aboveThreshold(database, 2);
+    // Only a→c clears the threshold of 2.
+    expect([...adjacency.keys()]).toEqual(["a"]);
+    expect([...adjacency.get("a")!]).toEqual(["c"]);
+
+    const inclusive = aboveThreshold(database, 1);
+    expect([...inclusive.get("a")!].sort()).toEqual(["b", "c"]);
+    expect([...inclusive.get("x")!]).toEqual(["y"]);
+  });
+
+  test("empty when nothing clears the threshold", () => {
+    reinforce(database, "a", "b", 0);
+    expect(aboveThreshold(database, 5).size).toBe(0);
+  });
+});
+
+describe("topByWeight", () => {
+  test("returns heaviest edges first, capped at limit", () => {
+    reinforce(database, "a", "b", 0);
+    reinforce(database, "a", "b", 0); // weight 2
+    reinforce(database, "c", "d", 0); // weight 1
+    const top = topByWeight(database, 1);
+    expect(top).toHaveLength(1);
+    expect(top[0]).toMatchObject({
+      sourceSlug: "a",
+      targetSlug: "b",
+      weight: 2,
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// edge-learning job.
+// ---------------------------------------------------------------------------
+
+function coact(
+  source: string,
+  target: string,
+  used: number,
+  createdAt: number,
+): CoactivationRow {
+  return {
+    conversationId: "conv-1",
+    turn: 1,
+    sourceSlug: source,
+    targetSlug: target,
+    passGap: 1,
+    used,
+    createdAt,
+  };
+}
+
+describe("runEdgeLearning", () => {
+  test("reinforces used co-activations only and skips unused ones", () => {
+    recordCoactivations(database, [
+      coact("a", "b", 1, 100),
+      coact("a", "b", 1, 200),
+      coact("c", "d", 0, 300),
+    ]);
+
+    const result = runEdgeLearning(database, 1_000);
+    expect(result.reinforced).toBe(2);
+    expect(result.skippedUnused).toBe(1);
+    expect(readWeight("a", "b")).toBe(2);
+    expect(readWeight("c", "d")).toBeUndefined();
+  });
+
+  test("advances the watermark so the same co-activation isn't re-counted", () => {
+    recordCoactivations(database, [coact("a", "b", 1, 100)]);
+    runEdgeLearning(database, 1_000);
+    expect(readWeight("a", "b")).toBe(1);
+
+    // Second run with no new co-activations: only decay, no fresh reinforcement.
+    const second = runEdgeLearning(database, 1_000);
+    expect(second.reinforced).toBe(0);
+    expect(readWeight("a", "b")).toBe(1);
+
+    // A newer co-activation past the watermark is picked up.
+    recordCoactivations(database, [coact("a", "b", 1, 500)]);
+    const third = runEdgeLearning(database, 1_000);
+    expect(third.reinforced).toBe(1);
+    expect(readWeight("a", "b")).toBe(2);
+  });
+
+  test("emits promotion candidates above the weight floor", () => {
+    // Two used co-activations ⇒ weight 2 ≥ floor (1.5); single ⇒ weight 1 < floor.
+    recordCoactivations(database, [
+      coact("a", "b", 1, 100),
+      coact("a", "b", 1, 200),
+      coact("c", "d", 1, 300),
+    ]);
+    const result = runEdgeLearning(database, 1_000);
+    const pairs = result.candidates.map(
+      (c) => `${c.sourceSlug}->${c.targetSlug}`,
+    );
+    expect(pairs).toEqual(["a->b"]);
+  });
+
+  test("caps candidates per source so one hub can't monopolize the slate", () => {
+    const rows: CoactivationRow[] = [];
+    let t = 100;
+    // Hub "a" → many targets, each reinforced twice (weight 2 ≥ floor).
+    for (let i = 0; i < MAX_CANDIDATES_PER_SOURCE + 3; i++) {
+      rows.push(coact("a", `t${i}`, 1, t++));
+      rows.push(coact("a", `t${i}`, 1, t++));
+    }
+    recordCoactivations(database, rows);
+    const result = runEdgeLearning(database, 1_000);
+    const fromA = result.candidates.filter((c) => c.sourceSlug === "a");
+    expect(fromA.length).toBe(MAX_CANDIDATES_PER_SOURCE);
+    expect(result.candidates.length).toBeLessThanOrEqual(
+      MAX_PROMOTION_CANDIDATES,
+    );
+  });
+});
diff --git a/assistant/src/memory/v3/auto-edges.ts b/assistant/src/memory/v3/auto-edges.ts
new file mode 100644
index 00000000000..ce40530d8e5
--- /dev/null
+++ b/assistant/src/memory/v3/auto-edges.ts
@@ -0,0 +1,223 @@
+/**
+ * Memory v3 — learned weighted auto-edge store.
+ *
+ * Read/write helpers over `memory_v3_auto_edges` (migration 263) — the
+ * **learned** association graph, a distinct class from the curated `edges:`
+ * frontmatter graph. Each row is a weighted directed pair `source → target`
+ * that the edge-learning job accrues from *used* co-activations and decays over
+ * wall-clock time.
+ *
+ * Three primitives:
+ *   - {@link reinforce} — bump a pair's weight, but only for a *used*
+ *     co-activation (we reinforce usefulness, not mere retrieval).
+ *   - {@link decay} — multiplicatively decay all weights toward zero on a
+ *     half-life schedule, so a pair that stops being reinforced fades. This is
+ *     the rich-get-richer counterweight: weight is a leaky integrator, not a
+ *     monotone counter.
+ *   - {@link aboveThreshold} — project the learned graph to the
+ *     `ReadonlyMap<source, ReadonlySet<target>>` adjacency that edge
+ *     expansion's `extraAdjacency` seam consumes (only pairs at/above a weight
+ *     threshold traverse).
+ *
+ * The decay model mirrors v2's injection-events EMA: `λ = ln 2 / halfLife`, and
+ * a pair decays by `exp(-λ × elapsed)` since its `last_reinforced_at`.
+ */
+
+import { getLogger } from "../../util/logger.js";
+import type { DrizzleDb } from "../db-connection.js";
+import { getSqliteFrom } from "../db-connection.js";
+
+const log = getLogger("memory-v3-auto-edges");
+
+/** Weight added to a pair per *used* co-activation reinforcement. */
+export const REINFORCE_INCREMENT = 1;
+
+/** Weights below this after decay are pruned rather than kept as dead rows. */
+export const PRUNE_FLOOR = 0.01;
+
+/** A learned auto-edge, as read back from the table. */
+export interface AutoEdgeRow {
+  sourceSlug: string;
+  targetSlug: string;
+  weight: number;
+  lastReinforcedAt: number;
+}
+
+/**
+ * Reinforce the directed pair `source → target`: bump its weight by
+ * {@link REINFORCE_INCREMENT} and stamp `last_reinforced_at = now`. **Only call
+ * this for a *used* co-activation** — the edge graph encodes which associations
+ * actually proved load-bearing for a turn, not which pages merely surfaced
+ * together. (The caller decides usedness from the co-activation's `used` flag;
+ * this primitive is unconditional so it stays composable.)
+ *
+ * UPSERT on the `(source, target)` primary key: a new pair starts at the
+ * increment; an existing pair accrues on top of its current weight (after the
+ * latest decay pass, since decay rewrites weight in place).
+ *
+ * Best-effort: a failed write must never abort the edge-learning job.
+ */
+export function reinforce(
+  database: DrizzleDb,
+  source: string,
+  target: string,
+  now: number,
+): void {
+  try {
+    const raw = getSqliteFrom(database);
+    raw
+      .prepare(
+        `INSERT INTO memory_v3_auto_edges
+           (source_slug, target_slug, weight, last_reinforced_at)
+           VALUES (?, ?, ?, ?)
+         ON CONFLICT(source_slug, target_slug) DO UPDATE SET
+           weight = weight + ?,
+           last_reinforced_at = ?`,
+      )
+      .run(source, target, REINFORCE_INCREMENT, now, REINFORCE_INCREMENT, now);
+  } catch (err) {
+    log.warn(
+      { err, source, target },
+      "failed to reinforce auto-edge; continuing",
+    );
+  }
+}
+
+/**
+ * Multiplicatively decay every auto-edge weight toward zero on a half-life
+ * schedule: `weight ← weight × exp(-λ × (now − last_reinforced_at))`, with
+ * `λ = ln 2 / halfLifeMs`. A pair last reinforced one half-life ago halves; two
+ * half-lives ago quarters; and so on. `last_reinforced_at` advances to `now`
+ * so successive decay passes don't double-count the same elapsed interval.
+ *
+ * Pairs whose decayed weight falls below {@link PRUNE_FLOOR} are deleted so the
+ * learned graph doesn't accumulate a long tail of effectively-dead edges.
+ *
+ * Returns the number of rows pruned, for the job's structured log.
+ */
+export function decay(
+  database: DrizzleDb,
+  now: number,
+  halfLifeMs: number,
+): number {
+  if (halfLifeMs <= 0) return 0;
+  const lambda = Math.LN2 / halfLifeMs;
+  try {
+    const raw = getSqliteFrom(database);
+    const rows = raw
+      .query(
+        `SELECT source_slug, target_slug, weight, last_reinforced_at
+           FROM memory_v3_auto_edges`,
+      )
+      .all() as Array<{
+      source_slug: string;
+      target_slug: string;
+      weight: number;
+      last_reinforced_at: number;
+    }>;
+    if (rows.length === 0) return 0;
+
+    const update = raw.prepare(
+      `UPDATE memory_v3_auto_edges
+         SET weight = ?, last_reinforced_at = ?
+         WHERE source_slug = ? AND target_slug = ?`,
+    );
+    const prune = raw.prepare(
+      `DELETE FROM memory_v3_auto_edges
+         WHERE source_slug = ? AND target_slug = ?`,
+    );
+
+    let pruned = 0;
+    const apply = raw.transaction(() => {
+      for (const row of rows) {
+        // Future timestamps (clock skew) would amplify rather than decay — clamp
+        // elapsed at 0 so decay only ever shrinks weight.
+        const elapsed = Math.max(0, now - row.last_reinforced_at);
+        const decayed = row.weight * Math.exp(-lambda * elapsed);
+        if (decayed < PRUNE_FLOOR) {
+          prune.run(row.source_slug, row.target_slug);
+          pruned += 1;
+        } else {
+          update.run(decayed, now, row.source_slug, row.target_slug);
+        }
+      }
+    });
+    apply();
+    return pruned;
+  } catch (err) {
+    log.warn({ err }, "failed to decay auto-edges; continuing");
+    return 0;
+  }
+}
+
+/**
+ * Project the learned graph to the `extraAdjacency` shape edge expansion
+ * consumes: `source → Set<target>` for every pair whose weight is at or above
+ * `threshold`. Edge expansion thresholds nothing itself — it merges whatever
+ * adjacency it's handed — so this read is where the weight cutoff is applied.
+ *
+ * Returns an empty map on any read failure so the caller (a best-effort read
+ * lane) degrades to "no learned edges" rather than aborting retrieval.
+ */
+export function aboveThreshold(
+  database: DrizzleDb,
+  threshold: number,
+): Map<string, Set<string>> {
+  const adjacency = new Map<string, Set<string>>();
+  try {
+    const raw = getSqliteFrom(database);
+    const rows = raw
+      .query(
+        `SELECT source_slug, target_slug
+           FROM memory_v3_auto_edges
+           WHERE weight >= ?
+           ORDER BY source_slug ASC, target_slug ASC`,
+      )
+      .all(threshold) as Array<{ source_slug: string; target_slug: string }>;
+    for (const row of rows) {
+      let targets = adjacency.get(row.source_slug);
+      if (!targets) {
+        targets = new Set<string>();
+        adjacency.set(row.source_slug, targets);
+      }
+      targets.add(row.target_slug);
+    }
+  } catch (err) {
+    log.warn({ err, threshold }, "failed to read auto-edges; continuing");
+  }
+  return adjacency;
+}
+
+/**
+ * Read the top-weight auto-edges, heaviest first, capped at `limit`. The
+ * edge-learning job surfaces these as advisory promotion candidates for the
+ * assistant to ratify into curated `edges:` during consolidation.
+ */
+export function topByWeight(database: DrizzleDb, limit: number): AutoEdgeRow[] {
+  if (limit <= 0) return [];
+  try {
+    const raw = getSqliteFrom(database);
+    const rows = raw
+      .query(
+        `SELECT source_slug, target_slug, weight, last_reinforced_at
+           FROM memory_v3_auto_edges
+           ORDER BY weight DESC, source_slug ASC, target_slug ASC
+           LIMIT ?`,
+      )
+      .all(limit) as Array<{
+      source_slug: string;
+      target_slug: string;
+      weight: number;
+      last_reinforced_at: number;
+    }>;
+    return rows.map((r) => ({
+      sourceSlug: r.source_slug,
+      targetSlug: r.target_slug,
+      weight: r.weight,
+      lastReinforcedAt: r.last_reinforced_at,
+    }));
+  } catch (err) {
+    log.warn({ err, limit }, "failed to read top auto-edges; continuing");
+    return [];
+  }
+}
diff --git a/assistant/src/memory/v3/edge-learning-job.ts b/assistant/src/memory/v3/edge-learning-job.ts
new file mode 100644
index 00000000000..875c058f064
--- /dev/null
+++ b/assistant/src/memory/v3/edge-learning-job.ts
@@ -0,0 +1,160 @@
+/**
+ * Memory v3 — `memory_v3_edge_learning` job (fast lane, no LLM).
+ *
+ * Reconciles the raw co-activation log (`memory_v3_coactivation`, migration
+ * 262) into the weighted learned-edge graph (`memory_v3_auto_edges`, migration
+ * 263). One pass does three things:
+ *
+ *   1. **Decay** — multiplicatively age all existing auto-edge weights toward
+ *      zero on a half-life schedule (the rich-get-richer counterweight: an edge
+ *      that stops being reinforced fades, so a once-hot pair can't dominate the
+ *      adjacency forever).
+ *   2. **Reinforce** — for each recent co-activation whose `used` flag is set,
+ *      bump the `source → target` weight. *Used-only*: we learn associations
+ *      that proved load-bearing for a turn, not pairs that merely surfaced
+ *      together. The watermark checkpoint advances so each co-activation is
+ *      counted once.
+ *   3. **Propose** — surface the top-weight auto-edges as advisory promotion
+ *      *candidates* for the assistant to ratify into curated `edges:` during
+ *      consolidation. This job PROPOSES; it never auto-writes page frontmatter.
+ *      Diversity counterweight: candidates are capped and a single source's
+ *      out-edges are bounded so one hub can't monopolize the slate.
+ *
+ * Decay runs before reinforce so a fresh reinforcement isn't immediately aged
+ * by the same pass. The job is idempotent in effect: re-running with no new
+ * co-activations only decays (which is itself elapsed-time-bounded).
+ */
+
+import { getLogger } from "../../util/logger.js";
+import { getMemoryCheckpoint, setMemoryCheckpoint } from "../checkpoints.js";
+import type { DrizzleDb } from "../db-connection.js";
+import { getDb } from "../db-connection.js";
+import type { MemoryJob } from "../jobs-store.js";
+import {
+  type AutoEdgeRow,
+  decay,
+  reinforce,
+  topByWeight,
+} from "./auto-edges.js";
+import { readCoactivations } from "./coactivation-store.js";
+
+const log = getLogger("memory-v3-edge-learning");
+
+/**
+ * Half-life of auto-edge weight decay. Matches the v2 injection-score cadence
+ * (3 days) — a pair reinforced 3 days ago and never since contributes half its
+ * weight, 6 days ago a quarter.
+ */
+export const EDGE_DECAY_HALF_LIFE_MS = 3 * 24 * 60 * 60 * 1000;
+
+/** Max promotion candidates surfaced per run (the diversity cap). */
+export const MAX_PROMOTION_CANDIDATES = 20;
+
+/** Max candidates contributed by any single source slug (anti-hub diversity). */
+export const MAX_CANDIDATES_PER_SOURCE = 3;
+
+/**
+ * Minimum weight for an auto-edge to be eligible for promotion. A pair must
+ * accrue more than a single reinforcement (which decays away) before it's worth
+ * proposing as a curated edge.
+ */
+export const PROMOTION_WEIGHT_FLOOR = 1.5;
+
+/** Checkpoint key for the high-water mark of reconciled co-activations. */
+const WATERMARK_KEY = "memory_v3_edge_learning:coactivation_watermark";
+
+/** Summary of one edge-learning pass, returned for the dispatcher log + tests. */
+export interface EdgeLearningResult {
+  /** Used co-activations reinforced this pass. */
+  reinforced: number;
+  /** Co-activations skipped because `used` was falsy. */
+  skippedUnused: number;
+  /** Auto-edges pruned by decay (fell below the floor). */
+  pruned: number;
+  /** Advisory promotion candidates, heaviest first, after the diversity cap. */
+  candidates: AutoEdgeRow[];
+}
+
+/**
+ * Run one edge-learning pass against `database`. Pure of LLM and workspace I/O —
+ * the whole pass is bounded DB work, hence the fast lane.
+ */
+export function runEdgeLearning(
+  database: DrizzleDb,
+  now = Date.now(),
+): EdgeLearningResult {
+  // 1. Decay first so this pass's reinforcements aren't immediately aged.
+  const pruned = decay(database, now, EDGE_DECAY_HALF_LIFE_MS);
+
+  // 2. Reinforce from co-activations newer than the watermark. The watermark is
+  //    a created_at boundary; `since` is inclusive so we nudge it forward by 1ms
+  //    to avoid re-counting the boundary row.
+  const watermark = parseInt(getMemoryCheckpoint(WATERMARK_KEY) ?? "0", 10);
+  const since = watermark > 0 ? watermark + 1 : undefined;
+  const coactivations = readCoactivations(database, since);
+
+  let reinforced = 0;
+  let skippedUnused = 0;
+  let maxCreatedAt = watermark;
+  for (const row of coactivations) {
+    if (row.createdAt > maxCreatedAt) maxCreatedAt = row.createdAt;
+    // Reinforce usefulness, not mere retrieval: skip co-activations the loop
+    // (or a later usefulness reconciliation) did not mark as used.
+    if (!row.used) {
+      skippedUnused += 1;
+      continue;
+    }
+    reinforce(database, row.sourceSlug, row.targetSlug, now);
+    reinforced += 1;
+  }
+  if (maxCreatedAt > watermark) {
+    setMemoryCheckpoint(WATERMARK_KEY, String(maxCreatedAt));
+  }
+
+  // 3. Propose promotion candidates: heaviest auto-edges above the floor, capped
+  //    overall and per-source so a single hub can't monopolize the slate.
+  const candidates = selectPromotionCandidates(
+    topByWeight(database, MAX_PROMOTION_CANDIDATES * MAX_CANDIDATES_PER_SOURCE),
+  );
+
+  log.info(
+    {
+      reinforced,
+      skippedUnused,
+      pruned,
+      candidateCount: candidates.length,
+    },
+    "v3 edge learning complete",
+  );
+
+  return { reinforced, skippedUnused, pruned, candidates };
+}
+
+/**
+ * Apply the weight floor and the overall / per-source diversity caps to a
+ * weight-sorted list of auto-edges. Input must already be sorted heaviest-first
+ * (as {@link topByWeight} returns).
+ */
+function selectPromotionCandidates(sorted: AutoEdgeRow[]): AutoEdgeRow[] {
+  const out: AutoEdgeRow[] = [];
+  const perSource = new Map<string, number>();
+  for (const edge of sorted) {
+    if (out.length >= MAX_PROMOTION_CANDIDATES) break;
+    if (edge.weight < PROMOTION_WEIGHT_FLOOR) continue;
+    const count = perSource.get(edge.sourceSlug) ?? 0;
+    if (count >= MAX_CANDIDATES_PER_SOURCE) continue;
+    perSource.set(edge.sourceSlug, count + 1);
+    out.push(edge);
+  }
+  return out;
+}
+
+/**
+ * Job handler for `memory_v3_edge_learning`. Thin wrapper over
+ * {@link runEdgeLearning} so the heavy lifting (and its tests) live in one
+ * place. The job carries no payload — it always reconciles the whole recent
+ * co-activation log.
+ */
+export function memoryV3EdgeLearningJob(_job: MemoryJob): EdgeLearningResult {
+  return runEdgeLearning(getDb());
+}

From 93da857fb26375f40be9385016be519b73a9de8b Mon Sep 17 00:00:00 2001
From: velissa-ai <velissa@velissa.ai>
Date: Mon, 25 May 2026 03:33:29 -0400
Subject: [PATCH 19/21] feat(memory-v3): live shadow via memoryRetrieval
 middleware (inject v2, log v3) (#31989)

Co-authored-by: Vellum Assistant <assistant@vellum.ai>
---
 .../memory/memory-v2-activation-log-store.ts  |  21 +-
 .../v3/__tests__/shadow-middleware.test.ts    | 292 +++++++++++++++++
 assistant/src/memory/v3/shadow-middleware.ts  | 305 ++++++++++++++++++
 assistant/src/plugins/defaults/index.ts       |   6 +
 4 files changed, 618 insertions(+), 6 deletions(-)
 create mode 100644 assistant/src/memory/v3/__tests__/shadow-middleware.test.ts
 create mode 100644 assistant/src/memory/v3/shadow-middleware.ts

diff --git a/assistant/src/memory/memory-v2-activation-log-store.ts b/assistant/src/memory/memory-v2-activation-log-store.ts
index 5b2e2be56ff..3afa0bcf7c2 100644
--- a/assistant/src/memory/memory-v2-activation-log-store.ts
+++ b/assistant/src/memory/memory-v2-activation-log-store.ts
@@ -115,11 +115,15 @@ export interface RecordMemoryV2ActivationLogParams {
    * `per-turn` for normal append injections, `errored` when `injectMemoryV2Block`
    * threw before completing — telemetry is still written so silent failures
    * are observable in the database, with whatever `concepts` rows had been
-   * built so far (possibly empty). `router` indicates the Sonnet
-   * router selected the per-turn page set; router-mode rows carry zeroed
-   * activation values and `source: "router"` on every concept row.
+   * built so far (possibly empty). `router` indicates the LLM router selected
+   * the per-turn page set; router-mode rows carry zeroed activation values and
+   * `source: "router"` on every concept row. `v3_shadow` is written by the
+   * live-shadow v3 retrieval middleware: it records v3's selection set for
+   * comparison without affecting injected context. The harness oracle filters
+   * `mode='router'`, so `v3_shadow` rows never pollute it; the inspector can
+   * still surface them.
    */
-  mode: "context-load" | "per-turn" | "errored" | "router";
+  mode: "context-load" | "per-turn" | "errored" | "router" | "v3_shadow";
   concepts: MemoryV2ConceptRowRecord[];
   config: MemoryV2ConfigSnapshot;
 }
@@ -167,7 +171,7 @@ export function backfillMemoryV2ActivationMessageId(
 export interface MemoryV2ActivationLog {
   conversationId: string;
   turn: number;
-  mode: "context-load" | "per-turn" | "errored" | "router";
+  mode: "context-load" | "per-turn" | "errored" | "router" | "v3_shadow";
   concepts: MemoryV2ConceptRowRecord[];
   config: MemoryV2ConfigSnapshot;
 }
@@ -188,7 +192,12 @@ export function getMemoryV2ActivationLogByMessageIds(
   return {
     conversationId: row.conversationId,
     turn: row.turn,
-    mode: row.mode as "context-load" | "per-turn" | "errored" | "router",
+    mode: row.mode as
+      | "context-load"
+      | "per-turn"
+      | "errored"
+      | "router"
+      | "v3_shadow",
     concepts: JSON.parse(row.conceptsJson) as MemoryV2ConceptRowRecord[],
     config: JSON.parse(row.configJson) as MemoryV2ConfigSnapshot,
   };
diff --git a/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts b/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts
new file mode 100644
index 00000000000..e9178a524b0
--- /dev/null
+++ b/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts
@@ -0,0 +1,292 @@
+/**
+ * Tests for the live-shadow `memoryRetrieval` middleware
+ * (`assistant/src/memory/v3/shadow-middleware.ts`).
+ *
+ * The critical invariant this PR guarantees: with `memory.v3.shadow` off
+ * (the default), the middleware is a byte-for-byte pass-through — it returns
+ * the downstream `MemoryResult` unchanged, never calls the v3 loop, and never
+ * writes a log row. With the flag on, it runs the v3 loop alongside the
+ * default path, logs v3's selection as `mode='v3_shadow'`, and STILL returns
+ * the unchanged downstream result (v2 injected, never v3). A v3 failure is
+ * swallowed and the turn result is unaffected.
+ *
+ * Everything the middleware reaches (config, the v3 loop, the activation-log
+ * store, message/now/everInjected reads) is stubbed via `mock.module` — no
+ * real LLM, no real workspace DB.
+ */
+
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+
+import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js";
+import type { AssistantConfig } from "../../../config/schema.js";
+import type { TrustContext } from "../../../daemon/trust-context.js";
+import type {
+  MemoryArgs,
+  MemoryResult,
+  TurnContext,
+} from "../../../plugins/types.js";
+import type { RecordMemoryV2ActivationLogParams } from "../../memory-v2-activation-log-store.js";
+import type {
+  RetrievalInput,
+  RetrievalOutput,
+} from "../../v2/harness/retriever.js";
+
+mock.module("../../../util/logger.js", () => ({
+  getLogger: () => makeMockLogger(),
+}));
+
+// ── Mutable test doubles, rewired per test ───────────────────────────────
+
+/** Drives `config.memory.v3.{enabled,shadow}` and `historical_pairs`. */
+let v3Enabled = false;
+let v3Shadow = false;
+
+function makeConfig(): AssistantConfig {
+  return {
+    memory: {
+      v2: { router: { historical_pairs: 1 } },
+      v3: { enabled: v3Enabled, shadow: v3Shadow },
+    },
+  } as unknown as AssistantConfig;
+}
+
+/** Captured `runRetrievalLoop` invocations. */
+const loopCalls: Array<{ input: RetrievalInput }> = [];
+/** Behavior of the stubbed loop — overridden per test. */
+let loopImpl: (
+  input: RetrievalInput,
+) => Promise<RetrievalOutput> = async () => ({
+  selectedSlugs: [],
+  sourceBySlug: new Map(),
+  trace: { passes: [] },
+  cost: { ms: 0 },
+  failureReason: null,
+});
+
+/** Captured `recordMemoryV2ActivationLog` calls. */
+const logCalls: RecordMemoryV2ActivationLogParams[] = [];
+
+mock.module("../../../config/loader.js", () => ({
+  getConfig: () => makeConfig(),
+}));
+mock.module("../../../util/platform.js", () => ({
+  getWorkspaceDir: () => "/tmp/shadow-test-workspace",
+}));
+// Chainable drizzle-query stub: every builder method returns the same object
+// and `.all()` yields the seeded rows. The shadow middleware reads recent
+// messages via `db.select(...).from(...).where(...).orderBy(...).limit(...).all()`.
+const messageRows: Array<{ role: string; content: string }> = [
+  {
+    role: "user",
+    content: JSON.stringify([{ type: "text", text: "hello memory" }]),
+  },
+];
+function makeFakeDb(): never {
+  const builder: Record<string, unknown> = {};
+  for (const m of ["select", "from", "where", "orderBy", "limit"]) {
+    builder[m] = () => builder;
+  }
+  builder.all = () => messageRows.slice();
+  return builder as never;
+}
+mock.module("../../db-connection.js", () => ({
+  getDb: () => makeFakeDb(),
+}));
+mock.module("../../v2/now-text.js", () => ({
+  loadNowText: async () => "NOW context",
+}));
+mock.module("../../v2/activation-store.js", () => ({
+  hydrate: async () => ({ everInjected: [{ slug: "old/page", turn: 0 }] }),
+}));
+mock.module("../loop.js", () => ({
+  runRetrievalLoop: async (input: RetrievalInput): Promise<RetrievalOutput> => {
+    loopCalls.push({ input });
+    return loopImpl(input);
+  },
+}));
+mock.module("../../memory-v2-activation-log-store.js", () => ({
+  recordMemoryV2ActivationLog: (params: RecordMemoryV2ActivationLogParams) => {
+    logCalls.push(params);
+  },
+}));
+
+const { memoryV3ShadowMiddleware } = await import("../shadow-middleware.js");
+
+// ── Fixtures ─────────────────────────────────────────────────────────────
+
+const trust: TrustContext = {
+  sourceChannel: "vellum",
+  trustClass: "guardian",
+};
+
+function makeCtx(): TurnContext {
+  return {
+    requestId: "req-shadow-test",
+    conversationId: "conv-shadow",
+    turnIndex: 3,
+    trust,
+  };
+}
+
+function makeArgs(signal?: AbortSignal): MemoryArgs {
+  return {
+    conversationId: "conv-shadow",
+    trustContext: trust,
+    turnIndex: 3,
+    signal: signal ?? new AbortController().signal,
+  };
+}
+
+/** The unchanged downstream (v2/default) result the terminal returns. */
+const DOWNSTREAM_RESULT: MemoryResult = {
+  pkbContent: "pkb",
+  nowContent: "now",
+  memoryGraphBlocks: [{ kind: "default.graph" }],
+};
+
+/** Flush the detached shadow chain (microtasks + a macrotask hop). */
+async function flush(): Promise<void> {
+  await new Promise((resolve) => setTimeout(resolve, 0));
+  await Promise.resolve();
+}
+
+beforeEach(() => {
+  v3Enabled = false;
+  v3Shadow = false;
+  loopCalls.length = 0;
+  logCalls.length = 0;
+  loopImpl = async () => ({
+    selectedSlugs: [],
+    sourceBySlug: new Map(),
+    trace: { passes: [] },
+    cost: { ms: 0 },
+    failureReason: null,
+  });
+});
+
+afterEach(() => {
+  mock.restore();
+});
+
+describe("memory-v3 shadow middleware", () => {
+  test("flag off → byte-for-byte pass-through, no v3 call, no log write", async () => {
+    v3Enabled = false;
+    v3Shadow = false;
+    let nextCalls = 0;
+    const args = makeArgs();
+    const result = await memoryV3ShadowMiddleware(
+      args,
+      async (a) => {
+        nextCalls++;
+        // identity is preserved — pass-through hands the same args down.
+        expect(a).toBe(args);
+        return DOWNSTREAM_RESULT;
+      },
+      makeCtx(),
+    );
+
+    // Returns the exact downstream object reference, unchanged.
+    expect(result).toBe(DOWNSTREAM_RESULT);
+    expect(nextCalls).toBe(1);
+
+    await flush();
+    expect(loopCalls.length).toBe(0);
+    expect(logCalls.length).toBe(0);
+  });
+
+  test("enabled but shadow off → still a pure pass-through", async () => {
+    v3Enabled = true;
+    v3Shadow = false;
+    const args = makeArgs();
+    const result = await memoryV3ShadowMiddleware(
+      args,
+      async () => DOWNSTREAM_RESULT,
+      makeCtx(),
+    );
+    expect(result).toBe(DOWNSTREAM_RESULT);
+    await flush();
+    expect(loopCalls.length).toBe(0);
+    expect(logCalls.length).toBe(0);
+  });
+
+  test("flag on → v3 runs, v3_shadow row logged, downstream result unchanged", async () => {
+    v3Enabled = true;
+    v3Shadow = true;
+    loopImpl = async () => ({
+      selectedSlugs: ["topic/a", "topic/b"],
+      sourceBySlug: new Map([["topic/a", "dense"]]),
+      trace: { passes: [] },
+      cost: { ms: 12 },
+      failureReason: null,
+    });
+
+    const args = makeArgs();
+    const result = await memoryV3ShadowMiddleware(
+      args,
+      async () => DOWNSTREAM_RESULT,
+      makeCtx(),
+    );
+
+    // The injected result is the v2/default result, NOT v3.
+    expect(result).toBe(DOWNSTREAM_RESULT);
+
+    await flush();
+
+    // v3 ran exactly once, with a faithfully-built RetrievalInput.
+    expect(loopCalls.length).toBe(1);
+    const input = loopCalls[0]!.input;
+    expect(input.nowText).toBe("NOW context");
+    expect(input.workspaceDir).toBe("/tmp/shadow-test-workspace");
+    expect(input.priorEverInjected).toEqual([{ slug: "old/page", turn: 0 }]);
+    expect(input.recentTurnPairs.length).toBeGreaterThan(0);
+    expect(input.recentTurnPairs.at(-1)?.userMessage).toBe("hello memory");
+
+    // Exactly one v3_shadow row, carrying v3's selection.
+    expect(logCalls.length).toBe(1);
+    const logged = logCalls[0]!;
+    expect(logged.mode).toBe("v3_shadow");
+    expect(logged.conversationId).toBe("conv-shadow");
+    expect(logged.turn).toBe(3);
+    expect(logged.concepts.map((c) => c.slug)).toEqual(["topic/a", "topic/b"]);
+  });
+
+  test("v3 error → logged/swallowed, turn result unaffected, no log row", async () => {
+    v3Enabled = true;
+    v3Shadow = true;
+    loopImpl = async () => {
+      throw new Error("v3 boom");
+    };
+
+    const args = makeArgs();
+    // The middleware must not reject even though the detached shadow throws.
+    const result = await memoryV3ShadowMiddleware(
+      args,
+      async () => DOWNSTREAM_RESULT,
+      makeCtx(),
+    );
+    expect(result).toBe(DOWNSTREAM_RESULT);
+
+    await flush();
+    // Loop was attempted; the failure short-circuited before logging.
+    expect(loopCalls.length).toBe(1);
+    expect(logCalls.length).toBe(0);
+  });
+
+  test("aborted signal → shadow does no v3 work", async () => {
+    v3Enabled = true;
+    v3Shadow = true;
+    const controller = new AbortController();
+    controller.abort();
+
+    const result = await memoryV3ShadowMiddleware(
+      makeArgs(controller.signal),
+      async () => DOWNSTREAM_RESULT,
+      makeCtx(),
+    );
+    expect(result).toBe(DOWNSTREAM_RESULT);
+
+    await flush();
+    expect(loopCalls.length).toBe(0);
+    expect(logCalls.length).toBe(0);
+  });
+});
diff --git a/assistant/src/memory/v3/shadow-middleware.ts b/assistant/src/memory/v3/shadow-middleware.ts
new file mode 100644
index 00000000000..5d5396f2a18
--- /dev/null
+++ b/assistant/src/memory/v3/shadow-middleware.ts
@@ -0,0 +1,305 @@
+/**
+ * Memory v3 — live-shadow `memoryRetrieval` middleware.
+ *
+ * Registered unconditionally into the `memoryRetrieval` pipeline, but inert
+ * unless BOTH `config.memory.v3.enabled` and `config.memory.v3.shadow` are on.
+ * When inert it is a byte-for-byte pass-through: it returns `next(args)`
+ * verbatim and performs zero extra work (no v3 call, no DB read, no log write).
+ *
+ * When active, it:
+ *   1. Returns the real (v2/default) `MemoryResult` from `next(args)` promptly —
+ *      the injected context is ALWAYS the v2 result, never v3.
+ *   2. Kicks off the v3 retrieval loop DETACHED (not awaited on the path that
+ *      returns the result), so the shadow run can never block or slow the turn.
+ *   3. Logs v3's selection set to `memory_v2_activation_logs` with
+ *      `mode = "v3_shadow"`. The harness oracle filters `mode='router'`, so
+ *      shadow rows never pollute it; the inspector can still surface them.
+ *
+ * The shadow build mirrors the inputs the v2 router receives (recent turn
+ * pairs, NOW context, prior-ever-injected slugs, config) so its recall is
+ * measured against the same situational context the live path saw. Failures
+ * are swallowed with a warn — the shadow is observational only and must never
+ * affect the live turn.
+ */
+
+import { desc, eq } from "drizzle-orm";
+
+import { getConfig } from "../../config/loader.js";
+import { registerPlugin } from "../../plugins/registry.js";
+import {
+  type MemoryArgs,
+  type MemoryResult,
+  type Middleware,
+  type Plugin,
+  PluginExecutionError,
+} from "../../plugins/types.js";
+import type { ContentBlock } from "../../providers/types.js";
+import { getLogger } from "../../util/logger.js";
+import { getWorkspaceDir } from "../../util/platform.js";
+import type { DrizzleDb } from "../db-connection.js";
+import { getDb } from "../db-connection.js";
+import {
+  type MemoryV2ConceptRowRecord,
+  type MemoryV2ConfigSnapshot,
+  recordMemoryV2ActivationLog,
+} from "../memory-v2-activation-log-store.js";
+import { messages } from "../schema.js";
+import { hydrate } from "../v2/activation-store.js";
+import type { RetrievalInput } from "../v2/harness/retriever.js";
+import { loadNowText } from "../v2/now-text.js";
+import type { RouterTurnPair } from "../v2/router.js";
+import type { EverInjectedEntry } from "../v2/types.js";
+import { runRetrievalLoop } from "./loop.js";
+
+const log = getLogger("memory-v3-shadow");
+
+/**
+ * Extract the recent (assistant, user) turn pairs from a conversation's
+ * message list, newest-pair-last, capped at `k`. Mirrors production
+ * `extractRecentTurnPairs` in `conversation-graph-memory.ts` (and its harness
+ * twin in `replay-input.ts`) so the shadow's `recentTurnPairs` matches what the
+ * live router was fed.
+ */
+function extractRecentTurnPairs(
+  msgs: ReadonlyArray<{ role: string; content: ContentBlock[] }>,
+  k: number,
+): RouterTurnPair[] {
+  const messageText = (content: ContentBlock[]): string =>
+    content
+      .filter(
+        (b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
+      )
+      .map((b) => b.text)
+      .join(" ");
+
+  const pairs: RouterTurnPair[] = [];
+  let pendingUser: string | null = null;
+  for (let i = msgs.length - 1; i >= 0 && pairs.length < k; i--) {
+    const msg = msgs[i]!;
+    if (msg.role === "user" && pendingUser === null) {
+      pendingUser = messageText(msg.content);
+    } else if (msg.role === "assistant" && pendingUser !== null) {
+      pairs.unshift({
+        assistantMessage: messageText(msg.content),
+        userMessage: pendingUser,
+      });
+      pendingUser = null;
+    }
+  }
+  if (pendingUser !== null && pairs.length < k) {
+    pairs.unshift({ assistantMessage: "", userMessage: pendingUser });
+  }
+  if (pairs.length === 0) {
+    pairs.push({ assistantMessage: "", userMessage: "" });
+  }
+  return pairs;
+}
+
+/** Parse a persisted JSON content-block string; tolerate malformed rows. */
+function parseContent(raw: string): ContentBlock[] {
+  try {
+    const parsed = JSON.parse(raw);
+    return Array.isArray(parsed) ? (parsed as ContentBlock[]) : [];
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * Load the most recent messages for a conversation, oldest-first, bounded to a
+ * small generous multiple of `historicalPairs`. Pair extraction only needs the
+ * tail, so a bounded `LIMIT` query avoids loading an entire (potentially
+ * multi-GB) conversation on every shadow turn — mirrors the harness's bounded
+ * fetch in `replay-input.ts`.
+ */
+function loadRecentMessages(
+  db: DrizzleDb,
+  conversationId: string,
+  historicalPairs: number,
+): Array<{ role: string; content: ContentBlock[] }> {
+  const fetchWindow = Math.max(20, historicalPairs * 12);
+  const rows = db
+    .select({ role: messages.role, content: messages.content })
+    .from(messages)
+    .where(eq(messages.conversationId, conversationId))
+    .orderBy(desc(messages.createdAt), desc(messages.id))
+    .limit(fetchWindow)
+    .all();
+  return rows
+    .reverse()
+    .map((r) => ({ role: r.role, content: parseContent(r.content) }));
+}
+
+/**
+ * Empty config snapshot for shadow log rows. The activation-state values are
+ * meaningless for a v3 selection (it computes no spreading-activation scores),
+ * so they are zeroed — exactly as the v2 router-mode rows do.
+ */
+const SHADOW_CONFIG_SNAPSHOT: MemoryV2ConfigSnapshot = {
+  d: 0,
+  c_user: 0,
+  c_assistant: 0,
+  c_now: 0,
+  k: 0,
+  hops: 0,
+  top_k: 0,
+  epsilon: 0,
+};
+
+/**
+ * Build the concept rows logged for a v3 shadow selection. Each selected slug
+ * becomes a zeroed concept row tagged `source: "router"` and
+ * `status: "injected"` — the shadow has no activation scores to record, and the
+ * `mode='v3_shadow'` row tag (not the concept source) is what distinguishes
+ * shadow telemetry from live router selections.
+ */
+function buildShadowConceptRows(
+  selectedSlugs: readonly string[],
+): MemoryV2ConceptRowRecord[] {
+  return selectedSlugs.map((slug) => ({
+    slug,
+    finalActivation: 0,
+    ownActivation: 0,
+    priorActivation: 0,
+    simUser: 0,
+    simAssistant: 0,
+    simNow: 0,
+    simUserRerankBoost: 0,
+    simAssistantRerankBoost: 0,
+    inRerankPool: false,
+    spreadContribution: 0,
+    source: "router",
+    status: "injected",
+  }));
+}
+
+/**
+ * Run the v3 retrieval loop for the shadow and log its selection. Best-effort:
+ * any failure is logged and swallowed. Honors `signal` so a cancelled turn
+ * stops the shadow's lane work.
+ */
+async function runShadowAndLog(
+  args: MemoryArgs,
+  signal: AbortSignal,
+): Promise<void> {
+  try {
+    if (signal.aborted) return;
+
+    const config = getConfig();
+    const workspaceDir = getWorkspaceDir();
+    const db = getDb();
+
+    const historicalPairs = config.memory.v2.router.historical_pairs;
+    const recentMessages = loadRecentMessages(
+      db,
+      args.conversationId,
+      historicalPairs,
+    );
+    const recentTurnPairs = extractRecentTurnPairs(
+      recentMessages,
+      historicalPairs,
+    );
+
+    const nowText = await loadNowText(workspaceDir);
+
+    let priorEverInjected: readonly EverInjectedEntry[] = [];
+    try {
+      const state = await hydrate(db, args.conversationId);
+      priorEverInjected = state?.everInjected ?? [];
+    } catch (err) {
+      log.warn(
+        { err, conversationId: args.conversationId },
+        "v3 shadow: failed to hydrate prior-ever-injected; continuing with empty set",
+      );
+    }
+
+    if (signal.aborted) return;
+
+    const input: RetrievalInput = {
+      workspaceDir,
+      recentTurnPairs,
+      nowText,
+      priorEverInjected,
+      config,
+      signal,
+    };
+
+    const output = await runRetrievalLoop(input, {
+      db,
+      conversationId: args.conversationId,
+      turn: args.turnIndex,
+    });
+
+    if (signal.aborted) return;
+
+    recordMemoryV2ActivationLog({
+      conversationId: args.conversationId,
+      turn: args.turnIndex,
+      mode: "v3_shadow",
+      concepts: buildShadowConceptRows(output.selectedSlugs),
+      config: SHADOW_CONFIG_SNAPSHOT,
+    });
+  } catch (err) {
+    log.warn(
+      { err, conversationId: args.conversationId, turn: args.turnIndex },
+      "v3 shadow retrieval failed; live turn unaffected",
+    );
+  }
+}
+
+/**
+ * Live-shadow `memoryRetrieval` middleware.
+ *
+ * Flag-gated INSIDE the middleware (per-turn, live-toggle): when v3 shadow is
+ * off it is a pure pass-through. When on, it fires the v3 loop detached and
+ * returns the unchanged downstream (v2) result immediately.
+ */
+export const memoryV3ShadowMiddleware: Middleware<MemoryArgs, MemoryResult> =
+  async function memoryV3Shadow(args, next) {
+    const v3 = getConfig().memory.v3;
+    if (!v3.enabled || !v3.shadow) {
+      // Inert: byte-for-byte pass-through, zero extra work.
+      return next(args);
+    }
+
+    // Detached — never awaited on the path that returns the result, so the
+    // shadow can neither block nor slow the live turn. Errors are swallowed
+    // inside `runShadowAndLog`.
+    void runShadowAndLog(args, args.signal);
+
+    return next(args);
+  };
+
+/**
+ * First-party plugin contributing the live-shadow `memoryRetrieval`
+ * middleware. Registered unconditionally by the plugin bootstrap (it is inert
+ * unless both v3 flags are on), so the registration is always present but does
+ * zero work in the default (flags-off) configuration.
+ */
+export const memoryV3ShadowPlugin: Plugin = {
+  manifest: {
+    name: "memory-v3-shadow",
+    version: "0.0.1",
+  },
+  middleware: {
+    memoryRetrieval: memoryV3ShadowMiddleware,
+  },
+};
+
+// Module-load side effect: register the shadow plugin at import time so the
+// registry is populated even in tests that skip `bootstrapPlugins()`, matching
+// the first-party `default-*` plugins. Idempotent via the swallowed
+// duplicate-name check (the defaults aggregator also lists this plugin).
+try {
+  registerPlugin(memoryV3ShadowPlugin);
+} catch (err) {
+  if (
+    err instanceof PluginExecutionError &&
+    err.message.includes("already registered")
+  ) {
+    // already registered — expected when both the defaults aggregator and the
+    // direct module import run in the same process.
+  } else {
+    throw err;
+  }
+}
diff --git a/assistant/src/plugins/defaults/index.ts b/assistant/src/plugins/defaults/index.ts
index 34fecc1b047..282e4a8db8f 100644
--- a/assistant/src/plugins/defaults/index.ts
+++ b/assistant/src/plugins/defaults/index.ts
@@ -24,6 +24,7 @@
  * chain) does not trip a TDZ.
  */
 
+import { memoryV3ShadowPlugin } from "../../memory/v3/shadow-middleware.js";
 import { registerPlugin, resetPluginRegistryForTests } from "../registry.js";
 import { type Plugin, PluginExecutionError } from "../types.js";
 import { defaultCircuitBreakerPlugin } from "./circuit-breaker.js";
@@ -60,6 +61,11 @@ function getAllDefaultPlugins(): readonly Plugin[] {
     defaultEmptyResponsePlugin,
     defaultToolErrorPlugin,
     defaultMemoryRetrievalPlugin,
+    // Live-shadow v3 retrieval. Always registered; inert unless both
+    // `memory.v3.enabled` and `memory.v3.shadow` are on (gated inside the
+    // middleware). Ordered after the default so the default terminal still
+    // produces the injected (v2) `MemoryResult`.
+    memoryV3ShadowPlugin,
     defaultInjectorsPlugin,
     defaultTokenEstimatePlugin,
     defaultOverflowReducePlugin,

From afa8d2840dd94dc9b5250302b5a05016221e1720 Mon Sep 17 00:00:00 2001
From: Vellum Assistant <assistant@vellum.ai>
Date: Mon, 25 May 2026 12:15:34 -0500
Subject: [PATCH 20/21] fix(memory-v3): null-safe shadow gate when memory.v3
 config is absent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The live-shadow middleware runs on every turn and read `config.memory.v3.enabled`
unguarded. Configs built outside the Zod schema (agent-loop test fixtures) have no
`memory.v3` block, so the gate threw `TypeError: undefined is not an object` and
aborted the turn — cascading across ~13 agent-loop test files. Guard with optional
chaining (matches the loop's existing `write?.coactivation` pattern) and add a
regression test for the absent-v3 config.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../v3/__tests__/shadow-middleware.test.ts    | 22 ++++++++++++++++++-
 assistant/src/memory/v3/shadow-middleware.ts  |  2 +-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts b/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts
index e9178a524b0..0633ab8559d 100644
--- a/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts
+++ b/assistant/src/memory/v3/__tests__/shadow-middleware.test.ts
@@ -40,12 +40,15 @@ mock.module("../../../util/logger.js", () => ({
 /** Drives `config.memory.v3.{enabled,shadow}` and `historical_pairs`. */
 let v3Enabled = false;
 let v3Shadow = false;
+/** When false, omit the `memory.v3` block entirely (mirrors configs built
+ * outside the Zod schema, e.g. agent-loop test fixtures). */
+let v3Present = true;
 
 function makeConfig(): AssistantConfig {
   return {
     memory: {
       v2: { router: { historical_pairs: 1 } },
-      v3: { enabled: v3Enabled, shadow: v3Shadow },
+      ...(v3Present ? { v3: { enabled: v3Enabled, shadow: v3Shadow } } : {}),
     },
   } as unknown as AssistantConfig;
 }
@@ -153,6 +156,7 @@ async function flush(): Promise<void> {
 beforeEach(() => {
   v3Enabled = false;
   v3Shadow = false;
+  v3Present = true;
   loopCalls.length = 0;
   logCalls.length = 0;
   loopImpl = async () => ({
@@ -209,6 +213,22 @@ describe("memory-v3 shadow middleware", () => {
     expect(logCalls.length).toBe(0);
   });
 
+  test("v3 config block absent → pass-through, no throw, no v3 call", async () => {
+    // Reproduces the agent-loop test fixtures (and any config built outside the
+    // Zod schema) where `memory.v3` is undefined. The gate must not throw.
+    v3Present = false;
+    const args = makeArgs();
+    const result = await memoryV3ShadowMiddleware(
+      args,
+      async () => DOWNSTREAM_RESULT,
+      makeCtx(),
+    );
+    expect(result).toBe(DOWNSTREAM_RESULT);
+    await flush();
+    expect(loopCalls.length).toBe(0);
+    expect(logCalls.length).toBe(0);
+  });
+
   test("flag on → v3 runs, v3_shadow row logged, downstream result unchanged", async () => {
     v3Enabled = true;
     v3Shadow = true;
diff --git a/assistant/src/memory/v3/shadow-middleware.ts b/assistant/src/memory/v3/shadow-middleware.ts
index 5d5396f2a18..af6ed9e28d5 100644
--- a/assistant/src/memory/v3/shadow-middleware.ts
+++ b/assistant/src/memory/v3/shadow-middleware.ts
@@ -257,7 +257,7 @@ async function runShadowAndLog(
 export const memoryV3ShadowMiddleware: Middleware<MemoryArgs, MemoryResult> =
   async function memoryV3Shadow(args, next) {
     const v3 = getConfig().memory.v3;
-    if (!v3.enabled || !v3.shadow) {
+    if (!v3?.enabled || !v3?.shadow) {
       // Inert: byte-for-byte pass-through, zero extra work.
       return next(args);
     }

From 700bdd52ac4c367f4b1c80b515ad1b699746bf4f Mon Sep 17 00:00:00 2001
From: Vellum Assistant <assistant@vellum.ai>
Date: Mon, 25 May 2026 12:22:11 -0500
Subject: [PATCH 21/21] fix(memory-v3): add route policies for
 memory/v3/validate + tree

PR #31983 registered the two read-only v3 routes but never added their
ACTOR_ENDPOINTS entries in route-policy.ts; the per-PR run skipped CI so the
route-policy coverage guard never ran. Add both as settings.read (mirroring the
v2 read routes), satisfying guard-tests.test.ts.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 assistant/src/runtime/auth/route-policy.ts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/assistant/src/runtime/auth/route-policy.ts b/assistant/src/runtime/auth/route-policy.ts
index d48e651733a..efd67b6f2ef 100644
--- a/assistant/src/runtime/auth/route-policy.ts
+++ b/assistant/src/runtime/auth/route-policy.ts
@@ -487,6 +487,8 @@ const ACTOR_ENDPOINTS: Array<{ endpoint: string; scopes: Scope[] }> = [
     scopes: ["settings.read"],
   },
   { endpoint: "memory/v2/now-text:GET", scopes: ["settings.read"] },
+  { endpoint: "memory/v3/validate:POST", scopes: ["settings.read"] },
+  { endpoint: "memory/v3/tree:POST", scopes: ["settings.read"] },
 
   // Trust rule listing
   { endpoint: "trust-rules/manage:GET", scopes: ["settings.read"] },