From c0c464cbbd0a1728038f0d2941a93618020294ec Mon Sep 17 00:00:00 2001 From: Vellum Assistant Date: Mon, 25 May 2026 01:40:00 -0500 Subject: [PATCH] feat(memory-v3): config schema + cheap/capable LLM call sites --- assistant/src/__tests__/llm-resolver.test.ts | 86 +++++++++++-- assistant/src/config/call-site-defaults.ts | 4 + .../schemas/__tests__/memory-v2.test.ts | 109 +++++++++++++++- .../src/config/schemas/call-site-catalog.ts | 21 ++++ assistant/src/config/schemas/llm.ts | 3 + assistant/src/config/schemas/memory-v2.ts | 119 ++++++++++++++++++ assistant/src/config/schemas/memory.ts | 3 +- 7 files changed, 334 insertions(+), 11 deletions(-) diff --git a/assistant/src/__tests__/llm-resolver.test.ts b/assistant/src/__tests__/llm-resolver.test.ts index 9b0e9ad626e..86feaf8c2b6 100644 --- a/assistant/src/__tests__/llm-resolver.test.ts +++ b/assistant/src/__tests__/llm-resolver.test.ts @@ -2,7 +2,10 @@ import { describe, expect, test } from "bun:test"; import { z } from "zod"; -import { resolveCallSiteConfig, resolveDefaultProfileKey } from "../config/llm-resolver.js"; +import { + resolveCallSiteConfig, + resolveDefaultProfileKey, +} from "../config/llm-resolver.js"; import { type LLMCallSite, LLMSchema } from "../config/schemas/llm.js"; const fullDefault = { @@ -690,13 +693,28 @@ describe("resolveCallSiteConfig", () => { }); const callSites: LLMCallSite[] = [ - "mainAgent", "subagentSpawn", "heartbeatAgent", "filingAgent", - "compactionAgent", "analyzeConversation", "callAgent", - "memoryExtraction", "memoryConsolidation", "memoryRetrieval", - "memoryRouter", "recall", "conversationSummarization", - "commitMessage", "conversationStarters", "replySuggestion", - "conversationTitle", "identityIntro", "emptyStateGreeting", - "notificationDecision", "interactionClassifier", "inference", + "mainAgent", + "subagentSpawn", + "heartbeatAgent", + "filingAgent", + "compactionAgent", + "analyzeConversation", + "callAgent", + "memoryExtraction", + "memoryConsolidation", + "memoryRetrieval", + "memoryRouter", + "recall", + "conversationSummarization", + "commitMessage", + "conversationStarters", + "replySuggestion", + "conversationTitle", + "identityIntro", + "emptyStateGreeting", + "notificationDecision", + "interactionClassifier", + "inference", ]; for (const cs of callSites) { @@ -778,7 +796,10 @@ describe("resolveCallSiteConfig", () => { provider_connection: "anthropic-managed", }, profiles: { - fireworks: { provider: "fireworks", model: "accounts/fireworks/models/kimi-k2p5" }, + fireworks: { + provider: "fireworks", + model: "accounts/fireworks/models/kimi-k2p5", + }, }, activeProfile: "fireworks", }); @@ -874,3 +895,50 @@ describe("resolveDefaultProfileKey", () => { ); }); }); + +describe("memory v3 call sites resolve through the standard resolver", () => { + const llm = LLMSchema.parse({ + default: fullDefault, + profiles: { + balanced: { provider: "anthropic", model: "claude-sonnet-4-7" }, + "cost-optimized": { + provider: "anthropic", + model: "claude-haiku-4-5-20251001", + }, + }, + }); + + test("memoryV3Filter and memoryV3Descent resolve to the cost-optimized profile", () => { + expect(resolveDefaultProfileKey("memoryV3Filter", llm)).toBe( + "cost-optimized", + ); + expect(resolveDefaultProfileKey("memoryV3Descent", llm)).toBe( + "cost-optimized", + ); + expect(resolveCallSiteConfig("memoryV3Filter", llm).model).toBe( + "claude-haiku-4-5-20251001", + ); + expect(resolveCallSiteConfig("memoryV3Descent", llm).model).toBe( + "claude-haiku-4-5-20251001", + ); + }); + + test("memoryV3Gate resolves to the balanced (capable) profile", () => { + expect(resolveDefaultProfileKey("memoryV3Gate", llm)).toBe("balanced"); + expect(resolveCallSiteConfig("memoryV3Gate", llm).model).toBe( + "claude-sonnet-4-7", + ); + }); + + test("v3 call sites are addressable as call-site override keys", () => { + const overridden = LLMSchema.parse({ + default: fullDefault, + callSites: { + memoryV3Gate: { model: "claude-opus-4-7" }, + }, + }); + expect(resolveCallSiteConfig("memoryV3Gate", overridden).model).toBe( + "claude-opus-4-7", + ); + }); +}); diff --git a/assistant/src/config/call-site-defaults.ts b/assistant/src/config/call-site-defaults.ts index 36fbe925750..e988dbe2e9a 100644 --- a/assistant/src/config/call-site-defaults.ts +++ b/assistant/src/config/call-site-defaults.ts @@ -47,6 +47,10 @@ export const CALL_SITE_DEFAULTS: Record = { memoryV2Migration: { profile: "cost-optimized" }, memoryV2Sweep: { profile: "cost-optimized" }, memoryV2Consolidation: { profile: "balanced" }, + // memory v3: cheap filter + descent, capable gate. + memoryV3Filter: { profile: "cost-optimized" }, + memoryV3Descent: { profile: "cost-optimized" }, + memoryV3Gate: { profile: "balanced" }, conversationSummarization: { profile: "cost-optimized" }, conversationTitle: { profile: "cost-optimized" }, approvalCopy: { profile: "cost-optimized" }, diff --git a/assistant/src/config/schemas/__tests__/memory-v2.test.ts b/assistant/src/config/schemas/__tests__/memory-v2.test.ts index ef55cca2c9c..5ca74e76bba 100644 --- a/assistant/src/config/schemas/__tests__/memory-v2.test.ts +++ b/assistant/src/config/schemas/__tests__/memory-v2.test.ts @@ -1,7 +1,7 @@ import { describe, expect, test } from "bun:test"; import { MemoryConfigSchema } from "../memory.js"; -import { MemoryV2ConfigSchema } from "../memory-v2.js"; +import { MemoryV2ConfigSchema, MemoryV3ConfigSchema } from "../memory-v2.js"; describe("MemoryV2ConfigSchema", () => { test("parses an empty object to documented defaults", () => { @@ -212,6 +212,113 @@ describe("MemoryV2ConfigSchema", () => { }); }); +describe("MemoryV3ConfigSchema", () => { + test("parses an empty object to documented defaults", () => { + const parsed = MemoryV3ConfigSchema.parse({}); + expect(parsed).toEqual({ + enabled: false, + shadow: false, + passCap: 3, + breadthBudget: 6, + maxDepth: 6, + denseQuota: { activeDomain: 30, offDomain: 8 }, + lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true }, + ks: [5, 10, 25, 50], + }); + }); + + test("parses undefined to the same defaults (top-level .default)", () => { + expect(MemoryV3ConfigSchema.parse(undefined)).toEqual( + MemoryV3ConfigSchema.parse({}), + ); + }); + + test("defaults to disabled for backwards compatibility", () => { + expect(MemoryV3ConfigSchema.parse({}).enabled).toBe(false); + expect(MemoryV3ConfigSchema.parse({}).shadow).toBe(false); + }); + + test("accepts explicit scalar overrides", () => { + const parsed = MemoryV3ConfigSchema.parse({ + enabled: true, + shadow: true, + passCap: 5, + breadthBudget: 10, + maxDepth: 8, + }); + expect(parsed.enabled).toBe(true); + expect(parsed.shadow).toBe(true); + expect(parsed.passCap).toBe(5); + expect(parsed.breadthBudget).toBe(10); + expect(parsed.maxDepth).toBe(8); + }); + + test("accepts explicit denseQuota override", () => { + const parsed = MemoryV3ConfigSchema.parse({ + denseQuota: { activeDomain: 50, offDomain: 12 }, + }); + expect(parsed.denseQuota).toEqual({ activeDomain: 50, offDomain: 12 }); + }); + + test("accepts a partial lanes override and defaults the rest", () => { + const parsed = MemoryV3ConfigSchema.parse({ lanes: { dense: false } }); + expect(parsed.lanes).toEqual({ + hot: true, + sparse: true, + dense: false, + tree: true, + edges: true, + }); + }); + + test("accepts an explicit ks override", () => { + const parsed = MemoryV3ConfigSchema.parse({ ks: [1, 3, 7] }); + expect(parsed.ks).toEqual([1, 3, 7]); + }); + + test("rejects a non-boolean enabled", () => { + expect(() => MemoryV3ConfigSchema.parse({ enabled: "yes" })).toThrow(); + }); + + test("rejects a non-integer passCap", () => { + expect(() => MemoryV3ConfigSchema.parse({ passCap: 2.5 })).toThrow(); + }); + + test("rejects non-number ks entries", () => { + expect(() => MemoryV3ConfigSchema.parse({ ks: ["a"] })).toThrow(); + }); +}); + +describe("MemoryConfigSchema integration with v3 block", () => { + test("includes a v3 block defaulting to disabled when v3 is omitted", () => { + const parsed = MemoryConfigSchema.parse({}); + expect(parsed.v3).toBeDefined(); + expect(parsed.v3.enabled).toBe(false); + expect(parsed.v3.shadow).toBe(false); + expect(parsed.v3.passCap).toBe(3); + expect(parsed.v3.lanes.dense).toBe(true); + expect(parsed.v3.ks).toEqual([5, 10, 25, 50]); + }); + + test("leaves pre-existing configs (no v3 key) otherwise unchanged", () => { + // A config authored before v3 existed parses fine and its v2 block is + // untouched; the v3 block is purely additive. + const parsed = MemoryConfigSchema.parse({ v2: { top_k: 50 } }); + expect(parsed.v2.top_k).toBe(50); + expect(parsed.v3.enabled).toBe(false); + }); + + test("propagates v3 overrides through MemoryConfigSchema", () => { + const parsed = MemoryConfigSchema.parse({ + v3: { enabled: true, passCap: 4 }, + }); + expect(parsed.v3.enabled).toBe(true); + expect(parsed.v3.passCap).toBe(4); + // Non-overridden v3 fields keep their defaults. + expect(parsed.v3.maxDepth).toBe(6); + }); +}); + describe("MemoryConfigSchema integration with v2 block", () => { test("parses an empty memory config and includes a v2 block with defaults", () => { const parsed = MemoryConfigSchema.parse({}); diff --git a/assistant/src/config/schemas/call-site-catalog.ts b/assistant/src/config/schemas/call-site-catalog.ts index 5552889d7cb..7d0417b4f1f 100644 --- a/assistant/src/config/schemas/call-site-catalog.ts +++ b/assistant/src/config/schemas/call-site-catalog.ts @@ -121,6 +121,27 @@ const CATALOG_RECORD: CatalogRecord = { "Selects which concept pages to inject for the next agent turn by routing over a cached page index.", domain: "memory", }, + memoryV3Filter: { + id: "memoryV3Filter", + displayName: "Memory V3 Filter", + description: + "Cheaply filters the V3 multi-lane candidate set before descent.", + domain: "memory", + }, + memoryV3Descent: { + id: "memoryV3Descent", + displayName: "Memory V3 Descent", + description: + "Drives the V3 bounded-descent traversal through the memory tree.", + domain: "memory", + }, + memoryV3Gate: { + id: "memoryV3Gate", + displayName: "Memory V3 Gate", + description: + "Final capable gate that decides which V3 candidates are injected for the next turn.", + domain: "memory", + }, memoryV2Consolidation: { id: "memoryV2Consolidation", displayName: "Memory V2 Consolidation", diff --git a/assistant/src/config/schemas/llm.ts b/assistant/src/config/schemas/llm.ts index 10103b86b1d..e6a53c85fae 100644 --- a/assistant/src/config/schemas/llm.ts +++ b/assistant/src/config/schemas/llm.ts @@ -49,6 +49,9 @@ export const LLMCallSiteEnum = z.enum([ "memoryV2Migration", "memoryV2Sweep", "memoryRouter", + "memoryV3Filter", + "memoryV3Descent", + "memoryV3Gate", "memoryV2Consolidation", "memoryRetrospective", "recall", diff --git a/assistant/src/config/schemas/memory-v2.ts b/assistant/src/config/schemas/memory-v2.ts index 45a076a778e..11360a89e31 100644 --- a/assistant/src/config/schemas/memory-v2.ts +++ b/assistant/src/config/schemas/memory-v2.ts @@ -388,3 +388,122 @@ export const MemoryV2ConfigSchema = z }); export type MemoryV2Config = z.infer; + +/** + * Memory v3 (multi-lane, bounded-descent retrieval) configuration. + * + * Additive scaffolding only — defaults to `enabled: false` so existing + * configs are untouched and the v3 retrieval loop stays inert until later + * PRs wire it up. Every field carries a default and the whole block is + * `.default(...)`-wrapped so a config that omits `memory.v3` entirely still + * parses to these documented defaults. + */ +export const MemoryV3ConfigSchema = z + .object({ + enabled: z + .boolean({ error: "memory.v3.enabled must be a boolean" }) + .default(false) + .describe( + "Whether the v3 memory subsystem (multi-lane bounded-descent retrieval) is enabled. Off by default until the v3 loop is wired up.", + ), + shadow: z + .boolean({ error: "memory.v3.shadow must be a boolean" }) + .default(false) + .describe( + "Live-shadow toggle: when on, the v3 retrieval loop runs alongside the active path for comparison without affecting injected context. Consumed by a later PR.", + ), + passCap: z + .number({ error: "memory.v3.passCap must be a number" }) + .int("memory.v3.passCap must be an integer") + .default(3) + .describe( + "Maximum number of retrieval passes (router → descent rounds) the v3 loop may run per turn.", + ), + breadthBudget: z + .number({ error: "memory.v3.breadthBudget must be a number" }) + .int("memory.v3.breadthBudget must be an integer") + .default(6) + .describe( + "Per-pass breadth budget — the number of frontier candidates the v3 loop may expand at each step.", + ), + maxDepth: z + .number({ error: "memory.v3.maxDepth must be a number" }) + .int("memory.v3.maxDepth must be an integer") + .default(6) + .describe( + "Maximum descent depth the v3 loop traverses through the memory tree before stopping.", + ), + denseQuota: z + .object({ + activeDomain: z + .number({ + error: "memory.v3.denseQuota.activeDomain must be a number", + }) + .describe( + "Dense-lane candidate quota allocated to the conversation's active domain.", + ), + offDomain: z + .number({ error: "memory.v3.denseQuota.offDomain must be a number" }) + .describe( + "Dense-lane candidate quota allocated to off-domain (exploratory) retrieval.", + ), + }) + .default({ activeDomain: 30, offDomain: 8 }) + .describe( + "Dense-lane candidate quotas split between the active domain and off-domain exploration.", + ), + lanes: z + .object({ + hot: z + .boolean() + .default(true) + .describe("Whether the hot (recently-touched) retrieval lane is on."), + sparse: z + .boolean() + .default(true) + .describe("Whether the sparse (BM25-style keyword) lane is on."), + dense: z + .boolean() + .default(true) + .describe("Whether the dense (embedding-similarity) lane is on."), + tree: z + .boolean() + .default(true) + .describe("Whether the tree (hierarchical descent) lane is on."), + edges: z + .boolean() + .default(true) + .describe("Whether the edges (graph-adjacency) lane is on."), + }) + .default({ + hot: true, + sparse: true, + dense: true, + tree: true, + edges: true, + }) + .describe( + "Per-lane on/off toggles for the v3 multi-lane retrieval fanout. All lanes on by default.", + ), + ks: z + .array(z.number({ error: "memory.v3.ks entries must be numbers" })) + .default([5, 10, 25, 50]) + .describe( + "Evaluation top-K cutoffs the v3 loop reports metrics at (e.g. recall@K).", + ), + }) + .default({ + enabled: false, + shadow: false, + passCap: 3, + breadthBudget: 6, + maxDepth: 6, + denseQuota: { activeDomain: 30, offDomain: 8 }, + lanes: { hot: true, sparse: true, dense: true, tree: true, edges: true }, + ks: [5, 10, 25, 50], + }) + .describe( + "Memory v3 — multi-lane bounded-descent retrieval. Additive scaffolding, disabled by default.", + ); + +export type MemoryV3Config = z.infer; diff --git a/assistant/src/config/schemas/memory.ts b/assistant/src/config/schemas/memory.ts index 4a3822ebb06..4ba15e3b044 100644 --- a/assistant/src/config/schemas/memory.ts +++ b/assistant/src/config/schemas/memory.ts @@ -16,7 +16,7 @@ import { MemorySegmentationConfigSchema, QdrantConfigSchema, } from "./memory-storage.js"; -import { MemoryV2ConfigSchema } from "./memory-v2.js"; +import { MemoryV2ConfigSchema, MemoryV3ConfigSchema } from "./memory-v2.js"; export const MemoryConfigSchema = z .object({ @@ -50,6 +50,7 @@ export const MemoryConfigSchema = z MemorySummarizationConfigSchema.parse({}), ), v2: MemoryV2ConfigSchema.default(MemoryV2ConfigSchema.parse({})), + v3: MemoryV3ConfigSchema.default(MemoryV3ConfigSchema.parse({})), retrospective: MemoryRetrospectiveConfigSchema.default( MemoryRetrospectiveConfigSchema.parse({}), ),