diff --git a/assistant/src/config/schemas/memory-v2.ts b/assistant/src/config/schemas/memory-v2.ts index c9628ee6d2f..f40040cd7d6 100644 --- a/assistant/src/config/schemas/memory-v2.ts +++ b/assistant/src/config/schemas/memory-v2.ts @@ -199,7 +199,7 @@ export const MemoryV2ConfigSchema = z .boolean() .default(false) .describe( - "Whether to apply cross-encoder reranking as an additive boost to the user + assistant similarity channels. Disabled by default — opt in once measured.", + "Whether to apply cross-encoder reranking as an additive A_o boost on the user + assistant channels. Disabled by default — opt in once measured.", ), top_k: z .number() @@ -208,7 +208,7 @@ export const MemoryV2ConfigSchema = z .max(200) .default(50) .describe( - "Number of top-fused candidates per `simBatch` call to send through the reranker. Tail candidates keep their pure fused score.", + "Number of candidates from the top of the pre-rerank-A_o pool to send through the reranker. Tail candidates contribute zero rerank boost and keep their pure fused activation.", ), alpha: z .number() @@ -216,7 +216,7 @@ export const MemoryV2ConfigSchema = z .max(1) .default(0.3) .describe( - "Boost weight: `boosted = clamp01(fused + alpha · normalized_rerank)`. Top reranker hit can lift its fused score by up to `alpha`; bottom of top_k stays roughly unchanged.", + "Per-channel rerank weight: each top-K slug gets `alpha · normalized_rerank` added to A_o weighted by `c_user` (user channel) or `c_assistant` (assistant channel). Top reranker hit can lift A_o by up to `(c_user + c_assistant) · alpha`; bottom of top_k stays roughly unchanged.", ), model: z .string() @@ -232,7 +232,7 @@ export const MemoryV2ConfigSchema = z model: DEFAULT_RERANK_MODEL, }) .describe( - "Cross-encoder rerank configuration. When enabled, runs a local cross-encoder over the top-K fused candidates per `simBatch(useRerank: true)` call and adds an alpha-weighted normalized boost to their fused scores.", + "Cross-encoder rerank configuration. When enabled, picks the top-K candidates by pre-rerank A_o, runs the cross-encoder once per channel (user, assistant) on that unified set, and adds an alpha-weighted normalized boost to A_o for each scored slug.", ), }) .describe( diff --git a/assistant/src/memory/__tests__/memory-v2-concept-frequency.test.ts b/assistant/src/memory/__tests__/memory-v2-concept-frequency.test.ts index ffbb0d8da29..d11d13eb80a 100644 --- a/assistant/src/memory/__tests__/memory-v2-concept-frequency.test.ts +++ b/assistant/src/memory/__tests__/memory-v2-concept-frequency.test.ts @@ -28,7 +28,6 @@ import { getDb } from "../db-connection.js"; import { initializeDb } from "../db-init.js"; import { type MemoryV2ConceptRowRecord, - type MemoryV2SkillRowRecord, recordMemoryV2ActivationLog, } from "../memory-v2-activation-log-store.js"; import { getConceptFrequencySummary } from "../memory-v2-concept-frequency.js"; @@ -38,7 +37,6 @@ import { sampleConfig } from "./fixtures/memory-v2-activation-fixtures.js"; initializeDb(); const WORKSPACE = "/tmp/memory-v2-concept-frequency-test"; -const NO_SKILLS: MemoryV2SkillRowRecord[] = []; function makeConcept( slug: string, @@ -80,7 +78,6 @@ describe("memory-v2-concept-frequency", () => { makeConcept("alice", "injected"), makeConcept("bob", "not_injected"), ], - skills: NO_SKILLS, config: sampleConfig, }); recordMemoryV2ActivationLog({ @@ -91,7 +88,6 @@ describe("memory-v2-concept-frequency", () => { makeConcept("alice", "in_context"), makeConcept("bob", "injected"), ], - skills: NO_SKILLS, config: sampleConfig, }); recordMemoryV2ActivationLog({ @@ -102,7 +98,6 @@ describe("memory-v2-concept-frequency", () => { makeConcept("alice", "injected"), makeConcept("charlie", "page_missing"), ], - skills: NO_SKILLS, config: sampleConfig, }); @@ -161,7 +156,6 @@ describe("memory-v2-concept-frequency", () => { turn: 1, mode: "per-turn", concepts: [makeConcept("alice", "injected")], - skills: NO_SKILLS, config: sampleConfig, }); recordMemoryV2ActivationLog({ @@ -172,7 +166,6 @@ describe("memory-v2-concept-frequency", () => { makeConcept("alice", "injected"), makeConcept("alice", "injected"), ], - skills: NO_SKILLS, config: sampleConfig, }); @@ -203,7 +196,6 @@ describe("memory-v2-concept-frequency", () => { turn: 1, mode: "per-turn", concepts: [makeConcept("alice", "injected")], - skills: NO_SKILLS, config: sampleConfig, }); // Backdate the just-written row — recordMemoryV2ActivationLog uses Date.now(). @@ -214,7 +206,6 @@ describe("memory-v2-concept-frequency", () => { turn: 2, mode: "per-turn", concepts: [makeConcept("alice", "injected")], - skills: NO_SKILLS, config: sampleConfig, }); @@ -242,7 +233,6 @@ describe("memory-v2-concept-frequency", () => { makeConcept("bob", "not_injected"), makeConcept("charlie", "page_missing"), ], - skills: NO_SKILLS, config: sampleConfig, }); @@ -268,7 +258,6 @@ describe("memory-v2-concept-frequency", () => { turn: 1, mode: "per-turn", concepts: [makeConcept("ghost", "injected")], - skills: NO_SKILLS, config: sampleConfig, }); diff --git a/assistant/src/memory/memory-v2-activation-log-store.ts b/assistant/src/memory/memory-v2-activation-log-store.ts index 3dc42509e48..d8053eedc1d 100644 --- a/assistant/src/memory/memory-v2-activation-log-store.ts +++ b/assistant/src/memory/memory-v2-activation-log-store.ts @@ -13,16 +13,20 @@ export interface MemoryV2ConceptRowRecord { simAssistant: number; simNow: number; /** - * Portion of `simUser` contributed by the cross-encoder rerank step. - * Zero when rerank is disabled or the slug fell outside the top-K - * window. Stored as a JSON field, so older log rows that pre-date - * this addition decode with `undefined`; readers should fall back to 0. + * Cross-encoder rerank delta in raw rerank space (`alpha · r_norm_u`) + * for the user channel. Zero when rerank is disabled or the slug fell + * outside the unified top-K-by-pre-rerank-A_o window. Applied + * additively to A_o weighted by `c_user` — `simUser` itself is the + * raw fused score and never carries the boost. Stored as a JSON field, + * so older log rows pre-date this addition and decode with `undefined`; + * readers should fall back to 0. */ simUserRerankBoost: number; /** - * Portion of `simAssistant` contributed by the cross-encoder rerank - * step. Same semantics as `simUserRerankBoost`. The NOW channel - * intentionally bypasses rerank, so there is no `simNowRerankBoost`. + * Cross-encoder rerank delta for the assistant channel. Same semantics + * as `simUserRerankBoost`, weighted by `c_assistant` when applied to + * A_o. The NOW channel intentionally bypasses rerank, so there is no + * `simNowRerankBoost`. */ simAssistantRerankBoost: number; spreadContribution: number; diff --git a/assistant/src/memory/v2/__tests__/activation.test.ts b/assistant/src/memory/v2/__tests__/activation.test.ts index cede66b1340..928982fe094 100644 --- a/assistant/src/memory/v2/__tests__/activation.test.ts +++ b/assistant/src/memory/v2/__tests__/activation.test.ts @@ -615,6 +615,173 @@ describe("computeOwnActivation", () => { // Rerank should have been called once per rerank-enabled channel. expect(rerankState.calls).toHaveLength(2); }); + + test("rerank pool is the unified top-K by pre-rerank A_o, not per-channel fused", async () => { + // Three candidates. The per-channel fused-sim top-2s would have picked + // different sets: + // user channel: a=0.9, b=0.5, c=0.4 → per-channel top-2 = [a, b] + // assistant channel: a=0.5, b=0.4, c=0.9 → per-channel top-2 = [c, a] + // But pre-rerank A_o (c_user=c_assistant=0.5) is: + // a = 0.5·0.9 + 0.5·0.5 = 0.70 + // b = 0.5·0.5 + 0.5·0.4 = 0.45 + // c = 0.5·0.4 + 0.5·0.9 = 0.65 + // → unified top-2 = [a, c]. b drops out, even though it would have made + // the user-channel pool under the old per-channel selection. + stageHybridResponse([ + { slug: "a", denseScore: 0.9 }, + { slug: "b", denseScore: 0.5 }, + { slug: "c", denseScore: 0.4 }, + ]); // user + stageHybridResponse([ + { slug: "a", denseScore: 0.5 }, + { slug: "b", denseScore: 0.4 }, + { slug: "c", denseScore: 0.9 }, + ]); // assistant + stageHybridResponse([]); // now (no signal) + rerankState.scores = new Map([ + ["a", 0.5], + ["b", 0.5], + ["c", 0.5], + ]); + + const config = { + memory: { + v2: { + d: 0.0, + c_user: 0.5, + c_assistant: 0.5, + c_now: 0.0, + dense_weight: 1.0, + sparse_weight: 0.0, + rerank: { + enabled: true, + top_k: 2, + alpha: 0.3, + model: "test-model", + }, + }, + }, + } as unknown as AssistantConfig; + + await computeOwnActivation({ + candidates: new Set(["a", "b", "c"]), + priorState: null, + userText: "u", + assistantText: "a", + nowText: "", + config, + }); + + expect(rerankState.calls).toHaveLength(2); + // Both channels rerank against the same unified slug set, sorted by + // pre-rerank A_o descending. + expect(rerankState.calls[0].candidates).toEqual(["a", "c"]); + expect(rerankState.calls[1].candidates).toEqual(["a", "c"]); + }); + + test("rerank-disabled candidates outside the unified pool get zero boost", async () => { + // Two candidates, top_k=1. The lower pre-rerank A_o slug must end up + // with simUserRerankBoost=0 / simAssistantRerankBoost=0 in the breakdown. + stageHybridResponse([ + { slug: "winner", denseScore: 0.9 }, + { slug: "loser", denseScore: 0.2 }, + ]); // user + stageHybridResponse([ + { slug: "winner", denseScore: 0.9 }, + { slug: "loser", denseScore: 0.2 }, + ]); // assistant + stageHybridResponse([]); // now + // The mocked reranker hands back scores for whatever slugs it's + // called with. Stage scores for both; the assertion below is that + // the loser still receives 0 because it's never sent to the + // reranker — top_k=1 cuts it off. + rerankState.scores = new Map([ + ["winner", 0.5], + ["loser", 0.5], + ]); + + const config = { + memory: { + v2: { + d: 0.0, + c_user: 0.5, + c_assistant: 0.5, + c_now: 0.0, + dense_weight: 1.0, + sparse_weight: 0.0, + rerank: { + enabled: true, + top_k: 1, + alpha: 0.3, + model: "test-model", + }, + }, + }, + } as unknown as AssistantConfig; + + const out = await computeOwnActivation({ + candidates: new Set(["winner", "loser"]), + priorState: null, + userText: "u", + assistantText: "a", + nowText: "", + config, + }); + + expect(out.breakdown.get("loser")?.simUserRerankBoost).toBe(0); + expect(out.breakdown.get("loser")?.simAssistantRerankBoost).toBe(0); + expect(out.breakdown.get("winner")?.simUserRerankBoost).toBeGreaterThan(0); + expect( + out.breakdown.get("winner")?.simAssistantRerankBoost, + ).toBeGreaterThan(0); + }); + + test("rerank boost is additive on A_o and leaves raw simUser / simAssistant untouched", async () => { + stageHybridResponse([{ slug: "a", denseScore: 0.5 }]); // user + stageHybridResponse([{ slug: "a", denseScore: 0.4 }]); // assistant + stageHybridResponse([]); // now + rerankState.scores = new Map([["a", 0.8]]); + + const config = { + memory: { + v2: { + d: 0.0, + c_user: 0.5, + c_assistant: 0.5, + c_now: 0.0, + dense_weight: 1.0, + sparse_weight: 0.0, + rerank: { + enabled: true, + top_k: 50, + alpha: 0.4, + model: "test-model", + }, + }, + }, + } as unknown as AssistantConfig; + + const out = await computeOwnActivation({ + candidates: new Set(["a"]), + priorState: null, + userText: "u", + assistantText: "a", + nowText: "", + config, + }); + + const breakdown = out.breakdown.get("a"); + // Raw fused similarities are reported untouched by rerank. + expect(breakdown?.simUser).toBeCloseTo(0.5, 6); + expect(breakdown?.simAssistant).toBeCloseTo(0.4, 6); + // Both rerank deltas are alpha · r_norm = 0.4 · 1.0 = 0.4 (single + // candidate normalises to 1.0 in each channel). + expect(breakdown?.simUserRerankBoost).toBeCloseTo(0.4, 6); + expect(breakdown?.simAssistantRerankBoost).toBeCloseTo(0.4, 6); + // Final A_o = c_user·simU + c_assistant·simA + c_user·boostU + c_assistant·boostA + // = 0.5·0.5 + 0.5·0.4 + 0.5·0.4 + 0.5·0.4 = 0.25+0.20+0.20+0.20 = 0.85 + expect(out.activation.get("a")).toBeCloseTo(0.85, 6); + }); }); // --------------------------------------------------------------------------- diff --git a/assistant/src/memory/v2/__tests__/sim.test.ts b/assistant/src/memory/v2/__tests__/sim.test.ts index 8d9d1685e45..d726fe3b509 100644 --- a/assistant/src/memory/v2/__tests__/sim.test.ts +++ b/assistant/src/memory/v2/__tests__/sim.test.ts @@ -145,31 +145,6 @@ mock.module("@qdrant/js-client-rest", () => ({ QdrantClient: MockQdrantClient, })); -// Reranker mock — allows boost-mode tests to programmatically supply scores -// without spinning up the cross-encoder subprocess. -const rerankState = { - scores: null as Map | null, - shouldThrow: false, - calls: [] as Array<{ query: string; candidates: string[] }>, -}; -mock.module("../reranker.js", () => ({ - rerankCandidates: async ( - query: string, - candidates: readonly string[], - ): Promise> => { - rerankState.calls.push({ query, candidates: [...candidates] }); - if (rerankState.shouldThrow) throw new Error("rerank disabled in test"); - if (rerankState.scores === null) return new Map(); - const out = new Map(); - for (const slug of candidates) { - const v = rerankState.scores.get(slug); - if (v !== undefined) out.set(slug, v); - } - return out; - }, - _resetRerankCacheForTests: () => {}, -})); - const { simBatch, clamp01, effectiveWeights } = await import("../sim.js"); const { _resetMemoryV2QdrantForTests } = await import("../qdrant.js"); @@ -184,9 +159,6 @@ function resetState(): void { state.queryResponses.dense.length = 0; state.queryResponses.sparse.length = 0; state.queryCalls.length = 0; - rerankState.scores = null; - rerankState.shouldThrow = false; - rerankState.calls.length = 0; // Bun's `mock.module` persists across files in the same process, so the // qdrant module's singleton may already hold a MockQdrantClient instance // from a sibling test file. Reset readiness so each test in this file @@ -541,151 +513,3 @@ describe("simBatch", () => { } }); }); - -// --------------------------------------------------------------------------- -// simBatch — cross-encoder rerank boost -// --------------------------------------------------------------------------- - -describe("simBatch with rerank boost", () => { - // dense_weight=1.0 / sparse_weight=0 so the fused score equals the dense - // input directly — keeps the boost-math arithmetic readable in assertions. - // The validator that requires the weights to sum to 1.0 only runs when the - // schema is parsed; tests cast partial objects so it never fires. - function configWithRerank(overrides: { - enabled: boolean; - top_k?: number; - alpha?: number; - }): AssistantConfig { - return { - memory: { - v2: { - dense_weight: 1.0, - sparse_weight: 0.0, - rerank: { - enabled: overrides.enabled, - top_k: overrides.top_k ?? 50, - alpha: overrides.alpha ?? 0.3, - model: "test-model", - }, - }, - }, - } as unknown as AssistantConfig; - } - - test("boosts top-K fused scores by alpha · normalized rerank", async () => { - const config = configWithRerank({ enabled: true, top_k: 50, alpha: 0.4 }); - stageHybridResponse([ - { slug: "a", denseScore: 0.5 }, - { slug: "b", denseScore: 0.4 }, - { slug: "c", denseScore: 0.3 }, - ]); - rerankState.scores = new Map([ - ["a", 0.2], // normalised → 0.2 / 0.8 = 0.25 - ["b", 0.8], // normalised → 1.0 (max) - ["c", 0.4], // normalised → 0.5 - ]); - - const out = await simBatch("query", ["a", "b", "c"], config, { - useRerank: true, - }); - - // a: clamp01(0.5 + 0.4·0.25) = 0.6 - // b: clamp01(0.4 + 0.4·1.0) = 0.8 - // c: clamp01(0.3 + 0.4·0.5) = 0.5 - expect(out.get("a")).toBeCloseTo(0.6); - expect(out.get("b")).toBeCloseTo(0.8); - expect(out.get("c")).toBeCloseTo(0.5); - }); - - test("rerank flips ranking when its top hit was dense's #2", async () => { - const config = configWithRerank({ enabled: true, alpha: 0.5 }); - stageHybridResponse([ - { slug: "lexical-match", denseScore: 0.55 }, - { slug: "semantic-match", denseScore: 0.45 }, - ]); - rerankState.scores = new Map([ - ["lexical-match", 0.05], - ["semantic-match", 0.9], - ]); - - const out = await simBatch( - "query", - ["lexical-match", "semantic-match"], - config, - { useRerank: true }, - ); - - // lexical-match: 0.55 + 0.5 · (0.05/0.9) ≈ 0.578 - // semantic-match: 0.45 + 0.5 · 1.0 = 0.95 - expect(out.get("semantic-match")!).toBeGreaterThan( - out.get("lexical-match")!, - ); - }); - - test("only top-K candidates get reranked; tail keeps pure fused", async () => { - const config = configWithRerank({ enabled: true, top_k: 2, alpha: 0.5 }); - stageHybridResponse([ - { slug: "a", denseScore: 0.9 }, - { slug: "b", denseScore: 0.7 }, - { slug: "c", denseScore: 0.3 }, // tail — outside top_k=2 - ]); - rerankState.scores = new Map([ - ["a", 0.5], - ["b", 1.0], - ["c", 1.0], // would lift but reranker is never called for it - ]); - - const out = await simBatch("query", ["a", "b", "c"], config, { - useRerank: true, - }); - - expect(rerankState.calls).toHaveLength(1); - expect(rerankState.calls[0].candidates).toEqual(["a", "b"]); - expect(out.get("c")).toBeCloseTo(0.3); // unchanged - }); - - test("returns pure fused when useRerank: true but rerank.enabled: false", async () => { - const config = configWithRerank({ enabled: false }); - stageHybridResponse([{ slug: "a", denseScore: 0.5 }]); - rerankState.scores = new Map([["a", 1.0]]); - - const out = await simBatch("query", ["a"], config, { useRerank: true }); - - expect(rerankState.calls).toHaveLength(0); - expect(out.get("a")).toBeCloseTo(0.5); // no boost applied - }); - - test("returns pure fused when reranker returns empty (fail-open)", async () => { - const config = configWithRerank({ enabled: true }); - stageHybridResponse([{ slug: "a", denseScore: 0.5 }]); - // The real `rerankCandidates` swallows worker errors and returns an - // empty Map — `applyRerankBoost` short-circuits on empty. - rerankState.scores = new Map(); - - const out = await simBatch("query", ["a"], config, { useRerank: true }); - - expect(out.get("a")).toBeCloseTo(0.5); // no boost - }); - - test("useRerank not passed — boost path doesn't run even when enabled", async () => { - const config = configWithRerank({ enabled: true }); - stageHybridResponse([{ slug: "a", denseScore: 0.5 }]); - rerankState.scores = new Map([["a", 1.0]]); - - const out = await simBatch("query", ["a"], config); - - expect(rerankState.calls).toHaveLength(0); - expect(out.get("a")).toBeCloseTo(0.5); - }); - - test("clamps boosted score to <= 1", async () => { - const config = configWithRerank({ enabled: true, alpha: 1.0 }); - stageHybridResponse([{ slug: "a", denseScore: 0.95 }]); - rerankState.scores = new Map([["a", 0.8]]); - - const out = await simBatch("query", ["a"], config, { useRerank: true }); - - // 0.95 + 1.0 · 1.0 = 1.95 → clamped to 1.0 - expect(out.get("a")).toBe(1); - }); -}); diff --git a/assistant/src/memory/v2/activation.ts b/assistant/src/memory/v2/activation.ts index c590eeaa7f1..0ee615d092a 100644 --- a/assistant/src/memory/v2/activation.ts +++ b/assistant/src/memory/v2/activation.ts @@ -2,12 +2,15 @@ // Memory v2 — Per-turn activation update // --------------------------------------------------------------------------- // -// Implements the activation formula from §4 of the design doc: +// Implements the activation formula from §4 of the design doc plus an +// additive cross-encoder rerank boost on the unified top-K-by-A_o pool: // // A_o(n, t+1) = d · A(n, t) // + c_user · sim(User_{t+1}, n) // + c_assistant · sim(Assistant_t, n) // + c_now · sim(NOW.md, n) +// + c_user · α · r_norm(User_{t+1}, n) [n ∈ topK] +// + c_assistant · α · r_norm(Assistant_t, n) [n ∈ topK] // // A(n, t+1) = [ A_o(n) // + k · Σ_{m∈in1(n)} A_o(m) @@ -40,6 +43,7 @@ import { import { clampUnitInterval } from "../validation.js"; import type { EdgeIndex } from "./edge-index.js"; import { hybridQueryConceptPages } from "./qdrant.js"; +import { rerankCandidates } from "./reranker.js"; import { simBatch } from "./sim.js"; import type { ActivationState, EverInjectedEntry } from "./types.js"; @@ -164,15 +168,15 @@ interface ComputeOwnActivationParams { interface OwnActivationBreakdown { /** `d * prev(slug)` — the decayed prior-turn activation contribution. */ priorContribution: number; - /** Raw `sim(user, slug)` similarity, before `c_user` weighting. */ + /** Raw fused `sim(user, slug)`, before `c_user` weighting. */ simUser: number; - /** Raw `sim(assistant, slug)` similarity, before `c_assistant` weighting. */ + /** Raw fused `sim(assistant, slug)`, before `c_assistant` weighting. */ simAssistant: number; - /** Raw `sim(now, slug)` similarity, before `c_now` weighting. */ + /** Raw fused `sim(now, slug)`, before `c_now` weighting. */ simNow: number; - /** Cross-encoder boost folded into `simUser`. `simUser - simUserRerankBoost` recovers the pre-rerank fused score. */ + /** Rerank delta `α · r_norm_u`; 0 outside the top-K pool. Applied to `A_o` weighted by `c_user`. */ simUserRerankBoost: number; - /** Cross-encoder boost folded into `simAssistant`. NOW channel skips rerank, so there is no `simNowRerankBoost`. */ + /** Rerank delta `α · r_norm_a`; 0 outside the top-K pool. Applied to `A_o` weighted by `c_assistant`. NOW skips rerank. */ simAssistantRerankBoost: number; } @@ -185,15 +189,21 @@ interface ComputeOwnActivationResult { /** * Apply the own-activation formula - * A_o(n) = d · prev(n) + c_user · sim_u + c_assistant · sim_a + c_now · sim_n - * over the candidate set. Returns a sparse map keyed by slug; slugs whose - * computed value rounds to 0 are still included so callers can see the - * candidate set explicitly. Also returns a per-slug breakdown of the raw - * inputs (decayed prior + raw sims) so callers can render contribution - * diagnostics without re-running the math. + * A_o(n) = d · prev(n) + * + c_user · sim_u + c_assistant · sim_a + c_now · sim_n + * + c_user · α · r_norm_u + c_assistant · α · r_norm_a + * over the candidate set, where the rerank terms only fire for slugs that + * land in the unified top-K-by-pre-rerank-A_o window. Returns a sparse map + * keyed by slug; slugs whose computed value rounds to 0 are still included + * so callers can see the candidate set explicitly. Also returns a per-slug + * breakdown of the raw inputs (decayed prior + raw sims + rerank deltas) so + * callers can render contribution diagnostics without re-running the math. * * The three `simBatch` calls run concurrently — they hit independent named - * vectors and embed independent query texts. + * vectors and embed independent query texts. Cross-encoder rerank then runs + * once on the unified top-K (selected by pre-rerank A_o, not per-channel + * fused sim) so an entry strong in both channels can't double-boost itself + * past entries that only land in one channel. */ export async function computeOwnActivation( params: ComputeOwnActivationParams, @@ -209,41 +219,106 @@ export async function computeOwnActivation( const slugList = [...candidates]; // NOW context is structured (timestamps, current focus) — outside the - // cross-encoder's training distribution, so it stays on pure fused fusion. - const userRerankBoost = new Map(); - const assistantRerankBoost = new Map(); + // cross-encoder's training distribution, so it never participates in rerank. const [simUser, simAssistant, simNow] = await Promise.all([ - simBatch(userText, slugList, config, { - useRerank: true, - rerankBoost: userRerankBoost, - }), - simBatch(assistantText, slugList, config, { - useRerank: true, - rerankBoost: assistantRerankBoost, - }), + simBatch(userText, slugList, config), + simBatch(assistantText, slugList, config), simBatch(nowText, slugList, config), ]); - for (const slug of slugList) { + interface SlugInputs { + slug: string; + priorContribution: number; + simU: number; + simA: number; + simN: number; + /** Pre-rerank A_o; ranking signal for the unified rerank pool. */ + preRerank: number; + } + const inputs: SlugInputs[] = slugList.map((slug) => { const prev = priorState?.state[slug] ?? 0; const simU = simUser.get(slug) ?? 0; const simA = simAssistant.get(slug) ?? 0; const simN = simNow.get(slug) ?? 0; - const value = d * prev + c_user * simU + c_assistant * simA + c_now * simN; - activation.set(slug, clampUnitInterval(value)); - breakdown.set(slug, { - priorContribution: d * prev, - simUser: simU, - simAssistant: simA, - simNow: simN, - simUserRerankBoost: userRerankBoost.get(slug) ?? 0, - simAssistantRerankBoost: assistantRerankBoost.get(slug) ?? 0, + const priorContribution = d * prev; + return { + slug, + priorContribution, + simU, + simA, + simN, + preRerank: + priorContribution + c_user * simU + c_assistant * simA + c_now * simN, + }; + }); + + // Unified top-K by pre-rerank A_o. Both channels rerank against the **same** + // slug set, so a slug strong on user can't crowd out one strong on assistant + // by virtue of appearing in both per-channel top-Ks. + let userRerankBoost: ReadonlyMap = new Map(); + let assistantRerankBoost: ReadonlyMap = new Map(); + const rerankCfg = config.memory.v2.rerank; + if (rerankCfg?.enabled) { + const topSlugs = inputs + .slice() + .sort((a, b) => b.preRerank - a.preRerank) + .slice(0, rerankCfg.top_k) + .map((e) => e.slug); + if (topSlugs.length > 0) { + const [userScores, assistantScores] = await Promise.all([ + rerankCandidates(userText, topSlugs, config), + rerankCandidates(assistantText, topSlugs, config), + ]); + userRerankBoost = normalizeRerankScores(userScores, rerankCfg.alpha); + assistantRerankBoost = normalizeRerankScores( + assistantScores, + rerankCfg.alpha, + ); + } + } + + for (const e of inputs) { + const boostU = userRerankBoost.get(e.slug) ?? 0; + const boostA = assistantRerankBoost.get(e.slug) ?? 0; + activation.set( + e.slug, + clampUnitInterval(e.preRerank + c_user * boostU + c_assistant * boostA), + ); + breakdown.set(e.slug, { + priorContribution: e.priorContribution, + simUser: e.simU, + simAssistant: e.simA, + simNow: e.simN, + simUserRerankBoost: boostU, + simAssistantRerankBoost: boostA, }); } return { activation, breakdown }; } +/** + * Per-batch normalisation: divide raw cross-encoder scores by the channel's + * own max and return `alpha · r_norm` per slug. Empty input or all-zero + * scores yield an empty Map so the channel contributes 0 boost. + */ +function normalizeRerankScores( + rawScores: ReadonlyMap, + alpha: number, +): Map { + const out = new Map(); + if (rawScores.size === 0) return out; + let maxScore = 0; + for (const v of rawScores.values()) { + if (v > maxScore) maxScore = v; + } + if (maxScore === 0) return out; + for (const [slug, raw] of rawScores) { + out.set(slug, alpha * (raw / maxScore)); + } + return out; +} + // --------------------------------------------------------------------------- // Spreading activation // --------------------------------------------------------------------------- diff --git a/assistant/src/memory/v2/reranker.ts b/assistant/src/memory/v2/reranker.ts index b7638fa09aa..1cc8dd0e134 100644 --- a/assistant/src/memory/v2/reranker.ts +++ b/assistant/src/memory/v2/reranker.ts @@ -54,7 +54,7 @@ function buildPassage(slug: string, body: string): string { * Run the cross-encoder over each candidate's first-paragraph preview. * Returns raw sigmoid scores; failures (worker down, page read error) yield * an empty Map so callers can fall back to pure fused scores. Per-batch - * normalisation and boost math live in `simBatch.applyRerankBoost`. + * normalisation and boost math live in `computeOwnActivation`. */ export async function rerankCandidates( query: string, diff --git a/assistant/src/memory/v2/sim.ts b/assistant/src/memory/v2/sim.ts index 3121135e40e..9cfa00b0660 100644 --- a/assistant/src/memory/v2/sim.ts +++ b/assistant/src/memory/v2/sim.ts @@ -30,7 +30,6 @@ import { applyCorrectionIfCalibrated } from "../anisotropy.js"; import { embedWithBackend } from "../embedding-backend.js"; import { clampUnitInterval } from "../validation.js"; import { hybridQueryConceptPages } from "./qdrant.js"; -import { rerankCandidates } from "./reranker.js"; import { generateBm25QueryEmbedding } from "./sparse-bm25.js"; /** @@ -147,18 +146,6 @@ export async function simBatch( text: string, candidateSlugs: readonly string[], config: AssistantConfig, - options?: { - useRerank?: boolean; - /** - * When provided alongside `useRerank: true`, the rerank step writes the - * per-slug boost delta (`boosted_fused - pre_rerank_fused`) into this map - * for slugs that fell inside the top-K window. Slugs outside top-K (or - * absent on the rerank-disabled path) leave no entry — callers should - * treat absence as 0. Used by the activation inspector to surface the - * cross-encoder contribution separately from the fused similarity. - */ - rerankBoost?: Map; - }, ): Promise> { if (candidateSlugs.length === 0) { return new Map(); @@ -205,50 +192,9 @@ export async function simBatch( scores.set(hit.slug, fuseHit(hit, maxSparse, denseWeight, sparseWeight)); } - // Cross-encoder boost on top of the fused score for the top-K candidates. - // Optional-chain on `rerank` so test configs that omit it still type-check. - if (options?.useRerank === true && config.memory.v2.rerank?.enabled) { - return applyRerankBoost(text, scores, config, options.rerankBoost); - } - return scores; } -async function applyRerankBoost( - query: string, - fused: Map, - config: AssistantConfig, - boostOut?: Map, -): Promise> { - const rerankCfg = config.memory.v2.rerank; - const sortedSlugs = [...fused.entries()] - .sort((a, b) => b[1] - a[1]) - .map(([slug]) => slug); - const topSlugs = sortedSlugs.slice(0, rerankCfg.top_k); - if (topSlugs.length === 0) return fused; - - const rerank = await rerankCandidates(query, topSlugs, config); - if (rerank.size === 0) return fused; - - let maxRerank = 0; - for (const v of rerank.values()) { - if (v > maxRerank) maxRerank = v; - } - if (maxRerank === 0) return fused; - - const out = new Map(fused); - for (const [slug, raw] of rerank) { - const r_norm = raw / maxRerank; - const base = fused.get(slug) ?? 0; - const boosted = clampUnitInterval(base + rerankCfg.alpha * r_norm); - out.set(slug, boosted); - // Capture the realized delta (post-clamp) so callers can render the - // cross-encoder contribution separately from the fused-only score. - boostOut?.set(slug, boosted - base); - } - return out; -} - /** * Per-batch sparse-score maximum used for normalization. Hits missing from * the sparse channel contribute 0 (handled by the `undefined` guard). diff --git a/clients/macos/vellum-assistant/Features/Chat/MessageInspectorMemoryV2Tab.swift b/clients/macos/vellum-assistant/Features/Chat/MessageInspectorMemoryV2Tab.swift index eaaee5dcfa2..4e47fc29c69 100644 --- a/clients/macos/vellum-assistant/Features/Chat/MessageInspectorMemoryV2Tab.swift +++ b/clients/macos/vellum-assistant/Features/Chat/MessageInspectorMemoryV2Tab.swift @@ -114,32 +114,33 @@ struct MessageInspectorMemoryV2TabModel: Equatable { label: "c_user · sim_u", value: "\(formatScaled(simUser, scale: config.cUser)) (raw \(formatActivation(simUser)))" ), + LabeledValue( + label: "c_assistant · sim_a", + value: "\(formatScaled(simAssistant, scale: config.cAssistant)) (raw \(formatActivation(simAssistant)))" + ), + LabeledValue( + label: "c_now · sim_n", + value: "\(formatScaled(simNow, scale: config.cNow)) (raw \(formatActivation(simNow)))" + ), ] - // Surface the rerank delta only when the cross-encoder actually - // contributed. We can't tell "outside top-K" from "in top-K but - // contributed 0" since the activation log stores the realized - // delta only; rendering "+0.000" everywhere when rerank is - // enabled would just be noise. + // Rerank contributes additively to A_o weighted by c_user / c_assistant + // — render as standalone rows (not nested under c_user · sim_u) so the + // sum across all visible rows equals the row's A_o. Only show when the + // cross-encoder actually contributed: the activation log stores the + // realized delta only, so "+0.000" everywhere with rerank enabled + // would just be noise. if simUserRerankBoost > 0 { rows.append(LabeledValue( - label: " └ rerank Δ_u", - value: "+\(formatActivation(simUserRerankBoost))" + label: "c_user · rerank Δ_u", + value: "+\(formatScaled(simUserRerankBoost, scale: config.cUser)) (raw \(formatActivation(simUserRerankBoost)))" )) } - rows.append(LabeledValue( - label: "c_assistant · sim_a", - value: "\(formatScaled(simAssistant, scale: config.cAssistant)) (raw \(formatActivation(simAssistant)))" - )) if simAssistantRerankBoost > 0 { rows.append(LabeledValue( - label: " └ rerank Δ_a", - value: "+\(formatActivation(simAssistantRerankBoost))" + label: "c_assistant · rerank Δ_a", + value: "+\(formatScaled(simAssistantRerankBoost, scale: config.cAssistant)) (raw \(formatActivation(simAssistantRerankBoost)))" )) } - rows.append(LabeledValue( - label: "c_now · sim_n", - value: "\(formatScaled(simNow, scale: config.cNow)) (raw \(formatActivation(simNow)))" - )) return rows } }