vellum-ai · siddseethepalli · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/assistant/src/config/schemas/memory-v2.ts b/assistant/src/config/schemas/memory-v2.ts
@@ -199,7 +199,7 @@ export const MemoryV2ConfigSchema = z
           .boolean()
           .default(false)
           .describe(
-            "Whether to apply cross-encoder reranking as an additive boost to the user + assistant similarity channels. Disabled by default — opt in once measured.",
+            "Whether to apply cross-encoder reranking as an additive A_o boost on the user + assistant channels. Disabled by default — opt in once measured.",
           ),
         top_k: z
           .number()
@@ -208,15 +208,15 @@ export const MemoryV2ConfigSchema = z
           .max(200)
           .default(50)
           .describe(
-            "Number of top-fused candidates per `simBatch` call to send through the reranker. Tail candidates keep their pure fused score.",
+            "Number of candidates from the top of the pre-rerank-A_o pool to send through the reranker. Tail candidates contribute zero rerank boost and keep their pure fused activation.",
           ),
         alpha: z
           .number()
           .min(0)
           .max(1)
           .default(0.3)
           .describe(
-            "Boost weight: `boosted = clamp01(fused + alpha · normalized_rerank)`. Top reranker hit can lift its fused score by up to `alpha`; bottom of top_k stays roughly unchanged.",
+            "Per-channel rerank weight: each top-K slug gets `alpha · normalized_rerank` added to A_o weighted by `c_user` (user channel) or `c_assistant` (assistant channel). Top reranker hit can lift A_o by up to `(c_user + c_assistant) · alpha`; bottom of top_k stays roughly unchanged.",
           ),
         model: z
           .string()
@@ -232,7 +232,7 @@ export const MemoryV2ConfigSchema = z
         model: DEFAULT_RERANK_MODEL,
       })
       .describe(
-        "Cross-encoder rerank configuration. When enabled, runs a local cross-encoder over the top-K fused candidates per `simBatch(useRerank: true)` call and adds an alpha-weighted normalized boost to their fused scores.",
+        "Cross-encoder rerank configuration. When enabled, picks the top-K candidates by pre-rerank A_o, runs the cross-encoder once per channel (user, assistant) on that unified set, and adds an alpha-weighted normalized boost to A_o for each scored slug.",
       ),
   })
   .describe(

@@ -28,7 +28,6 @@ import { getDb } from "../db-connection.js";
 import { initializeDb } from "../db-init.js";
 import {
   type MemoryV2ConceptRowRecord,
-  type MemoryV2SkillRowRecord,
   recordMemoryV2ActivationLog,
 } from "../memory-v2-activation-log-store.js";
 import { getConceptFrequencySummary } from "../memory-v2-concept-frequency.js";
@@ -38,7 +37,6 @@ import { sampleConfig } from "./fixtures/memory-v2-activation-fixtures.js";
 initializeDb();
 
 const WORKSPACE = "/tmp/memory-v2-concept-frequency-test";
-const NO_SKILLS: MemoryV2SkillRowRecord[] = [];
 
 function makeConcept(
   slug: string,
@@ -80,7 +78,6 @@ describe("memory-v2-concept-frequency", () => {
         makeConcept("alice", "injected"),
         makeConcept("bob", "not_injected"),
       ],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
     recordMemoryV2ActivationLog({
@@ -91,7 +88,6 @@ describe("memory-v2-concept-frequency", () => {
         makeConcept("alice", "in_context"),
         makeConcept("bob", "injected"),
       ],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
     recordMemoryV2ActivationLog({
@@ -102,7 +98,6 @@ describe("memory-v2-concept-frequency", () => {
         makeConcept("alice", "injected"),
         makeConcept("charlie", "page_missing"),
       ],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
 
@@ -161,7 +156,6 @@ describe("memory-v2-concept-frequency", () => {
       turn: 1,
       mode: "per-turn",
       concepts: [makeConcept("alice", "injected")],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
     recordMemoryV2ActivationLog({
@@ -172,7 +166,6 @@ describe("memory-v2-concept-frequency", () => {
         makeConcept("alice", "injected"),
         makeConcept("alice", "injected"),
       ],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
 
@@ -203,7 +196,6 @@ describe("memory-v2-concept-frequency", () => {
       turn: 1,
       mode: "per-turn",
       concepts: [makeConcept("alice", "injected")],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
     // Backdate the just-written row — recordMemoryV2ActivationLog uses Date.now().
@@ -214,7 +206,6 @@ describe("memory-v2-concept-frequency", () => {
       turn: 2,
       mode: "per-turn",
       concepts: [makeConcept("alice", "injected")],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
 
@@ -242,7 +233,6 @@ describe("memory-v2-concept-frequency", () => {
         makeConcept("bob", "not_injected"),
         makeConcept("charlie", "page_missing"),
       ],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
 
@@ -268,7 +258,6 @@ describe("memory-v2-concept-frequency", () => {
       turn: 1,
       mode: "per-turn",
       concepts: [makeConcept("ghost", "injected")],
-      skills: NO_SKILLS,
       config: sampleConfig,
     });
 

@@ -13,16 +13,20 @@ export interface MemoryV2ConceptRowRecord {
   simAssistant: number;
   simNow: number;
   /**
-   * Portion of `simUser` contributed by the cross-encoder rerank step.
-   * Zero when rerank is disabled or the slug fell outside the top-K
-   * window. Stored as a JSON field, so older log rows that pre-date
-   * this addition decode with `undefined`; readers should fall back to 0.
+   * Cross-encoder rerank delta in raw rerank space (`alpha · r_norm_u`)
+   * for the user channel. Zero when rerank is disabled or the slug fell
+   * outside the unified top-K-by-pre-rerank-A_o window. Applied
+   * additively to A_o weighted by `c_user` — `simUser` itself is the
+   * raw fused score and never carries the boost. Stored as a JSON field,
+   * so older log rows pre-date this addition and decode with `undefined`;
+   * readers should fall back to 0.
    */
   simUserRerankBoost: number;
   /**
-   * Portion of `simAssistant` contributed by the cross-encoder rerank
-   * step. Same semantics as `simUserRerankBoost`. The NOW channel
-   * intentionally bypasses rerank, so there is no `simNowRerankBoost`.
+   * Cross-encoder rerank delta for the assistant channel. Same semantics
+   * as `simUserRerankBoost`, weighted by `c_assistant` when applied to
+   * A_o. The NOW channel intentionally bypasses rerank, so there is no
+   * `simNowRerankBoost`.
    */
   simAssistantRerankBoost: number;
   spreadContribution: number;

@@ -615,6 +615,173 @@ describe("computeOwnActivation", () => {
     // Rerank should have been called once per rerank-enabled channel.
     expect(rerankState.calls).toHaveLength(2);
   });
+
+  test("rerank pool is the unified top-K by pre-rerank A_o, not per-channel fused", async () => {
+    // Three candidates. The per-channel fused-sim top-2s would have picked
+    // different sets:
+    //   user channel:      a=0.9, b=0.5, c=0.4  → per-channel top-2 = [a, b]
+    //   assistant channel: a=0.5, b=0.4, c=0.9  → per-channel top-2 = [c, a]
+    // But pre-rerank A_o (c_user=c_assistant=0.5) is:
+    //   a = 0.5·0.9 + 0.5·0.5 = 0.70
+    //   b = 0.5·0.5 + 0.5·0.4 = 0.45
+    //   c = 0.5·0.4 + 0.5·0.9 = 0.65
+    // → unified top-2 = [a, c]. b drops out, even though it would have made
+    //   the user-channel pool under the old per-channel selection.
+    stageHybridResponse([
+      { slug: "a", denseScore: 0.9 },
+      { slug: "b", denseScore: 0.5 },
+      { slug: "c", denseScore: 0.4 },
+    ]); // user
+    stageHybridResponse([
+      { slug: "a", denseScore: 0.5 },
+      { slug: "b", denseScore: 0.4 },
+      { slug: "c", denseScore: 0.9 },
+    ]); // assistant
+    stageHybridResponse([]); // now (no signal)
+    rerankState.scores = new Map([
+      ["a", 0.5],
+      ["b", 0.5],
+      ["c", 0.5],
+    ]);
+
+    const config = {
+      memory: {
+        v2: {
+          d: 0.0,
+          c_user: 0.5,
+          c_assistant: 0.5,
+          c_now: 0.0,
+          dense_weight: 1.0,
+          sparse_weight: 0.0,
+          rerank: {
+            enabled: true,
+            top_k: 2,
+            alpha: 0.3,
+            model: "test-model",
+          },
+        },
+      },
+    } as unknown as AssistantConfig;
+
+    await computeOwnActivation({
+      candidates: new Set(["a", "b", "c"]),
+      priorState: null,
+      userText: "u",
+      assistantText: "a",
+      nowText: "",
+      config,
+    });
+
+    expect(rerankState.calls).toHaveLength(2);
+    // Both channels rerank against the same unified slug set, sorted by
+    // pre-rerank A_o descending.
+    expect(rerankState.calls[0].candidates).toEqual(["a", "c"]);
+    expect(rerankState.calls[1].candidates).toEqual(["a", "c"]);
+  });
+
+  test("rerank-disabled candidates outside the unified pool get zero boost", async () => {
+    // Two candidates, top_k=1. The lower pre-rerank A_o slug must end up
+    // with simUserRerankBoost=0 / simAssistantRerankBoost=0 in the breakdown.
+    stageHybridResponse([
+      { slug: "winner", denseScore: 0.9 },
+      { slug: "loser", denseScore: 0.2 },
+    ]); // user
+    stageHybridResponse([
+      { slug: "winner", denseScore: 0.9 },
+      { slug: "loser", denseScore: 0.2 },
+    ]); // assistant
+    stageHybridResponse([]); // now
+    // The mocked reranker hands back scores for whatever slugs it's
+    // called with. Stage scores for both; the assertion below is that
+    // the loser still receives 0 because it's never sent to the
+    // reranker — top_k=1 cuts it off.
+    rerankState.scores = new Map([
+      ["winner", 0.5],
+      ["loser", 0.5],
+    ]);
+
+    const config = {
+      memory: {
+        v2: {
+          d: 0.0,
+          c_user: 0.5,
+          c_assistant: 0.5,
+          c_now: 0.0,
+          dense_weight: 1.0,
+          sparse_weight: 0.0,
+          rerank: {
+            enabled: true,
+            top_k: 1,
+            alpha: 0.3,
+            model: "test-model",
+          },
+        },
+      },
+    } as unknown as AssistantConfig;
+
+    const out = await computeOwnActivation({
+      candidates: new Set(["winner", "loser"]),
+      priorState: null,
+      userText: "u",
+      assistantText: "a",
+      nowText: "",
+      config,
+    });
+
+    expect(out.breakdown.get("loser")?.simUserRerankBoost).toBe(0);
+    expect(out.breakdown.get("loser")?.simAssistantRerankBoost).toBe(0);
+    expect(out.breakdown.get("winner")?.simUserRerankBoost).toBeGreaterThan(0);
+    expect(
+      out.breakdown.get("winner")?.simAssistantRerankBoost,
+    ).toBeGreaterThan(0);
+  });
+
+  test("rerank boost is additive on A_o and leaves raw simUser / simAssistant untouched", async () => {
+    stageHybridResponse([{ slug: "a", denseScore: 0.5 }]); // user
+    stageHybridResponse([{ slug: "a", denseScore: 0.4 }]); // assistant
+    stageHybridResponse([]); // now
+    rerankState.scores = new Map([["a", 0.8]]);
+
+    const config = {
+      memory: {
+        v2: {
+          d: 0.0,
+          c_user: 0.5,
+          c_assistant: 0.5,
+          c_now: 0.0,
+          dense_weight: 1.0,
+          sparse_weight: 0.0,
+          rerank: {
+            enabled: true,
+            top_k: 50,
+            alpha: 0.4,
+            model: "test-model",
+          },
+        },
+      },
+    } as unknown as AssistantConfig;
+
+    const out = await computeOwnActivation({
+      candidates: new Set(["a"]),
+      priorState: null,
+      userText: "u",
+      assistantText: "a",
+      nowText: "",
+      config,
+    });
+
+    const breakdown = out.breakdown.get("a");
+    // Raw fused similarities are reported untouched by rerank.
+    expect(breakdown?.simUser).toBeCloseTo(0.5, 6);
+    expect(breakdown?.simAssistant).toBeCloseTo(0.4, 6);
+    // Both rerank deltas are alpha · r_norm = 0.4 · 1.0 = 0.4 (single
+    // candidate normalises to 1.0 in each channel).
+    expect(breakdown?.simUserRerankBoost).toBeCloseTo(0.4, 6);
+    expect(breakdown?.simAssistantRerankBoost).toBeCloseTo(0.4, 6);
+    // Final A_o = c_user·simU + c_assistant·simA + c_user·boostU + c_assistant·boostA
+    //           = 0.5·0.5 + 0.5·0.4 + 0.5·0.4 + 0.5·0.4 = 0.25+0.20+0.20+0.20 = 0.85
+    expect(out.activation.get("a")).toBeCloseTo(0.85, 6);
+  });
 });
 
 // ---------------------------------------------------------------------------