Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
226 changes: 226 additions & 0 deletions assistant/src/memory/v3/__tests__/retriever.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/**
* Route-assembly tests for the v3 retriever wiring in
* `handleCompareRetrievers` (`assistant/src/runtime/routes/memory-v2-routes.ts`).
*
* The compare route always includes the router retriever as comparand #1 and
* adds the v3 retriever as comparand #2 only when `config.memory.v3.enabled`.
* These tests exercise that gating end-to-end through the real handler and the
* real `runComparisonOverHistory`, with a fixture DB seeded with one logged
* router turn (mirroring `assistant/src/memory/v2/__tests__/harness-compare.test.ts`).
*
* Neither the real router nor the real v3 loop runs here — both would hit a
* provider. `../loop.js` (the v3 loop) and `../../v2/harness/router-retriever.js`
* are `mock.module`-stubbed to return deterministic selections, so the tests
* assert *which retrievers were assembled* (by the names in the report), not
* their retrieval quality. `loadConfig` is stubbed so each test controls
* `memory.v3.enabled`; workspace/page-index helpers are stubbed to keep the
* handler off the filesystem.
*/

import { beforeEach, describe, expect, mock, test } from "bun:test";

import type { AssistantConfig } from "../../../config/types.js";
import { getDb } from "../../db-connection.js";
import { initializeDb } from "../../db-init.js";
import type { MemoryV2ConceptRowRecord } from "../../memory-v2-activation-log-store.js";
import {
conversations,
memoryV2ActivationLogs,
messages,
} from "../../schema.js";
import type {
RetrievalInput,
RetrievalOutput,
} from "../../v2/harness/retriever.js";

initializeDb();

// Silence the route's logger.
mock.module("../../../util/logger.js", () => ({
getLogger: () =>
new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
}));

// loadNowText / page-index read workspace files; a nonexistent dir yields "".
const WORKSPACE = "/tmp/v3-retriever-nonexistent-workspace";

// Controllable config: each test sets `v3Enabled` before invoking the handler.
let v3Enabled = false;

mock.module("../../../config/loader.js", () => ({
loadConfig: (): AssistantConfig =>
({
memory: {
v2: { enabled: true, router: { historical_pairs: 1 } },
v3: { enabled: v3Enabled },
},
}) as unknown as AssistantConfig,
}));

mock.module("../../../util/platform.js", () => ({
getWorkspaceDir: (): string => WORKSPACE,
}));

// page-index is intentionally NOT mocked: it has a wide export surface
// (`invalidatePageIndex` etc.) that transitive importers in the route's
// dependency graph rely on, and `getPageIndex` over the nonexistent workspace
// returns a benign index. The retriever names are what we assert, not the
// page set, so the real (empty-ish) index is harmless here.

// Stub the router retriever — the real one calls a provider.
mock.module("../../v2/harness/router-retriever.js", () => ({
createRouterRetriever: () => ({
name: "router",
retrieve: async (): Promise<RetrievalOutput> => ({
selectedSlugs: ["p1"],
sourceBySlug: new Map([["p1", "router"]]),
}),
}),
}));

// Stub the v3 loop — the real one runs scout/filter/tree/edge/gate lanes that
// hit providers, embeddings, and the filesystem.
mock.module("../loop.js", () => ({
runRetrievalLoop: async (
_input: RetrievalInput,
): Promise<RetrievalOutput> => ({
selectedSlugs: ["p2"],
sourceBySlug: new Map([["p2", "dense"]]),
}),
}));

// Import the handler only after the mocks are installed.
const { handleCompareRetrievers } =
await import("../../../runtime/routes/memory-v2-routes.js");

const ZERO_CONFIG = {
d: 0,
c_user: 0,
c_assistant: 0,
c_now: 0,
k: 0,
hops: 0,
top_k: 0,
epsilon: 0,
};

let seq = 0;

function ensureConversation(id: string): void {
getDb()
.insert(conversations)
.values({ id, createdAt: 0, updatedAt: 0 })
.onConflictDoNothing()
.run();
}

function insertMessage(
id: string,
conversationId: string,
role: string,
text: string,
createdAt: number,
): void {
ensureConversation(conversationId);
getDb()
.insert(messages)
.values({
id,
conversationId,
role,
content: JSON.stringify([{ type: "text", text }]),
createdAt,
})
.run();
}

function makeConcept(
slug: string,
status: MemoryV2ConceptRowRecord["status"],
): MemoryV2ConceptRowRecord {
return {
slug,
finalActivation: 0,
ownActivation: 0,
priorActivation: 0,
simUser: 0,
simAssistant: 0,
simNow: 0,
simUserRerankBoost: 0,
simAssistantRerankBoost: 0,
inRerankPool: false,
spreadContribution: 0,
source: "router",
status,
};
}

function insertRouterLog(
conversationId: string,
messageId: string,
turn: number,
concepts: MemoryV2ConceptRowRecord[],
createdAt: number,
): void {
ensureConversation(conversationId);
getDb()
.insert(memoryV2ActivationLogs)
.values({
id: `log-${seq++}`,
conversationId,
messageId,
turn,
mode: "router",
conceptsJson: JSON.stringify(concepts),
skillsJson: "[]",
configJson: JSON.stringify(ZERO_CONFIG),
createdAt,
})
.run();
}

/** Seed one router turn: user msg, assistant anchor, and the logged picks. */
function seedTurn(groundTruth: string[]): void {
insertMessage("u1", "c1", "user", "hello", 10);
insertMessage("a1", "c1", "assistant", "hi", 20); // anchor for turn 1
insertRouterLog(
"c1",
"a1",
1,
groundTruth.map((slug) => makeConcept(slug, "injected")),
20,
);
}

function reset(): void {
const db = getDb();
db.delete(memoryV2ActivationLogs).run();
db.delete(messages).run();
v3Enabled = false;
}

describe("handleCompareRetrievers v3 wiring", () => {
beforeEach(reset);

test("includes only router when memory.v3.enabled is false", async () => {
seedTurn(["p1", "p2"]);

const report = await handleCompareRetrievers({ body: {} });

const names = report.retrievers.map((r) => r.name);
expect(names).toEqual(["router"]);
});

test("includes router and v3 when memory.v3.enabled is true", async () => {
v3Enabled = true;
seedTurn(["p1", "p2"]);

const report = await handleCompareRetrievers({ body: {} });

const names = report.retrievers.map((r) => r.name);
expect(names).toEqual(["router", "v3"]);
// Router is always comparand #1; v3 joins as comparand #2.
expect(names[0]).toBe("router");
expect(names[1]).toBe("v3");
});
});
33 changes: 33 additions & 0 deletions assistant/src/memory/v3/retriever.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* v3 retriever — the multi-lane bounded-descent retrieval loop
* ({@link runRetrievalLoop}) adapted to the harness {@link Retriever}
* interface.
*
* This is the offline, zero-production-risk shadow path: the comparison harness
* replays historical oracle turns and scores v3's selection against the v2
* router's logged picks (recall@k). Nothing here runs on a live injection turn
* — the loop reads the DB handle for its hot lane but never mutates production
* state, matching the {@link Retriever} contract.
*/

import type { DrizzleDb } from "../db-connection.js";
import type {
RetrievalInput,
RetrievalOutput,
Retriever,
} from "../v2/harness/retriever.js";
import { runRetrievalLoop } from "./loop.js";

/**
* Wrap the v3 retrieval loop as a named harness {@link Retriever}.
*
* @param db handle threaded to {@link runRetrievalLoop} for the scout hot lane.
*/
export function createV3Retriever(db: DrizzleDb): Retriever {
return {
name: "v3",
retrieve(input: RetrievalInput): Promise<RetrievalOutput> {
return runRetrievalLoop(input, { db });
},
};
}
13 changes: 12 additions & 1 deletion assistant/src/runtime/routes/memory-v2-routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
validateEdgeTargets,
} from "../../memory/v2/edge-index.js";
import { runComparisonOverHistory } from "../../memory/v2/harness/compare.js";
import type { Retriever } from "../../memory/v2/harness/retriever.js";
import { createRouterRetriever } from "../../memory/v2/harness/router-retriever.js";
import type { ComparisonReport } from "../../memory/v2/harness/runner.js";
import { computeInjectionScores } from "../../memory/v2/injection-events.js";
Expand All @@ -38,6 +39,7 @@ import {
import { ROUTER_PROMPT } from "../../memory/v2/prompts/router.js";
import { type RouterSource, runRouter } from "../../memory/v2/router.js";
import { seedV2SkillEntries } from "../../memory/v2/skill-store.js";
import { createV3Retriever } from "../../memory/v3/retriever.js";
import { getLogger } from "../../util/logger.js";
import { getWorkspaceDir } from "../../util/platform.js";
import { RouteError } from "./errors.js";
Expand Down Expand Up @@ -637,11 +639,20 @@ export async function handleCompareRetrievers({
const pageIndex = await getPageIndex(workspaceDir);
const db = getDb();

// The router is always comparand #1 (the harness self-test against its own
// logged ground truth). v3 joins as comparand #2 only when explicitly
// enabled, so the default compare surface is unchanged until v3 is switched
// on. v3 is offline-only here — the loop reads `db` but mutates nothing.
const retrievers: Retriever[] = [createRouterRetriever(db)];
if (config.memory.v3.enabled) {
retrievers.push(createV3Retriever(db));
}

return runComparisonOverHistory({
db,
workspaceDir,
config,
retrievers: [createRouterRetriever(db)],
retrievers,
ks: ks ?? DEFAULT_COMPARE_KS,
limit: limit ?? DEFAULT_COMPARE_LIMIT,
pageExists: (slug) => pageIndex.bySlug.has(slug),
Expand Down
Loading