From 806d4e86b9d42372b5ce876ccd3bbe354affc9b8 Mon Sep 17 00:00:00 2001 From: Vincent <0426vincent@gmail.com> Date: Tue, 24 Feb 2026 10:27:26 -0800 Subject: [PATCH] perf: disable LLM reranking for memory recall LLM reranking was calling Claude Haiku API on every message to re-score memory candidates, adding ~2.2s latency. The RRF merge already produces a well-ordered list from lexical, semantic, recency, and entity scores. Disabling reranking reduces memory recall from ~2.6s to ~700ms. Co-Authored-By: Claude Opus 4.6 --- assistant/src/config/defaults.ts | 2 +- assistant/src/config/schema.ts | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/assistant/src/config/defaults.ts b/assistant/src/config/defaults.ts index b7c834fd851..a288bae6bac 100644 --- a/assistant/src/config/defaults.ts +++ b/assistant/src/config/defaults.ts @@ -46,7 +46,7 @@ export const DEFAULT_CONFIG: AssistantConfig = { injectionFormat: 'markdown' as const, injectionStrategy: 'prepend_user_block' as const, reranking: { - enabled: true, + enabled: false, model: 'claude-haiku-4-5-20251001', topK: 20, }, diff --git a/assistant/src/config/schema.ts b/assistant/src/config/schema.ts index c72c40cbbce..99b1269f924 100644 --- a/assistant/src/config/schema.ts +++ b/assistant/src/config/schema.ts @@ -258,7 +258,7 @@ export const QdrantConfigSchema = z.object({ export const MemoryRerankingConfigSchema = z.object({ enabled: z .boolean({ error: 'memory.retrieval.reranking.enabled must be a boolean' }) - .default(true), + .default(false), model: z .string({ error: 'memory.retrieval.reranking.model must be a string' }) .default('claude-haiku-4-5-20251001'), @@ -384,7 +384,7 @@ export const MemoryRetrievalConfigSchema = z.object({ }) .default('prepend_user_block'), reranking: MemoryRerankingConfigSchema.default({ - enabled: true, + enabled: false, model: 'claude-haiku-4-5-20251001', topK: 20, }), @@ -628,7 +628,7 @@ export const MemoryConfigSchema = z.object({ injectionFormat: 'markdown', injectionStrategy: 'prepend_user_block', reranking: { - enabled: true, + enabled: false, model: 'claude-haiku-4-5-20251001', topK: 20, }, @@ -1203,7 +1203,7 @@ export const AssistantConfigSchema = z.object({ injectionFormat: 'markdown', injectionStrategy: 'prepend_user_block', reranking: { - enabled: true, + enabled: false, model: 'claude-haiku-4-5-20251001', topK: 20, },