abhigyanpatwari · magyargergo · Apr 27, 2026 · Apr 27, 2026
@@ -146,11 +146,9 @@ let vectorExtensionLoaded = false;
 
 /**
  * In-process cache of FTS indexes that have been ensured against the current
- * connection. Prevents repeated `CALL CREATE_FTS_INDEX` round-trips inside a
- * single CLI/MCP session — the first call to `ensureFTSIndex` for a given
- * `(tableName, indexName)` pays the LadybugDB cost (~440 ms even when the
- * index already exists on disk), subsequent calls are a Set lookup. Cleared
- * by `closeLbug` so a re-init starts fresh.
+ * writable connection. Prevents repeated `CALL CREATE_FTS_INDEX` round-trips
+ * for callers that explicitly opt into `ensureFTSIndex`. Cleared by
+ * `closeLbug` so a re-init starts fresh.
  *
  * Key format: `${tableName}:${indexName}`.
  */
@@ -1252,10 +1250,9 @@ export const createFTSIndex = async (
 /**
  * Lazy-create an FTS index, caching the fact in-process.
  *
- * Used by `queryFTS` so that `analyze` doesn't pay the ~440 ms × 5 fixed
- * LadybugDB cost up-front (it dominates analyze on small repos). Instead,
- * the cost is moved to the first `query`/`context` call in a session,
- * where it's amortised across many lookups.
+ * Kept for writable maintenance paths that need to lazily materialize an
+ * index. Read-only query paths must not call this; production analysis owns
+ * creating the configured search indexes before the database is served.
  *
  * Safe to call repeatedly — the in-process Set guarantees only the first
  * call hits LadybugDB. `closeLbug` clears the cache so re-init starts fresh.

@@ -21,6 +21,7 @@ import {
   closeLbug,
   loadCachedEmbeddings,
 } from './lbug/lbug-adapter.js';
+import { createSearchFTSIndexes } from './search/fts-indexes.js';
 import {
   getStoragePaths,
   saveMeta,
@@ -280,12 +281,9 @@ export async function runFullAnalysis(
     });
 
     // ── Phase 3: FTS (85–90%) ─────────────────────────────────────────
-    // FTS indexes are created lazily on first `query`/`context` call instead
-    // of eagerly here. On small repos / CI runners the LadybugDB
-    // CREATE_FTS_INDEX cost is ~440 ms × 5 (≈2 s) regardless of table size,
-    // which dominated `analyze` runtime and pushed Windows CI past its
-    // 30 s test budget. Lazy creation is implemented in
-    // `core/search/bm25-index.ts` via `ensureFTSIndex`.
+    progress('fts', 85, 'Creating search indexes...');
+    await createSearchFTSIndexes();
+    progress('fts', 90, 'Search indexes ready');
 
     // ── Phase 3.5: Re-insert cached embeddings ────────────────────────
     if (cachedEmbeddings.length > 0) {

@@ -3,15 +3,10 @@
  *
  * Uses LadybugDB's built-in full-text search indexes for keyword-based search.
  * Always reads from the database (no cached state to drift).
- *
- * FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
- * `lbug-adapter.ts` for the rationale. This keeps `analyze` fast (the
- * ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
- * small repos / CI runners) at the cost of paying that overhead on the
- * first `query`/`context` call in a session.
  */
 
-import { queryFTS, ensureFTSIndex } from '../lbug/lbug-adapter.js';
+import { queryFTS } from '../lbug/lbug-adapter.js';
+import { FTS_INDEXES } from './fts-schema.js';
 
 export interface BM25SearchResult {
   filePath: string;
@@ -20,104 +15,6 @@ export interface BM25SearchResult {
   nodeIds?: string[];
 }
 
-/**
- * FTS schema served by `searchFTSFromLbug`. Centralised so that both the
- * CLI/pipeline path and the MCP pool path use identical (table, index,
- * properties) tuples and the lazy-create logic stays in one place.
- */
-const FTS_INDEXES: ReadonlyArray<{
-  table: string;
-  indexName: string;
-  properties: readonly string[];
-}> = [
-  { table: 'File', indexName: 'file_fts', properties: ['name', 'content'] },
-  { table: 'Function', indexName: 'function_fts', properties: ['name', 'content'] },
-  { table: 'Class', indexName: 'class_fts', properties: ['name', 'content'] },
-  { table: 'Method', indexName: 'method_fts', properties: ['name', 'content'] },
-  { table: 'Interface', indexName: 'interface_fts', properties: ['name', 'content'] },
-];
-
-/**
- * Per-process cache for the MCP pool path: tracks which `(repoId, table)`
- * pairs have been ensured. The CLI/pipeline path gets its own cache inside
- * `lbug-adapter.ts` keyed by table/index, scoped to the singleton connection.
- *
- * IMPORTANT: an entry is added ONLY when the index was confirmed to exist
- * (CREATE_FTS_INDEX succeeded, or failed with `'already exists'`). Other
- * failures (transient lock errors, missing extension, etc.) leave the key
- * unset so the next query retries instead of silently caching the failure.
- *
- * Entries for a given repoId are invalidated when its pool is closed —
- * see the `addPoolCloseListener` registration in `searchFTSFromLbug`.
- */
-const ensuredPoolFTS = new Set<string>();
-
-/**
- * Drop all ensured-FTS cache entries for a given repoId.
- *
- * Called from the pool-close listener so that a pool teardown / recreation
- * forces the next `searchFTSFromLbug` call to re-issue `CREATE_FTS_INDEX`
- * against the fresh connection rather than trust stale ensure-state from a
- * previous pool lifetime.
- *
- * Exported for tests; the listener wiring is internal.
- */
-export function invalidateEnsuredFTSForRepo(repoId: string): void {
-  const prefix = `${repoId}:`;
-  for (const key of ensuredPoolFTS) {
-    if (key.startsWith(prefix)) ensuredPoolFTS.delete(key);
-  }
-}
-
-/**
- * Tracks whether we've already wired the pool-close listener for this
- * process. The pool adapter is dynamically imported, so registration
- * happens lazily on the first MCP-pool-backed FTS query.
- */
-let poolCloseListenerRegistered = false;
-function registerPoolCloseListenerOnce(
-  addPoolCloseListener: (listener: (repoId: string) => void) => void,
-): void {
-  if (poolCloseListenerRegistered) return;
-  poolCloseListenerRegistered = true;
-  addPoolCloseListener((repoId) => invalidateEnsuredFTSForRepo(repoId));
-}
-
-async function ensureFTSIndexViaExecutor(
-  executor: (cypher: string) => Promise<any[]>,
-  repoId: string,
-  table: string,
-  indexName: string,
-  properties: readonly string[],
-): Promise<void> {
-  const key = `${repoId}:${table}:${indexName}`;
-  if (ensuredPoolFTS.has(key)) return;
-  const propList = properties.map((p) => `'${p}'`).join(', ');
-  try {
-    await executor(
-      `CALL CREATE_FTS_INDEX('${table}', '${indexName}', [${propList}], stemmer := 'porter')`,
-    );
-    // Index was created successfully — safe to cache.
-    ensuredPoolFTS.add(key);
-  } catch (e: any) {
-    // 'already exists' is the happy path (index persists on disk between
-    // process invocations) — cache it. Anything else is treated as a
-    // transient failure: surface a one-time warning and leave the key
-    // unset so the NEXT query retries rather than silently using a
-    // cached failure (which previously disabled BM25 for the whole
-    // process for that repo).
-    const msg = String(e?.message ?? '');
-    if (msg.includes('already exists')) {
-      ensuredPoolFTS.add(key);
-    } else {
-      console.warn(
-        `[gitnexus] FTS index ensure failed for repo "${repoId}" table "${table}" ` +
-          `(index "${indexName}"): ${msg || e}. Will retry on next query.`,
-      );
-    }
-  }
-}
-
 /**
  * Execute a single FTS query via a custom executor (for MCP connection pool).
  * Returns the same shape as core queryFTS (from LadybugDB adapter).
@@ -169,58 +66,24 @@ export const searchFTSFromLbug = async (
   limit: number = 20,
   repoId?: string,
 ): Promise<BM25SearchResult[]> => {
-  let fileResults: any[],
-    functionResults: any[],
-    classResults: any[],
-    methodResults: any[],
-    interfaceResults: any[];
+  const resultsByIndex: any[][] = [];
 
   if (repoId) {
     // Use MCP connection pool via dynamic import
     // IMPORTANT: FTS queries run sequentially to avoid connection contention.
     // The MCP pool supports multiple connections, but FTS is best run serially.
     const poolMod = await import('../lbug/pool-adapter.js');
-    const { executeQuery, addPoolCloseListener } = poolMod;
-    // Register the pool-close listener lazily on first use so a teardown of
-    // the pool entry (LRU eviction, idle timeout, explicit close) drops the
-    // matching `ensuredPoolFTS` entries. Without this, stale ensure-state
-    // can outlive the pool that produced it.
-    registerPoolCloseListenerOnce(addPoolCloseListener);
+    const { executeQuery } = poolMod;
     const executor = (cypher: string) => executeQuery(repoId, cypher);
 
-    // Lazy-create FTS indexes on first query for this repo (analyze no longer
-    // creates them up-front, so we ensure them here). Cached per-process.
-    for (const { table, indexName, properties } of FTS_INDEXES) {
-      await ensureFTSIndexViaExecutor(executor, repoId, table, indexName, properties);
+    for (const { table, indexName } of FTS_INDEXES) {
+      resultsByIndex.push(await queryFTSViaExecutor(executor, table, indexName, query, limit));
     }
-
-    fileResults = await queryFTSViaExecutor(executor, 'File', 'file_fts', query, limit);
-    functionResults = await queryFTSViaExecutor(executor, 'Function', 'function_fts', query, limit);
-    classResults = await queryFTSViaExecutor(executor, 'Class', 'class_fts', query, limit);
-    methodResults = await queryFTSViaExecutor(executor, 'Method', 'method_fts', query, limit);
-    interfaceResults = await queryFTSViaExecutor(
-      executor,
-      'Interface',
-      'interface_fts',
-      query,
-      limit,
-    );
   } else {
     // Use core lbug adapter (CLI / pipeline context) — also sequential for safety.
-    // Lazy-create FTS indexes on first query (analyze no longer does it).
-    for (const { table, indexName, properties } of FTS_INDEXES) {
-      await ensureFTSIndex(table, indexName, [...properties]).catch(() => {});
+    for (const { table, indexName } of FTS_INDEXES) {
+      resultsByIndex.push(await queryFTS(table, indexName, query, limit, false).catch(() => []));
     }
-
-    fileResults = await queryFTS('File', 'file_fts', query, limit, false).catch(() => []);
-    functionResults = await queryFTS('Function', 'function_fts', query, limit, false).catch(
-      () => [],
-    );
-    classResults = await queryFTS('Class', 'class_fts', query, limit, false).catch(() => []);
-    methodResults = await queryFTS('Method', 'method_fts', query, limit, false).catch(() => []);
-    interfaceResults = await queryFTS('Interface', 'interface_fts', query, limit, false).catch(
-      () => [],
-    );
   }
 
   // Collect all node scores per filePath to track which nodes actually matched
@@ -233,11 +96,7 @@ export const searchFTSFromLbug = async (
     }
   };
 
-  addResults(fileResults);
-  addResults(functionResults);
-  addResults(classResults);
-  addResults(methodResults);
-  addResults(interfaceResults);
+  for (const results of resultsByIndex) addResults(results);
 
   // Sum the top-3 highest-scoring nodes per file and collect their nodeIds.
   // Summing all nodes naively inflates scores for files with many mediocre

@@ -0,0 +1,8 @@
+import { createFTSIndex } from '../lbug/lbug-adapter.js';
+import { FTS_INDEXES } from './fts-schema.js';
+
+export async function createSearchFTSIndexes(): Promise<void> {
+  for (const { table, indexName, properties } of FTS_INDEXES) {
+    await createFTSIndex(table, indexName, [...properties]);
+  }
+}
@@ -0,0 +1,13 @@
+export interface FTSIndexDefinition {
+  readonly table: string;
+  readonly indexName: string;
+  readonly properties: readonly string[];
+}
+
+export const FTS_INDEXES: readonly FTSIndexDefinition[] = [
+  { table: 'File', indexName: 'file_fts', properties: ['name', 'content'] },
+  { table: 'Function', indexName: 'function_fts', properties: ['name', 'content'] },
+  { table: 'Class', indexName: 'class_fts', properties: ['name', 'content'] },
+  { table: 'Method', indexName: 'method_fts', properties: ['name', 'content'] },
+  { table: 'Interface', indexName: 'interface_fts', properties: ['name', 'content'] },
+];
@@ -96,15 +96,10 @@ withTestLbugDB(
       it('query tool returns results for keyword search', async () => {
         const result = await backend.callTool('query', { query: 'login' });
         expect(result).not.toHaveProperty('error');
-        // Should have some combination of processes, process_symbols, or definitions
         expect(result).toHaveProperty('processes');
         expect(result).toHaveProperty('definitions');
-        // The search should find something (FTS or graph-based)
-        const totalResults =
-          (result.processes?.length || 0) +
-          (result.process_symbols?.length || 0) +
-          (result.definitions?.length || 0);
-        expect(totalResults).toBeGreaterThanOrEqual(1);
+        expect(result.processes.map((p: any) => p.id)).toContain('proc:login-flow');
+        expect(result.process_symbols.map((s: any) => s.id)).toContain('func:login');
 
         // #553: query response carries per-phase timing metadata.
         expect(result.timing).toBeDefined();