diff --git a/gitnexus/src/core/lbug/lbug-adapter.ts b/gitnexus/src/core/lbug/lbug-adapter.ts index 63d1b3db1c..ed454e6e0d 100644 --- a/gitnexus/src/core/lbug/lbug-adapter.ts +++ b/gitnexus/src/core/lbug/lbug-adapter.ts @@ -146,11 +146,9 @@ let vectorExtensionLoaded = false; /** * In-process cache of FTS indexes that have been ensured against the current - * connection. Prevents repeated `CALL CREATE_FTS_INDEX` round-trips inside a - * single CLI/MCP session — the first call to `ensureFTSIndex` for a given - * `(tableName, indexName)` pays the LadybugDB cost (~440 ms even when the - * index already exists on disk), subsequent calls are a Set lookup. Cleared - * by `closeLbug` so a re-init starts fresh. + * writable connection. Prevents repeated `CALL CREATE_FTS_INDEX` round-trips + * for callers that explicitly opt into `ensureFTSIndex`. Cleared by + * `closeLbug` so a re-init starts fresh. * * Key format: `${tableName}:${indexName}`. */ @@ -1252,10 +1250,9 @@ export const createFTSIndex = async ( /** * Lazy-create an FTS index, caching the fact in-process. * - * Used by `queryFTS` so that `analyze` doesn't pay the ~440 ms × 5 fixed - * LadybugDB cost up-front (it dominates analyze on small repos). Instead, - * the cost is moved to the first `query`/`context` call in a session, - * where it's amortised across many lookups. + * Kept for writable maintenance paths that need to lazily materialize an + * index. Read-only query paths must not call this; production analysis owns + * creating the configured search indexes before the database is served. * * Safe to call repeatedly — the in-process Set guarantees only the first * call hits LadybugDB. `closeLbug` clears the cache so re-init starts fresh. diff --git a/gitnexus/src/core/run-analyze.ts b/gitnexus/src/core/run-analyze.ts index 6198746e6c..20d50a4ce9 100644 --- a/gitnexus/src/core/run-analyze.ts +++ b/gitnexus/src/core/run-analyze.ts @@ -21,6 +21,7 @@ import { closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js'; +import { createSearchFTSIndexes } from './search/fts-indexes.js'; import { getStoragePaths, saveMeta, @@ -280,12 +281,9 @@ export async function runFullAnalysis( }); // ── Phase 3: FTS (85–90%) ───────────────────────────────────────── - // FTS indexes are created lazily on first `query`/`context` call instead - // of eagerly here. On small repos / CI runners the LadybugDB - // CREATE_FTS_INDEX cost is ~440 ms × 5 (≈2 s) regardless of table size, - // which dominated `analyze` runtime and pushed Windows CI past its - // 30 s test budget. Lazy creation is implemented in - // `core/search/bm25-index.ts` via `ensureFTSIndex`. + progress('fts', 85, 'Creating search indexes...'); + await createSearchFTSIndexes(); + progress('fts', 90, 'Search indexes ready'); // ── Phase 3.5: Re-insert cached embeddings ──────────────────────── if (cachedEmbeddings.length > 0) { diff --git a/gitnexus/src/core/search/bm25-index.ts b/gitnexus/src/core/search/bm25-index.ts index d32bce403a..58c0343c96 100644 --- a/gitnexus/src/core/search/bm25-index.ts +++ b/gitnexus/src/core/search/bm25-index.ts @@ -3,15 +3,10 @@ * * Uses LadybugDB's built-in full-text search indexes for keyword-based search. * Always reads from the database (no cached state to drift). - * - * FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see - * `lbug-adapter.ts` for the rationale. This keeps `analyze` fast (the - * ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on - * small repos / CI runners) at the cost of paying that overhead on the - * first `query`/`context` call in a session. */ -import { queryFTS, ensureFTSIndex } from '../lbug/lbug-adapter.js'; +import { queryFTS } from '../lbug/lbug-adapter.js'; +import { FTS_INDEXES } from './fts-schema.js'; export interface BM25SearchResult { filePath: string; @@ -20,104 +15,6 @@ export interface BM25SearchResult { nodeIds?: string[]; } -/** - * FTS schema served by `searchFTSFromLbug`. Centralised so that both the - * CLI/pipeline path and the MCP pool path use identical (table, index, - * properties) tuples and the lazy-create logic stays in one place. - */ -const FTS_INDEXES: ReadonlyArray<{ - table: string; - indexName: string; - properties: readonly string[]; -}> = [ - { table: 'File', indexName: 'file_fts', properties: ['name', 'content'] }, - { table: 'Function', indexName: 'function_fts', properties: ['name', 'content'] }, - { table: 'Class', indexName: 'class_fts', properties: ['name', 'content'] }, - { table: 'Method', indexName: 'method_fts', properties: ['name', 'content'] }, - { table: 'Interface', indexName: 'interface_fts', properties: ['name', 'content'] }, -]; - -/** - * Per-process cache for the MCP pool path: tracks which `(repoId, table)` - * pairs have been ensured. The CLI/pipeline path gets its own cache inside - * `lbug-adapter.ts` keyed by table/index, scoped to the singleton connection. - * - * IMPORTANT: an entry is added ONLY when the index was confirmed to exist - * (CREATE_FTS_INDEX succeeded, or failed with `'already exists'`). Other - * failures (transient lock errors, missing extension, etc.) leave the key - * unset so the next query retries instead of silently caching the failure. - * - * Entries for a given repoId are invalidated when its pool is closed — - * see the `addPoolCloseListener` registration in `searchFTSFromLbug`. - */ -const ensuredPoolFTS = new Set(); - -/** - * Drop all ensured-FTS cache entries for a given repoId. - * - * Called from the pool-close listener so that a pool teardown / recreation - * forces the next `searchFTSFromLbug` call to re-issue `CREATE_FTS_INDEX` - * against the fresh connection rather than trust stale ensure-state from a - * previous pool lifetime. - * - * Exported for tests; the listener wiring is internal. - */ -export function invalidateEnsuredFTSForRepo(repoId: string): void { - const prefix = `${repoId}:`; - for (const key of ensuredPoolFTS) { - if (key.startsWith(prefix)) ensuredPoolFTS.delete(key); - } -} - -/** - * Tracks whether we've already wired the pool-close listener for this - * process. The pool adapter is dynamically imported, so registration - * happens lazily on the first MCP-pool-backed FTS query. - */ -let poolCloseListenerRegistered = false; -function registerPoolCloseListenerOnce( - addPoolCloseListener: (listener: (repoId: string) => void) => void, -): void { - if (poolCloseListenerRegistered) return; - poolCloseListenerRegistered = true; - addPoolCloseListener((repoId) => invalidateEnsuredFTSForRepo(repoId)); -} - -async function ensureFTSIndexViaExecutor( - executor: (cypher: string) => Promise, - repoId: string, - table: string, - indexName: string, - properties: readonly string[], -): Promise { - const key = `${repoId}:${table}:${indexName}`; - if (ensuredPoolFTS.has(key)) return; - const propList = properties.map((p) => `'${p}'`).join(', '); - try { - await executor( - `CALL CREATE_FTS_INDEX('${table}', '${indexName}', [${propList}], stemmer := 'porter')`, - ); - // Index was created successfully — safe to cache. - ensuredPoolFTS.add(key); - } catch (e: any) { - // 'already exists' is the happy path (index persists on disk between - // process invocations) — cache it. Anything else is treated as a - // transient failure: surface a one-time warning and leave the key - // unset so the NEXT query retries rather than silently using a - // cached failure (which previously disabled BM25 for the whole - // process for that repo). - const msg = String(e?.message ?? ''); - if (msg.includes('already exists')) { - ensuredPoolFTS.add(key); - } else { - console.warn( - `[gitnexus] FTS index ensure failed for repo "${repoId}" table "${table}" ` + - `(index "${indexName}"): ${msg || e}. Will retry on next query.`, - ); - } - } -} - /** * Execute a single FTS query via a custom executor (for MCP connection pool). * Returns the same shape as core queryFTS (from LadybugDB adapter). @@ -169,58 +66,24 @@ export const searchFTSFromLbug = async ( limit: number = 20, repoId?: string, ): Promise => { - let fileResults: any[], - functionResults: any[], - classResults: any[], - methodResults: any[], - interfaceResults: any[]; + const resultsByIndex: any[][] = []; if (repoId) { // Use MCP connection pool via dynamic import // IMPORTANT: FTS queries run sequentially to avoid connection contention. // The MCP pool supports multiple connections, but FTS is best run serially. const poolMod = await import('../lbug/pool-adapter.js'); - const { executeQuery, addPoolCloseListener } = poolMod; - // Register the pool-close listener lazily on first use so a teardown of - // the pool entry (LRU eviction, idle timeout, explicit close) drops the - // matching `ensuredPoolFTS` entries. Without this, stale ensure-state - // can outlive the pool that produced it. - registerPoolCloseListenerOnce(addPoolCloseListener); + const { executeQuery } = poolMod; const executor = (cypher: string) => executeQuery(repoId, cypher); - // Lazy-create FTS indexes on first query for this repo (analyze no longer - // creates them up-front, so we ensure them here). Cached per-process. - for (const { table, indexName, properties } of FTS_INDEXES) { - await ensureFTSIndexViaExecutor(executor, repoId, table, indexName, properties); + for (const { table, indexName } of FTS_INDEXES) { + resultsByIndex.push(await queryFTSViaExecutor(executor, table, indexName, query, limit)); } - - fileResults = await queryFTSViaExecutor(executor, 'File', 'file_fts', query, limit); - functionResults = await queryFTSViaExecutor(executor, 'Function', 'function_fts', query, limit); - classResults = await queryFTSViaExecutor(executor, 'Class', 'class_fts', query, limit); - methodResults = await queryFTSViaExecutor(executor, 'Method', 'method_fts', query, limit); - interfaceResults = await queryFTSViaExecutor( - executor, - 'Interface', - 'interface_fts', - query, - limit, - ); } else { // Use core lbug adapter (CLI / pipeline context) — also sequential for safety. - // Lazy-create FTS indexes on first query (analyze no longer does it). - for (const { table, indexName, properties } of FTS_INDEXES) { - await ensureFTSIndex(table, indexName, [...properties]).catch(() => {}); + for (const { table, indexName } of FTS_INDEXES) { + resultsByIndex.push(await queryFTS(table, indexName, query, limit, false).catch(() => [])); } - - fileResults = await queryFTS('File', 'file_fts', query, limit, false).catch(() => []); - functionResults = await queryFTS('Function', 'function_fts', query, limit, false).catch( - () => [], - ); - classResults = await queryFTS('Class', 'class_fts', query, limit, false).catch(() => []); - methodResults = await queryFTS('Method', 'method_fts', query, limit, false).catch(() => []); - interfaceResults = await queryFTS('Interface', 'interface_fts', query, limit, false).catch( - () => [], - ); } // Collect all node scores per filePath to track which nodes actually matched @@ -233,11 +96,7 @@ export const searchFTSFromLbug = async ( } }; - addResults(fileResults); - addResults(functionResults); - addResults(classResults); - addResults(methodResults); - addResults(interfaceResults); + for (const results of resultsByIndex) addResults(results); // Sum the top-3 highest-scoring nodes per file and collect their nodeIds. // Summing all nodes naively inflates scores for files with many mediocre diff --git a/gitnexus/src/core/search/fts-indexes.ts b/gitnexus/src/core/search/fts-indexes.ts new file mode 100644 index 0000000000..5dfbde647c --- /dev/null +++ b/gitnexus/src/core/search/fts-indexes.ts @@ -0,0 +1,8 @@ +import { createFTSIndex } from '../lbug/lbug-adapter.js'; +import { FTS_INDEXES } from './fts-schema.js'; + +export async function createSearchFTSIndexes(): Promise { + for (const { table, indexName, properties } of FTS_INDEXES) { + await createFTSIndex(table, indexName, [...properties]); + } +} diff --git a/gitnexus/src/core/search/fts-schema.ts b/gitnexus/src/core/search/fts-schema.ts new file mode 100644 index 0000000000..404a320ede --- /dev/null +++ b/gitnexus/src/core/search/fts-schema.ts @@ -0,0 +1,13 @@ +export interface FTSIndexDefinition { + readonly table: string; + readonly indexName: string; + readonly properties: readonly string[]; +} + +export const FTS_INDEXES: readonly FTSIndexDefinition[] = [ + { table: 'File', indexName: 'file_fts', properties: ['name', 'content'] }, + { table: 'Function', indexName: 'function_fts', properties: ['name', 'content'] }, + { table: 'Class', indexName: 'class_fts', properties: ['name', 'content'] }, + { table: 'Method', indexName: 'method_fts', properties: ['name', 'content'] }, + { table: 'Interface', indexName: 'interface_fts', properties: ['name', 'content'] }, +]; diff --git a/gitnexus/test/integration/local-backend-calltool.test.ts b/gitnexus/test/integration/local-backend-calltool.test.ts index 27e6550cc7..fa6a088582 100644 --- a/gitnexus/test/integration/local-backend-calltool.test.ts +++ b/gitnexus/test/integration/local-backend-calltool.test.ts @@ -96,15 +96,10 @@ withTestLbugDB( it('query tool returns results for keyword search', async () => { const result = await backend.callTool('query', { query: 'login' }); expect(result).not.toHaveProperty('error'); - // Should have some combination of processes, process_symbols, or definitions expect(result).toHaveProperty('processes'); expect(result).toHaveProperty('definitions'); - // The search should find something (FTS or graph-based) - const totalResults = - (result.processes?.length || 0) + - (result.process_symbols?.length || 0) + - (result.definitions?.length || 0); - expect(totalResults).toBeGreaterThanOrEqual(1); + expect(result.processes.map((p: any) => p.id)).toContain('proc:login-flow'); + expect(result.process_symbols.map((s: any) => s.id)).toContain('func:login'); // #553: query response carries per-phase timing metadata. expect(result.timing).toBeDefined(); diff --git a/gitnexus/test/unit/bm25-search.test.ts b/gitnexus/test/unit/bm25-search.test.ts index 1317430995..9b232688fe 100644 --- a/gitnexus/test/unit/bm25-search.test.ts +++ b/gitnexus/test/unit/bm25-search.test.ts @@ -1,35 +1,46 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { - searchFTSFromLbug, - invalidateEnsuredFTSForRepo, - type BM25SearchResult, -} from '../../src/core/search/bm25-index.js'; +import { searchFTSFromLbug, type BM25SearchResult } from '../../src/core/search/bm25-index.js'; vi.mock('../../src/core/lbug/lbug-adapter.js', async (importOriginal) => { const actual = await importOriginal(); return { ...actual, queryFTS: vi.fn().mockResolvedValue([]), + createFTSIndex: vi.fn().mockResolvedValue(undefined), }; }); // Pool adapter is dynamically imported by the MCP-pool path of -// `searchFTSFromLbug`. We mock it so we can drive the executor and the -// pool-close listener without spinning up a real LadybugDB pool. -const poolCloseListeners: Array<(repoId: string) => void> = []; +// `searchFTSFromLbug`. We mock it so we can drive the executor without +// spinning up a real LadybugDB pool. const mockExecuteQuery = vi.fn(); vi.mock('../../src/core/lbug/pool-adapter.js', () => ({ executeQuery: (repoId: string, cypher: string) => mockExecuteQuery(repoId, cypher), - addPoolCloseListener: (listener: (repoId: string) => void) => { - poolCloseListeners.push(listener); - return () => { - const idx = poolCloseListeners.indexOf(listener); - if (idx !== -1) poolCloseListeners.splice(idx, 1); - }; - }, + addPoolCloseListener: vi.fn(), })); describe('BM25 search', () => { + describe('createSearchFTSIndexes', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('creates the configured indexes on the writable analysis path', async () => { + const { createFTSIndex } = await import('../../src/core/lbug/lbug-adapter.js'); + const { createSearchFTSIndexes } = await import('../../src/core/search/fts-indexes.js'); + + await createSearchFTSIndexes(); + + expect(vi.mocked(createFTSIndex).mock.calls).toEqual([ + ['File', 'file_fts', ['name', 'content']], + ['Function', 'function_fts', ['name', 'content']], + ['Class', 'class_fts', ['name', 'content']], + ['Method', 'method_fts', ['name', 'content']], + ['Interface', 'interface_fts', ['name', 'content']], + ]); + }); + }); + describe('searchFTSFromLbug', () => { it('returns empty array when LadybugDB is not initialized', async () => { // Without LadybugDB init, search should return empty (not crash) @@ -190,125 +201,50 @@ describe('BM25 search', () => { }); }); - describe('ensureFTS cache (MCP pool path)', () => { - const REPO = 'test-repo-fts-cache'; + describe('MCP pool path', () => { + const REPO = 'test-repo-readonly-fts'; beforeEach(() => { - // Clean state so cases don't bleed into each other. mockExecuteQuery.mockReset(); - invalidateEnsuredFTSForRepo(REPO); - // Suppress the surfaced warn so test output stays readable. - vi.spyOn(console, 'warn').mockImplementation(() => {}); }); - it('does NOT cache a transient CREATE_FTS_INDEX failure — second call retries', async () => { - // First call: every CREATE_FTS_INDEX fails transiently; QUERY_FTS_INDEX returns nothing. + it('queries existing FTS indexes without issuing CREATE_FTS_INDEX', async () => { mockExecuteQuery.mockImplementation(async (_repo: string, cypher: string) => { if (cypher.includes('CREATE_FTS_INDEX')) { - throw new Error('transient lock error: Could not set lock'); + throw new Error('query path must stay read-only'); } - return []; - }); - - const r1 = await searchFTSFromLbug('anything', 5, REPO); - expect(Array.isArray(r1)).toBe(true); - - const createCallsAfterFirst = mockExecuteQuery.mock.calls.filter((c) => - String(c[1]).includes('CREATE_FTS_INDEX'), - ).length; - // 5 FTS index tables — all five attempted on first call. - expect(createCallsAfterFirst).toBe(5); - // Second call: CREATE succeeds this time. The bug being fixed: if the - // first failure was cached, we'd see ZERO additional CREATE calls. - mockExecuteQuery.mockReset(); - mockExecuteQuery.mockResolvedValue([]); - - await searchFTSFromLbug('anything', 5, REPO); - - const createCallsOnRetry = mockExecuteQuery.mock.calls.filter((c) => - String(c[1]).includes('CREATE_FTS_INDEX'), - ).length; - expect(createCallsOnRetry).toBe(5); - }); - - it("treats 'already exists' as success and caches it (no retry on second call)", async () => { - mockExecuteQuery.mockImplementation(async (_repo: string, cypher: string) => { - if (cypher.includes('CREATE_FTS_INDEX')) { - throw new Error("Catalog exception: index 'file_fts' already exists"); + if (cypher.includes("QUERY_FTS_INDEX('Function'")) { + return [{ node: { filePath: 'src/auth.ts', id: 'func:login' }, score: 8 }]; } return []; }); - await searchFTSFromLbug('anything', 5, REPO); - mockExecuteQuery.mockReset(); - mockExecuteQuery.mockResolvedValue([]); + const results = await searchFTSFromLbug('login', 5, REPO); - await searchFTSFromLbug('anything', 5, REPO); - - const createCallsOnSecond = mockExecuteQuery.mock.calls.filter((c) => - String(c[1]).includes('CREATE_FTS_INDEX'), - ).length; - expect(createCallsOnSecond).toBe(0); - }); - - it('invalidateEnsuredFTSForRepo drops cached entries so next call re-issues CREATE', async () => { - // Prime the cache with successful creates. - mockExecuteQuery.mockResolvedValue([]); - await searchFTSFromLbug('anything', 5, REPO); - - mockExecuteQuery.mockReset(); - mockExecuteQuery.mockResolvedValue([]); - - // Without invalidation: no re-CREATE. - await searchFTSFromLbug('anything', 5, REPO); + expect(results).toEqual([ + { filePath: 'src/auth.ts', score: 8, rank: 1, nodeIds: ['func:login'] }, + ]); expect( - mockExecuteQuery.mock.calls.filter((c) => String(c[1]).includes('CREATE_FTS_INDEX')).length, - ).toBe(0); - - // After invalidation: next call re-issues CREATE for all 5 tables. - invalidateEnsuredFTSForRepo(REPO); - mockExecuteQuery.mockReset(); - mockExecuteQuery.mockResolvedValue([]); - await searchFTSFromLbug('anything', 5, REPO); - expect( - mockExecuteQuery.mock.calls.filter((c) => String(c[1]).includes('CREATE_FTS_INDEX')).length, - ).toBe(5); + mockExecuteQuery.mock.calls.some((c) => String(c[1]).includes('CREATE_FTS_INDEX')), + ).toBe(false); }); - it('a pool-close listener fired by the pool adapter invalidates this repo only', async () => { - const OTHER = 'other-repo'; - - mockExecuteQuery.mockResolvedValue([]); - // Prime both repos. - await searchFTSFromLbug('anything', 5, REPO); - await searchFTSFromLbug('anything', 5, OTHER); - - // Confirm at least one listener was registered by the search module. - expect(poolCloseListeners.length).toBeGreaterThanOrEqual(1); - - // Simulate the pool adapter closing REPO. - for (const l of poolCloseListeners) l(REPO); - - mockExecuteQuery.mockReset(); + it('uses the configured FTS query set on every call', async () => { mockExecuteQuery.mockResolvedValue([]); await searchFTSFromLbug('anything', 5, REPO); - const createForRepo = mockExecuteQuery.mock.calls.filter( - (c) => c[0] === REPO && String(c[1]).includes('CREATE_FTS_INDEX'), - ).length; - expect(createForRepo).toBe(5); - - // OTHER repo's cache must remain intact — no re-CREATE for it. - mockExecuteQuery.mockReset(); - mockExecuteQuery.mockResolvedValue([]); - await searchFTSFromLbug('anything', 5, OTHER); - const createForOther = mockExecuteQuery.mock.calls.filter( - (c) => c[0] === OTHER && String(c[1]).includes('CREATE_FTS_INDEX'), - ).length; - expect(createForOther).toBe(0); - invalidateEnsuredFTSForRepo(OTHER); + const queryCalls = mockExecuteQuery.mock.calls.filter((c) => + String(c[1]).includes('QUERY_FTS_INDEX'), + ); + expect(queryCalls.map((c) => String(c[1]).match(/QUERY_FTS_INDEX\('([^']+)'/)?.[1])).toEqual([ + 'File', + 'Function', + 'Class', + 'Method', + 'Interface', + ]); }); }); });