From a93af0eeb43a1869dc3df0addcf793616bc817e3 Mon Sep 17 00:00:00 2001 From: Gergo Magyar Date: Wed, 10 Jun 2026 05:59:02 +0000 Subject: [PATCH 1/8] fix(embeddings): create VECTOR index via conn.query, not the prepared path (#2114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `gitnexus analyze` generated embeddings but silently failed to create the LadybugDB VECTOR/HNSW index, degrading semantic search to exact-scan and recording `vectorSearch.status: exact-scan` in meta.json — even where the VECTOR extension was available and the identical `CALL CREATE_VECTOR_INDEX(...)` succeeded when run manually via `conn.query()`. Root cause: `CALL CREATE_VECTOR_INDEX(...)` compiles to multiple statements, so LadybugDB cannot run it through `conn.prepare()` ("We do not support prepare multiple statements"). The embedding pipeline's `createVectorIndex` ran it through the injected `executeQuery`, which routes to `executePrepared` -> `conn.prepare()`. The throw was swallowed (dev-only `logger.warn({ error }, ...)` — and an Error logged under the non-`err` key serializes to `{}`, the reporter's mysterious `{"error":{}}`), so analyze fell back to exact-scan. FTS index creation survived because `createFTSIndex` uses `conn.query()`; the singleton `executeQuery` was switched to the prepared path in #1655, breaking VECTOR but not FTS. The read path (`CALL QUERY_VECTOR_INDEX`) prepares fine, so semantic search itself was unaffected — only index creation. Fix: add an adapter-owned `createVectorIndex()` that runs the procedure via `conn.query()` (mirroring `createFTSIndex`), idempotent on "already exists" so incremental re-runs don't spuriously downgrade. The pipeline keeps its extension-install-policy gate, delegates creation to the adapter, and now logs failures via `{ err: error }` without the dev gate so a future degrade is visible. Both `runEmbeddingPipeline` callers use the same singleton writable connection, so the single adapter change fixes both analyze and the server path. Tests: real-`@ladybugdb/core` regression test in lbug-vector-extension.test.ts (asserts SHOW_INDEXES reports code_embedding_idx/HNSW, idempotency, and that the prepared executeQuery path rejects); updated embedding-pipeline.test.ts unit mocks to the new contract. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/core/embeddings/embedding-pipeline.ts | 48 ++++++------ gitnexus/src/core/lbug/lbug-adapter.ts | 39 ++++++++++ .../integration/lbug-vector-extension.test.ts | 75 ++++++++++++++++++- gitnexus/test/unit/embedding-pipeline.test.ts | 25 +++++-- 4 files changed, 159 insertions(+), 28 deletions(-) diff --git a/gitnexus/src/core/embeddings/embedding-pipeline.ts b/gitnexus/src/core/embeddings/embedding-pipeline.ts index e394659f4d..3f84731e80 100644 --- a/gitnexus/src/core/embeddings/embedding-pipeline.ts +++ b/gitnexus/src/core/embeddings/embedding-pipeline.ts @@ -36,13 +36,11 @@ import { } from './types.js'; import { resolveEmbeddingConfig } from './config.js'; import { rankExactEmbeddingRows, type ExactEmbeddingRow } from './exact-search.js'; +import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, STALE_HASH_SENTINEL } from '../lbug/schema.js'; import { - EMBEDDING_TABLE_NAME, - EMBEDDING_INDEX_NAME, - CREATE_VECTOR_INDEX_QUERY, - STALE_HASH_SENTINEL, -} from '../lbug/schema.js'; -import { loadVectorExtension } from '../lbug/lbug-adapter.js'; + loadVectorExtension, + createVectorIndex as createVectorIndexOnDb, +} from '../lbug/lbug-adapter.js'; import type { ExtensionInstallPolicy } from '../lbug/extension-loader.js'; import { getExactScanLimit } from '../platform/capabilities.js'; import { logger } from '../logger.js'; @@ -215,24 +213,30 @@ export const batchInsertEmbeddings = async ( }; /** - * Create the vector index for semantic search - - * Now indexes the separate CodeEmbedding table. - * Delegates extension loading to lbug-adapter's loadVectorExtension(), - * which owns the VECTOR extension lifecycle and state tracking. - + * Create the vector index for semantic search (indexes the CodeEmbedding table). + * + * Keeps the embedding-specific extension-install policy gate here + * (ensureVectorExtensionAvailable → resolveEmbeddingInstallPolicy, default + * `auto` for the analyze write path), then delegates the actual + * `CALL CREATE_VECTOR_INDEX(...)` to the adapter, which runs it through the + * unprepared `conn.query()` path. It must NOT go through the injected + * `executeQuery` (prepared `conn.prepare()`): LadybugDB cannot prepare that + * procedure and fails with "We do not support prepare multiple statements" — + * the silent degrade in #2114. */ -const createVectorIndex = async ( - executeQuery: (cypher: string) => Promise, -): Promise => { +const createVectorIndex = async (): Promise => { if (!(await ensureVectorExtensionAvailable())) return false; try { - await executeQuery(CREATE_VECTOR_INDEX_QUERY); - return true; + return await createVectorIndexOnDb(); } catch (error) { - if (isDev) { - logger.warn({ error }, 'Vector index creation warning:'); - } + // Surface this even outside dev: it silently downgrades a user-requested + // feature (semantic search) to exact scan. Log under `err` so pino's + // standard serializer captures the message/stack — logging under `error` + // serialized an Error to `{}` (the empty `{"error":{}}` reported in #2114). + logger.warn( + { err: error }, + 'Vector index creation failed; semantic search will use exact-scan fallback', + ); return false; } }; @@ -383,7 +387,7 @@ export const runEmbeddingPipeline = async ( // Ensure the vector index exists even when no new nodes need embedding. // A prior crash or first-time incremental run may have left CodeEmbedding // rows without ever reaching index creation. - const vectorIndexReady = await createVectorIndex(executeQuery); + const vectorIndexReady = await createVectorIndex(); onProgress({ phase: 'ready', @@ -544,7 +548,7 @@ export const runEmbeddingPipeline = async ( logger.info('📇 Creating vector index...'); } - const vectorIndexReady = await createVectorIndex(executeQuery); + const vectorIndexReady = await createVectorIndex(); onProgress({ phase: 'ready', diff --git a/gitnexus/src/core/lbug/lbug-adapter.ts b/gitnexus/src/core/lbug/lbug-adapter.ts index 101783bcbc..ffbbdf19a7 100644 --- a/gitnexus/src/core/lbug/lbug-adapter.ts +++ b/gitnexus/src/core/lbug/lbug-adapter.ts @@ -14,6 +14,7 @@ import { REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, + CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, NodeTableName, } from './schema.js'; @@ -1938,6 +1939,44 @@ export const createFTSIndex = async ( } }; +/** + * Create the HNSW vector index on the CodeEmbedding table. + * + * MUST run via `conn.query()` (here through `queryAndDrain`), NOT through the + * prepared `executeQuery`/`conn.prepare()` path: `CALL CREATE_VECTOR_INDEX(...)` + * compiles to multiple statements, which LadybugDB cannot prepare — it fails + * with "Connection Exception: We do not support prepare multiple statements." + * Routing index creation through `executeQuery` (prepared) is exactly what + * broke vector-index creation during `analyze` (#2114; the singleton + * `executeQuery` was switched to the prepared path in #1655 while FTS index + * creation kept using `conn.query()`, which is why FTS survived and VECTOR did + * not). Mirrors `createFTSIndex` above. + * + * Returns `true` on success (or when the index already exists — idempotent so + * incremental re-runs don't spuriously downgrade to exact scan), `false` when + * the VECTOR extension is unavailable or the connection is read-only. Any other + * failure propagates so the caller can log it. + */ +export const createVectorIndex = async (): Promise => { + if (!conn) { + throw new Error('LadybugDB not initialized. Call initLbug first.'); + } + if (!(await loadVectorExtension())) { + return false; + } + try { + await queryAndDrain(conn, CREATE_VECTOR_INDEX_QUERY); + return true; + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + // Idempotent: a prior analyze already built the HNSW index. + if (msg.includes('already exists')) return true; + // Read-only DB (e.g. the MCP query pool): writable analyze owns creation. + if (isReadOnlyDbError(e)) return false; + throw e; + } +}; + /** * Lazy-create an FTS index, caching the fact in-process. * diff --git a/gitnexus/test/integration/lbug-vector-extension.test.ts b/gitnexus/test/integration/lbug-vector-extension.test.ts index 5eb966189a..2e3e97e243 100644 --- a/gitnexus/test/integration/lbug-vector-extension.test.ts +++ b/gitnexus/test/integration/lbug-vector-extension.test.ts @@ -7,7 +7,7 @@ * Follows existing lbug integration test patterns (lbug-core-adapter, * lbug-lock-retry). */ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, beforeAll, beforeEach } from 'vitest'; import { withTestLbugDB } from '../helpers/test-indexed-db.js'; withTestLbugDB('vector-extension', (handle) => { @@ -73,3 +73,76 @@ withTestLbugDB('vector-extension', (handle) => { }); }); }); + +/** + * Regression: VECTOR/HNSW index creation during analyze (#2114). + * + * `CALL CREATE_VECTOR_INDEX(...)` compiles to multiple statements, which + * LadybugDB cannot run through `conn.prepare()`. Routing it through the + * prepared `executeQuery` path (as #1655 inadvertently did when it switched the + * singleton `executeQuery` from `conn.query()` to `conn.prepare()`) makes it + * throw "We do not support prepare multiple statements", which `analyze` + * swallowed and silently downgraded to exact-scan. The fix gives the adapter a + * `createVectorIndex()` that runs the procedure via `conn.query()` (like + * `createFTSIndex`). These tests exercise the real adapter against a real + * LadybugDB so a revert to the prepared path fails loudly. + */ +withTestLbugDB('vector-index-creation', () => { + // VECTOR is platform-sensitive (skipped on win32 / unsupported platforms, + // and when it cannot be installed offline). Probe once, skip the suite if + // unavailable — mirrors the FTS-skip convention in withTestLbugDB. + let vectorAvailable = false; + let skipWarned = false; + beforeAll(async () => { + const adapter = await import('../../src/core/lbug/lbug-adapter.js'); + const { resolveAnalyzeInstallPolicy } = await import('../../src/core/lbug/extension-loader.js'); + // Mirror the analyze write path (`auto`: LOAD-first, then one bounded + // INSTALL) so this suite runs wherever analyze would have vector support. + vectorAvailable = await adapter.loadVectorExtension(undefined, { + policy: resolveAnalyzeInstallPolicy(), + }); + }); + beforeEach((ctx) => { + if (!vectorAvailable) { + if (!skipWarned) { + skipWarned = true; + console.warn( + '[withTestLbugDB(vector-index-creation)] Skipping — the LadybugDB VECTOR ' + + 'extension is unavailable (unsupported platform or could not be installed).', + ); + } + ctx.skip(); + } + }); + + describe('createVectorIndex', () => { + it('creates the HNSW index via conn.query (the prepared path cannot)', async () => { + const adapter = await import('../../src/core/lbug/lbug-adapter.js'); + + const created = await adapter.createVectorIndex(); + expect(created).toBe(true); + + const rows = await adapter.executeQuery('CALL SHOW_INDEXES() RETURN *'); + const idx = rows.find((r: any) => r.index_name === 'code_embedding_idx'); + expect(idx).toBeDefined(); + expect(idx.index_type).toBe('HNSW'); + }); + + it('is idempotent — a second call returns true so incremental re-runs do not downgrade to exact scan', async () => { + const adapter = await import('../../src/core/lbug/lbug-adapter.js'); + + await adapter.createVectorIndex(); + await expect(adapter.createVectorIndex()).resolves.toBe(true); + }); + + it('regression: the prepared executeQuery path cannot create the index (#2114 root cause)', async () => { + const adapter = await import('../../src/core/lbug/lbug-adapter.js'); + const { CREATE_VECTOR_INDEX_QUERY } = await import('../../src/core/lbug/schema.js'); + + // executeQuery -> executePrepared -> conn.prepare(): rejects the + // multi-statement CREATE_VECTOR_INDEX procedure. This is exactly why + // createVectorIndex must use conn.query() instead. + await expect(adapter.executeQuery(CREATE_VECTOR_INDEX_QUERY)).rejects.toThrow(); + }); + }); +}); diff --git a/gitnexus/test/unit/embedding-pipeline.test.ts b/gitnexus/test/unit/embedding-pipeline.test.ts index f5a9f5ae20..2a3273f9d6 100644 --- a/gitnexus/test/unit/embedding-pipeline.test.ts +++ b/gitnexus/test/unit/embedding-pipeline.test.ts @@ -182,6 +182,11 @@ describe('runEmbeddingPipeline incremental filter', () => { let queryCalls: string[]; let stmtCalls: Array<{ cypher: string; params: Array> }>; let progressUpdates: EmbeddingProgress[]; + // Spy for the adapter's createVectorIndex (the pipeline delegates index + // creation to it via conn.query — see #2114). Captured so tests can assert + // it was invoked instead of asserting CREATE_VECTOR_INDEX flowed through the + // injected (prepared) executeQuery, which it must NOT. + let vectorIndexMock: ReturnType; // Helper node const makeNode = (overrides: Partial = {}): EmbeddableNode => ({ @@ -215,9 +220,12 @@ describe('runEmbeddingPipeline incremental filter', () => { isEmbedderReady: vi.fn().mockReturnValue(true), })); - // Mock loadVectorExtension (avoids needing the native lbug module) + // Mock the adapter (avoids needing the native lbug module). The pipeline + // imports both loadVectorExtension and createVectorIndex from here. + vectorIndexMock = vi.fn().mockResolvedValue(true); vi.doMock('../../src/core/lbug/lbug-adapter.js', () => ({ loadVectorExtension: vi.fn().mockResolvedValue(true), + createVectorIndex: vectorIndexMock, })); }; @@ -347,6 +355,7 @@ describe('runEmbeddingPipeline incremental filter', () => { })); vi.doMock('../../src/core/lbug/lbug-adapter.js', () => ({ loadVectorExtension: vi.fn().mockResolvedValue(true), + createVectorIndex: vi.fn().mockResolvedValue(true), })); const executeQuery = vi.fn().mockImplementation(async (cypher: string) => { @@ -486,7 +495,7 @@ describe('runEmbeddingPipeline incremental filter', () => { const { runEmbeddingPipeline } = await import('../../src/core/embeddings/embedding-pipeline.js'); - await runEmbeddingPipeline( + const result = await runEmbeddingPipeline( executeQuery, executeWithReusedStatement, onProgress, @@ -496,9 +505,13 @@ describe('runEmbeddingPipeline incremental filter', () => { existingEmbeddings, ); - // The CREATE_VECTOR_INDEX query should have been called via executeQuery - const vectorIndexCalls = queryCalls.filter((c) => c.includes('CREATE_VECTOR_INDEX')); - expect(vectorIndexCalls.length).toBeGreaterThanOrEqual(1); + // Index creation must go through the adapter's createVectorIndex (conn.query), + // NOT the injected/prepared executeQuery — CALL CREATE_VECTOR_INDEX cannot be + // prepared (#2114). It must still run on the zero-nodes-to-embed branch. + expect(vectorIndexMock).toHaveBeenCalled(); + expect(queryCalls.some((c) => c.includes('CREATE_VECTOR_INDEX'))).toBe(false); + expect(result.vectorIndexReady).toBe(true); + expect(result.semanticMode).toBe('vector-index'); }); it('stores embeddings with exact-scan fallback when VECTOR is unavailable', async () => { @@ -546,6 +559,7 @@ describe('runEmbeddingPipeline incremental filter', () => { })); vi.doMock('../../src/core/lbug/lbug-adapter.js', () => ({ loadVectorExtension: vi.fn().mockResolvedValue(true), + createVectorIndex: vi.fn().mockResolvedValue(true), })); const node = makeNode({ @@ -600,6 +614,7 @@ describe('runEmbeddingPipeline incremental filter', () => { })); vi.doMock('../../src/core/lbug/lbug-adapter.js', () => ({ loadVectorExtension: vi.fn().mockResolvedValue(true), + createVectorIndex: vi.fn().mockResolvedValue(true), })); const node = makeNode({ From ba2df47ff9e3042b4a6b0fe2a006238b44f0777d Mon Sep 17 00:00:00 2001 From: Gergo Magyar Date: Wed, 10 Jun 2026 06:35:27 +0000 Subject: [PATCH 2/8] test(embeddings): anchor vector-index regression to the prepare error (#2114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prepared-path-rejects assertion shared a connection with the index-creation tests, so by the time it ran the index already existed and conn.prepare() failed with "index already exists" — not the multi-statement rejection the test name claims. Move it to its own fresh (index-free) withTestLbugDB suite and anchor it to /prepare multiple statements/i so it can only pass for the real #2114 reason. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../integration/lbug-vector-extension.test.ts | 54 ++++++++++++++++--- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/gitnexus/test/integration/lbug-vector-extension.test.ts b/gitnexus/test/integration/lbug-vector-extension.test.ts index 2e3e97e243..7f02468877 100644 --- a/gitnexus/test/integration/lbug-vector-extension.test.ts +++ b/gitnexus/test/integration/lbug-vector-extension.test.ts @@ -134,15 +134,53 @@ withTestLbugDB('vector-index-creation', () => { await adapter.createVectorIndex(); await expect(adapter.createVectorIndex()).resolves.toBe(true); }); + }); +}); - it('regression: the prepared executeQuery path cannot create the index (#2114 root cause)', async () => { - const adapter = await import('../../src/core/lbug/lbug-adapter.js'); - const { CREATE_VECTOR_INDEX_QUERY } = await import('../../src/core/lbug/schema.js'); - - // executeQuery -> executePrepared -> conn.prepare(): rejects the - // multi-statement CREATE_VECTOR_INDEX procedure. This is exactly why - // createVectorIndex must use conn.query() instead. - await expect(adapter.executeQuery(CREATE_VECTOR_INDEX_QUERY)).rejects.toThrow(); +/** + * Regression for the #2114 root cause: the prepared `executeQuery` path cannot + * create the index. This lives in its OWN suite (a fresh, index-free DB) on + * purpose — in the `vector-index-creation` suite above the index already exists + * by the time this would run, so `conn.prepare()` fails with "index already + * exists" instead of the multi-statement rejection we want to pin. With no index + * present, `CALL CREATE_VECTOR_INDEX(...)` (which compiles to multiple + * statements) is rejected by `conn.prepare()` with "We do not support prepare + * multiple statements" — the exact failure that silently downgraded analyze to + * exact-scan, and why `createVectorIndex` must use `conn.query()` instead. + */ +withTestLbugDB('vector-index-prepare-rejects', () => { + let vectorAvailable = false; + let skipWarned = false; + beforeAll(async () => { + const adapter = await import('../../src/core/lbug/lbug-adapter.js'); + const { resolveAnalyzeInstallPolicy } = await import('../../src/core/lbug/extension-loader.js'); + vectorAvailable = await adapter.loadVectorExtension(undefined, { + policy: resolveAnalyzeInstallPolicy(), }); }); + beforeEach((ctx) => { + if (!vectorAvailable) { + if (!skipWarned) { + skipWarned = true; + console.warn( + '[withTestLbugDB(vector-index-prepare-rejects)] Skipping — the LadybugDB VECTOR ' + + 'extension is unavailable (unsupported platform or could not be installed).', + ); + } + ctx.skip(); + } + }); + + it('the prepared executeQuery path rejects CREATE_VECTOR_INDEX (#2114 root cause)', async () => { + const adapter = await import('../../src/core/lbug/lbug-adapter.js'); + const { CREATE_VECTOR_INDEX_QUERY } = await import('../../src/core/lbug/schema.js'); + + // executeQuery -> executePrepared -> conn.prepare(): the multi-statement + // CREATE_VECTOR_INDEX procedure cannot be prepared. Anchored to the specific + // error so the test can only pass for the #2114 reason — not for an + // unrelated throw (e.g. a missing table or an already-existing index). + await expect(adapter.executeQuery(CREATE_VECTOR_INDEX_QUERY)).rejects.toThrow( + /prepare multiple statements/i, + ); + }); }); From a94f2e39dea0a6982837a8061b4c6503bcc3b600 Mon Sep 17 00:00:00 2001 From: Gergo Magyar Date: Wed, 10 Jun 2026 06:36:28 +0000 Subject: [PATCH 3/8] test(embeddings): mock createVectorIndex in exact-scan fallback test (#2114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exact-scan-fallback test's vi.doMock of lbug-adapter omitted createVectorIndex (the pipeline now imports it). Harmless today — the extension-unavailable path short-circuits before the adapter call — but it left a latent TypeError trap and was inconsistent with the three other adapter mock sites. Add the mock for parity. Co-Authored-By: Claude Opus 4.8 (1M context) --- gitnexus/test/unit/embedding-pipeline.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/gitnexus/test/unit/embedding-pipeline.test.ts b/gitnexus/test/unit/embedding-pipeline.test.ts index 2a3273f9d6..3fb75449ad 100644 --- a/gitnexus/test/unit/embedding-pipeline.test.ts +++ b/gitnexus/test/unit/embedding-pipeline.test.ts @@ -528,6 +528,7 @@ describe('runEmbeddingPipeline incremental filter', () => { })); vi.doMock('../../src/core/lbug/lbug-adapter.js', () => ({ loadVectorExtension: vi.fn().mockResolvedValue(false), + createVectorIndex: vi.fn().mockResolvedValue(false), })); const node = makeNode(); From 9f16039716fee82e0b1a4158e526dc03dd826280 Mon Sep 17 00:00:00 2001 From: Gergo Magyar Date: Wed, 10 Jun 2026 06:37:24 +0000 Subject: [PATCH 4/8] test(embeddings): cover the vector-index creation-failure branch (#2114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the adapter's createVectorIndex throws (e.g. a DB error during HNSW build), the pipeline wrapper must swallow it, log via { err }, and degrade to exact-scan rather than failing the whole analyze run. This branch — the secondary #2114 visibility fix — had no coverage. Asserts the pipeline does not throw and returns vectorIndexReady=false / semanticMode='exact-scan' with embeddings still persisted. Co-Authored-By: Claude Opus 4.8 (1M context) --- gitnexus/test/unit/embedding-pipeline.test.ts | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/gitnexus/test/unit/embedding-pipeline.test.ts b/gitnexus/test/unit/embedding-pipeline.test.ts index 3fb75449ad..a81bb79142 100644 --- a/gitnexus/test/unit/embedding-pipeline.test.ts +++ b/gitnexus/test/unit/embedding-pipeline.test.ts @@ -545,6 +545,41 @@ describe('runEmbeddingPipeline incremental filter', () => { expect(progressUpdates.at(-1)?.phase).toBe('ready'); }); + it('degrades to exact-scan (without throwing) when vector index creation fails', async () => { + vi.doMock('../../src/core/embeddings/embedder.js', () => ({ + initEmbedder: vi.fn().mockResolvedValue(undefined), + embedBatch: vi + .fn() + .mockImplementation((texts: string[]) => + Promise.resolve(texts.map(() => new Float32Array(384))), + ), + embedText: vi.fn().mockResolvedValue(new Float32Array(384)), + embeddingToArray: vi.fn().mockImplementation((emb: Float32Array) => Array.from(emb)), + isEmbedderReady: vi.fn().mockReturnValue(true), + })); + // VECTOR loads, but the adapter's createVectorIndex throws (e.g. a DB error + // during HNSW build). The pipeline wrapper must swallow it, log, and fall + // back to exact-scan rather than failing the whole analyze run (#2114). + vi.doMock('../../src/core/lbug/lbug-adapter.js', () => ({ + loadVectorExtension: vi.fn().mockResolvedValue(true), + createVectorIndex: vi.fn().mockRejectedValue(new Error('HNSW build failed')), + })); + + const node = makeNode(); + const executeQuery = mockExecuteQuery([node]); + const executeWithReusedStatement = mockExecuteWithReusedStatement(); + const { runEmbeddingPipeline } = + await import('../../src/core/embeddings/embedding-pipeline.js'); + + const result = await runEmbeddingPipeline(executeQuery, executeWithReusedStatement, onProgress); + + expect(result.vectorIndexReady).toBe(false); + expect(result.semanticMode).toBe('exact-scan'); + // Embeddings were still persisted and the pipeline completed normally. + expect(stmtCalls.some((call) => call.cypher.includes('CREATE'))).toBe(true); + expect(progressUpdates.at(-1)?.phase).toBe('ready'); + }); + it('does not inject preceding context when overlap is disabled', async () => { const embedBatchSpy = vi .fn() From 143db9ce213682ffd40d4317577604d77cbc1d64 Mon Sep 17 00:00:00 2001 From: Gergo Magyar Date: Wed, 10 Jun 2026 06:38:25 +0000 Subject: [PATCH 5/8] test(embeddings): assert createVectorIndex called once on zero-nodes branch (#2114) Tighten the totalNodes===0 routing test from toHaveBeenCalled() to toHaveBeenCalledTimes(1) so an accidental double-creation on the early-return branch is caught. Co-Authored-By: Claude Opus 4.8 (1M context) --- gitnexus/test/unit/embedding-pipeline.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gitnexus/test/unit/embedding-pipeline.test.ts b/gitnexus/test/unit/embedding-pipeline.test.ts index a81bb79142..91f182db2c 100644 --- a/gitnexus/test/unit/embedding-pipeline.test.ts +++ b/gitnexus/test/unit/embedding-pipeline.test.ts @@ -508,7 +508,7 @@ describe('runEmbeddingPipeline incremental filter', () => { // Index creation must go through the adapter's createVectorIndex (conn.query), // NOT the injected/prepared executeQuery — CALL CREATE_VECTOR_INDEX cannot be // prepared (#2114). It must still run on the zero-nodes-to-embed branch. - expect(vectorIndexMock).toHaveBeenCalled(); + expect(vectorIndexMock).toHaveBeenCalledTimes(1); expect(queryCalls.some((c) => c.includes('CREATE_VECTOR_INDEX'))).toBe(false); expect(result.vectorIndexReady).toBe(true); expect(result.semanticMode).toBe('vector-index'); From 896a8b8e3d933c8aff069b3c8d96a1a1a339de9e Mon Sep 17 00:00:00 2001 From: Gergo Magyar Date: Wed, 10 Jun 2026 06:41:06 +0000 Subject: [PATCH 6/8] perf(lbug): cache vector-index creation in-process like createFTSIndex (#2114) createVectorIndex re-issued CALL CREATE_VECTOR_INDEX on every call, relying on the 'already exists' error string for idempotency. Add a module-scoped vectorIndexEnsured guard (mirrors ensuredFTSIndexes): early-return true when set, set on success and on 'already exists', and reset it everywhere vectorExtensionLoaded resets so it can never go stale against a swapped or closed connection. The integration idempotency test now also asserts SHOW_INDEXES has no duplicate code_embedding_idx. Co-Authored-By: Claude Opus 4.8 (1M context) --- gitnexus/src/core/lbug/lbug-adapter.ts | 17 ++++++++++++++++- .../integration/lbug-vector-extension.test.ts | 5 +++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/gitnexus/src/core/lbug/lbug-adapter.ts b/gitnexus/src/core/lbug/lbug-adapter.ts index ffbbdf19a7..dea1a4e981 100644 --- a/gitnexus/src/core/lbug/lbug-adapter.ts +++ b/gitnexus/src/core/lbug/lbug-adapter.ts @@ -172,6 +172,11 @@ let currentDbPath: string | null = null; let currentDbReadOnly = false; let ftsLoaded = false; let vectorExtensionLoaded = false; +// In-process guard so a repeated createVectorIndex() within one connection +// lifetime skips the DB round-trip (mirrors ensuredFTSIndexes). Reset wherever +// vectorExtensionLoaded resets, so it can never stay true against a swapped or +// closed connection. +let vectorIndexEnsured = false; /** * In-process cache of FTS indexes observed against the current singleton @@ -604,6 +609,7 @@ const resetOpenConnectionState = (): void => { currentDbPath = null; ftsLoaded = false; vectorExtensionLoaded = false; + vectorIndexEnsured = false; ensuredFTSIndexes.clear(); }; @@ -691,6 +697,7 @@ export const withLbugDb = async ( currentDbPath = null; ftsLoaded = false; vectorExtensionLoaded = false; + vectorIndexEnsured = false; ensuredFTSIndexes.clear(); }); // Sleep outside the lock — no need to block others while waiting @@ -717,6 +724,7 @@ const doInitLbug = async (dbPath: string, readOnly: boolean = false) => { currentDbPath = null; ftsLoaded = false; vectorExtensionLoaded = false; + vectorIndexEnsured = false; ensuredFTSIndexes.clear(); } @@ -1672,6 +1680,7 @@ export const closeLbug = async (): Promise => { currentDbPath = null; ftsLoaded = false; vectorExtensionLoaded = false; + vectorIndexEnsured = false; ensuredFTSIndexes.clear(); }; @@ -1961,16 +1970,22 @@ export const createVectorIndex = async (): Promise => { if (!conn) { throw new Error('LadybugDB not initialized. Call initLbug first.'); } + // Already built on this connection — skip the round-trip (mirrors createFTSIndex). + if (vectorIndexEnsured) return true; if (!(await loadVectorExtension())) { return false; } try { await queryAndDrain(conn, CREATE_VECTOR_INDEX_QUERY); + vectorIndexEnsured = true; return true; } catch (e) { const msg = e instanceof Error ? e.message : String(e); // Idempotent: a prior analyze already built the HNSW index. - if (msg.includes('already exists')) return true; + if (msg.includes('already exists')) { + vectorIndexEnsured = true; + return true; + } // Read-only DB (e.g. the MCP query pool): writable analyze owns creation. if (isReadOnlyDbError(e)) return false; throw e; diff --git a/gitnexus/test/integration/lbug-vector-extension.test.ts b/gitnexus/test/integration/lbug-vector-extension.test.ts index 7f02468877..ba436f1834 100644 --- a/gitnexus/test/integration/lbug-vector-extension.test.ts +++ b/gitnexus/test/integration/lbug-vector-extension.test.ts @@ -133,6 +133,11 @@ withTestLbugDB('vector-index-creation', () => { await adapter.createVectorIndex(); await expect(adapter.createVectorIndex()).resolves.toBe(true); + + // No duplicate index created by the repeat call. + const rows = await adapter.executeQuery('CALL SHOW_INDEXES() RETURN *'); + const matches = rows.filter((r: any) => r.index_name === 'code_embedding_idx'); + expect(matches).toHaveLength(1); }); }); }); From 3fb971c692799012293df4eee6ce58a891b1d0a2 Mon Sep 17 00:00:00 2001 From: Gergo Magyar Date: Wed, 10 Jun 2026 06:43:07 +0000 Subject: [PATCH 7/8] refactor(embeddings): rename local vector-index wrapper, drop import alias (#2114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipeline-local wrapper shared the name createVectorIndex with the adapter export, forcing an `as createVectorIndexOnDb` import alias. Rename the wrapper to buildVectorIndex and import the adapter export under its real name. Internal rename only — no behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- gitnexus/src/core/embeddings/embedding-pipeline.ts | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/gitnexus/src/core/embeddings/embedding-pipeline.ts b/gitnexus/src/core/embeddings/embedding-pipeline.ts index 3f84731e80..76644f901c 100644 --- a/gitnexus/src/core/embeddings/embedding-pipeline.ts +++ b/gitnexus/src/core/embeddings/embedding-pipeline.ts @@ -37,10 +37,7 @@ import { import { resolveEmbeddingConfig } from './config.js'; import { rankExactEmbeddingRows, type ExactEmbeddingRow } from './exact-search.js'; import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, STALE_HASH_SENTINEL } from '../lbug/schema.js'; -import { - loadVectorExtension, - createVectorIndex as createVectorIndexOnDb, -} from '../lbug/lbug-adapter.js'; +import { loadVectorExtension, createVectorIndex } from '../lbug/lbug-adapter.js'; import type { ExtensionInstallPolicy } from '../lbug/extension-loader.js'; import { getExactScanLimit } from '../platform/capabilities.js'; import { logger } from '../logger.js'; @@ -224,10 +221,10 @@ export const batchInsertEmbeddings = async ( * procedure and fails with "We do not support prepare multiple statements" — * the silent degrade in #2114. */ -const createVectorIndex = async (): Promise => { +const buildVectorIndex = async (): Promise => { if (!(await ensureVectorExtensionAvailable())) return false; try { - return await createVectorIndexOnDb(); + return await createVectorIndex(); } catch (error) { // Surface this even outside dev: it silently downgrades a user-requested // feature (semantic search) to exact scan. Log under `err` so pino's @@ -387,7 +384,7 @@ export const runEmbeddingPipeline = async ( // Ensure the vector index exists even when no new nodes need embedding. // A prior crash or first-time incremental run may have left CodeEmbedding // rows without ever reaching index creation. - const vectorIndexReady = await createVectorIndex(); + const vectorIndexReady = await buildVectorIndex(); onProgress({ phase: 'ready', @@ -548,7 +545,7 @@ export const runEmbeddingPipeline = async ( logger.info('📇 Creating vector index...'); } - const vectorIndexReady = await createVectorIndex(); + const vectorIndexReady = await buildVectorIndex(); onProgress({ phase: 'ready', From 3b385420afdd1ab776ce48f093edcc48dfd81209 Mon Sep 17 00:00:00 2001 From: Gergo Magyar Date: Wed, 10 Jun 2026 06:43:46 +0000 Subject: [PATCH 8/8] docs(embeddings): explain the double VECTOR-extension gate (#2114) Note at the call site why the pipeline pre-check and the adapter's own loadVectorExtension are not redundant: the pre-check applies the embedding-specific install policy, and the adapter's second load is a no-op via the cached vectorExtensionLoaded flag (no double install). Co-Authored-By: Claude Opus 4.8 (1M context) --- gitnexus/src/core/embeddings/embedding-pipeline.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gitnexus/src/core/embeddings/embedding-pipeline.ts b/gitnexus/src/core/embeddings/embedding-pipeline.ts index 76644f901c..0405cd47c8 100644 --- a/gitnexus/src/core/embeddings/embedding-pipeline.ts +++ b/gitnexus/src/core/embeddings/embedding-pipeline.ts @@ -222,6 +222,12 @@ export const batchInsertEmbeddings = async ( * the silent degrade in #2114. */ const buildVectorIndex = async (): Promise => { + // This pre-check applies the embedding-specific install policy + // (resolveEmbeddingInstallPolicy, default `auto` for analyze) before reaching + // the adapter. The adapter's createVectorIndex() calls loadVectorExtension() + // again, but that's a no-op here: once this gate loads VECTOR the module-level + // `vectorExtensionLoaded` flag is set, so the adapter's second call + // short-circuits without re-resolving the policy — no double install. if (!(await ensureVectorExtensionAvailable())) return false; try { return await createVectorIndex();