Skip to content
Merged
53 changes: 30 additions & 23 deletions gitnexus/src/core/embeddings/embedding-pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,8 @@ import {
} from './types.js';
import { resolveEmbeddingConfig } from './config.js';
import { rankExactEmbeddingRows, type ExactEmbeddingRow } from './exact-search.js';
import {
EMBEDDING_TABLE_NAME,
EMBEDDING_INDEX_NAME,
CREATE_VECTOR_INDEX_QUERY,
STALE_HASH_SENTINEL,
} from '../lbug/schema.js';
import { loadVectorExtension } from '../lbug/lbug-adapter.js';
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, STALE_HASH_SENTINEL } from '../lbug/schema.js';
import { loadVectorExtension, createVectorIndex } from '../lbug/lbug-adapter.js';
import type { ExtensionInstallPolicy } from '../lbug/extension-loader.js';
import { getExactScanLimit } from '../platform/capabilities.js';
import { logger } from '../logger.js';
Expand Down Expand Up @@ -215,24 +210,36 @@ export const batchInsertEmbeddings = async (
};

/**
* Create the vector index for semantic search

* Now indexes the separate CodeEmbedding table.
* Delegates extension loading to lbug-adapter's loadVectorExtension(),
* which owns the VECTOR extension lifecycle and state tracking.

* Create the vector index for semantic search (indexes the CodeEmbedding table).
*
* Keeps the embedding-specific extension-install policy gate here
* (ensureVectorExtensionAvailable → resolveEmbeddingInstallPolicy, default
* `auto` for the analyze write path), then delegates the actual
* `CALL CREATE_VECTOR_INDEX(...)` to the adapter, which runs it through the
* unprepared `conn.query()` path. It must NOT go through the injected
* `executeQuery` (prepared `conn.prepare()`): LadybugDB cannot prepare that
* procedure and fails with "We do not support prepare multiple statements" —
* the silent degrade in #2114.
*/
const createVectorIndex = async (
executeQuery: (cypher: string) => Promise<any[]>,
): Promise<boolean> => {
const buildVectorIndex = async (): Promise<boolean> => {
// This pre-check applies the embedding-specific install policy
// (resolveEmbeddingInstallPolicy, default `auto` for analyze) before reaching
// the adapter. The adapter's createVectorIndex() calls loadVectorExtension()
// again, but that's a no-op here: once this gate loads VECTOR the module-level
// `vectorExtensionLoaded` flag is set, so the adapter's second call
// short-circuits without re-resolving the policy — no double install.
if (!(await ensureVectorExtensionAvailable())) return false;
try {
await executeQuery(CREATE_VECTOR_INDEX_QUERY);
return true;
return await createVectorIndex();
} catch (error) {
if (isDev) {
logger.warn({ error }, 'Vector index creation warning:');
}
// Surface this even outside dev: it silently downgrades a user-requested
// feature (semantic search) to exact scan. Log under `err` so pino's
// standard serializer captures the message/stack — logging under `error`
// serialized an Error to `{}` (the empty `{"error":{}}` reported in #2114).
logger.warn(
{ err: error },
'Vector index creation failed; semantic search will use exact-scan fallback',
);
return false;
}
};
Expand Down Expand Up @@ -383,7 +390,7 @@ export const runEmbeddingPipeline = async (
// Ensure the vector index exists even when no new nodes need embedding.
// A prior crash or first-time incremental run may have left CodeEmbedding
// rows without ever reaching index creation.
const vectorIndexReady = await createVectorIndex(executeQuery);
const vectorIndexReady = await buildVectorIndex();

onProgress({
phase: 'ready',
Expand Down Expand Up @@ -544,7 +551,7 @@ export const runEmbeddingPipeline = async (
logger.info('📇 Creating vector index...');
}

const vectorIndexReady = await createVectorIndex(executeQuery);
const vectorIndexReady = await buildVectorIndex();

onProgress({
phase: 'ready',
Expand Down
54 changes: 54 additions & 0 deletions gitnexus/src/core/lbug/lbug-adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
REL_TABLE_NAME,
SCHEMA_QUERIES,
EMBEDDING_TABLE_NAME,
CREATE_VECTOR_INDEX_QUERY,
STALE_HASH_SENTINEL,
NodeTableName,
} from './schema.js';
Expand Down Expand Up @@ -171,6 +172,11 @@ let currentDbPath: string | null = null;
let currentDbReadOnly = false;
let ftsLoaded = false;
let vectorExtensionLoaded = false;
// In-process guard so a repeated createVectorIndex() within one connection
// lifetime skips the DB round-trip (mirrors ensuredFTSIndexes). Reset wherever
// vectorExtensionLoaded resets, so it can never stay true against a swapped or
// closed connection.
let vectorIndexEnsured = false;

/**
* In-process cache of FTS indexes observed against the current singleton
Expand Down Expand Up @@ -603,6 +609,7 @@ const resetOpenConnectionState = (): void => {
currentDbPath = null;
ftsLoaded = false;
vectorExtensionLoaded = false;
vectorIndexEnsured = false;
ensuredFTSIndexes.clear();
};

Expand Down Expand Up @@ -690,6 +697,7 @@ export const withLbugDb = async <T>(
currentDbPath = null;
ftsLoaded = false;
vectorExtensionLoaded = false;
vectorIndexEnsured = false;
ensuredFTSIndexes.clear();
});
// Sleep outside the lock — no need to block others while waiting
Expand All @@ -716,6 +724,7 @@ const doInitLbug = async (dbPath: string, readOnly: boolean = false) => {
currentDbPath = null;
ftsLoaded = false;
vectorExtensionLoaded = false;
vectorIndexEnsured = false;
ensuredFTSIndexes.clear();
}

Expand Down Expand Up @@ -1671,6 +1680,7 @@ export const closeLbug = async (): Promise<void> => {
currentDbPath = null;
ftsLoaded = false;
vectorExtensionLoaded = false;
vectorIndexEnsured = false;
ensuredFTSIndexes.clear();
};

Expand Down Expand Up @@ -1938,6 +1948,50 @@ export const createFTSIndex = async (
}
};

/**
* Create the HNSW vector index on the CodeEmbedding table.
*
* MUST run via `conn.query()` (here through `queryAndDrain`), NOT through the
* prepared `executeQuery`/`conn.prepare()` path: `CALL CREATE_VECTOR_INDEX(...)`
* compiles to multiple statements, which LadybugDB cannot prepare — it fails
* with "Connection Exception: We do not support prepare multiple statements."
* Routing index creation through `executeQuery` (prepared) is exactly what
* broke vector-index creation during `analyze` (#2114; the singleton
* `executeQuery` was switched to the prepared path in #1655 while FTS index
* creation kept using `conn.query()`, which is why FTS survived and VECTOR did
* not). Mirrors `createFTSIndex` above.
*
* Returns `true` on success (or when the index already exists — idempotent so
* incremental re-runs don't spuriously downgrade to exact scan), `false` when
* the VECTOR extension is unavailable or the connection is read-only. Any other
* failure propagates so the caller can log it.
*/
export const createVectorIndex = async (): Promise<boolean> => {
if (!conn) {
throw new Error('LadybugDB not initialized. Call initLbug first.');
}
// Already built on this connection — skip the round-trip (mirrors createFTSIndex).
if (vectorIndexEnsured) return true;
if (!(await loadVectorExtension())) {
return false;
}
try {
await queryAndDrain(conn, CREATE_VECTOR_INDEX_QUERY);
vectorIndexEnsured = true;
return true;
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
// Idempotent: a prior analyze already built the HNSW index.
if (msg.includes('already exists')) {
vectorIndexEnsured = true;
return true;
}
// Read-only DB (e.g. the MCP query pool): writable analyze owns creation.
if (isReadOnlyDbError(e)) return false;
throw e;
}
};

/**
* Lazy-create an FTS index, caching the fact in-process.
*
Expand Down
118 changes: 117 additions & 1 deletion gitnexus/test/integration/lbug-vector-extension.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* Follows existing lbug integration test patterns (lbug-core-adapter,
* lbug-lock-retry).
*/
import { describe, it, expect } from 'vitest';
import { describe, it, expect, beforeAll, beforeEach } from 'vitest';
import { withTestLbugDB } from '../helpers/test-indexed-db.js';

withTestLbugDB('vector-extension', (handle) => {
Expand Down Expand Up @@ -73,3 +73,119 @@ withTestLbugDB('vector-extension', (handle) => {
});
});
});

/**
* Regression: VECTOR/HNSW index creation during analyze (#2114).
*
* `CALL CREATE_VECTOR_INDEX(...)` compiles to multiple statements, which
* LadybugDB cannot run through `conn.prepare()`. Routing it through the
* prepared `executeQuery` path (as #1655 inadvertently did when it switched the
* singleton `executeQuery` from `conn.query()` to `conn.prepare()`) makes it
* throw "We do not support prepare multiple statements", which `analyze`
* swallowed and silently downgraded to exact-scan. The fix gives the adapter a
* `createVectorIndex()` that runs the procedure via `conn.query()` (like
* `createFTSIndex`). These tests exercise the real adapter against a real
* LadybugDB so a revert to the prepared path fails loudly.
*/
withTestLbugDB('vector-index-creation', () => {
// VECTOR is platform-sensitive (skipped on win32 / unsupported platforms,
// and when it cannot be installed offline). Probe once, skip the suite if
// unavailable — mirrors the FTS-skip convention in withTestLbugDB.
let vectorAvailable = false;
let skipWarned = false;
beforeAll(async () => {
const adapter = await import('../../src/core/lbug/lbug-adapter.js');
const { resolveAnalyzeInstallPolicy } = await import('../../src/core/lbug/extension-loader.js');
// Mirror the analyze write path (`auto`: LOAD-first, then one bounded
// INSTALL) so this suite runs wherever analyze would have vector support.
vectorAvailable = await adapter.loadVectorExtension(undefined, {
policy: resolveAnalyzeInstallPolicy(),
});
});
beforeEach((ctx) => {
if (!vectorAvailable) {
if (!skipWarned) {
skipWarned = true;
console.warn(
'[withTestLbugDB(vector-index-creation)] Skipping — the LadybugDB VECTOR ' +
'extension is unavailable (unsupported platform or could not be installed).',
);
}
ctx.skip();
}
});

describe('createVectorIndex', () => {
it('creates the HNSW index via conn.query (the prepared path cannot)', async () => {
const adapter = await import('../../src/core/lbug/lbug-adapter.js');

const created = await adapter.createVectorIndex();
expect(created).toBe(true);

const rows = await adapter.executeQuery('CALL SHOW_INDEXES() RETURN *');
const idx = rows.find((r: any) => r.index_name === 'code_embedding_idx');
expect(idx).toBeDefined();
expect(idx.index_type).toBe('HNSW');
});

it('is idempotent — a second call returns true so incremental re-runs do not downgrade to exact scan', async () => {
const adapter = await import('../../src/core/lbug/lbug-adapter.js');

await adapter.createVectorIndex();
await expect(adapter.createVectorIndex()).resolves.toBe(true);

// No duplicate index created by the repeat call.
const rows = await adapter.executeQuery('CALL SHOW_INDEXES() RETURN *');
const matches = rows.filter((r: any) => r.index_name === 'code_embedding_idx');
expect(matches).toHaveLength(1);
});
});
});

/**
* Regression for the #2114 root cause: the prepared `executeQuery` path cannot
* create the index. This lives in its OWN suite (a fresh, index-free DB) on
* purpose — in the `vector-index-creation` suite above the index already exists
* by the time this would run, so `conn.prepare()` fails with "index already
* exists" instead of the multi-statement rejection we want to pin. With no index
* present, `CALL CREATE_VECTOR_INDEX(...)` (which compiles to multiple
* statements) is rejected by `conn.prepare()` with "We do not support prepare
* multiple statements" — the exact failure that silently downgraded analyze to
* exact-scan, and why `createVectorIndex` must use `conn.query()` instead.
*/
withTestLbugDB('vector-index-prepare-rejects', () => {
let vectorAvailable = false;
let skipWarned = false;
beforeAll(async () => {
const adapter = await import('../../src/core/lbug/lbug-adapter.js');
const { resolveAnalyzeInstallPolicy } = await import('../../src/core/lbug/extension-loader.js');
vectorAvailable = await adapter.loadVectorExtension(undefined, {
policy: resolveAnalyzeInstallPolicy(),
});
});
beforeEach((ctx) => {
if (!vectorAvailable) {
if (!skipWarned) {
skipWarned = true;
console.warn(
'[withTestLbugDB(vector-index-prepare-rejects)] Skipping — the LadybugDB VECTOR ' +
'extension is unavailable (unsupported platform or could not be installed).',
);
}
ctx.skip();
}
});

it('the prepared executeQuery path rejects CREATE_VECTOR_INDEX (#2114 root cause)', async () => {
const adapter = await import('../../src/core/lbug/lbug-adapter.js');
const { CREATE_VECTOR_INDEX_QUERY } = await import('../../src/core/lbug/schema.js');

// executeQuery -> executePrepared -> conn.prepare(): the multi-statement
// CREATE_VECTOR_INDEX procedure cannot be prepared. Anchored to the specific
// error so the test can only pass for the #2114 reason — not for an
// unrelated throw (e.g. a missing table or an already-existing index).
await expect(adapter.executeQuery(CREATE_VECTOR_INDEX_QUERY)).rejects.toThrow(
/prepare multiple statements/i,
);
});
});
Loading
Loading