From 93bac68cb998d738635845ef397b5847d6d3f3c4 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 29 Oct 2024 13:12:43 +0400 Subject: [PATCH 1/8] add test for vector search in ATTACHED db --- libsql-sqlite3/test/libsql_vector_index.test | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/libsql-sqlite3/test/libsql_vector_index.test b/libsql-sqlite3/test/libsql_vector_index.test index 951c9086b0..e42b3613f7 100644 --- a/libsql-sqlite3/test/libsql_vector_index.test +++ b/libsql-sqlite3/test/libsql_vector_index.test @@ -508,3 +508,24 @@ do_test vector-errors { {vector index(search): third parameter (k) must be an integer, but float value were provided} {vector index(insert): dimensions are different: 1 != 3} }] + + +do_test vector-index-attach-setup { + forcedelete test2.db + forcedelete test2.db-journal + sqlite3 db2 test2.db + + execsql { + CREATE TABLE t_attach(e FLOAT32(3)); + CREATE INDEX t_attach_idx ON t_attach(libsql_vector_idx(e)); + INSERT INTO t_attach VALUES (vector('[1,2,3]')); + INSERT INTO t_attach VALUES (vector('[2,3,4]')); + INSERT INTO t_attach VALUES (vector('[3,4,5]')); + INSERT INTO t_attach VALUES (vector('[4,5,6]')); + } db2 +} {} + +do_execsql_test vector-index-attach-query { + ATTACH DATABASE 'test2.db' AS t; + SELECT * FROM vector_top_k('t.t_attach_idx', vector('[3,4,5]'), 4); +} {3 4 2 1} From b26a78b83c1c0577412f571b2887f4349b4cd729 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 29 Oct 2024 13:13:04 +0400 Subject: [PATCH 2/8] remove db schema name from vector vtab - as it make no sense for eponymous virtual tables - see https://www.sqlite.org/vtab.html#epovtab --- libsql-sqlite3/src/vectorvtab.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/libsql-sqlite3/src/vectorvtab.c b/libsql-sqlite3/src/vectorvtab.c index 1d8d95f717..cbe6f2b6e4 100644 --- a/libsql-sqlite3/src/vectorvtab.c +++ b/libsql-sqlite3/src/vectorvtab.c @@ -33,7 +33,6 @@ typedef struct vectorVtab vectorVtab; struct vectorVtab { sqlite3_vtab base; /* Base class - must be first */ sqlite3 *db; /* Database connection */ - char* zDbSName; /* Database schema name */ }; typedef struct vectorVtab_cursor vectorVtab_cursor; @@ -59,7 +58,6 @@ static int vectorVtabConnect( sqlite3_vtab **ppVtab, char **pzErr ){ - char *zDbSName = NULL; vectorVtab *pVtab = NULL; int rc; /* @@ -74,21 +72,17 @@ static int vectorVtabConnect( if( pVtab == NULL ){ return SQLITE_NOMEM_BKPT; } - zDbSName = sqlite3DbStrDup(db, argv[1]); // argv[1] is the database schema name by spec (see https://www.sqlite.org/vtab.html#the_xcreate_method) - if( zDbSName == NULL ){ - sqlite3_free(pVtab); - return SQLITE_NOMEM_BKPT; - } + // > Eponymous virtual tables exist in the "main" schema only, so they will not work if prefixed with a different schema name. + // so, argv[1] always equal to "main" and we can safely ignore it + // (see https://www.sqlite.org/vtab.html#epovtab) memset(pVtab, 0, sizeof(*pVtab)); pVtab->db = db; - pVtab->zDbSName = zDbSName; *ppVtab = (sqlite3_vtab*)pVtab; return SQLITE_OK; } static int vectorVtabDisconnect(sqlite3_vtab *pVtab){ vectorVtab *pVTab = (vectorVtab*)pVtab; - sqlite3DbFree(pVTab->db, pVTab->zDbSName); sqlite3_free(pVtab); return SQLITE_OK; } @@ -155,7 +149,7 @@ static int vectorVtabFilter( pCur->rows.aIntValues = NULL; pCur->rows.ppValues = NULL; - if( vectorIndexSearch(pVTab->db, pVTab->zDbSName, argc, argv, &pCur->rows, &pCur->nReads, &pCur->nWrites, &pVTab->base.zErrMsg) != 0 ){ + if( vectorIndexSearch(pVTab->db, argc, argv, &pCur->rows, &pCur->nReads, &pCur->nWrites, &pVTab->base.zErrMsg) != 0 ){ return SQLITE_ERROR; } From 05fce74677017ca048df17ebc3f8ad62dae70d73 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 29 Oct 2024 13:13:33 +0400 Subject: [PATCH 3/8] parse schema name from vector vtab index name string argument --- libsql-sqlite3/src/vectorIndex.c | 69 ++++++++++++++++++++++++----- libsql-sqlite3/src/vectorIndexInt.h | 2 +- 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/libsql-sqlite3/src/vectorIndex.c b/libsql-sqlite3/src/vectorIndex.c index 45b3eeb5a9..1e3b4363b0 100644 --- a/libsql-sqlite3/src/vectorIndex.c +++ b/libsql-sqlite3/src/vectorIndex.c @@ -747,17 +747,25 @@ int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, cons int vectorIndexGetParameters( sqlite3 *db, + const char *zDbSName, const char *zIdxName, VectorIdxParams *pParams ) { int rc = SQLITE_OK; + assert( zDbSName != NULL ); - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + static const char *zSelectSqlTemplate = "SELECT metadata FROM \"%w\"." VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + char* zSelectSql; + zSelectSql = sqlite3_mprintf(zSelectSqlTemplate, zDbSName); + if( zSelectSql == NULL ){ + return SQLITE_NOMEM_BKPT; + } // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) // when instead of table with binary parameters rigid schema was used for index settings // we should drop this eventually - but for now we postponed this decision static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + sqlite3_free(zSelectSql); if( rc == SQLITE_OK ){ return SQLITE_OK; } @@ -940,9 +948,32 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co return CREATE_OK; } +// extracts schema and index name part if full index name is composite (e.g. schema_name.index_name) +// if full index name has no schema part - function returns SQLITE_OK but leaves pzIdxDbSName and pzIdxName untouched +int getIndexNameParts(sqlite3 *db, const char *zIdxFullName, char **pzIdxDbSName, char **pzIdxName) { + int nFullName, nDbSName; + const char *pDot = zIdxFullName; + while( *pDot != '.' && *pDot != '\0' ){ + pDot++; + } + if( *pDot == '\0' ){ + return SQLITE_OK; + } + assert( *pDot == '.' ); + nFullName = sqlite3Strlen30(zIdxFullName); + nDbSName = pDot - zIdxFullName; + *pzIdxDbSName = sqlite3DbStrNDup(db, zIdxFullName, nDbSName); + *pzIdxName = sqlite3DbStrNDup(db, pDot + 1, nFullName - nDbSName - 1); + if( pzIdxName == NULL || pzIdxDbSName == NULL ){ + sqlite3DbFree(db, *pzIdxName); + sqlite3DbFree(db, *pzIdxDbSName); + return SQLITE_NOMEM_BKPT; + } + return SQLITE_OK; +} + int vectorIndexSearch( sqlite3 *db, - const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, @@ -952,7 +983,11 @@ int vectorIndexSearch( ) { int type, dims, k, rc; double kDouble; - const char *zIdxName; + const char *zIdxFullName; + char *zIdxDbSNameAlloc = NULL; // allocated managed schema name string - must be freed if not null + char *zIdxNameAlloc = NULL; // allocated managed index name string - must be freed if not null + const char *zIdxDbSName = NULL; // schema name of the index (can be static in cases where explicit schema is omitted - so must not be freed) + const char *zIdxName = NULL; // index name (can be extracted with sqlite3_value_text and managed by SQLite - so must not be freed) const char *zErrMsg; Vector *pVector = NULL; DiskAnnIndex *pDiskAnn = NULL; @@ -961,8 +996,6 @@ int vectorIndexSearch( VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); - assert( zDbSName != NULL ); - if( argc != 3 ){ *pzErrMsg = sqlite3_mprintf("vector index(search): got %d parameters, expected 3", argc); rc = SQLITE_ERROR; @@ -1013,19 +1046,33 @@ int vectorIndexSearch( rc = SQLITE_ERROR; goto out; } - zIdxName = (const char*)sqlite3_value_text(argv[0]); - if( vectorIndexGetParameters(db, zIdxName, &idxParams) != 0 ){ + zIdxFullName = (const char*)sqlite3_value_text(argv[0]); + rc = getIndexNameParts(db, zIdxFullName, &zIdxDbSNameAlloc, &zIdxNameAlloc); + if( rc != SQLITE_OK ){ + *pzErrMsg = sqlite3_mprintf("vector index(search): failed to parse index name"); + goto out; + } + assert( (zIdxDbSNameAlloc == NULL && zIdxNameAlloc == NULL) || (zIdxDbSNameAlloc != NULL && zIdxNameAlloc != NULL) ); + if( zIdxDbSNameAlloc == NULL && zIdxNameAlloc == NULL ){ + zIdxDbSName = "main"; + zIdxName = zIdxFullName; + } else{ + zIdxDbSName = zIdxDbSNameAlloc; + zIdxName = zIdxNameAlloc; + } + + if( vectorIndexGetParameters(db, zIdxDbSName, zIdxName, &idxParams) != 0 ){ *pzErrMsg = sqlite3_mprintf("vector index(search): failed to parse vector index parameters"); rc = SQLITE_ERROR; goto out; } - pIndex = sqlite3FindIndex(db, zIdxName, zDbSName); + pIndex = sqlite3FindIndex(db, zIdxName, zIdxDbSName); if( pIndex == NULL ){ *pzErrMsg = sqlite3_mprintf("vector index(search): index not found"); rc = SQLITE_ERROR; goto out; } - rc = diskAnnOpenIndex(db, zDbSName, zIdxName, &idxParams, &pDiskAnn); + rc = diskAnnOpenIndex(db, zIdxDbSName, zIdxName, &idxParams, &pDiskAnn); if( rc != SQLITE_OK ){ *pzErrMsg = sqlite3_mprintf("vector index(search): failed to open diskann index"); goto out; @@ -1045,6 +1092,8 @@ int vectorIndexSearch( if( pVector != NULL ){ vectorFree(pVector); } + sqlite3DbFree(db, zIdxNameAlloc); + sqlite3DbFree(db, zIdxDbSNameAlloc); return rc; } @@ -1094,7 +1143,7 @@ int vectorIndexCursorInit( assert( zDbSName != NULL ); - if( vectorIndexGetParameters(db, zIndexName, ¶ms) != 0 ){ + if( vectorIndexGetParameters(db, zDbSName, zIndexName, ¶ms) != 0 ){ return SQLITE_ERROR; } pCursor = sqlite3DbMallocZero(db, sizeof(VectorIdxCursor)); diff --git a/libsql-sqlite3/src/vectorIndexInt.h b/libsql-sqlite3/src/vectorIndexInt.h index e65df4d515..1f3ef8c22c 100644 --- a/libsql-sqlite3/src/vectorIndexInt.h +++ b/libsql-sqlite3/src/vectorIndexInt.h @@ -242,7 +242,7 @@ int vectorIdxParseColumnType(const char *, int *, int *, const char **); int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, int *, int *, char **); +int vectorIndexSearch(sqlite3 *, int, sqlite3_value **, VectorOutRows *, int *, int *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *, int *, int *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); From f3db0bc8271643365f1593fa965bcf39f0600118 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 29 Oct 2024 13:16:23 +0400 Subject: [PATCH 4/8] build bundles --- .../SQLite3MultipleCiphers/src/sqlite3.c | 85 ++++++++++++++----- libsql-ffi/bundled/src/sqlite3.c | 85 ++++++++++++++----- 2 files changed, 128 insertions(+), 42 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index 82db050d36..801bf497c1 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -85652,7 +85652,7 @@ int vectorIdxParseColumnType(const char *, int *, int *, const char **); int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, int *, int *, char **); +int vectorIndexSearch(sqlite3 *, int, sqlite3_value **, VectorOutRows *, int *, int *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *, int *, int *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); @@ -215829,17 +215829,25 @@ int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, cons int vectorIndexGetParameters( sqlite3 *db, + const char *zDbSName, const char *zIdxName, VectorIdxParams *pParams ) { int rc = SQLITE_OK; + assert( zDbSName != NULL ); - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + static const char *zSelectSqlTemplate = "SELECT metadata FROM \"%w\"." VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + char* zSelectSql; + zSelectSql = sqlite3_mprintf(zSelectSqlTemplate, zDbSName); + if( zSelectSql == NULL ){ + return SQLITE_NOMEM_BKPT; + } // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) // when instead of table with binary parameters rigid schema was used for index settings // we should drop this eventually - but for now we postponed this decision static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + sqlite3_free(zSelectSql); if( rc == SQLITE_OK ){ return SQLITE_OK; } @@ -216022,9 +216030,32 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co return CREATE_OK; } +// extracts schema and index name part if full index name is composite (e.g. schema_name.index_name) +// if full index name has no schema part - function returns SQLITE_OK but leaves pzIdxDbSName and pzIdxName untouched +int getIndexNameParts(sqlite3 *db, const char *zIdxFullName, char **pzIdxDbSName, char **pzIdxName) { + int nFullName, nDbSName; + const char *pDot = zIdxFullName; + while( *pDot != '.' && *pDot != '\0' ){ + pDot++; + } + if( *pDot == '\0' ){ + return SQLITE_OK; + } + assert( *pDot == '.' ); + nFullName = sqlite3Strlen30(zIdxFullName); + nDbSName = pDot - zIdxFullName; + *pzIdxDbSName = sqlite3DbStrNDup(db, zIdxFullName, nDbSName); + *pzIdxName = sqlite3DbStrNDup(db, pDot + 1, nFullName - nDbSName - 1); + if( pzIdxName == NULL || pzIdxDbSName == NULL ){ + sqlite3DbFree(db, *pzIdxName); + sqlite3DbFree(db, *pzIdxDbSName); + return SQLITE_NOMEM_BKPT; + } + return SQLITE_OK; +} + int vectorIndexSearch( sqlite3 *db, - const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, @@ -216034,7 +216065,11 @@ int vectorIndexSearch( ) { int type, dims, k, rc; double kDouble; - const char *zIdxName; + const char *zIdxFullName; + char *zIdxDbSNameAlloc = NULL; // allocated managed schema name string - must be freed if not null + char *zIdxNameAlloc = NULL; // allocated managed index name string - must be freed if not null + const char *zIdxDbSName = NULL; // schema name of the index (can be static in cases where explicit schema is omitted - so must not be freed) + const char *zIdxName = NULL; // index name (can be extracted with sqlite3_value_text and managed by SQLite - so must not be freed) const char *zErrMsg; Vector *pVector = NULL; DiskAnnIndex *pDiskAnn = NULL; @@ -216043,8 +216078,6 @@ int vectorIndexSearch( VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); - assert( zDbSName != NULL ); - if( argc != 3 ){ *pzErrMsg = sqlite3_mprintf("vector index(search): got %d parameters, expected 3", argc); rc = SQLITE_ERROR; @@ -216095,19 +216128,33 @@ int vectorIndexSearch( rc = SQLITE_ERROR; goto out; } - zIdxName = (const char*)sqlite3_value_text(argv[0]); - if( vectorIndexGetParameters(db, zIdxName, &idxParams) != 0 ){ + zIdxFullName = (const char*)sqlite3_value_text(argv[0]); + rc = getIndexNameParts(db, zIdxFullName, &zIdxDbSNameAlloc, &zIdxNameAlloc); + if( rc != SQLITE_OK ){ + *pzErrMsg = sqlite3_mprintf("vector index(search): failed to parse index name"); + goto out; + } + assert( (zIdxDbSNameAlloc == NULL && zIdxNameAlloc == NULL) || (zIdxDbSNameAlloc != NULL && zIdxNameAlloc != NULL) ); + if( zIdxDbSNameAlloc == NULL && zIdxNameAlloc == NULL ){ + zIdxDbSName = "main"; + zIdxName = zIdxFullName; + } else{ + zIdxDbSName = zIdxDbSNameAlloc; + zIdxName = zIdxNameAlloc; + } + + if( vectorIndexGetParameters(db, zIdxDbSName, zIdxName, &idxParams) != 0 ){ *pzErrMsg = sqlite3_mprintf("vector index(search): failed to parse vector index parameters"); rc = SQLITE_ERROR; goto out; } - pIndex = sqlite3FindIndex(db, zIdxName, zDbSName); + pIndex = sqlite3FindIndex(db, zIdxName, zIdxDbSName); if( pIndex == NULL ){ *pzErrMsg = sqlite3_mprintf("vector index(search): index not found"); rc = SQLITE_ERROR; goto out; } - rc = diskAnnOpenIndex(db, zDbSName, zIdxName, &idxParams, &pDiskAnn); + rc = diskAnnOpenIndex(db, zIdxDbSName, zIdxName, &idxParams, &pDiskAnn); if( rc != SQLITE_OK ){ *pzErrMsg = sqlite3_mprintf("vector index(search): failed to open diskann index"); goto out; @@ -216127,6 +216174,8 @@ int vectorIndexSearch( if( pVector != NULL ){ vectorFree(pVector); } + sqlite3DbFree(db, zIdxNameAlloc); + sqlite3DbFree(db, zIdxDbSNameAlloc); return rc; } @@ -216176,7 +216225,7 @@ int vectorIndexCursorInit( assert( zDbSName != NULL ); - if( vectorIndexGetParameters(db, zIndexName, ¶ms) != 0 ){ + if( vectorIndexGetParameters(db, zDbSName, zIndexName, ¶ms) != 0 ){ return SQLITE_ERROR; } pCursor = sqlite3DbMallocZero(db, sizeof(VectorIdxCursor)); @@ -216240,7 +216289,6 @@ typedef struct vectorVtab vectorVtab; struct vectorVtab { sqlite3_vtab base; /* Base class - must be first */ sqlite3 *db; /* Database connection */ - char* zDbSName; /* Database schema name */ }; typedef struct vectorVtab_cursor vectorVtab_cursor; @@ -216266,7 +216314,6 @@ static int vectorVtabConnect( sqlite3_vtab **ppVtab, char **pzErr ){ - char *zDbSName = NULL; vectorVtab *pVtab = NULL; int rc; /* @@ -216281,21 +216328,17 @@ static int vectorVtabConnect( if( pVtab == NULL ){ return SQLITE_NOMEM_BKPT; } - zDbSName = sqlite3DbStrDup(db, argv[1]); // argv[1] is the database schema name by spec (see https://www.sqlite.org/vtab.html#the_xcreate_method) - if( zDbSName == NULL ){ - sqlite3_free(pVtab); - return SQLITE_NOMEM_BKPT; - } + // > Eponymous virtual tables exist in the "main" schema only, so they will not work if prefixed with a different schema name. + // so, argv[1] always equal to "main" and we can safely ignore it + // (see https://www.sqlite.org/vtab.html#epovtab) memset(pVtab, 0, sizeof(*pVtab)); pVtab->db = db; - pVtab->zDbSName = zDbSName; *ppVtab = (sqlite3_vtab*)pVtab; return SQLITE_OK; } static int vectorVtabDisconnect(sqlite3_vtab *pVtab){ vectorVtab *pVTab = (vectorVtab*)pVtab; - sqlite3DbFree(pVTab->db, pVTab->zDbSName); sqlite3_free(pVtab); return SQLITE_OK; } @@ -216362,7 +216405,7 @@ static int vectorVtabFilter( pCur->rows.aIntValues = NULL; pCur->rows.ppValues = NULL; - if( vectorIndexSearch(pVTab->db, pVTab->zDbSName, argc, argv, &pCur->rows, &pCur->nReads, &pCur->nWrites, &pVTab->base.zErrMsg) != 0 ){ + if( vectorIndexSearch(pVTab->db, argc, argv, &pCur->rows, &pCur->nReads, &pCur->nWrites, &pVTab->base.zErrMsg) != 0 ){ return SQLITE_ERROR; } diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index 82db050d36..801bf497c1 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -85652,7 +85652,7 @@ int vectorIdxParseColumnType(const char *, int *, int *, const char **); int vectorIndexCreate(Parse*, const Index*, const char *, const IdList*); int vectorIndexClear(sqlite3 *, const char *, const char *); int vectorIndexDrop(sqlite3 *, const char *, const char *); -int vectorIndexSearch(sqlite3 *, const char *, int, sqlite3_value **, VectorOutRows *, int *, int *, char **); +int vectorIndexSearch(sqlite3 *, int, sqlite3_value **, VectorOutRows *, int *, int *, char **); int vectorIndexCursorInit(sqlite3 *, const char *, const char *, VectorIdxCursor **); void vectorIndexCursorClose(sqlite3 *, VectorIdxCursor *, int *, int *); int vectorIndexInsert(VectorIdxCursor *, const UnpackedRecord *, char **); @@ -215829,17 +215829,25 @@ int vectorIndexTryGetParametersFromBinFormat(sqlite3 *db, const char *zSql, cons int vectorIndexGetParameters( sqlite3 *db, + const char *zDbSName, const char *zIdxName, VectorIdxParams *pParams ) { int rc = SQLITE_OK; + assert( zDbSName != NULL ); - static const char* zSelectSql = "SELECT metadata FROM " VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + static const char *zSelectSqlTemplate = "SELECT metadata FROM \"%w\"." VECTOR_INDEX_GLOBAL_META_TABLE " WHERE name = ?"; + char* zSelectSql; + zSelectSql = sqlite3_mprintf(zSelectSqlTemplate, zDbSName); + if( zSelectSql == NULL ){ + return SQLITE_NOMEM_BKPT; + } // zSelectSqlPekkaLegacy handles the case when user created DB before 04 July 2024 (https://discord.com/channels/933071162680958986/1225560924526477322/1258367912402489397) // when instead of table with binary parameters rigid schema was used for index settings // we should drop this eventually - but for now we postponed this decision static const char* zSelectSqlPekkaLegacy = "SELECT vector_type, block_size, dims, distance_ops FROM libsql_vector_index WHERE name = ?"; rc = vectorIndexTryGetParametersFromBinFormat(db, zSelectSql, zIdxName, pParams); + sqlite3_free(zSelectSql); if( rc == SQLITE_OK ){ return SQLITE_OK; } @@ -216022,9 +216030,32 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co return CREATE_OK; } +// extracts schema and index name part if full index name is composite (e.g. schema_name.index_name) +// if full index name has no schema part - function returns SQLITE_OK but leaves pzIdxDbSName and pzIdxName untouched +int getIndexNameParts(sqlite3 *db, const char *zIdxFullName, char **pzIdxDbSName, char **pzIdxName) { + int nFullName, nDbSName; + const char *pDot = zIdxFullName; + while( *pDot != '.' && *pDot != '\0' ){ + pDot++; + } + if( *pDot == '\0' ){ + return SQLITE_OK; + } + assert( *pDot == '.' ); + nFullName = sqlite3Strlen30(zIdxFullName); + nDbSName = pDot - zIdxFullName; + *pzIdxDbSName = sqlite3DbStrNDup(db, zIdxFullName, nDbSName); + *pzIdxName = sqlite3DbStrNDup(db, pDot + 1, nFullName - nDbSName - 1); + if( pzIdxName == NULL || pzIdxDbSName == NULL ){ + sqlite3DbFree(db, *pzIdxName); + sqlite3DbFree(db, *pzIdxDbSName); + return SQLITE_NOMEM_BKPT; + } + return SQLITE_OK; +} + int vectorIndexSearch( sqlite3 *db, - const char* zDbSName, int argc, sqlite3_value **argv, VectorOutRows *pRows, @@ -216034,7 +216065,11 @@ int vectorIndexSearch( ) { int type, dims, k, rc; double kDouble; - const char *zIdxName; + const char *zIdxFullName; + char *zIdxDbSNameAlloc = NULL; // allocated managed schema name string - must be freed if not null + char *zIdxNameAlloc = NULL; // allocated managed index name string - must be freed if not null + const char *zIdxDbSName = NULL; // schema name of the index (can be static in cases where explicit schema is omitted - so must not be freed) + const char *zIdxName = NULL; // index name (can be extracted with sqlite3_value_text and managed by SQLite - so must not be freed) const char *zErrMsg; Vector *pVector = NULL; DiskAnnIndex *pDiskAnn = NULL; @@ -216043,8 +216078,6 @@ int vectorIndexSearch( VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); - assert( zDbSName != NULL ); - if( argc != 3 ){ *pzErrMsg = sqlite3_mprintf("vector index(search): got %d parameters, expected 3", argc); rc = SQLITE_ERROR; @@ -216095,19 +216128,33 @@ int vectorIndexSearch( rc = SQLITE_ERROR; goto out; } - zIdxName = (const char*)sqlite3_value_text(argv[0]); - if( vectorIndexGetParameters(db, zIdxName, &idxParams) != 0 ){ + zIdxFullName = (const char*)sqlite3_value_text(argv[0]); + rc = getIndexNameParts(db, zIdxFullName, &zIdxDbSNameAlloc, &zIdxNameAlloc); + if( rc != SQLITE_OK ){ + *pzErrMsg = sqlite3_mprintf("vector index(search): failed to parse index name"); + goto out; + } + assert( (zIdxDbSNameAlloc == NULL && zIdxNameAlloc == NULL) || (zIdxDbSNameAlloc != NULL && zIdxNameAlloc != NULL) ); + if( zIdxDbSNameAlloc == NULL && zIdxNameAlloc == NULL ){ + zIdxDbSName = "main"; + zIdxName = zIdxFullName; + } else{ + zIdxDbSName = zIdxDbSNameAlloc; + zIdxName = zIdxNameAlloc; + } + + if( vectorIndexGetParameters(db, zIdxDbSName, zIdxName, &idxParams) != 0 ){ *pzErrMsg = sqlite3_mprintf("vector index(search): failed to parse vector index parameters"); rc = SQLITE_ERROR; goto out; } - pIndex = sqlite3FindIndex(db, zIdxName, zDbSName); + pIndex = sqlite3FindIndex(db, zIdxName, zIdxDbSName); if( pIndex == NULL ){ *pzErrMsg = sqlite3_mprintf("vector index(search): index not found"); rc = SQLITE_ERROR; goto out; } - rc = diskAnnOpenIndex(db, zDbSName, zIdxName, &idxParams, &pDiskAnn); + rc = diskAnnOpenIndex(db, zIdxDbSName, zIdxName, &idxParams, &pDiskAnn); if( rc != SQLITE_OK ){ *pzErrMsg = sqlite3_mprintf("vector index(search): failed to open diskann index"); goto out; @@ -216127,6 +216174,8 @@ int vectorIndexSearch( if( pVector != NULL ){ vectorFree(pVector); } + sqlite3DbFree(db, zIdxNameAlloc); + sqlite3DbFree(db, zIdxDbSNameAlloc); return rc; } @@ -216176,7 +216225,7 @@ int vectorIndexCursorInit( assert( zDbSName != NULL ); - if( vectorIndexGetParameters(db, zIndexName, ¶ms) != 0 ){ + if( vectorIndexGetParameters(db, zDbSName, zIndexName, ¶ms) != 0 ){ return SQLITE_ERROR; } pCursor = sqlite3DbMallocZero(db, sizeof(VectorIdxCursor)); @@ -216240,7 +216289,6 @@ typedef struct vectorVtab vectorVtab; struct vectorVtab { sqlite3_vtab base; /* Base class - must be first */ sqlite3 *db; /* Database connection */ - char* zDbSName; /* Database schema name */ }; typedef struct vectorVtab_cursor vectorVtab_cursor; @@ -216266,7 +216314,6 @@ static int vectorVtabConnect( sqlite3_vtab **ppVtab, char **pzErr ){ - char *zDbSName = NULL; vectorVtab *pVtab = NULL; int rc; /* @@ -216281,21 +216328,17 @@ static int vectorVtabConnect( if( pVtab == NULL ){ return SQLITE_NOMEM_BKPT; } - zDbSName = sqlite3DbStrDup(db, argv[1]); // argv[1] is the database schema name by spec (see https://www.sqlite.org/vtab.html#the_xcreate_method) - if( zDbSName == NULL ){ - sqlite3_free(pVtab); - return SQLITE_NOMEM_BKPT; - } + // > Eponymous virtual tables exist in the "main" schema only, so they will not work if prefixed with a different schema name. + // so, argv[1] always equal to "main" and we can safely ignore it + // (see https://www.sqlite.org/vtab.html#epovtab) memset(pVtab, 0, sizeof(*pVtab)); pVtab->db = db; - pVtab->zDbSName = zDbSName; *ppVtab = (sqlite3_vtab*)pVtab; return SQLITE_OK; } static int vectorVtabDisconnect(sqlite3_vtab *pVtab){ vectorVtab *pVTab = (vectorVtab*)pVtab; - sqlite3DbFree(pVTab->db, pVTab->zDbSName); sqlite3_free(pVtab); return SQLITE_OK; } @@ -216362,7 +216405,7 @@ static int vectorVtabFilter( pCur->rows.aIntValues = NULL; pCur->rows.ppValues = NULL; - if( vectorIndexSearch(pVTab->db, pVTab->zDbSName, argc, argv, &pCur->rows, &pCur->nReads, &pCur->nWrites, &pVTab->base.zErrMsg) != 0 ){ + if( vectorIndexSearch(pVTab->db, argc, argv, &pCur->rows, &pCur->nReads, &pCur->nWrites, &pVTab->base.zErrMsg) != 0 ){ return SQLITE_ERROR; } From badbbe979f218820fe2dbda6ff01806994f2f0e7 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 29 Oct 2024 15:10:16 +0400 Subject: [PATCH 5/8] add test for unknown schema name --- libsql-sqlite3/test/libsql_vector_index.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libsql-sqlite3/test/libsql_vector_index.test b/libsql-sqlite3/test/libsql_vector_index.test index e42b3613f7..ef4d75c0b6 100644 --- a/libsql-sqlite3/test/libsql_vector_index.test +++ b/libsql-sqlite3/test/libsql_vector_index.test @@ -489,6 +489,7 @@ do_test vector-errors { sqlite3_exec db { CREATE TABLE t_mixed_t( v FLOAT32(3)); } sqlite3_exec db { INSERT INTO t_mixed_t VALUES('[1]'); } lappend ret [error_messages {CREATE INDEX t_mixed_t_idx ON t_mixed_t( libsql_vector_idx(v) )}] + lappend ret [error_messages {SELECT * FROM vector_top_k('t.t_attach_idx', vector('[3,4,5]'), 4)}] } [list {*}{ {no such table: main.t_no} {no such column: v} @@ -507,6 +508,7 @@ do_test vector-errors { {vector index(search): dimensions are different: 2 != 4} {vector index(search): third parameter (k) must be an integer, but float value were provided} {vector index(insert): dimensions are different: 1 != 3} + {vector index(search): unknown schema name 't'} }] From 26ba2d969bab80e5546ed4afdff52db48efa0dd7 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 29 Oct 2024 15:13:06 +0400 Subject: [PATCH 6/8] properly lock schema in case when we work with attached DB --- libsql-sqlite3/src/vectorIndex.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/libsql-sqlite3/src/vectorIndex.c b/libsql-sqlite3/src/vectorIndex.c index 1e3b4363b0..d41b5711b0 100644 --- a/libsql-sqlite3/src/vectorIndex.c +++ b/libsql-sqlite3/src/vectorIndex.c @@ -981,7 +981,7 @@ int vectorIndexSearch( int *nWrites, char **pzErrMsg ) { - int type, dims, k, rc; + int type, dims, k, rc, iDb = -1; double kDouble; const char *zIdxFullName; char *zIdxDbSNameAlloc = NULL; // allocated managed schema name string - must be freed if not null @@ -1059,6 +1059,18 @@ int vectorIndexSearch( } else{ zIdxDbSName = zIdxDbSNameAlloc; zIdxName = zIdxNameAlloc; + iDb = sqlite3FindDbName(db, zIdxDbSName); + if( iDb < 0 ){ + *pzErrMsg = sqlite3_mprintf("vector index(search): unknown schema '%s'", zIdxDbSName); + rc = SQLITE_ERROR; + goto out; + } + // we need to hold mutex to protect schema against unwanted changes + // this code is necessary, otherwise sqlite3SchemaMutexHeld assert will fail + if( iDb !=1 ){ + // not "main" DB which we already hold mutex for + sqlite3BtreeEnter(db->aDb[iDb].pBt); + } } if( vectorIndexGetParameters(db, zIdxDbSName, zIdxName, &idxParams) != 0 ){ @@ -1094,6 +1106,9 @@ int vectorIndexSearch( } sqlite3DbFree(db, zIdxNameAlloc); sqlite3DbFree(db, zIdxDbSNameAlloc); + if( iDb >= 0 && iDb != 1 ){ + sqlite3BtreeLeave(db->aDb[iDb].pBt); + } return rc; } From 28cf3d8529642f57c0ce1060d7e3a43040f3aad0 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 29 Oct 2024 17:46:14 +0400 Subject: [PATCH 7/8] build bundles --- .../SQLite3MultipleCiphers/src/sqlite3.c | 17 ++++++++++++++++- libsql-ffi/bundled/src/sqlite3.c | 17 ++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index 801bf497c1..cf812dc9ae 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -216063,7 +216063,7 @@ int vectorIndexSearch( int *nWrites, char **pzErrMsg ) { - int type, dims, k, rc; + int type, dims, k, rc, iDb = -1; double kDouble; const char *zIdxFullName; char *zIdxDbSNameAlloc = NULL; // allocated managed schema name string - must be freed if not null @@ -216141,6 +216141,18 @@ int vectorIndexSearch( } else{ zIdxDbSName = zIdxDbSNameAlloc; zIdxName = zIdxNameAlloc; + iDb = sqlite3FindDbName(db, zIdxDbSName); + if( iDb < 0 ){ + *pzErrMsg = sqlite3_mprintf("vector index(search): unknown schema '%s'", zIdxDbSName); + rc = SQLITE_ERROR; + goto out; + } + // we need to hold mutex to protect schema against unwanted changes + // this code is necessary, otherwise sqlite3SchemaMutexHeld assert will fail + if( iDb !=1 ){ + // not "main" DB which we already hold mutex for + sqlite3BtreeEnter(db->aDb[iDb].pBt); + } } if( vectorIndexGetParameters(db, zIdxDbSName, zIdxName, &idxParams) != 0 ){ @@ -216176,6 +216188,9 @@ int vectorIndexSearch( } sqlite3DbFree(db, zIdxNameAlloc); sqlite3DbFree(db, zIdxDbSNameAlloc); + if( iDb >= 0 && iDb != 1 ){ + sqlite3BtreeLeave(db->aDb[iDb].pBt); + } return rc; } diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index 801bf497c1..cf812dc9ae 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -216063,7 +216063,7 @@ int vectorIndexSearch( int *nWrites, char **pzErrMsg ) { - int type, dims, k, rc; + int type, dims, k, rc, iDb = -1; double kDouble; const char *zIdxFullName; char *zIdxDbSNameAlloc = NULL; // allocated managed schema name string - must be freed if not null @@ -216141,6 +216141,18 @@ int vectorIndexSearch( } else{ zIdxDbSName = zIdxDbSNameAlloc; zIdxName = zIdxNameAlloc; + iDb = sqlite3FindDbName(db, zIdxDbSName); + if( iDb < 0 ){ + *pzErrMsg = sqlite3_mprintf("vector index(search): unknown schema '%s'", zIdxDbSName); + rc = SQLITE_ERROR; + goto out; + } + // we need to hold mutex to protect schema against unwanted changes + // this code is necessary, otherwise sqlite3SchemaMutexHeld assert will fail + if( iDb !=1 ){ + // not "main" DB which we already hold mutex for + sqlite3BtreeEnter(db->aDb[iDb].pBt); + } } if( vectorIndexGetParameters(db, zIdxDbSName, zIdxName, &idxParams) != 0 ){ @@ -216176,6 +216188,9 @@ int vectorIndexSearch( } sqlite3DbFree(db, zIdxNameAlloc); sqlite3DbFree(db, zIdxDbSNameAlloc); + if( iDb >= 0 && iDb != 1 ){ + sqlite3BtreeLeave(db->aDb[iDb].pBt); + } return rc; } From 69001c546d7320a3b556ec335ee4d06ea7376944 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 29 Oct 2024 18:15:25 +0400 Subject: [PATCH 8/8] fix test --- libsql-sqlite3/test/libsql_vector_index.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsql-sqlite3/test/libsql_vector_index.test b/libsql-sqlite3/test/libsql_vector_index.test index ef4d75c0b6..c7c7a9dfa8 100644 --- a/libsql-sqlite3/test/libsql_vector_index.test +++ b/libsql-sqlite3/test/libsql_vector_index.test @@ -508,7 +508,7 @@ do_test vector-errors { {vector index(search): dimensions are different: 2 != 4} {vector index(search): third parameter (k) must be an integer, but float value were provided} {vector index(insert): dimensions are different: 1 != 3} - {vector index(search): unknown schema name 't'} + {vector index(search): unknown schema 't'} }]