From 6d8eeabc312d055b61f595d5fc897e5571e7378a Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sat, 21 Oct 2023 19:10:48 +0400 Subject: [PATCH 01/14] WAL/blockmaps: continue blockmaps initialization after failure ContinueBlockMapGroupInitialization() description has all the details about how it's done and how different kinds of failures are handled. --- src/hnsw/external_index.c | 318 +++++++++++++++++++++++++++++--------- src/hnsw/external_index.h | 15 +- src/hnsw/scan.c | 5 +- 3 files changed, 262 insertions(+), 76 deletions(-) diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index ae3bbbd4a..431123bc2 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -24,7 +24,7 @@ #include #endif -static BlockNumber getBlockMapPageBlockNumber(uint32 *blockmap_page_group_index, int id); +static BlockNumber getBlockMapPageBlockNumber(const HnswBlockMapGroupDesc *blockmap_groups, int id); uint32 UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level) { @@ -54,57 +54,221 @@ static char *extract_node(char *data, return tape; } -int CreateBlockMapGroup( - HnswIndexHeaderPage *hdr, Relation index, ForkNumber forkNum, int first_node_index, int blockmap_groupno) +static BlockNumber NumberOfBlockMapsInGroup(unsigned groupno) { - // Create empty blockmap pages for the group - const int number_of_blockmaps_in_group = 1 << blockmap_groupno; - OffsetNumber inserted_at = InvalidOffsetNumber; - assert(hdr != NULL); + assert(groupno < HNSW_MAX_BLOCKMAP_GROUPS); - for(int blockmap_id = 0; blockmap_id < number_of_blockmaps_in_group; ++blockmap_id) { - GenericXLogState *state = GenericXLogStart(index); - Buffer buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL); - LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + return 1u << groupno; +} - if(blockmap_id == 0) { - hdr->blockmap_page_groups = blockmap_groupno; - hdr->blockmap_page_group_index[ blockmap_groupno ] = BufferGetBlockNumber(buf); - } +static bool BlockMapGroupIsFullyInitialized(HnswIndexHeaderPage *hdr, unsigned groupno) +{ + assert(groupno < HNSW_MAX_BLOCKMAP_GROUPS); + + return hdr->blockmap_groups[ groupno ].blockmaps_initialized == NumberOfBlockMapsInGroup(groupno); +} + +static uint32 BlockMapGroupFirstNodeIndex(unsigned groupno) +{ + uint32 first_node_index = 0; + + if(groupno == 0) return 0; + for(unsigned i = 0; i < groupno - 1; ++i) first_node_index += NumberOfBlockMapsInGroup(i); + return first_node_index; +} + +static bool AreEnoughBlockMapsForTupleId(uint32 blockmap_groups_nr, uint32 tuple_id) +{ + // new_tuple_id / HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1 is kth blockmap + // we check if k is more than already created 2^groups + return tuple_id / HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1 < ((uint32)1 << blockmap_groups_nr); +} + +/* + * Updates HnswBlockMapGroupDesc for groupno in the HnswIndexHeaderPage. + * The header is fsynced to the WAL after this function returns if flush_log is true. + * Assumes that the header (block 0) buffer is locked. + * + * In the current use cases the header page is added to a WAL record somewhere + * up the call stack, so the changes made here must be duplicated to the + * HnswIndexHeaderPage in that header page, otherwise they would be overwritten + * when that WAL record up the stack is written to the log. + */ +static void UpdateHeaderBlockMapGroupDesc( + Relation index, ForkNumber forkNum, unsigned groupno, const HnswBlockMapGroupDesc *desc, bool flush_log) +{ + GenericXLogState *state; + BlockNumber HEADER_BLOCK = 0; + Page hdr_page; + Buffer hdr_buf; + HnswIndexHeaderPage *hdr_copy; + XLogRecPtr log_rec_ptr; + + state = GenericXLogStart(index); + /* no need to look the buffer because it's the header (block 0) and it's locked already) */ + hdr_buf = ReadBufferExtended(index, forkNum, HEADER_BLOCK, RBM_NORMAL, NULL); + hdr_page = GenericXLogRegisterBuffer(state, hdr_buf, LDB_GENERIC_XLOG_DELTA_IMAGE); + + hdr_copy = (HnswIndexHeaderPage *)PageGetContents(hdr_page); + assert(groupno < lengthof(hdr_copy->blockmap_groups)); + + hdr_copy->blockmap_groups_nr = groupno + 1; + + hdr_copy->blockmap_groups[ groupno ] = *desc; + + log_rec_ptr = GenericXLogFinish(state); + assert(log_rec_ptr != InvalidXLogRecPtr); + if(flush_log) XLogFlush(log_rec_ptr); + ReleaseBuffer(hdr_buf); +} + +/* + * Continue and finish the initialization of a blockmap group. + * If the initialization hasn't been started, then the initialization is started. + * When this function is called a BUFFER_LOCK_EXCLUSIVE is supposed to be taken on the block 0 (header block). + * When this function returns the block maps of the group are fully initialized. + * + * HnswBlockMapGroupDesc + * first_block blockmaps_initialized meaning + * InvalidBlockNumber 0 The blockmap group initialization hasn't started. + * 0 The blockmap group initialization has started, but + * no blockmaps has been initialized. It's possible that + * the block allocation for the group hasn't finished. + * >0 Some blockmaps were initialized. + * NumberOfBlockMapsInGroup() The blockmap group had been fully initialized. + * + * The process restarts during the blockmap creation are handled in the following way: + * - there are only 2 cases to modify the index in the code right now: index + * creation and insert. If there are more cases the new code MUST do the + * BlockMapGroupIsFullyInitialized() check and then call + * ContinueBlockMapGroupInitialization if the blockmap group is not fully + * initialized (similar to how PrepareIndexTuple() currently does it); + * - if the process restart happens during the index creation there is no code + * currently to continue an index creation after crash, so this function + * doesn't do anything special about it and just handles it as usual; + * - if the process restart happens during the insert it might be possible that + * the blockmap group initialization hasn't been completed (and it could only + * happen to one group, which is the last one). We're considering the state + * of the blockmap group initialization after WAL replay after restart. The + * following cases are possible: + * + * - the first header update WAL record (which sets HnswBlockMapGroupDesc.first_block + * for the group) hasn't been replayed. It means that + * ContinueBlockMapGroupInitialization() will start from the first header + * update and then continue as usual; + * - the first header update WAL record was replayed, but the block + * allocation for the index hasn't been finished. It will be detected by + * non-InvalidBlockNumber value in the first_block and a size check for the + * index relation. This case will be handled by finishing the block + * allocations and then continuing as usual; + * - blockmap pages initialization hasn't been complete. In this case the + * HnswBlockMapGroupDesc.blockmaps_initialized will be used as the first + * blockmap page to continue the initialization from. This value is updated + * in the header approximately every HNSW_BLOCKMAP_UPDATE_HEADER_EVERY + * initialized blockmap pages. + * + * - in case of failure during WAL replay we're relying on PostgreSQL to + * correctly recover the WAL on subsequent restart; + * - in case of a failure in this function when it's continuing interrupted + * blockmap group initialization the subsequent run of this function after + * the restart will correctly continue from the nearest place it could + * continue from. + * + * Important note: This function creates a WAL record to update + * HnswIndexHeaderPage.blockmap_groups and + * HnswIndexHeaderPage.blockmap_groups_nr in the header page. Therefore it has + * to update the same fields in the memory pointed to by hdr parameter, because + * the header page is added to a WAL record somewhere on the higher level and + * that WAL record would be GenericXLogFinish()ed after the header updates + * here, so whatever is written to the header page here directly would be + * overwritten. + */ +static void ContinueBlockMapGroupInitialization( + HnswIndexHeaderPage *hdr, Relation index, ForkNumber forkNum, uint32 first_node_index, unsigned groupno) +{ + GenericXLogState *state; + BlockNumber blockmaps_in_group = NumberOfBlockMapsInGroup(groupno); + const HnswBlockMapGroupDesc *group_desc; + HnswBlockmapPage *blockmap_page; + unsigned pages_in_xlog_state; + Buffer bufs[ MAX_GENERIC_XLOG_PAGES ]; + Buffer buf; + Page page; + HnswIndexPageSpecialBlock *special; + OffsetNumber inserted_at; + + assert(groupno < HNSW_MAX_BLOCKMAP_GROUPS); + + if(hdr->blockmap_groups[ groupno ].first_block == InvalidBlockNumber) { + assert(hdr->blockmap_groups[ groupno ].blockmaps_initialized == 0); + hdr->blockmap_groups[ groupno ].first_block = RelationGetNumberOfBlocksInFork(index, forkNum); + assert(groupno == hdr->blockmap_groups_nr); + hdr->blockmap_groups_nr = groupno + 1; + UpdateHeaderBlockMapGroupDesc(index, forkNum, groupno, &hdr->blockmap_groups[ groupno ], true); + } + assert(hdr->blockmap_groups_nr == groupno + 1); + + group_desc = &hdr->blockmap_groups[ groupno ]; + if(group_desc->blockmaps_initialized == 0 + && group_desc->first_block + blockmaps_in_group > RelationGetNumberOfBlocksInFork(index, forkNum)) { + buf = ExtendBufferedRelTo(BMR_REL(index), + forkNum, + NULL, + EB_CLEAR_SIZE_CACHE, + group_desc->first_block + blockmaps_in_group, + RBM_NORMAL); + assert(group_desc->first_block + blockmaps_in_group - 1 == BufferGetBlockNumber(buf)); + ReleaseBuffer(buf); + } + assert(group_desc->first_block + blockmaps_in_group <= RelationGetNumberOfBlocksInFork(index, forkNum)); + + blockmap_page = palloc0(sizeof(*blockmap_page)); + pages_in_xlog_state = 0; + for(uint32 blockmap_id = group_desc->blockmaps_initialized; blockmap_id < blockmaps_in_group; ++blockmap_id) { + if(pages_in_xlog_state == 0) state = GenericXLogStart(index); + + buf = ReadBufferExtended(index, forkNum, group_desc->first_block + blockmap_id, RBM_NORMAL, NULL); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - Page page = GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE); + page = GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE); PageInit(page, BufferGetPageSize(buf), sizeof(HnswIndexPageSpecialBlock)); - HnswIndexPageSpecialBlock *special = (HnswIndexPageSpecialBlock *)PageGetSpecialPointer(page); + special = (HnswIndexPageSpecialBlock *)PageGetSpecialPointer(page); special->firstId = first_node_index + blockmap_id * HNSW_BLOCKMAP_BLOCKS_PER_PAGE; - special->nextblockno = InvalidBlockNumber; - - HnswBlockmapPage *blockmap = palloc0(BLCKSZ); - blockmap->first_id = first_node_index + blockmap_id * HNSW_BLOCKMAP_BLOCKS_PER_PAGE; + special->lastId = special->firstId + HNSW_BLOCKMAP_BLOCKS_PER_PAGE - 1; + /* TODO nextblockno is incorrect for the last blockmap in the group */ + special->nextblockno = group_desc->first_block + blockmap_id + 1; + blockmap_page->first_id = first_node_index + blockmap_id * HNSW_BLOCKMAP_BLOCKS_PER_PAGE; // we always add a single Blockmap page per Index page which has a fixed size that // always fits in postgres wal page. So this should never happen // (Assumes 8k BLKSZ. we can make HnswBlockmapPage size configurable by BLCKSZ) - inserted_at = PageAddItem(page, (Item)blockmap, sizeof(HnswBlockmapPage), InvalidOffsetNumber, false, false); + inserted_at = PageAddItem(page, (Item)blockmap_page, sizeof(*blockmap_page), InvalidOffsetNumber, false, false); ldb_invariant(inserted_at != InvalidOffsetNumber, "could not add blockmap to page %d", inserted_at); - special->lastId = first_node_index + (blockmap_id + 1) * HNSW_BLOCKMAP_BLOCKS_PER_PAGE - 1; - special->nextblockno = BufferGetBlockNumber(buf) + 1; + bufs[ pages_in_xlog_state++ ] = buf; - // GenericXLogFinish also calls MarkBufferDirty(buf) - GenericXLogFinish(state); - UnlockReleaseBuffer(buf); - // GenericXLog allows registering up to 4 buffers at a time. So, we cannot set up large BlockMapGroups - // in a single WAL entry. If we could, we would start the generic Xlog record before the for loop and commit - // all changes as a whole in the end of this function. - // Because now all changes do not happen atomically, we probably need to use some other mechanism to make - // sure we do not corrupt the index in case of a crash in the middle of a BlockMapGroup creation. - } + if(pages_in_xlog_state == MAX_GENERIC_XLOG_PAGES || blockmap_id == blockmaps_in_group - 1) { + bool update_header = false; + // GenericXLogFinish also calls MarkBufferDirty(buf) + GenericXLogFinish(state); + for(unsigned i = 0; i < pages_in_xlog_state; ++i) { + UnlockReleaseBuffer(bufs[ i ]); + if(((blockmap_id - pages_in_xlog_state + 1 + i) % HNSW_BLOCKMAP_UPDATE_HEADER_EVERY) == 0) + update_header = true; + } + if(update_header || blockmap_id == blockmaps_in_group - 1) { + hdr->blockmap_groups[ groupno ].blockmaps_initialized = blockmap_id + 1; + UpdateHeaderBlockMapGroupDesc( + index, forkNum, groupno, &hdr->blockmap_groups[ groupno ], blockmap_id == blockmaps_in_group - 1); + } + pages_in_xlog_state = 0; + } + } + pfree(blockmap_page); // it is possible that usearch asks for a newly added node from this blockmap range // we need to make sure the global header has this information - - return number_of_blockmaps_in_group; } void StoreExternalIndexBlockMapGroup(Relation index, @@ -114,12 +278,13 @@ void StoreExternalIndexBlockMapGroup(Relation index, char *data, uint64 *progress, int dimension, - int first_node_index, - size_t num_added_vectors, - int blockmap_groupno) + uint32 first_node_index, + uint32 num_added_vectors, + unsigned blockmap_groupno) { - const int number_of_blockmaps_in_group - = CreateBlockMapGroup(headerp, index, forkNum, first_node_index, blockmap_groupno); + const uint32 number_of_blockmaps_in_group = NumberOfBlockMapsInGroup(blockmap_groupno); + + ContinueBlockMapGroupInitialization(headerp, index, forkNum, first_node_index, blockmap_groupno); // Now add nodes to data pages char *node = 0; @@ -222,10 +387,10 @@ void StoreExternalIndexBlockMapGroup(Relation index, headerp->last_data_block = last_block; // Update blockmap pages with correct associations - for(int blockmap_id = 0; blockmap_id < number_of_blockmaps_in_group; ++blockmap_id) { - // When the blockmap page group was created, header block was updated accordingly in CreateBlockMapGroup - // call above. - const BlockNumber blockmapno = blockmap_id + headerp->blockmap_page_group_index[ blockmap_groupno ]; + for(uint32 blockmap_id = 0; blockmap_id < number_of_blockmaps_in_group; ++blockmap_id) { + // When the blockmap page group was created, header block was updated accordingly in + // ContinueBlockMapGroupInitialization call above. + const BlockNumber blockmapno = blockmap_id + headerp->blockmap_groups[ blockmap_groupno ].first_block; Buffer buf = ReadBufferExtended(index, MAIN_FORKNUM, blockmapno, RBM_NORMAL, NULL); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); @@ -271,20 +436,26 @@ void StoreExternalIndex(Relation index, headerp->metric_kind = opts->metric_kind; headerp->num_vectors = num_added_vectors; - headerp->blockmap_page_groups = 0; - MemSet(headerp->blockmap_page_group_index, 0, HNSW_MAX_BLOCKMAP_GROUPS); - // headerp->blockmap_page_group_index and blockmap_page_groups are + headerp->blockmap_groups_nr = 0; + + for(uint32 i = 0; i < lengthof(headerp->blockmap_groups); ++i) { + headerp->blockmap_groups[ i ] = (HnswBlockMapGroupDesc){ + .first_block = InvalidBlockNumber, + .blockmaps_initialized = 0, + }; + } + // headerp->blockmap_groups and blockmap_groups_nr are // updated in a separate wal entry - headerp->last_data_block = -1; + headerp->last_data_block = InvalidBlockNumber; memcpy(headerp->usearch_header, data, USEARCH_HEADER_SIZE); ((PageHeader)header_page)->pd_lower = ((char *)headerp + sizeof(HnswIndexHeaderPage)) - (char *)header_page; - uint64 progress = USEARCH_HEADER_SIZE; // usearch header size - int blockmap_groupno = 0; - int group_node_first_index = 0; - int num_added_vectors_remaining = (int)num_added_vectors; - int batch_size = HNSW_BLOCKMAP_BLOCKS_PER_PAGE; + uint64 progress = USEARCH_HEADER_SIZE; // usearch header size + unsigned blockmap_groupno = 0; + uint32 group_node_first_index = 0; + uint32 num_added_vectors_remaining = num_added_vectors; + uint32 batch_size = HNSW_BLOCKMAP_BLOCKS_PER_PAGE; while(num_added_vectors_remaining > 0) { StoreExternalIndexBlockMapGroup(index, external_index, @@ -296,7 +467,7 @@ void StoreExternalIndex(Relation index, group_node_first_index, Min(num_added_vectors_remaining, batch_size), blockmap_groupno); - num_added_vectors_remaining -= batch_size; + num_added_vectors_remaining -= Min(num_added_vectors_remaining, batch_size); group_node_first_index += batch_size; batch_size = batch_size * 2; blockmap_groupno++; @@ -350,6 +521,14 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, uint32 new_tuple_level, HnswInsertState *insertstate) { + if(hdr->blockmap_groups_nr > 0) { + unsigned last_blockmap_group = hdr->blockmap_groups_nr - 1; + + if(!BlockMapGroupIsFullyInitialized(hdr, last_blockmap_group)) { + ContinueBlockMapGroupInitialization( + hdr, index_rel, MAIN_FORKNUM, BlockMapGroupFirstNodeIndex(last_blockmap_group), last_blockmap_group); + } + } // if any data blocks exist, the last one's buffer will be read into this Buffer last_dblock = InvalidBuffer; // if a new data buffer is created for the inserted vector, it will be stored here @@ -377,7 +556,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, * (create new page if necessary) ***/ if(hdr->last_data_block == InvalidBlockNumber) { - CreateBlockMapGroup(hdr, index_rel, MAIN_FORKNUM, 0, 0); + ContinueBlockMapGroupInitialization(hdr, index_rel, MAIN_FORKNUM, 0, 0); new_dblock = ReadBufferExtended(index_rel, MAIN_FORKNUM, P_NEW, RBM_NORMAL, NULL); LockBuffer(new_dblock, BUFFER_LOCK_EXCLUSIVE); new_vector_blockno = BufferGetBlockNumber(new_dblock); @@ -405,9 +584,8 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, assert(last_dblock != InvalidBuffer); - const uint32 blockmaps_are_enough - = new_tuple_id / HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1 < ((uint32)1 << (hdr->blockmap_page_groups + 1)); - if(PageGetFreeSpace(page) > sizeof(HnswIndexTuple) + alloced_tuple->size && blockmaps_are_enough) { + if(PageGetFreeSpace(page) > sizeof(HnswIndexTuple) + alloced_tuple->size + && AreEnoughBlockMapsForTupleId(hdr->blockmap_groups_nr, new_tuple_id)) { // there is enough space in the last page to fit the new vector // so we just append it to the page ldb_dlog("InsertBranching: we adding element to existing page"); @@ -430,10 +608,10 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, // page creation. // check the count of blockmaps, see if there's place to add the block id, if yes add, if no create a - // new group check if already existing blockmaps are not enough new_tuple_id / - // HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1 is kth blockmap we check if k is more than already created 2^groups - if(new_tuple_id / HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1 >= ((uint32)1 << (hdr->blockmap_page_groups + 1))) { - CreateBlockMapGroup(hdr, index_rel, MAIN_FORKNUM, new_tuple_id, hdr->blockmap_page_groups + 1); + // new group check if already existing blockmaps are not enough + if(!AreEnoughBlockMapsForTupleId(hdr->blockmap_groups_nr, new_tuple_id)) { + ContinueBlockMapGroupInitialization( + hdr, index_rel, MAIN_FORKNUM, new_tuple_id, hdr->blockmap_groups_nr); } new_dblock = ReadBufferExtended(index_rel, MAIN_FORKNUM, P_NEW, RBM_NORMAL, NULL); @@ -471,7 +649,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, page = NULL; // to avoid its accidental use /*** Update pagemap with the information of the added page ***/ { - BlockNumber blockmapno = getBlockMapPageBlockNumber(hdr->blockmap_page_group_index, new_tuple_id); + BlockNumber blockmapno = getBlockMapPageBlockNumber(&hdr->blockmap_groups[ 0 ], new_tuple_id); Page blockmap_page; HnswBlockmapPage *blockmap; int max_offset; @@ -504,7 +682,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, return new_tup_ref; } -static BlockNumber getBlockMapPageBlockNumber(uint32 *blockmap_page_group_index, int id) +static BlockNumber getBlockMapPageBlockNumber(const HnswBlockMapGroupDesc *blockmap_groups, int id) { assert(id >= 0); // Trust me, I'm an engineer! @@ -512,16 +690,16 @@ static BlockNumber getBlockMapPageBlockNumber(uint32 *blockmap_page_group_index, int k; for(k = 0; id >= (1 << k); ++k) { } - return blockmap_page_group_index[ k - 1 ] + (id - (1 << (k - 1))); + assert(blockmap_groups[ k - 1 ].first_block != InvalidBlockNumber); + return blockmap_groups[ k - 1 ].first_block + (id - (1 << (k - 1))); } BlockNumber getDataBlockNumber(RetrieverCtx *ctx, int id, bool add_to_extra_dirtied) { - HTABCache *cache = &ctx->block_numbers_cache; - uint32 *blockmap_group_index = ctx->header_page_under_wal != NULL - ? ctx->header_page_under_wal->blockmap_page_group_index - : ctx->blockmap_page_group_index_cache; - BlockNumber blockmapno = getBlockMapPageBlockNumber(blockmap_group_index, id); + HTABCache *cache = &ctx->block_numbers_cache; + HnswBlockMapGroupDesc *blockmap_groups + = ctx->header_page_under_wal != NULL ? ctx->header_page_under_wal->blockmap_groups : ctx->blockmap_groups_cache; + BlockNumber blockmapno = getBlockMapPageBlockNumber(blockmap_groups, id); BlockNumber blockno; HnswBlockmapPage *blockmap_page; Page page; diff --git a/src/hnsw/external_index.h b/src/hnsw/external_index.h index b653719e2..b1c3db893 100644 --- a/src/hnsw/external_index.h +++ b/src/hnsw/external_index.h @@ -32,8 +32,17 @@ // to be able to test more of the algorithm corner cases with a small table dataset #define HNSW_BLOCKMAP_BLOCKS_PER_PAGE 2000 +// the header is updated every time this number of pages is initialized in ContinueBlockMapGroupInitialization() +#define HNSW_BLOCKMAP_UPDATE_HEADER_EVERY 100 + #define USEARCH_HEADER_SIZE 80 +typedef struct HnswBlockMapGroupDesc +{ + BlockNumber first_block; + uint32 blockmaps_initialized; +} HnswBlockMapGroupDesc; + typedef struct HnswIndexHeaderPage { uint32 magicNumber; @@ -47,8 +56,8 @@ typedef struct HnswIndexHeaderPage BlockNumber last_data_block; char usearch_header[ USEARCH_HEADER_SIZE ]; - uint32 blockmap_page_groups; - uint32 blockmap_page_group_index[ HNSW_MAX_BLOCKMAP_GROUPS ]; + uint32 blockmap_groups_nr; + HnswBlockMapGroupDesc blockmap_groups[ HNSW_MAX_BLOCKMAP_GROUPS ]; } HnswIndexHeaderPage; typedef struct HnswIndexPageSpecialBlock @@ -84,7 +93,7 @@ typedef struct Relation index_rel; // used for scans - uint32 blockmap_page_group_index_cache[ HNSW_MAX_BLOCKMAP_GROUPS ]; // todo:: + HnswBlockMapGroupDesc blockmap_groups_cache[ HNSW_MAX_BLOCKMAP_GROUPS ]; // todo:: // used for inserts HnswIndexHeaderPage *header_page_under_wal; diff --git a/src/hnsw/scan.c b/src/hnsw/scan.c index a8f70f63d..ed958ad0f 100644 --- a/src/hnsw/scan.c +++ b/src/hnsw/scan.c @@ -75,9 +75,8 @@ IndexScanDesc ldb_ambeginscan(Relation index, int nkeys, int norderbys) if(error != NULL) elog(ERROR, "error loading index: %s", error); assert(error == NULL); - memcpy(retriever_ctx->blockmap_page_group_index_cache, - headerp->blockmap_page_group_index, - sizeof(retriever_ctx->block_numbers_cache)); + memcpy( + retriever_ctx->blockmap_groups_cache, headerp->blockmap_groups, sizeof(retriever_ctx->blockmap_groups_cache)); retriever_ctx->header_page_under_wal = NULL; usearch_mem = headerp->usearch_header; From aefbd7581cc96032a948d807496456cb6a49938f Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sun, 22 Oct 2023 02:22:21 +0400 Subject: [PATCH 02/14] src/hnsw/external_index: use ReadBufferExtended(P_NEW) instead of ExtendBufferedRelTo() for PostgreSQL < 16 --- src/hnsw/external_index.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index 431123bc2..dd04d36ad 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -122,6 +122,25 @@ static void UpdateHeaderBlockMapGroupDesc( ReleaseBuffer(hdr_buf); } +static void ExtendIndexRelationTo(Relation index, ForkNumber forkNum, BlockNumber number_of_blocks) +{ + Buffer buf; + + assert(RelationGetNumberOfBlocksInFork(index, forkNum) < number_of_blocks); + +#if PG_VERSION_NUM >= 160000 + buf = ExtendBufferedRelTo(BMR_REL(index), forkNum, NULL, EB_CLEAR_SIZE_CACHE, number_of_blocks, RBM_NORMAL); + assert(BufferGetBlockNumber(buf) == number_of_blocks - 1); + ReleaseBuffer(buf); +#else + for(BlockNumber block = RelationGetNumberOfBlocksInFork(index, forkNum); block < number_of_blocks; ++block) { + buf = ReadBufferExtended(index, forkNum, P_NEW, RBM_NORMAL, NULL); + assert(BufferGetBlockNumber(buf) == block); + ReleaseBuffer(buf); + } +#endif +} + /* * Continue and finish the initialization of a blockmap group. * If the initialization hasn't been started, then the initialization is started. @@ -211,14 +230,7 @@ static void ContinueBlockMapGroupInitialization( group_desc = &hdr->blockmap_groups[ groupno ]; if(group_desc->blockmaps_initialized == 0 && group_desc->first_block + blockmaps_in_group > RelationGetNumberOfBlocksInFork(index, forkNum)) { - buf = ExtendBufferedRelTo(BMR_REL(index), - forkNum, - NULL, - EB_CLEAR_SIZE_CACHE, - group_desc->first_block + blockmaps_in_group, - RBM_NORMAL); - assert(group_desc->first_block + blockmaps_in_group - 1 == BufferGetBlockNumber(buf)); - ReleaseBuffer(buf); + ExtendIndexRelationTo(index, forkNum, group_desc->first_block + blockmaps_in_group); } assert(group_desc->first_block + blockmaps_in_group <= RelationGetNumberOfBlocksInFork(index, forkNum)); From f272c85d40fe781456ec0ae62a39115e7b09127c Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Thu, 26 Oct 2023 01:43:40 +0400 Subject: [PATCH 03/14] src/hnsw/validate_index: update to work with changes to HnswIndexHeaderPage.blockmap_groups[] --- src/hnsw/external_index.c | 2 +- src/hnsw/external_index.h | 2 ++ src/hnsw/validate_index.c | 23 ++++++++++++++++------- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index 2e99c46e2..784d6ed33 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -54,7 +54,7 @@ static char *extract_node(char *data, return tape; } -static BlockNumber NumberOfBlockMapsInGroup(unsigned groupno) +BlockNumber NumberOfBlockMapsInGroup(unsigned groupno) { assert(groupno < HNSW_MAX_BLOCKMAP_GROUPS); diff --git a/src/hnsw/external_index.h b/src/hnsw/external_index.h index b1c3db893..d0b3b01dc 100644 --- a/src/hnsw/external_index.h +++ b/src/hnsw/external_index.h @@ -139,4 +139,6 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, uint32 new_tuple_level, HnswInsertState *insertstate); +BlockNumber NumberOfBlockMapsInGroup(unsigned groupno); + #endif // LDB_HNSW_EXTERNAL_INDEX_H diff --git a/src/hnsw/validate_index.c b/src/hnsw/validate_index.c index f6fd51e61..30f9d124f 100644 --- a/src/hnsw/validate_index.c +++ b/src/hnsw/validate_index.c @@ -133,15 +133,24 @@ static void ldb_vi_read_blockmaps(Relation index, if(blocks_nr == 0) return; vi_blocks[ 0 ].vp_type = LDB_VI_BLOCK_HEADER; while(nodes_remaining != 0) { - if(blockmap_groupno > index_header->blockmap_page_groups) { + if(blockmap_groupno >= index_header->blockmap_groups_nr) { elog(ERROR, - "blockmap_groupno=%d > index_header->blockmap_page_groups=%d", + "blockmap_groupno=%" PRIu32 " >= index_header->blockmap_groups_nr=%" PRIu32, blockmap_groupno, - index_header->blockmap_page_groups); + index_header->blockmap_groups_nr); + } + if(index_header->blockmap_groups[ blockmap_groupno ].blockmaps_initialized + != NumberOfBlockMapsInGroup(blockmap_groupno)) { + elog(ERROR, + "HnswBlockMapGroupDesc.blockmaps_initialized=%" PRIu32 " != NumberOfBlockMapsInGroup()=%" PRIu32 + " for blockmap_groupno=%" PRIu32, + index_header->blockmap_groups[ blockmap_groupno ].blockmaps_initialized, + NumberOfBlockMapsInGroup(blockmap_groupno), + blockmap_groupno); } /* TODO see the loop in CreateBlockMapGroup() */ BlockNumber number_of_blockmaps_in_group = 1u << blockmap_groupno; - BlockNumber group_start = index_header->blockmap_page_group_index[ blockmap_groupno ]; + BlockNumber group_start = index_header->blockmap_groups[ blockmap_groupno ].first_block; for(unsigned blockmap_id = 0; blockmap_id < number_of_blockmaps_in_group; ++blockmap_id) { BlockNumber blockmap_block = group_start + blockmap_id; BlockNumber expected_special_nextblockno; @@ -640,7 +649,7 @@ void ldb_validate_index(Oid indrelid, bool print_info) elog(INFO, "index_header = HnswIndexHeaderPage(" "version=%" PRIu32 " vector_dim=%" PRIu32 " m=%" PRIu32 " ef_construction=%" PRIu32 " ef=%" PRIu32 - " metric_kind=%d num_vectors=%" PRIu32 " last_data_block=%" PRIu32 " blockmap_page_groups=%" PRIu32 ")", + " metric_kind=%d num_vectors=%" PRIu32 " last_data_block=%" PRIu32 " blockmap_groups_nr=%" PRIu32 ")", index_header->version, index_header->vector_dim, index_header->m, @@ -649,7 +658,7 @@ void ldb_validate_index(Oid indrelid, bool print_info) index_header->metric_kind, index_header->num_vectors, index_header->last_data_block, - index_header->blockmap_page_groups); + index_header->blockmap_groups_nr); } blocks_nr = RelationGetNumberOfBlocksInFork(index, MAIN_FORKNUM); @@ -657,7 +666,7 @@ void ldb_validate_index(Oid indrelid, bool print_info) if(print_info) { elog(INFO, "blocks_nr=%" PRIu32 " nodes_nr=%" PRIu32, blocks_nr, nodes_nr); } - /* TODO check nodes_nr against index_header->blockmap_page_groups */ + /* TODO check nodes_nr against index_header->blockmap_groups_nr */ vi_blocks = palloc0_array(typeof(*vi_blocks), blocks_nr); vi_nodes = palloc0_array(typeof(*vi_nodes), nodes_nr); From 99a9f2f9521ea08e87ecb8b991bc655862a4d2b0 Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Thu, 26 Oct 2023 02:24:02 +0400 Subject: [PATCH 04/14] cmake/FindPostgreSQL: put the server include dir first (temporarily cherry picked from commit da3e432db13585a54c02ad043122770aa420aedb to run the CI tests) --- cmake/FindPostgreSQL.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/FindPostgreSQL.cmake b/cmake/FindPostgreSQL.cmake index 2e76e0ae3..adb0bdf2b 100644 --- a/cmake/FindPostgreSQL.cmake +++ b/cmake/FindPostgreSQL.cmake @@ -91,7 +91,7 @@ if(PostgreSQL_FOUND) separate_arguments(_pg_ldflags_ex) set(_server_lib_dirs ${_pg_libdir} ${_pg_pkglibdir}) - set(_server_inc_dirs ${_pg_pkgincludedir} ${_pg_includedir_server}) + set(_server_inc_dirs ${_pg_includedir_server} ${_pg_pkgincludedir}) string(REPLACE ";" " " _shared_link_options "${_pg_ldflags};${_pg_ldflags_sl}") set(_link_options ${_pg_ldflags}) From c3d21781ef56c36459a38c24e685a493b061347d Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Thu, 26 Oct 2023 03:16:58 +0400 Subject: [PATCH 05/14] Revert "cmake/FindPostgreSQL: put the server include dir first" This reverts commit 99a9f2f9521ea08e87ecb8b991bc655862a4d2b0. --- cmake/FindPostgreSQL.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/FindPostgreSQL.cmake b/cmake/FindPostgreSQL.cmake index adb0bdf2b..2e76e0ae3 100644 --- a/cmake/FindPostgreSQL.cmake +++ b/cmake/FindPostgreSQL.cmake @@ -91,7 +91,7 @@ if(PostgreSQL_FOUND) separate_arguments(_pg_ldflags_ex) set(_server_lib_dirs ${_pg_libdir} ${_pg_pkglibdir}) - set(_server_inc_dirs ${_pg_includedir_server} ${_pg_pkgincludedir}) + set(_server_inc_dirs ${_pg_pkgincludedir} ${_pg_includedir_server}) string(REPLACE ";" " " _shared_link_options "${_pg_ldflags};${_pg_ldflags_sl}") set(_link_options ${_pg_ldflags}) From 821b41ff79588de737b340bd315e31bb138fc4af Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Thu, 26 Oct 2023 21:13:35 +0400 Subject: [PATCH 06/14] Implement failure points This patch adds API to trigger execution of C code from SQL to test corner cases. `test/sql/hnsw_failure_point.sql` has an example of how to trigger a process crash using failure points and how to see that a space leak happens if a crash happens after a block is allocated, but before a record for the block is added to the index during blockmaps creation. --- CMakeLists.txt | 8 +++ sql/lantern.sql | 3 ++ src/hnsw.c | 13 +++++ src/hnsw/external_index.c | 1 + src/hnsw/failure_point.c | 75 ++++++++++++++++++++++++++++ src/hnsw/failure_point.h | 47 +++++++++++++++++ test/expected/ext_relocation.out | 5 +- test/expected/hnsw_failure_point.out | 45 +++++++++++++++++ test/schedule.txt | 2 +- test/sql/hnsw_failure_point.sql | 33 ++++++++++++ 10 files changed, 229 insertions(+), 3 deletions(-) create mode 100644 src/hnsw/failure_point.c create mode 100644 src/hnsw/failure_point.h create mode 100644 test/expected/hnsw_failure_point.out create mode 100644 test/sql/hnsw_failure_point.sql diff --git a/CMakeLists.txt b/CMakeLists.txt index becb084b3..10cbdc492 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ option(BUILD_WITH_USEARCH "Build with usearch as hnsw provider" ON) option(BUILD_LIBHNSW "Build libhnsw as hnsw provider" OFF) option(CODECOVERAGE "Enable code coverage for the build" OFF) option(BENCH "Enable benchmarking" OFF) +option(FAILURE_POINTS "Enable failure points" ON) if(CODECOVERAGE) message(STATUS "Code coverage is enabled.") @@ -149,6 +150,13 @@ if (${BUILD_WITH_LIBHNSW}) target_link_libraries(lantern PRIVATE hnsw) target_compile_definitions(lantern PRIVATE LANTERN_USE_LIBHNSW) endif() +if (FAILURE_POINTS) + message(STATUS "Failure points are enabled.") + target_compile_definitions(lantern PRIVATE LANTERN_FAILURE_POINTS_ARE_ENABLED=1) +else() + message(STATUS "Failure points are disabled.") + target_compile_definitions(lantern PRIVATE LANTERN_FAILURE_POINTS_ARE_ENABLED=0) +endif() if (${LANTERNDB_COPYNODES}) target_compile_definitions(lantern PRIVATE LANTERNDB_COPYNODES) endif() diff --git a/sql/lantern.sql b/sql/lantern.sql index d3ceb5846..b34708494 100644 --- a/sql/lantern.sql +++ b/sql/lantern.sql @@ -34,6 +34,9 @@ CREATE SCHEMA _lantern_internal; CREATE FUNCTION _lantern_internal.validate_index(index regclass, print_info boolean DEFAULT true) RETURNS VOID AS 'MODULE_PATHNAME', 'lantern_internal_validate_index' LANGUAGE C STABLE STRICT PARALLEL UNSAFE; +CREATE FUNCTION _lantern_internal.failure_point_enable(func TEXT, name TEXT, dont_trigger_first_nr INTEGER DEFAULT 0) RETURNS VOID + AS 'MODULE_PATHNAME', 'lantern_internal_failure_point_enable' LANGUAGE C STABLE STRICT PARALLEL UNSAFE; + -- operator classes CREATE OR REPLACE FUNCTION _lantern_internal._create_ldb_operator_classes(access_method_name TEXT) RETURNS BOOLEAN AS $$ DECLARE diff --git a/src/hnsw.c b/src/hnsw.c index 1da92f794..59e233924 100644 --- a/src/hnsw.c +++ b/src/hnsw.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ #include "hnsw/build.h" #include "hnsw/delete.h" +#include "hnsw/failure_point.h" #include "hnsw/insert.h" #include "hnsw/options.h" #include "hnsw/scan.h" @@ -369,6 +371,17 @@ Datum lantern_internal_validate_index(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +PGDLLEXPORT PG_FUNCTION_INFO_V1(lantern_internal_failure_point_enable); +Datum lantern_internal_failure_point_enable(PG_FUNCTION_ARGS) +{ + const char *func = text_to_cstring(PG_GETARG_TEXT_PP(0)); + const char *name = text_to_cstring(PG_GETARG_TEXT_PP(1)); + uint32 dont_trigger_first_nr = PG_GETARG_UINT32(2); + + ldb_failure_point_enable(func, name, dont_trigger_first_nr); + PG_RETURN_VOID(); +} + /* * Get data type for give oid * */ diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index 784d6ed33..35152646b 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -13,6 +13,7 @@ #include #include "extra_dirtied.h" +#include "failure_point.h" #include "htab_cache.h" #include "insert.h" #include "options.h" diff --git a/src/hnsw/failure_point.c b/src/hnsw/failure_point.c new file mode 100644 index 000000000..7c49305e6 --- /dev/null +++ b/src/hnsw/failure_point.c @@ -0,0 +1,75 @@ +#include + +#include "hnsw/failure_point.h" + +#include /* PRIu32 */ + +struct failure_point_state +{ + bool enabled; + const char *func; + const char *name; + uint32 remaining; +}; + +static struct failure_point_state *failure_point_get_state(void) +{ + static struct failure_point_state state = {}; + + return &state; +} + +void ldb_failure_point_enable(const char *func, const char *name, uint32 dont_trigger_first_nr) +{ + struct failure_point_state *state = failure_point_get_state(); + + if(!LANTERN_FAILURE_POINTS_ARE_ENABLED) { + elog(WARNING, + "Can't enable failure point for (func=%s name=%s), " + "because failure points are disabled in compile time.", + func, + name); + } + if(state->enabled) { + elog(WARNING, + "ldb_failure_point_enable(): another failure point is enabled already." + " old failure point: func=%s name=%s remaining=%" PRIu32 + " new failure point: func=%s name=%s dont_trigger_first_nr=%" PRIu32, + state->func, + state->name, + state->remaining, + func, + name, + dont_trigger_first_nr); + } + *state = (struct failure_point_state){ + .enabled = true, + .func = func, + .name = name, + .remaining = dont_trigger_first_nr, + }; +} + +bool ldb_failure_point_is_enabled(const char *func, const char *name) +{ + struct failure_point_state *state = failure_point_get_state(); + + if(!LANTERN_FAILURE_POINTS_ARE_ENABLED) return false; + if(!state->enabled) return false; + if(strcmp(func, state->func) == 0 && strcmp(name, state->name) == 0) { + if(state->remaining == 0) { + state->enabled = false; + elog(INFO, "Failure point (func=%s name=%s) has been triggered.", state->func, state->name); + return true; + } else { + --state->remaining; + } + } + return false; +} + +void ldb_failure_point_crash(void) +{ + elog(ERROR, "ldb_failure_point_crash()"); + pg_unreachable(); +} diff --git a/src/hnsw/failure_point.h b/src/hnsw/failure_point.h new file mode 100644 index 000000000..fbb0cd02b --- /dev/null +++ b/src/hnsw/failure_point.h @@ -0,0 +1,47 @@ +#ifndef LDB_HNSW_FAILURE_POINT_H +#define LDB_HNSW_FAILURE_POINT_H + +/* + * Failure points implementation. + * + * An example on how to use from test/sql/hnsw_failure_point.sql. + * + * 1) Add this to CreateBlockMapGroup(): + * + LDB_FAILURE_POINT_CRASH_IF_ENABLED("crash_after_buf_allocation"); + * + * 2) Enable the failure point somewhere in the test: + * + * SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation', 0); + * + * 3) Trigger the failure point, the output looks like this: + * + * INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation) has been triggered. + * + * 4) Now check that the failure actually happens, for example with validate_index(): + * + * SELECT _lantern_internal.validate_index('small_world_v_idx', false); + * + * 5) The output tells that the block is allocated, but it's not being used: + * + * INFO: validate_index() start for small_world_v_idx + * ERROR: vi_blocks[48].vp_type == LDB_VI_BLOCK_UNKNOWN (but it should be known now) + * + * + * Limitations + * + * 1) A single static per-process variable holds the state. + * 2) Only one failure point active at a time is supported. + * 3) The API is not thread-safe. + */ + +#define LDB_FAILURE_POINT_IS_ENABLED(_name) \ + (LANTERN_FAILURE_POINTS_ARE_ENABLED && ldb_failure_point_is_enabled(__func__, (_name))) +#define LDB_FAILURE_POINT_CRASH_IF_ENABLED(_name) \ + if(LDB_FAILURE_POINT_IS_ENABLED(_name)) ldb_failure_point_crash() + +void ldb_failure_point_enable(const char *func, const char *name, uint32 dont_trigger_first_nr); +bool ldb_failure_point_is_enabled(const char *func, const char *name); +void ldb_failure_point_crash(void); + +#endif // LDB_HNSW_FAILURE_POINT_H diff --git a/test/expected/ext_relocation.out b/test/expected/ext_relocation.out index 9dc24635e..fd5bb7311 100644 --- a/test/expected/ext_relocation.out +++ b/test/expected/ext_relocation.out @@ -35,14 +35,15 @@ ORDER BY 1, 3; extschema | proname | proschema -----------+------------------------------+------------------- schema1 | validate_index | _lantern_internal + schema1 | failure_point_enable | _lantern_internal schema1 | _create_ldb_operator_classes | _lantern_internal - schema1 | ldb_generic_dist | schema1 schema1 | l2sq_dist | schema1 schema1 | hnsw_handler | schema1 schema1 | hamming_dist | schema1 schema1 | cos_dist | schema1 schema1 | ldb_generic_dist | schema1 -(8 rows) + schema1 | ldb_generic_dist | schema1 +(9 rows) -- show all the extension operators SELECT ne.nspname AS extschema, op.oprname, np.nspname AS proschema diff --git a/test/expected/hnsw_failure_point.out b/test/expected/hnsw_failure_point.out new file mode 100644 index 000000000..d9165e6ac --- /dev/null +++ b/test/expected/hnsw_failure_point.out @@ -0,0 +1,45 @@ +------------------------------ +-- Test HNSW failure points -- +------------------------------ +CREATE TABLE small_world ( + id SERIAL PRIMARY KEY, + v REAL[2] +); +CREATE INDEX ON small_world USING hnsw (v) WITH (dim=3); +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +-- let's insert HNSW_BLOCKMAP_BLOCKS_PER_PAGE (2000) record to fill the first blockmap page +do $$ +BEGIN + FOR i IN 1..2000 LOOP + INSERT INTO small_world (v) VALUES (array_replace(ARRAY[0,0,-1], -1, i)); + END LOOP; +END $$; +-- everything is fine, the index is valid +SELECT _lantern_internal.validate_index('small_world_v_idx', false); +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. + validate_index +---------------- + +(1 row) + +-- now let's crash after a buffer for a blockmap is allocated during insert, +-- but it hasn't been recorded yet +SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation'); + failure_point_enable +---------------------- + +(1 row) + +-- here is the insert where the crash happens +\set ON_ERROR_STOP off +INSERT INTO small_world (v) VALUES ('{2,2,2}'); +INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation) has been triggered. +ERROR: ldb_failure_point_crash() +\set ON_ERROR_STOP on +-- now we see that the index has an extra free page, so the index validation fails +SELECT _lantern_internal.validate_index('small_world_v_idx', false); +INFO: validate_index() start for small_world_v_idx +ERROR: vi_blocks[48].vp_type == LDB_VI_BLOCK_UNKNOWN (but it should be known now) diff --git a/test/schedule.txt b/test/schedule.txt index f1122c434..ef2c6b142 100644 --- a/test/schedule.txt +++ b/test/schedule.txt @@ -4,4 +4,4 @@ # - 'test' lines may have multiple space-separated tests. All tests in a single 'test' line will be run in parallel test_pgvector: hnsw_vector -test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search +test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search hnsw_failure_point diff --git a/test/sql/hnsw_failure_point.sql b/test/sql/hnsw_failure_point.sql new file mode 100644 index 000000000..3b751388e --- /dev/null +++ b/test/sql/hnsw_failure_point.sql @@ -0,0 +1,33 @@ +------------------------------ +-- Test HNSW failure points -- +------------------------------ + +CREATE TABLE small_world ( + id SERIAL PRIMARY KEY, + v REAL[2] +); +CREATE INDEX ON small_world USING hnsw (v) WITH (dim=3); + +-- let's insert HNSW_BLOCKMAP_BLOCKS_PER_PAGE (2000) record to fill the first blockmap page + +do $$ +BEGIN + FOR i IN 1..2000 LOOP + INSERT INTO small_world (v) VALUES (array_replace(ARRAY[0,0,-1], -1, i)); + END LOOP; +END $$; + +-- everything is fine, the index is valid +SELECT _lantern_internal.validate_index('small_world_v_idx', false); + +-- now let's crash after a buffer for a blockmap is allocated during insert, +-- but it hasn't been recorded yet +SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation'); + +-- here is the insert where the crash happens +\set ON_ERROR_STOP off +INSERT INTO small_world (v) VALUES ('{2,2,2}'); +\set ON_ERROR_STOP on + +-- now we see that the index has an extra free page, so the index validation fails +SELECT _lantern_internal.validate_index('small_world_v_idx', false); From 4f98054e555795ffa9e8e5f1abeb383bcceb7a75 Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sat, 4 Nov 2023 19:48:47 +0400 Subject: [PATCH 07/14] Revert "Implement failure points" This reverts commit 821b41ff79588de737b340bd315e31bb138fc4af. --- CMakeLists.txt | 8 --- sql/lantern.sql | 3 -- src/hnsw.c | 13 ----- src/hnsw/external_index.c | 1 - src/hnsw/failure_point.c | 75 ---------------------------- src/hnsw/failure_point.h | 47 ----------------- test/expected/ext_relocation.out | 5 +- test/expected/hnsw_failure_point.out | 45 ----------------- test/schedule.txt | 2 +- test/sql/hnsw_failure_point.sql | 33 ------------ 10 files changed, 3 insertions(+), 229 deletions(-) delete mode 100644 src/hnsw/failure_point.c delete mode 100644 src/hnsw/failure_point.h delete mode 100644 test/expected/hnsw_failure_point.out delete mode 100644 test/sql/hnsw_failure_point.sql diff --git a/CMakeLists.txt b/CMakeLists.txt index 10cbdc492..becb084b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,6 @@ option(BUILD_WITH_USEARCH "Build with usearch as hnsw provider" ON) option(BUILD_LIBHNSW "Build libhnsw as hnsw provider" OFF) option(CODECOVERAGE "Enable code coverage for the build" OFF) option(BENCH "Enable benchmarking" OFF) -option(FAILURE_POINTS "Enable failure points" ON) if(CODECOVERAGE) message(STATUS "Code coverage is enabled.") @@ -150,13 +149,6 @@ if (${BUILD_WITH_LIBHNSW}) target_link_libraries(lantern PRIVATE hnsw) target_compile_definitions(lantern PRIVATE LANTERN_USE_LIBHNSW) endif() -if (FAILURE_POINTS) - message(STATUS "Failure points are enabled.") - target_compile_definitions(lantern PRIVATE LANTERN_FAILURE_POINTS_ARE_ENABLED=1) -else() - message(STATUS "Failure points are disabled.") - target_compile_definitions(lantern PRIVATE LANTERN_FAILURE_POINTS_ARE_ENABLED=0) -endif() if (${LANTERNDB_COPYNODES}) target_compile_definitions(lantern PRIVATE LANTERNDB_COPYNODES) endif() diff --git a/sql/lantern.sql b/sql/lantern.sql index b34708494..d3ceb5846 100644 --- a/sql/lantern.sql +++ b/sql/lantern.sql @@ -34,9 +34,6 @@ CREATE SCHEMA _lantern_internal; CREATE FUNCTION _lantern_internal.validate_index(index regclass, print_info boolean DEFAULT true) RETURNS VOID AS 'MODULE_PATHNAME', 'lantern_internal_validate_index' LANGUAGE C STABLE STRICT PARALLEL UNSAFE; -CREATE FUNCTION _lantern_internal.failure_point_enable(func TEXT, name TEXT, dont_trigger_first_nr INTEGER DEFAULT 0) RETURNS VOID - AS 'MODULE_PATHNAME', 'lantern_internal_failure_point_enable' LANGUAGE C STABLE STRICT PARALLEL UNSAFE; - -- operator classes CREATE OR REPLACE FUNCTION _lantern_internal._create_ldb_operator_classes(access_method_name TEXT) RETURNS BOOLEAN AS $$ DECLARE diff --git a/src/hnsw.c b/src/hnsw.c index 59e233924..1da92f794 100644 --- a/src/hnsw.c +++ b/src/hnsw.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -16,7 +15,6 @@ #include "hnsw/build.h" #include "hnsw/delete.h" -#include "hnsw/failure_point.h" #include "hnsw/insert.h" #include "hnsw/options.h" #include "hnsw/scan.h" @@ -371,17 +369,6 @@ Datum lantern_internal_validate_index(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -PGDLLEXPORT PG_FUNCTION_INFO_V1(lantern_internal_failure_point_enable); -Datum lantern_internal_failure_point_enable(PG_FUNCTION_ARGS) -{ - const char *func = text_to_cstring(PG_GETARG_TEXT_PP(0)); - const char *name = text_to_cstring(PG_GETARG_TEXT_PP(1)); - uint32 dont_trigger_first_nr = PG_GETARG_UINT32(2); - - ldb_failure_point_enable(func, name, dont_trigger_first_nr); - PG_RETURN_VOID(); -} - /* * Get data type for give oid * */ diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index 35152646b..784d6ed33 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -13,7 +13,6 @@ #include #include "extra_dirtied.h" -#include "failure_point.h" #include "htab_cache.h" #include "insert.h" #include "options.h" diff --git a/src/hnsw/failure_point.c b/src/hnsw/failure_point.c deleted file mode 100644 index 7c49305e6..000000000 --- a/src/hnsw/failure_point.c +++ /dev/null @@ -1,75 +0,0 @@ -#include - -#include "hnsw/failure_point.h" - -#include /* PRIu32 */ - -struct failure_point_state -{ - bool enabled; - const char *func; - const char *name; - uint32 remaining; -}; - -static struct failure_point_state *failure_point_get_state(void) -{ - static struct failure_point_state state = {}; - - return &state; -} - -void ldb_failure_point_enable(const char *func, const char *name, uint32 dont_trigger_first_nr) -{ - struct failure_point_state *state = failure_point_get_state(); - - if(!LANTERN_FAILURE_POINTS_ARE_ENABLED) { - elog(WARNING, - "Can't enable failure point for (func=%s name=%s), " - "because failure points are disabled in compile time.", - func, - name); - } - if(state->enabled) { - elog(WARNING, - "ldb_failure_point_enable(): another failure point is enabled already." - " old failure point: func=%s name=%s remaining=%" PRIu32 - " new failure point: func=%s name=%s dont_trigger_first_nr=%" PRIu32, - state->func, - state->name, - state->remaining, - func, - name, - dont_trigger_first_nr); - } - *state = (struct failure_point_state){ - .enabled = true, - .func = func, - .name = name, - .remaining = dont_trigger_first_nr, - }; -} - -bool ldb_failure_point_is_enabled(const char *func, const char *name) -{ - struct failure_point_state *state = failure_point_get_state(); - - if(!LANTERN_FAILURE_POINTS_ARE_ENABLED) return false; - if(!state->enabled) return false; - if(strcmp(func, state->func) == 0 && strcmp(name, state->name) == 0) { - if(state->remaining == 0) { - state->enabled = false; - elog(INFO, "Failure point (func=%s name=%s) has been triggered.", state->func, state->name); - return true; - } else { - --state->remaining; - } - } - return false; -} - -void ldb_failure_point_crash(void) -{ - elog(ERROR, "ldb_failure_point_crash()"); - pg_unreachable(); -} diff --git a/src/hnsw/failure_point.h b/src/hnsw/failure_point.h deleted file mode 100644 index fbb0cd02b..000000000 --- a/src/hnsw/failure_point.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef LDB_HNSW_FAILURE_POINT_H -#define LDB_HNSW_FAILURE_POINT_H - -/* - * Failure points implementation. - * - * An example on how to use from test/sql/hnsw_failure_point.sql. - * - * 1) Add this to CreateBlockMapGroup(): - * - LDB_FAILURE_POINT_CRASH_IF_ENABLED("crash_after_buf_allocation"); - * - * 2) Enable the failure point somewhere in the test: - * - * SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation', 0); - * - * 3) Trigger the failure point, the output looks like this: - * - * INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation) has been triggered. - * - * 4) Now check that the failure actually happens, for example with validate_index(): - * - * SELECT _lantern_internal.validate_index('small_world_v_idx', false); - * - * 5) The output tells that the block is allocated, but it's not being used: - * - * INFO: validate_index() start for small_world_v_idx - * ERROR: vi_blocks[48].vp_type == LDB_VI_BLOCK_UNKNOWN (but it should be known now) - * - * - * Limitations - * - * 1) A single static per-process variable holds the state. - * 2) Only one failure point active at a time is supported. - * 3) The API is not thread-safe. - */ - -#define LDB_FAILURE_POINT_IS_ENABLED(_name) \ - (LANTERN_FAILURE_POINTS_ARE_ENABLED && ldb_failure_point_is_enabled(__func__, (_name))) -#define LDB_FAILURE_POINT_CRASH_IF_ENABLED(_name) \ - if(LDB_FAILURE_POINT_IS_ENABLED(_name)) ldb_failure_point_crash() - -void ldb_failure_point_enable(const char *func, const char *name, uint32 dont_trigger_first_nr); -bool ldb_failure_point_is_enabled(const char *func, const char *name); -void ldb_failure_point_crash(void); - -#endif // LDB_HNSW_FAILURE_POINT_H diff --git a/test/expected/ext_relocation.out b/test/expected/ext_relocation.out index fd5bb7311..9dc24635e 100644 --- a/test/expected/ext_relocation.out +++ b/test/expected/ext_relocation.out @@ -35,15 +35,14 @@ ORDER BY 1, 3; extschema | proname | proschema -----------+------------------------------+------------------- schema1 | validate_index | _lantern_internal - schema1 | failure_point_enable | _lantern_internal schema1 | _create_ldb_operator_classes | _lantern_internal + schema1 | ldb_generic_dist | schema1 schema1 | l2sq_dist | schema1 schema1 | hnsw_handler | schema1 schema1 | hamming_dist | schema1 schema1 | cos_dist | schema1 schema1 | ldb_generic_dist | schema1 - schema1 | ldb_generic_dist | schema1 -(9 rows) +(8 rows) -- show all the extension operators SELECT ne.nspname AS extschema, op.oprname, np.nspname AS proschema diff --git a/test/expected/hnsw_failure_point.out b/test/expected/hnsw_failure_point.out deleted file mode 100644 index d9165e6ac..000000000 --- a/test/expected/hnsw_failure_point.out +++ /dev/null @@ -1,45 +0,0 @@ ------------------------------- --- Test HNSW failure points -- ------------------------------- -CREATE TABLE small_world ( - id SERIAL PRIMARY KEY, - v REAL[2] -); -CREATE INDEX ON small_world USING hnsw (v) WITH (dim=3); -INFO: done init usearch index -INFO: inserted 0 elements -INFO: done saving 0 vectors --- let's insert HNSW_BLOCKMAP_BLOCKS_PER_PAGE (2000) record to fill the first blockmap page -do $$ -BEGIN - FOR i IN 1..2000 LOOP - INSERT INTO small_world (v) VALUES (array_replace(ARRAY[0,0,-1], -1, i)); - END LOOP; -END $$; --- everything is fine, the index is valid -SELECT _lantern_internal.validate_index('small_world_v_idx', false); -INFO: validate_index() start for small_world_v_idx -INFO: validate_index() done, no issues found. - validate_index ----------------- - -(1 row) - --- now let's crash after a buffer for a blockmap is allocated during insert, --- but it hasn't been recorded yet -SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation'); - failure_point_enable ----------------------- - -(1 row) - --- here is the insert where the crash happens -\set ON_ERROR_STOP off -INSERT INTO small_world (v) VALUES ('{2,2,2}'); -INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation) has been triggered. -ERROR: ldb_failure_point_crash() -\set ON_ERROR_STOP on --- now we see that the index has an extra free page, so the index validation fails -SELECT _lantern_internal.validate_index('small_world_v_idx', false); -INFO: validate_index() start for small_world_v_idx -ERROR: vi_blocks[48].vp_type == LDB_VI_BLOCK_UNKNOWN (but it should be known now) diff --git a/test/schedule.txt b/test/schedule.txt index ef2c6b142..f1122c434 100644 --- a/test/schedule.txt +++ b/test/schedule.txt @@ -4,4 +4,4 @@ # - 'test' lines may have multiple space-separated tests. All tests in a single 'test' line will be run in parallel test_pgvector: hnsw_vector -test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search hnsw_failure_point +test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search diff --git a/test/sql/hnsw_failure_point.sql b/test/sql/hnsw_failure_point.sql deleted file mode 100644 index 3b751388e..000000000 --- a/test/sql/hnsw_failure_point.sql +++ /dev/null @@ -1,33 +0,0 @@ ------------------------------- --- Test HNSW failure points -- ------------------------------- - -CREATE TABLE small_world ( - id SERIAL PRIMARY KEY, - v REAL[2] -); -CREATE INDEX ON small_world USING hnsw (v) WITH (dim=3); - --- let's insert HNSW_BLOCKMAP_BLOCKS_PER_PAGE (2000) record to fill the first blockmap page - -do $$ -BEGIN - FOR i IN 1..2000 LOOP - INSERT INTO small_world (v) VALUES (array_replace(ARRAY[0,0,-1], -1, i)); - END LOOP; -END $$; - --- everything is fine, the index is valid -SELECT _lantern_internal.validate_index('small_world_v_idx', false); - --- now let's crash after a buffer for a blockmap is allocated during insert, --- but it hasn't been recorded yet -SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation'); - --- here is the insert where the crash happens -\set ON_ERROR_STOP off -INSERT INTO small_world (v) VALUES ('{2,2,2}'); -\set ON_ERROR_STOP on - --- now we see that the index has an extra free page, so the index validation fails -SELECT _lantern_internal.validate_index('small_world_v_idx', false); From d7ae6149ea5d74d40076db86c891db0616a74595 Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sun, 5 Nov 2023 04:11:22 +0400 Subject: [PATCH 08/14] test/hnsw_blockmap_create: implement --- src/hnsw/external_index.c | 13 +- src/hnsw/failure_point.c | 2 +- src/hnsw/validate_index.c | 12 +- test/expected/hnsw_blockmap_create.out | 179 +++++++++++++++++++++++++ test/schedule.txt | 2 +- test/sql/hnsw_blockmap_create.sql | 81 +++++++++++ 6 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 test/expected/hnsw_blockmap_create.out create mode 100644 test/sql/hnsw_blockmap_create.sql diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index 35152646b..1a9b4ca35 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -119,7 +119,11 @@ static void UpdateHeaderBlockMapGroupDesc( log_rec_ptr = GenericXLogFinish(state); assert(log_rec_ptr != InvalidXLogRecPtr); - if(flush_log) XLogFlush(log_rec_ptr); + if(flush_log) { + LDB_FAILURE_POINT_CRASH_IF_ENABLED("just_before_wal_flush"); + XLogFlush(log_rec_ptr); + LDB_FAILURE_POINT_CRASH_IF_ENABLED("just_after_wal_flush"); + } ReleaseBuffer(hdr_buf); } @@ -224,7 +228,9 @@ static void ContinueBlockMapGroupInitialization( hdr->blockmap_groups[ groupno ].first_block = RelationGetNumberOfBlocksInFork(index, forkNum); assert(groupno == hdr->blockmap_groups_nr); hdr->blockmap_groups_nr = groupno + 1; + LDB_FAILURE_POINT_CRASH_IF_ENABLED("just_before_writing_the_intent_to_init"); UpdateHeaderBlockMapGroupDesc(index, forkNum, groupno, &hdr->blockmap_groups[ groupno ], true); + LDB_FAILURE_POINT_CRASH_IF_ENABLED("just_after_writing_the_intent_to_init"); } assert(hdr->blockmap_groups_nr == groupno + 1); @@ -232,6 +238,7 @@ static void ContinueBlockMapGroupInitialization( if(group_desc->blockmaps_initialized == 0 && group_desc->first_block + blockmaps_in_group > RelationGetNumberOfBlocksInFork(index, forkNum)) { ExtendIndexRelationTo(index, forkNum, group_desc->first_block + blockmaps_in_group); + LDB_FAILURE_POINT_CRASH_IF_ENABLED("just_after_extending_the_index_relation"); } assert(group_desc->first_block + blockmaps_in_group <= RelationGetNumberOfBlocksInFork(index, forkNum)); @@ -271,11 +278,15 @@ static void ContinueBlockMapGroupInitialization( if(((blockmap_id - pages_in_xlog_state + 1 + i) % HNSW_BLOCKMAP_UPDATE_HEADER_EVERY) == 0) update_header = true; } + if(blockmap_id == blockmaps_in_group - 1) + LDB_FAILURE_POINT_CRASH_IF_ENABLED("just_before_updating_header_at_the_end"); if(update_header || blockmap_id == blockmaps_in_group - 1) { hdr->blockmap_groups[ groupno ].blockmaps_initialized = blockmap_id + 1; UpdateHeaderBlockMapGroupDesc( index, forkNum, groupno, &hdr->blockmap_groups[ groupno ], blockmap_id == blockmaps_in_group - 1); } + if(blockmap_id == blockmaps_in_group - 1) + LDB_FAILURE_POINT_CRASH_IF_ENABLED("just_after_updating_header_at_the_end"); pages_in_xlog_state = 0; } } diff --git a/src/hnsw/failure_point.c b/src/hnsw/failure_point.c index 58e9fd64d..d2b4868ff 100644 --- a/src/hnsw/failure_point.c +++ b/src/hnsw/failure_point.c @@ -62,7 +62,7 @@ void ldb_failure_point_enable(const char *func, const char *name, uint32 dont_tr state->remaining = dont_trigger_first_nr; strncpy(state->func, func, lengthof(state->func)); strncpy(state->name, name, lengthof(state->name)); - elog(INFO, "Failure point (func=%s name=%s) is enabled.", state->func, state->name); + elog(INFO, "Failure point (func=%s name=%s remaining=%"PRIu32") is enabled.", state->func, state->name, state->remaining); } bool ldb_failure_point_is_enabled(const char *func, const char *name) diff --git a/src/hnsw/validate_index.c b/src/hnsw/validate_index.c index 30f9d124f..c6e6061ef 100644 --- a/src/hnsw/validate_index.c +++ b/src/hnsw/validate_index.c @@ -129,10 +129,11 @@ static void ldb_vi_read_blockmaps(Relation index, uint32 group_node_first_index = 0; uint32 nodes_remaining = nodes_nr; uint32 batch_size = HNSW_BLOCKMAP_BLOCKS_PER_PAGE; + bool last_group_node_is_used = true; if(blocks_nr == 0) return; vi_blocks[ 0 ].vp_type = LDB_VI_BLOCK_HEADER; - while(nodes_remaining != 0) { + while(nodes_remaining != 0 || (last_group_node_is_used && blockmap_groupno < index_header->blockmap_groups_nr)) { if(blockmap_groupno >= index_header->blockmap_groups_nr) { elog(ERROR, "blockmap_groupno=%" PRIu32 " >= index_header->blockmap_groups_nr=%" PRIu32, @@ -241,6 +242,11 @@ static void ldb_vi_read_blockmaps(Relation index, UnlockReleaseBuffer(buf); } + /* + * This is for the case when the last blockmap group is initialized, + * but PostgreSQL process crashed before something was added to it. + */ + last_group_node_is_used = batch_size == nodes_remaining; nodes_remaining -= Min(batch_size, nodes_remaining); group_node_first_index += batch_size; batch_size = batch_size * 2; @@ -659,6 +665,10 @@ void ldb_validate_index(Oid indrelid, bool print_info) index_header->num_vectors, index_header->last_data_block, index_header->blockmap_groups_nr); + for(uint32 i = 0; i < index_header->blockmap_groups_nr; ++i) { + elog(INFO, "blockmap_groups[%"PRIu32"]=(first_block=%"PRIu32", blockmaps_initialized=%"PRIu32"),", + i, index_header->blockmap_groups[i].first_block, index_header->blockmap_groups[i].blockmaps_initialized); + } } blocks_nr = RelationGetNumberOfBlocksInFork(index, MAIN_FORKNUM); diff --git a/test/expected/hnsw_blockmap_create.out b/test/expected/hnsw_blockmap_create.out new file mode 100644 index 000000000..a8e965141 --- /dev/null +++ b/test/expected/hnsw_blockmap_create.out @@ -0,0 +1,179 @@ +----------------------------------------------------------- +-- Test HNSW blockmap creation after failures in the middle +----------------------------------------------------------- +-- create a table and fill the first blockmap group +CREATE FUNCTION prepare() RETURNS VOID AS $$ +BEGIN + DROP TABLE IF EXISTS small_world; + CREATE TABLE small_world (id SERIAL PRIMARY KEY, v real[]); + CREATE INDEX ON small_world USING hnsw (v) WITH (dim=3); + -- let's insert HNSW_BLOCKMAP_BLOCKS_PER_PAGE (2000) record to fill the first blockmap page + BEGIN + FOR i IN 1..2000 LOOP + INSERT INTO small_world (v) VALUES (array_replace(ARRAY[0,0,-1], -1, i)); + END LOOP; + END; +END; +$$ LANGUAGE plpgsql VOLATILE; +-- enable a failure point and run an insert to trigger new blockmap group initialization +CREATE FUNCTION trigger_failure(func TEXT, name TEXT, dont_trigger_first_nr INTEGER) RETURNS VOID AS $$ +BEGIN + PERFORM _lantern_internal.failure_point_enable(func, name, dont_trigger_first_nr); + BEGIN + INSERT INTO small_world (v) VALUES ('{2,2,2}'); + EXCEPTION WHEN OTHERS THEN + RAISE NOTICE 'Exception caught: %', SQLERRM; + END; +END; +$$ LANGUAGE plpgsql VOLATILE; +DO $$ +DECLARE + failure_points TEXT[][]:= '{ + {"At this point no changes have been made to the index header, so validate_index() should succeed.", + "ContinueBlockMapGroupInitialization", + "just_before_writing_the_intent_to_init", 0, false}, + {"It''s not know if the header will be updated before WAL flush, so it''s not clear if validate_index() will succeed.", + "UpdateHeaderBlockMapGroupDesc", "just_before_wal_flush", 0, NULL}, + {"After updating the header validate_index() must fail, because the header has 0 initialized blockmaps for the last blockmap group.", + "UpdateHeaderBlockMapGroupDesc", "just_after_wal_flush", 0, true}, + {"The same reason to fail as before.", + "ContinueBlockMapGroupInitialization", + "just_after_writing_the_intent_to_init", 0, true}, + {"The validate_index() will fail at the same place, because the check for unused blocks is after the check for the number of initialize blockmap blocks.", + "ContinueBlockMapGroupInitialization", + "just_after_extending_the_index_relation", 0, true}, + {"Here blockmap blocks are initialized, but the header may or may not be updated to reflect this.", + "ContinueBlockMapGroupInitialization", + "just_before_updating_header_at_the_end", 0, NULL}, + {"It''s not know if the header will be updated for the second (last) time before WAL flush, so it''s not clear if validate_index() will succeed.", + "UpdateHeaderBlockMapGroupDesc", "just_before_wal_flush", 1, NULL}, + {"After updating the header validate_index() must succeed, because the blockmap group is fully initialized and the header is updated.", + "UpdateHeaderBlockMapGroupDesc", "just_after_wal_flush", 1, false}, + {"Blockmaps are initilized, the header is updated. validate_index() should not fail.", + "ContinueBlockMapGroupInitialization", + "just_after_updating_header_at_the_end", 0, false} + }'; + fp TEXT[]; +BEGIN + FOREACH fp SLICE 1 IN ARRAY failure_points + LOOP + PERFORM prepare(); + PERFORM _lantern_internal.validate_index('small_world_v_idx', false); + PERFORM trigger_failure(fp[2], fp[3], fp[4]::integer); + RAISE INFO '%', fp[1]; + -- If it's not known if the data is written to WAL (and the validate_index() + -- may find issues) or if we know that validate_index() will definitely + -- find an issue then catch the exception + IF fp[5]::boolean IS NULL OR fp[5]::boolean THEN + BEGIN + PERFORM _lantern_internal.validate_index('small_world_v_idx', false); + EXCEPTION WHEN OTHERS THEN + RAISE NOTICE 'Exception caught: %', SQLERRM; + END; + ELSE + PERFORM _lantern_internal.validate_index('small_world_v_idx', false); + END IF; + END LOOP; + RAISE INFO 'The test is complete.'; +END $$; +NOTICE: table "small_world" does not exist, skipping +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_before_writing_the_intent_to_init remaining=0) is enabled. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_before_writing_the_intent_to_init) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: At this point no changes have been made to the index header, so validate_index() should succeed. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=UpdateHeaderBlockMapGroupDesc name=just_before_wal_flush remaining=0) is enabled. +INFO: Failure point (func=UpdateHeaderBlockMapGroupDesc name=just_before_wal_flush) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: It's not know if the header will be updated before WAL flush, so it's not clear if validate_index() will succeed. +INFO: validate_index() start for small_world_v_idx +NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=UpdateHeaderBlockMapGroupDesc name=just_after_wal_flush remaining=0) is enabled. +INFO: Failure point (func=UpdateHeaderBlockMapGroupDesc name=just_after_wal_flush) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: After updating the header validate_index() must fail, because the header has 0 initialized blockmaps for the last blockmap group. +INFO: validate_index() start for small_world_v_idx +NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_writing_the_intent_to_init remaining=0) is enabled. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_writing_the_intent_to_init) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: The same reason to fail as before. +INFO: validate_index() start for small_world_v_idx +NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_extending_the_index_relation remaining=0) is enabled. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_extending_the_index_relation) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: The validate_index() will fail at the same place, because the check for unused blocks is after the check for the number of initialize blockmap blocks. +INFO: validate_index() start for small_world_v_idx +NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_before_updating_header_at_the_end remaining=0) is enabled. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_before_updating_header_at_the_end) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: Here blockmap blocks are initialized, but the header may or may not be updated to reflect this. +INFO: validate_index() start for small_world_v_idx +NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=UpdateHeaderBlockMapGroupDesc name=just_before_wal_flush remaining=1) is enabled. +INFO: Failure point (func=UpdateHeaderBlockMapGroupDesc name=just_before_wal_flush) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: It's not know if the header will be updated for the second (last) time before WAL flush, so it's not clear if validate_index() will succeed. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=UpdateHeaderBlockMapGroupDesc name=just_after_wal_flush remaining=1) is enabled. +INFO: Failure point (func=UpdateHeaderBlockMapGroupDesc name=just_after_wal_flush) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: After updating the header validate_index() must succeed, because the blockmap group is fully initialized and the header is updated. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: done init usearch index +INFO: inserted 0 elements +INFO: done saving 0 vectors +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_updating_header_at_the_end remaining=0) is enabled. +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_updating_header_at_the_end) has been triggered. +NOTICE: Exception caught: ldb_failure_point_crash() +INFO: Blockmaps are initilized, the header is updated. validate_index() should not fail. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. +INFO: The test is complete. diff --git a/test/schedule.txt b/test/schedule.txt index ef2c6b142..c4dad723a 100644 --- a/test/schedule.txt +++ b/test/schedule.txt @@ -4,4 +4,4 @@ # - 'test' lines may have multiple space-separated tests. All tests in a single 'test' line will be run in parallel test_pgvector: hnsw_vector -test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search hnsw_failure_point +test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search hnsw_failure_point test_blockmap_create diff --git a/test/sql/hnsw_blockmap_create.sql b/test/sql/hnsw_blockmap_create.sql new file mode 100644 index 000000000..2f661a92e --- /dev/null +++ b/test/sql/hnsw_blockmap_create.sql @@ -0,0 +1,81 @@ +----------------------------------------------------------- +-- Test HNSW blockmap creation after failures in the middle +----------------------------------------------------------- + +-- create a table and fill the first blockmap group +CREATE FUNCTION prepare() RETURNS VOID AS $$ +BEGIN + DROP TABLE IF EXISTS small_world; + CREATE TABLE small_world (id SERIAL PRIMARY KEY, v real[]); + CREATE INDEX ON small_world USING hnsw (v) WITH (dim=3); + -- let's insert HNSW_BLOCKMAP_BLOCKS_PER_PAGE (2000) record to fill the first blockmap page + BEGIN + FOR i IN 1..2000 LOOP + INSERT INTO small_world (v) VALUES (array_replace(ARRAY[0,0,-1], -1, i)); + END LOOP; + END; +END; +$$ LANGUAGE plpgsql VOLATILE; + +-- enable a failure point and run an insert to trigger new blockmap group initialization +CREATE FUNCTION trigger_failure(func TEXT, name TEXT, dont_trigger_first_nr INTEGER) RETURNS VOID AS $$ +BEGIN + PERFORM _lantern_internal.failure_point_enable(func, name, dont_trigger_first_nr); + BEGIN + INSERT INTO small_world (v) VALUES ('{2,2,2}'); + EXCEPTION WHEN OTHERS THEN + RAISE NOTICE 'Exception caught: %', SQLERRM; + END; +END; +$$ LANGUAGE plpgsql VOLATILE; + +DO $$ +DECLARE + failure_points TEXT[][]:= '{ + {"At this point no changes have been made to the index header, so validate_index() should succeed.", + "ContinueBlockMapGroupInitialization", + "just_before_writing_the_intent_to_init", 0, false}, + {"It''s not know if the header will be updated before WAL flush, so it''s not clear if validate_index() will succeed.", + "UpdateHeaderBlockMapGroupDesc", "just_before_wal_flush", 0, NULL}, + {"After updating the header validate_index() must fail, because the header has 0 initialized blockmaps for the last blockmap group.", + "UpdateHeaderBlockMapGroupDesc", "just_after_wal_flush", 0, true}, + {"The same reason to fail as before.", + "ContinueBlockMapGroupInitialization", + "just_after_writing_the_intent_to_init", 0, true}, + {"The validate_index() will fail at the same place, because the check for unused blocks is after the check for the number of initialize blockmap blocks.", + "ContinueBlockMapGroupInitialization", + "just_after_extending_the_index_relation", 0, true}, + {"Here blockmap blocks are initialized, but the header may or may not be updated to reflect this.", + "ContinueBlockMapGroupInitialization", + "just_before_updating_header_at_the_end", 0, NULL}, + {"It''s not know if the header will be updated for the second (last) time before WAL flush, so it''s not clear if validate_index() will succeed.", + "UpdateHeaderBlockMapGroupDesc", "just_before_wal_flush", 1, NULL}, + {"After updating the header validate_index() must succeed, because the blockmap group is fully initialized and the header is updated.", + "UpdateHeaderBlockMapGroupDesc", "just_after_wal_flush", 1, false}, + {"Blockmaps are initilized, the header is updated. validate_index() should not fail.", + "ContinueBlockMapGroupInitialization", + "just_after_updating_header_at_the_end", 0, false} + }'; + fp TEXT[]; +BEGIN + FOREACH fp SLICE 1 IN ARRAY failure_points + LOOP + PERFORM prepare(); + PERFORM _lantern_internal.validate_index('small_world_v_idx', false); + PERFORM trigger_failure(fp[2], fp[3], fp[4]::integer); + RAISE INFO '%', fp[1]; + -- If it's not known if the data is written to WAL (and the validate_index() + -- may find issues) or if we know that validate_index() will definitely + -- find an issue then catch the exception + IF fp[5]::boolean IS NULL OR fp[5]::boolean THEN + BEGIN + PERFORM _lantern_internal.validate_index('small_world_v_idx', false); + EXCEPTION WHEN OTHERS THEN + RAISE NOTICE 'Exception caught: %', SQLERRM; + END; + ELSE + PERFORM _lantern_internal.validate_index('small_world_v_idx', false); + END IF; + END LOOP; + RAISE INFO 'The test is complete.'; +END $$; From 57911684b96ed51c6a31e6ea7c47face385a2bea Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sun, 5 Nov 2023 04:13:38 +0400 Subject: [PATCH 09/14] test/hnsw_failure_point: no longer works, temporarily --- test/expected/hnsw_failure_point.out | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/expected/hnsw_failure_point.out b/test/expected/hnsw_failure_point.out index bd7b7eb38..c420a379f 100644 --- a/test/expected/hnsw_failure_point.out +++ b/test/expected/hnsw_failure_point.out @@ -28,7 +28,7 @@ INFO: validate_index() done, no issues found. -- now let's crash after a buffer for a blockmap is allocated during insert, -- but it hasn't been recorded yet SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation'); -INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation) is enabled. +INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation remaining=0) is enabled. failure_point_enable ---------------------- @@ -37,10 +37,13 @@ INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation) -- here is the insert where the crash happens \set ON_ERROR_STOP off INSERT INTO small_world (v) VALUES ('{2,2,2}'); -INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation) has been triggered. -ERROR: ldb_failure_point_crash() \set ON_ERROR_STOP on -- now we see that the index has an extra free page, so the index validation fails SELECT _lantern_internal.validate_index('small_world_v_idx', false); INFO: validate_index() start for small_world_v_idx -ERROR: vi_blocks[48].vp_type == LDB_VI_BLOCK_UNKNOWN (but it should be known now) +INFO: validate_index() done, no issues found. + validate_index +---------------- + +(1 row) + From 770076dafde5785f2394db3920d551ca46db7478 Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sun, 5 Nov 2023 04:19:17 +0400 Subject: [PATCH 10/14] test/schedule: fix hnsw_blockmap_create name (was non-existing test_blockmap_create) --- test/schedule.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/schedule.txt b/test/schedule.txt index c4dad723a..4a0006fdd 100644 --- a/test/schedule.txt +++ b/test/schedule.txt @@ -4,4 +4,4 @@ # - 'test' lines may have multiple space-separated tests. All tests in a single 'test' line will be run in parallel test_pgvector: hnsw_vector -test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search hnsw_failure_point test_blockmap_create +test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search hnsw_failure_point hnsw_blockmap_create From 55ca770ecbdd68c48fdd92a12afb27543e89bd81 Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sun, 5 Nov 2023 04:21:52 +0400 Subject: [PATCH 11/14] run clang-format --- src/hnsw/failure_point.c | 6 +++++- src/hnsw/validate_index.c | 7 +++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/hnsw/failure_point.c b/src/hnsw/failure_point.c index d2b4868ff..6dc5cd438 100644 --- a/src/hnsw/failure_point.c +++ b/src/hnsw/failure_point.c @@ -62,7 +62,11 @@ void ldb_failure_point_enable(const char *func, const char *name, uint32 dont_tr state->remaining = dont_trigger_first_nr; strncpy(state->func, func, lengthof(state->func)); strncpy(state->name, name, lengthof(state->name)); - elog(INFO, "Failure point (func=%s name=%s remaining=%"PRIu32") is enabled.", state->func, state->name, state->remaining); + elog(INFO, + "Failure point (func=%s name=%s remaining=%" PRIu32 ") is enabled.", + state->func, + state->name, + state->remaining); } bool ldb_failure_point_is_enabled(const char *func, const char *name) diff --git a/src/hnsw/validate_index.c b/src/hnsw/validate_index.c index c6e6061ef..3b7c1fc11 100644 --- a/src/hnsw/validate_index.c +++ b/src/hnsw/validate_index.c @@ -666,8 +666,11 @@ void ldb_validate_index(Oid indrelid, bool print_info) index_header->last_data_block, index_header->blockmap_groups_nr); for(uint32 i = 0; i < index_header->blockmap_groups_nr; ++i) { - elog(INFO, "blockmap_groups[%"PRIu32"]=(first_block=%"PRIu32", blockmaps_initialized=%"PRIu32"),", - i, index_header->blockmap_groups[i].first_block, index_header->blockmap_groups[i].blockmaps_initialized); + elog(INFO, + "blockmap_groups[%" PRIu32 "]=(first_block=%" PRIu32 ", blockmaps_initialized=%" PRIu32 "),", + i, + index_header->blockmap_groups[ i ].first_block, + index_header->blockmap_groups[ i ].blockmaps_initialized); } } From a007ba6273dfc0946540b37a9698e8d7360e20da Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sun, 5 Nov 2023 04:54:21 +0400 Subject: [PATCH 12/14] test/hnsw_failure_point: make it work by using another failure point --- test/expected/hnsw_failure_point.out | 13 +++++-------- test/sql/hnsw_failure_point.sql | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/test/expected/hnsw_failure_point.out b/test/expected/hnsw_failure_point.out index c420a379f..61e39125d 100644 --- a/test/expected/hnsw_failure_point.out +++ b/test/expected/hnsw_failure_point.out @@ -27,8 +27,8 @@ INFO: validate_index() done, no issues found. -- now let's crash after a buffer for a blockmap is allocated during insert, -- but it hasn't been recorded yet -SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation'); -INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation remaining=0) is enabled. +SELECT _lantern_internal.failure_point_enable('ContinueBlockMapGroupInitialization', 'just_after_extending_the_index_relation'); +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_extending_the_index_relation remaining=0) is enabled. failure_point_enable ---------------------- @@ -37,13 +37,10 @@ INFO: Failure point (func=CreateBlockMapGroup name=crash_after_buf_allocation r -- here is the insert where the crash happens \set ON_ERROR_STOP off INSERT INTO small_world (v) VALUES ('{2,2,2}'); +INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_extending_the_index_relation) has been triggered. +ERROR: ldb_failure_point_crash() \set ON_ERROR_STOP on -- now we see that the index has an extra free page, so the index validation fails SELECT _lantern_internal.validate_index('small_world_v_idx', false); INFO: validate_index() start for small_world_v_idx -INFO: validate_index() done, no issues found. - validate_index ----------------- - -(1 row) - +ERROR: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 diff --git a/test/sql/hnsw_failure_point.sql b/test/sql/hnsw_failure_point.sql index 3b751388e..cb3b06628 100644 --- a/test/sql/hnsw_failure_point.sql +++ b/test/sql/hnsw_failure_point.sql @@ -22,7 +22,7 @@ SELECT _lantern_internal.validate_index('small_world_v_idx', false); -- now let's crash after a buffer for a blockmap is allocated during insert, -- but it hasn't been recorded yet -SELECT _lantern_internal.failure_point_enable('CreateBlockMapGroup', 'crash_after_buf_allocation'); +SELECT _lantern_internal.failure_point_enable('ContinueBlockMapGroupInitialization', 'just_after_extending_the_index_relation'); -- here is the insert where the crash happens \set ON_ERROR_STOP off From 1ad9d051bc563fd02efdf06f05925a8169f11bec Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sun, 5 Nov 2023 05:25:21 +0400 Subject: [PATCH 13/14] implement and use _lantern_internal.continue_blockmap_group_initialization() --- sql/lantern.sql | 3 ++ src/hnsw.c | 9 +++++ src/hnsw/external_index.c | 50 ++++++++++++++++++++++++-- src/hnsw/external_index.h | 6 ++++ src/hnsw/validate_index.c | 6 ++-- test/expected/ext_relocation.out | 25 ++++++------- test/expected/hnsw_blockmap_create.out | 30 ++++++++++++++++ test/expected/hnsw_failure_point.out | 18 ++++++++++ test/sql/hnsw_blockmap_create.sql | 3 ++ test/sql/hnsw_failure_point.sql | 6 ++++ 10 files changed, 140 insertions(+), 16 deletions(-) diff --git a/sql/lantern.sql b/sql/lantern.sql index b34708494..e8b180646 100644 --- a/sql/lantern.sql +++ b/sql/lantern.sql @@ -37,6 +37,9 @@ CREATE FUNCTION _lantern_internal.validate_index(index regclass, print_info bool CREATE FUNCTION _lantern_internal.failure_point_enable(func TEXT, name TEXT, dont_trigger_first_nr INTEGER DEFAULT 0) RETURNS VOID AS 'MODULE_PATHNAME', 'lantern_internal_failure_point_enable' LANGUAGE C STABLE STRICT PARALLEL UNSAFE; +CREATE FUNCTION _lantern_internal.continue_blockmap_group_initialization(index regclass) RETURNS VOID + AS 'MODULE_PATHNAME', 'lantern_internal_continue_blockmap_group_initialization' LANGUAGE C STABLE STRICT PARALLEL UNSAFE; + -- operator classes CREATE OR REPLACE FUNCTION _lantern_internal._create_ldb_operator_classes(access_method_name TEXT) RETURNS BOOLEAN AS $$ DECLARE diff --git a/src/hnsw.c b/src/hnsw.c index 59e233924..10fa048d1 100644 --- a/src/hnsw.c +++ b/src/hnsw.c @@ -382,6 +382,15 @@ Datum lantern_internal_failure_point_enable(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +PGDLLEXPORT PG_FUNCTION_INFO_V1(lantern_internal_continue_blockmap_group_initialization); +Datum lantern_internal_continue_blockmap_group_initialization(PG_FUNCTION_ARGS) +{ + Oid indrelid = PG_GETARG_OID(0); + + ldb_continue_blockmap_group_initialization(indrelid); + PG_RETURN_VOID(); +} + /* * Get data type for give oid * */ diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index 1a9b4ca35..a720e7545 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -4,6 +4,7 @@ #include "external_index.h" #include // GenericXLog +#include // relation_open #include #include #include @@ -74,8 +75,8 @@ static uint32 BlockMapGroupFirstNodeIndex(unsigned groupno) uint32 first_node_index = 0; if(groupno == 0) return 0; - for(unsigned i = 0; i < groupno - 1; ++i) first_node_index += NumberOfBlockMapsInGroup(i); - return first_node_index; + for(unsigned i = 0; i < groupno; ++i) first_node_index += NumberOfBlockMapsInGroup(i); + return first_node_index * HNSW_BLOCKMAP_BLOCKS_PER_PAGE; } static bool AreEnoughBlockMapsForTupleId(uint32 blockmap_groups_nr, uint32 tuple_id) @@ -706,6 +707,51 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, return new_tup_ref; } +/* TODO refactor: the relation open/close/read the header code is very similar to ldb_validate_index() */ +void ldb_continue_blockmap_group_initialization(Oid indrelid) +{ + Relation index; + BlockNumber header_blockno = 0; + Buffer header_buf; + Page header_page; + HnswIndexHeaderPage *index_header; + bool update_header = false; + GenericXLogState *state; + XLogRecPtr ptr; + + /* the code may modify the index, so at least ExclusiveLock should be taken */ + index = relation_open(indrelid, ExclusiveLock); + state = GenericXLogStart(index); + header_buf = ReadBuffer(index, header_blockno); + LockBuffer(header_buf, BUFFER_LOCK_EXCLUSIVE); + header_page = GenericXLogRegisterBuffer(state, header_buf, LDB_GENERIC_XLOG_DELTA_IMAGE); + index_header = (HnswIndexHeaderPage *)PageGetContents(header_page); + + if(index_header->blockmap_groups_nr == 0) { + elog(INFO, "There is no blockmap group to continue to initialize: blockmap_groups_nr=0"); + } else if(BlockMapGroupIsFullyInitialized(index_header, index_header->blockmap_groups_nr - 1)) { + elog(INFO, "The last blockmap group is fully initialized."); + } else { + ContinueBlockMapGroupInitialization(index_header, + index, + MAIN_FORKNUM, + BlockMapGroupFirstNodeIndex(index_header->blockmap_groups_nr - 1), + index_header->blockmap_groups_nr - 1); + update_header = true; + elog(INFO, "The last blockmap group has been successfully initialized."); + } + + if(update_header) { + ptr = GenericXLogFinish(state); + assert(ptr != InvalidXLogRecPtr); + } else { + GenericXLogAbort(state); + } + + UnlockReleaseBuffer(header_buf); + relation_close(index, ExclusiveLock); +} + static BlockNumber getBlockMapPageBlockNumber(const HnswBlockMapGroupDesc *blockmap_groups, int id) { assert(id >= 0); diff --git a/src/hnsw/external_index.h b/src/hnsw/external_index.h index d0b3b01dc..16c46253d 100644 --- a/src/hnsw/external_index.h +++ b/src/hnsw/external_index.h @@ -141,4 +141,10 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel, BlockNumber NumberOfBlockMapsInGroup(unsigned groupno); +/* + * Continue and finish the last blockmap group initialization if needed. + * @see ContinueBlockMapGroupInitialization + */ +void ldb_continue_blockmap_group_initialization(Oid indrelid); + #endif // LDB_HNSW_EXTERNAL_INDEX_H diff --git a/src/hnsw/validate_index.c b/src/hnsw/validate_index.c index 3b7c1fc11..b8c34aa43 100644 --- a/src/hnsw/validate_index.c +++ b/src/hnsw/validate_index.c @@ -191,11 +191,13 @@ static void ldb_vi_read_blockmaps(Relation index, elog(ERROR, "blockmap->first_id=%" PRIu32 " != " - "group_node_first_index=%d + blockmap_id=%u * HNSW_BLOCKMAP_BLOCKS_PER_PAGE=%d", + "group_node_first_index=%d + blockmap_id=%u * HNSW_BLOCKMAP_BLOCKS_PER_PAGE=%d for " + "blockmap_groupno=%" PRIu32, blockmap->first_id, group_node_first_index, blockmap_id, - HNSW_BLOCKMAP_BLOCKS_PER_PAGE); + HNSW_BLOCKMAP_BLOCKS_PER_PAGE, + blockmap_groupno); } HnswIndexPageSpecialBlock *special = (HnswIndexPageSpecialBlock *)PageGetSpecialPointer(page); if(special->firstId != blockmap->first_id) { diff --git a/test/expected/ext_relocation.out b/test/expected/ext_relocation.out index fd5bb7311..8dd7ba3ff 100644 --- a/test/expected/ext_relocation.out +++ b/test/expected/ext_relocation.out @@ -32,18 +32,19 @@ FROM pg_catalog.pg_extension AS e INNER JOIN pg_catalog.pg_namespace AS np ON (np.oid = p.pronamespace) WHERE d.deptype = 'e' AND e.extname = 'lantern' ORDER BY 1, 3; - extschema | proname | proschema ------------+------------------------------+------------------- - schema1 | validate_index | _lantern_internal - schema1 | failure_point_enable | _lantern_internal - schema1 | _create_ldb_operator_classes | _lantern_internal - schema1 | l2sq_dist | schema1 - schema1 | hnsw_handler | schema1 - schema1 | hamming_dist | schema1 - schema1 | cos_dist | schema1 - schema1 | ldb_generic_dist | schema1 - schema1 | ldb_generic_dist | schema1 -(9 rows) + extschema | proname | proschema +-----------+----------------------------------------+------------------- + schema1 | validate_index | _lantern_internal + schema1 | failure_point_enable | _lantern_internal + schema1 | continue_blockmap_group_initialization | _lantern_internal + schema1 | _create_ldb_operator_classes | _lantern_internal + schema1 | cos_dist | schema1 + schema1 | hnsw_handler | schema1 + schema1 | hamming_dist | schema1 + schema1 | ldb_generic_dist | schema1 + schema1 | ldb_generic_dist | schema1 + schema1 | l2sq_dist | schema1 +(10 rows) -- show all the extension operators SELECT ne.nspname AS extschema, op.oprname, np.nspname AS proschema diff --git a/test/expected/hnsw_blockmap_create.out b/test/expected/hnsw_blockmap_create.out index a8e965141..460238ce4 100644 --- a/test/expected/hnsw_blockmap_create.out +++ b/test/expected/hnsw_blockmap_create.out @@ -73,6 +73,9 @@ BEGIN ELSE PERFORM _lantern_internal.validate_index('small_world_v_idx', false); END IF; + -- now let's finish the blockmap creation and validate the index again + PERFORM _lantern_internal.continue_blockmap_group_initialization('small_world_v_idx'); + PERFORM _lantern_internal.validate_index('small_world_v_idx', false); END LOOP; RAISE INFO 'The test is complete.'; END $$; @@ -88,6 +91,9 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: At this point no changes have been made to the index header, so validate_index() should succeed. INFO: validate_index() start for small_world_v_idx INFO: validate_index() done, no issues found. +INFO: The last blockmap group is fully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors @@ -99,6 +105,9 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: It's not know if the header will be updated before WAL flush, so it's not clear if validate_index() will succeed. INFO: validate_index() start for small_world_v_idx NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: The last blockmap group has been successfully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors @@ -110,6 +119,9 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: After updating the header validate_index() must fail, because the header has 0 initialized blockmaps for the last blockmap group. INFO: validate_index() start for small_world_v_idx NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: The last blockmap group has been successfully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors @@ -121,6 +133,9 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: The same reason to fail as before. INFO: validate_index() start for small_world_v_idx NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: The last blockmap group has been successfully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors @@ -132,6 +147,9 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: The validate_index() will fail at the same place, because the check for unused blocks is after the check for the number of initialize blockmap blocks. INFO: validate_index() start for small_world_v_idx NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: The last blockmap group has been successfully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors @@ -143,6 +161,9 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: Here blockmap blocks are initialized, but the header may or may not be updated to reflect this. INFO: validate_index() start for small_world_v_idx NOTICE: Exception caught: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +INFO: The last blockmap group has been successfully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors @@ -154,6 +175,9 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: It's not know if the header will be updated for the second (last) time before WAL flush, so it's not clear if validate_index() will succeed. INFO: validate_index() start for small_world_v_idx INFO: validate_index() done, no issues found. +INFO: The last blockmap group is fully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors @@ -165,6 +189,9 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: After updating the header validate_index() must succeed, because the blockmap group is fully initialized and the header is updated. INFO: validate_index() start for small_world_v_idx INFO: validate_index() done, no issues found. +INFO: The last blockmap group is fully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors @@ -176,4 +203,7 @@ NOTICE: Exception caught: ldb_failure_point_crash() INFO: Blockmaps are initilized, the header is updated. validate_index() should not fail. INFO: validate_index() start for small_world_v_idx INFO: validate_index() done, no issues found. +INFO: The last blockmap group is fully initialized. +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. INFO: The test is complete. diff --git a/test/expected/hnsw_failure_point.out b/test/expected/hnsw_failure_point.out index 61e39125d..88cf50c1a 100644 --- a/test/expected/hnsw_failure_point.out +++ b/test/expected/hnsw_failure_point.out @@ -41,6 +41,24 @@ INFO: Failure point (func=ContinueBlockMapGroupInitialization name=just_after_e ERROR: ldb_failure_point_crash() \set ON_ERROR_STOP on -- now we see that the index has an extra free page, so the index validation fails +\set ON_ERROR_STOP off SELECT _lantern_internal.validate_index('small_world_v_idx', false); INFO: validate_index() start for small_world_v_idx ERROR: HnswBlockMapGroupDesc.blockmaps_initialized=0 != NumberOfBlockMapsInGroup()=2 for blockmap_groupno=1 +\set ON_ERROR_STOP on +-- now let's continue and finish the blockmap creation and then validate the index again +SELECT _lantern_internal.continue_blockmap_group_initialization('small_world_v_idx'); +INFO: The last blockmap group has been successfully initialized. + continue_blockmap_group_initialization +---------------------------------------- + +(1 row) + +SELECT _lantern_internal.validate_index('small_world_v_idx', false); +INFO: validate_index() start for small_world_v_idx +INFO: validate_index() done, no issues found. + validate_index +---------------- + +(1 row) + diff --git a/test/sql/hnsw_blockmap_create.sql b/test/sql/hnsw_blockmap_create.sql index 2f661a92e..aa7d8360b 100644 --- a/test/sql/hnsw_blockmap_create.sql +++ b/test/sql/hnsw_blockmap_create.sql @@ -76,6 +76,9 @@ BEGIN ELSE PERFORM _lantern_internal.validate_index('small_world_v_idx', false); END IF; + -- now let's finish the blockmap creation and validate the index again + PERFORM _lantern_internal.continue_blockmap_group_initialization('small_world_v_idx'); + PERFORM _lantern_internal.validate_index('small_world_v_idx', false); END LOOP; RAISE INFO 'The test is complete.'; END $$; diff --git a/test/sql/hnsw_failure_point.sql b/test/sql/hnsw_failure_point.sql index cb3b06628..39874491c 100644 --- a/test/sql/hnsw_failure_point.sql +++ b/test/sql/hnsw_failure_point.sql @@ -30,4 +30,10 @@ INSERT INTO small_world (v) VALUES ('{2,2,2}'); \set ON_ERROR_STOP on -- now we see that the index has an extra free page, so the index validation fails +\set ON_ERROR_STOP off +SELECT _lantern_internal.validate_index('small_world_v_idx', false); +\set ON_ERROR_STOP on + +-- now let's continue and finish the blockmap creation and then validate the index again +SELECT _lantern_internal.continue_blockmap_group_initialization('small_world_v_idx'); SELECT _lantern_internal.validate_index('small_world_v_idx', false); From 6c70e3ca7817443d19b7ba0cbfbd9972dc4d4115 Mon Sep 17 00:00:00 2001 From: Maksym Medvied Date: Sun, 5 Nov 2023 05:28:03 +0400 Subject: [PATCH 14/14] =?UTF-8?q?fix=20error:=20variable=20=E2=80=98ptr?= =?UTF-8?q?=E2=80=99=20set=20but=20not=20used?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/hnsw/external_index.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c index a720e7545..2845a58a9 100644 --- a/src/hnsw/external_index.c +++ b/src/hnsw/external_index.c @@ -744,6 +744,7 @@ void ldb_continue_blockmap_group_initialization(Oid indrelid) if(update_header) { ptr = GenericXLogFinish(state); assert(ptr != InvalidXLogRecPtr); + LDB_UNUSED(ptr); } else { GenericXLogAbort(state); }