From d60c3986a9905685b55319a7bc4ee1ceca90aad3 Mon Sep 17 00:00:00 2001 From: Yuming-Xu <56633988+Yuming-Xu@users.noreply.github.com> Date: Tue, 19 Dec 2023 07:32:59 +0800 Subject: [PATCH] SPFresh implementation based on SPTAG (#405) * Initial commit * init from update version * merged code * backup * continue * update the initial work version * fix ExtraDynamicSearcher * fix vectorTranslateMap * fix compiling issues * fix compling * fix compiling * add merge * add merge re-assign * TODO: add steady state test * init steady state test * Init SPDKIO * optimize split * fix align_round * modify steady state: delete after search * modify trace * need to figure out disk usage * modify steadystate * need to debug load index * fix delete by metadata * add spfresh exe * finish benchmark * steady state complete * add KVTest * fix compiling errors * Init SPDKIO * optimize split * add KVTest * fix compiling errors * change update vector to partital read * modify merge * fix build * mem impl * fix correctness * Refactor code * add spdk as module * add isal-l_crypto as submodule * initial spdk integration * build queue infra * spdk working * add reference bdev.json * static spfresh search done * need to debug split * debug for GC/ReAssign * finish debug for GC/ReAssign * Add build guide for SPDK version * remove rocksdb * add timeout support for SPDK * modify code for timeout support * fix timeout bug * fix writeblocks segmentation fault * fix bug: SPDK init fail * fix timeout bugs * batch ReadBlocks * add batchSize option * add timeout support for batched I/O * Refine Index * debug refine * modify for stress test * change update from metadata to mapping * modify * add IOStatistics for SPDK * change ssd size * modify for stress test * fix ssd usage bug * fix io limit * modify ssd size for stress test * for io stat * modify for io * fix for stress test * temporarily remove rebuild * fix BKT bug * merge changes from Index.h BKTIndex.cpp and RelativeNeighborhoodGraph.h * add lock for mapping * fix merge bug * Improve README.md * add loading exclude head * modify length * fix code with AE repo * add checkpoint & move rocksdb to thirdparty * fix zstd dependency * fix compiling * fix running on SPDK * enable rocksdb, need to debug update * fix update * add SPDK & Rocksdb checkpoint function * pass test for checkpoint, todo: add persistentbuffer * add wal logic --------- Co-authored-by: MaggieQi Co-authored-by: cheqi Co-authored-by: Hengyu Liang Co-authored-by: Ziyue Yang Co-authored-by: Hengyu Liang --- .gitmodules | 9 + AnnService/CMakeLists.txt | 37 +- AnnService/CoreLibrary.vcxproj | 11 +- AnnService/CoreLibrary.vcxproj.filters | 17 +- AnnService/inc/Core/BKT/Index.h | 3 +- AnnService/inc/Core/Common.h | 14 + AnnService/inc/Core/Common/Dataset.h | 332 ++- AnnService/inc/Core/Common/FineGrainedLock.h | 26 + AnnService/inc/Core/Common/OPQQuantizer.h | 2 +- .../inc/Core/Common/PostingSizeRecord.h | 110 + AnnService/inc/Core/Common/VersionLabel.h | 126 ++ .../inc/Core/SPANN/ExtraDynamicSearcher.h | 1782 +++++++++++++++++ .../inc/Core/SPANN/ExtraRocksDBController.h | 332 +++ .../inc/Core/SPANN/ExtraSPDKController.h | 483 +++++ ...lGraphSearcher.h => ExtraStaticSearcher.h} | 51 +- AnnService/inc/Core/SPANN/IExtraSearcher.h | 121 +- AnnService/inc/Core/SPANN/Index.h | 121 +- AnnService/inc/Core/SPANN/Options.h | 62 +- .../inc/Core/SPANN/ParameterDefinitionList.h | 84 +- AnnService/inc/Core/SPANN/PersistentBuffer.h | 56 + AnnService/inc/Core/VectorIndex.h | 3 + AnnService/inc/Helper/KeyValueIO.h | 54 + AnnService/inc/Helper/ThreadPool.h | 8 + AnnService/inc/Quantizer/Training.h | 2 +- AnnService/inc/SPFresh/SPFresh.h | 1252 ++++++++++++ AnnService/inc/SSDServing/SSDIndex.h | 3 +- AnnService/packages.config | 2 + AnnService/src/Core/BKT/BKTIndex.cpp | 53 +- AnnService/src/Core/KDT/KDTIndex.cpp | 2 +- .../src/Core/SPANN/ExtraSPDKController.cpp | 478 +++++ AnnService/src/Core/SPANN/SPANNIndex.cpp | 265 ++- AnnService/src/SPFresh/main.cpp | 33 + CMakeLists.txt | 7 +- LICENSE | 2 +- README.md | 39 + Test/CMakeLists.txt | 2 +- Test/Test.vcxproj | 1 + Test/Test.vcxproj.filters | 6 + Test/src/KVTest.cpp | 97 + ThirdParty/RocksDB | 1 + ThirdParty/isal-l_crypto | 1 + ThirdParty/spdk | 1 + bdev.json | 17 + 43 files changed, 5977 insertions(+), 131 deletions(-) create mode 100644 AnnService/inc/Core/Common/PostingSizeRecord.h create mode 100644 AnnService/inc/Core/Common/VersionLabel.h create mode 100644 AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h create mode 100644 AnnService/inc/Core/SPANN/ExtraRocksDBController.h create mode 100644 AnnService/inc/Core/SPANN/ExtraSPDKController.h rename AnnService/inc/Core/SPANN/{ExtraFullGraphSearcher.h => ExtraStaticSearcher.h} (96%) create mode 100644 AnnService/inc/Core/SPANN/PersistentBuffer.h create mode 100644 AnnService/inc/Helper/KeyValueIO.h create mode 100644 AnnService/inc/SPFresh/SPFresh.h create mode 100644 AnnService/src/Core/SPANN/ExtraSPDKController.cpp create mode 100644 AnnService/src/SPFresh/main.cpp create mode 100644 Test/src/KVTest.cpp create mode 160000 ThirdParty/RocksDB create mode 160000 ThirdParty/isal-l_crypto create mode 160000 ThirdParty/spdk create mode 100644 bdev.json diff --git a/.gitmodules b/.gitmodules index a6fa563c..40da5edf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,12 @@ path = ThirdParty/zstd url = https://github.com/facebook/zstd branch = release +[submodule "ThirdParty/spdk"] + path = ThirdParty/spdk + url = https://github.com/spdk/spdk +[submodule "ThirdParty/isal-l_crypto"] + path = ThirdParty/isal-l_crypto + url = https://github.com/intel/isa-l_crypto +[submodule "ThirdParty/RocksDB"] + path = ThirdParty/RocksDB + url = git@github.com:facebook/rocksdb.git diff --git a/AnnService/CMakeLists.txt b/AnnService/CMakeLists.txt index de5f588e..4e24a6bc 100644 --- a/AnnService/CMakeLists.txt +++ b/AnnService/CMakeLists.txt @@ -3,9 +3,16 @@ set(AnnService ${PROJECT_SOURCE_DIR}/AnnService) set(Zstd ${PROJECT_SOURCE_DIR}/ThirdParty/zstd) +set(Spdk ${PROJECT_SOURCE_DIR}/ThirdParty/spdk/build) +set(Dpdk ${PROJECT_SOURCE_DIR}/ThirdParty/spdk/dpdk/build) +set(IsalLCrypto ${PROJECT_SOURCE_DIR}/ThirdParty/isal-l_crypto/.libs/libisal_crypto.a) +set(SpdkLibPrefix ${Spdk}/lib/libspdk_) +set(DpdkLibPrefix ${Dpdk}/lib/librte_) +set(SPDK_LIBRARIES -Wl,--whole-archive ${SpdkLibPrefix}bdev_nvme.a ${SpdkLibPrefix}bdev.a ${SpdkLibPrefix}nvme.a ${SpdkLibPrefix}vfio_user.a ${SpdkLibPrefix}sock.a ${SpdkLibPrefix}dma.a ${SpdkLibPrefix}notify.a ${SpdkLibPrefix}accel.a ${SpdkLibPrefix}event_bdev.a ${SpdkLibPrefix}event_accel.a ${SpdkLibPrefix}vmd.a ${SpdkLibPrefix}event_vmd.a ${SpdkLibPrefix}event_sock.a ${SpdkLibPrefix}event_iobuf.a ${SpdkLibPrefix}event.a ${SpdkLibPrefix}env_dpdk.a ${SpdkLibPrefix}log.a ${SpdkLibPrefix}thread.a ${SpdkLibPrefix}rpc.a ${SpdkLibPrefix}init.a ${SpdkLibPrefix}jsonrpc.a ${SpdkLibPrefix}json.a ${SpdkLibPrefix}trace.a ${SpdkLibPrefix}util.a ${DpdkLibPrefix}mempool.a ${DpdkLibPrefix}mempool_ring.a ${DpdkLibPrefix}eal.a ${DpdkLibPrefix}ring.a ${DpdkLibPrefix}telemetry.a ${DpdkLibPrefix}bus_pci.a ${DpdkLibPrefix}kvargs.a ${DpdkLibPrefix}pci.a -Wl,--no-whole-archive dl rt isal ${IsalLCrypto} uuid) include_directories(${AnnService}) include_directories(${Zstd}/lib) +include_directories(${Spdk}/include) file(GLOB_RECURSE HDR_FILES ${AnnService}/inc/Core/*.h ${AnnService}/inc/Helper/*.h) file(GLOB_RECURSE SRC_FILES ${AnnService}/src/Core/*.cpp ${AnnService}/src/Helper/*.cpp) @@ -37,10 +44,28 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") target_compile_options(DistanceUtils PRIVATE -mavx2 -mavx -msse -msse2 -mavx512f -mavx512bw -mavx512dq -fPIC) endif() +find_package(RocksDB CONFIG) +if((DEFINED RocksDB_DIR) AND RocksDB_DIR) + list(APPEND RocksDB_LIBRARIES RocksDB::rocksdb) + find_package(uring) + set(uring_LIBRARIES "") + message (STATUS "Found RocksDB ${RocksDB_VERSION}") + message (STATUS "RocksDB: ${RocksDB_DIR}") +else() + set(RocksDB_LIBRARIES "") + set(uring_LIBRARIES "") + message (FATAL_ERROR "Could not find RocksDB!") +endif() + +find_package(TBB REQUIRED) +if (TBB_FOUND) + message (STATUS "Found TBB") +endif() + add_library (SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES}) -target_link_libraries (SPTAGLib DistanceUtils libzstd_shared ${NUMA_LIBRARY}) +target_link_libraries (SPTAGLib DistanceUtils ${RocksDB_LIBRARIES} ${uring_LIBRARIES} libzstd_shared ${NUMA_LIBRARY} tbb ${SPDK_LIBRARIES}) add_library (SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES}) -target_link_libraries (SPTAGLibStatic DistanceUtils libzstd_static ${NUMA_LIBRARY_STATIC}) +target_link_libraries (SPTAGLibStatic DistanceUtils ${RocksDB_LIBRARIES} ${uring_LIBRARIES} libzstd_static ${NUMA_LIBRARY_STATIC} tbb ${SPDK_LIBRARIES}) if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") target_compile_options(SPTAGLibStatic PRIVATE -fPIC) endif() @@ -88,9 +113,15 @@ endif() file(GLOB_RECURSE SSD_SERVING_HDR_FILES ${AnnService}/inc/SSDServing/*.h) file(GLOB_RECURSE SSD_SERVING_FILES ${AnnService}/src/SSDServing/*.cpp) +file(GLOB_RECURSE SPFRESH_HDR_FILES ${AnnService}/inc/SPFresh/*.h) +file(GLOB_RECURSE SPFRESH_FILES ${AnnService}/src/SPFresh/*.cpp) + add_executable(ssdserving ${SSD_SERVING_HDR_FILES} ${SSD_SERVING_FILES}) -target_link_libraries(ssdserving SPTAGLibStatic ${Boost_LIBRARIES}) +add_executable(spfresh ${SPFRESH_HDR_FILES} ${SPFRESH_FILES}) +target_link_libraries(ssdserving SPTAGLibStatic ${Boost_LIBRARIES} ${RocksDB_LIBRARIES}) +target_link_libraries(spfresh SPTAGLibStatic ${Boost_LIBRARIES} ${RocksDB_LIBRARIES}) target_compile_definitions(ssdserving PRIVATE _exe) +target_compile_definitions(spfresh PRIVATE _exe) # for Test add_library(ssdservingLib ${SSD_SERVING_HDR_FILES} ${SSD_SERVING_FILES}) diff --git a/AnnService/CoreLibrary.vcxproj b/AnnService/CoreLibrary.vcxproj index 78295d08..eb4c56f2 100644 --- a/AnnService/CoreLibrary.vcxproj +++ b/AnnService/CoreLibrary.vcxproj @@ -149,6 +149,7 @@ + @@ -167,7 +168,10 @@ - + + + + @@ -182,6 +186,7 @@ + @@ -229,11 +234,15 @@ + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + \ No newline at end of file diff --git a/AnnService/CoreLibrary.vcxproj.filters b/AnnService/CoreLibrary.vcxproj.filters index d79d57a5..9f617827 100644 --- a/AnnService/CoreLibrary.vcxproj.filters +++ b/AnnService/CoreLibrary.vcxproj.filters @@ -196,7 +196,7 @@ Header Files\Core\SPANN - + Header Files\Core\SPANN @@ -220,6 +220,21 @@ Header Files\Core\Common + + Header Files\Core\SPANN + + + Header Files\Core\SPANN + + + Header Files\Helper + + + Header Files\Core\Common + + + Header Files\Core\SPANN + diff --git a/AnnService/inc/Core/BKT/Index.h b/AnnService/inc/Core/BKT/Index.h index c8c18f2e..e263daca 100644 --- a/AnnService/inc/Core/BKT/Index.h +++ b/AnnService/inc/Core/BKT/Index.h @@ -178,6 +178,8 @@ namespace SPTAG ErrorCode RefineSearchIndex(QueryResult &p_query, bool p_searchDeleted = false) const; ErrorCode SearchTree(QueryResult &p_query) const; ErrorCode AddIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, std::shared_ptr p_metadataSet, bool p_withMetaIndex = false, bool p_normalized = false); + ErrorCode AddIndexIdx(SizeType begin, SizeType end); + ErrorCode AddIndexId(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, int& beginHead, int& endHead); ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum); ErrorCode DeleteIndex(const SizeType& p_id); @@ -209,7 +211,6 @@ namespace SPTAG private: void SearchIndex(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, bool p_searchDeleted, bool p_searchDuplicated, std::function filterFunc = nullptr) const; - template &, SizeType, float), bool(*checkFilter)(const std::shared_ptr&, SizeType, std::function)> void Search(COMMON::QueryResultSet& p_query, COMMON::WorkSpace& p_space, std::function filterFunc) const; }; diff --git a/AnnService/inc/Core/Common.h b/AnnService/inc/Core/Common.h index afa8f1dc..8775d406 100644 --- a/AnnService/inc/Core/Common.h +++ b/AnnService/inc/Core/Common.h @@ -67,6 +67,7 @@ inline T max(T a, T b) { #define InterlockedExchange8(a,b) __sync_lock_test_and_set(a, b) #define Sleep(a) usleep(a * 1000) #define strtok_s(a, b, c) strtok_r(a, b, c) +#define ALIGN_ROUND(size) ((size) + 31) / 32 * 32 #else @@ -98,6 +99,17 @@ inline bool fileexists(const TCHAR* path) { #define min(a,b) (((a) < (b)) ? (a) : (b)) #endif +FORCEINLINE +char +InterlockedCompareExchange( + _Inout_ _Interlocked_operand_ char volatile* Destination, + _In_ char Exchange, + _In_ char Comperand +) +{ + return (char)_InterlockedCompareExchange8(Destination, Exchange, Comperand); +} + #endif namespace SPTAG @@ -114,6 +126,8 @@ namespace SPTAG #define PAGE_FREE(ptr) ::operator delete(ptr, (std::align_val_t)512) #endif +#define ALIGN_ROUND(size) ((size) + 31) / 32 * 32 + typedef std::int32_t SizeType; typedef std::int32_t DimensionType; diff --git a/AnnService/inc/Core/Common/Dataset.h b/AnnService/inc/Core/Common/Dataset.h index a2183cf8..810a41d1 100644 --- a/AnnService/inc/Core/Common/Dataset.h +++ b/AnnService/inc/Core/Common/Dataset.h @@ -4,15 +4,12 @@ #ifndef _SPTAG_COMMON_DATASET_H_ #define _SPTAG_COMMON_DATASET_H_ -#include "inc/Helper/Logging.h" -#include -#include - namespace SPTAG { namespace COMMON { // structure to save Data and Graph + /* template class Dataset { @@ -31,7 +28,7 @@ namespace SPTAG public: Dataset() {} - Dataset(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, T* data_ = nullptr, bool shareOwnership_ = true) + Dataset(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, const void* data_ = nullptr, bool shareOwnership_ = true) { Initialize(rows_, cols_, rowsInBlock_, capacity_, data_, shareOwnership_); } @@ -41,11 +38,17 @@ namespace SPTAG for (T* ptr : incBlocks) ALIGN_FREE(ptr); incBlocks.clear(); } - void Initialize(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, T* data_ = nullptr, bool shareOwnership_ = true) + void Initialize(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, const void* data_ = nullptr, bool shareOwnership_ = true) { + if (data != nullptr) { + if (ownData) ALIGN_FREE(data); + for (T* ptr : incBlocks) ALIGN_FREE(ptr); + incBlocks.clear(); + } + rows = rows_; cols = cols_; - data = data_; + data = (T*)data_; if (data_ == nullptr || !shareOwnership_) { ownData = true; @@ -77,20 +80,11 @@ namespace SPTAG inline const T* At(SizeType index) const { - if (index < R() && index >= 0) - { - if (index >= rows) { - SizeType incIndex = index - rows; - return incBlocks[incIndex >> rowsInBlockEx] + ((size_t)(incIndex & rowsInBlock)) * cols; - } - return data + ((size_t)index) * cols; - } - else - { - std::ostringstream oss; - oss << "Index out of range in Dataset. Index: " << index << " Size: " << R(); - throw std::out_of_range(oss.str()); + if (index >= rows) { + SizeType incIndex = index - rows; + return incBlocks[incIndex >> rowsInBlockEx] + ((size_t)(incIndex & rowsInBlock)) * cols; } + return data + ((size_t)index) * cols; } T* operator[](SizeType index) @@ -103,7 +97,7 @@ namespace SPTAG return At(index); } - ErrorCode AddBatch(const T* pData, SizeType num) + ErrorCode AddBatch(SizeType num, const T* pData = nullptr) { if (R() > maxRows - num) return ErrorCode::MemoryOverFlow; @@ -111,38 +105,20 @@ namespace SPTAG while (written < num) { SizeType curBlockIdx = ((incRows + written) >> rowsInBlockEx); if (curBlockIdx >= (SizeType)incBlocks.size()) { - T* newBlock = (T*)ALIGN_ALLOC(((size_t)rowsInBlock + 1) * cols * sizeof(T)); + T* newBlock = (T*)ALIGN_ALLOC(sizeof(T) * (rowsInBlock + 1) * cols); if (newBlock == nullptr) return ErrorCode::MemoryOverFlow; + std::memset(newBlock, -1, sizeof(T) * (rowsInBlock + 1) * cols); incBlocks.push_back(newBlock); } SizeType curBlockPos = ((incRows + written) & rowsInBlock); SizeType toWrite = min(rowsInBlock + 1 - curBlockPos, num - written); - std::memcpy(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, pData + ((size_t)written) * cols, ((size_t)toWrite) * cols * sizeof(T)); + if (pData != nullptr) std::memcpy(incBlocks[curBlockIdx] + ((size_t)curBlockPos) * cols, pData + ((size_t)written) * cols, ((size_t)toWrite) * cols * sizeof(T)); written += toWrite; } incRows += written; return ErrorCode::Success; } - ErrorCode AddBatch(SizeType num) - { - if (R() > maxRows - num) return ErrorCode::MemoryOverFlow; - - SizeType written = 0; - while (written < num) { - SizeType curBlockIdx = (incRows + written) >> rowsInBlockEx; - if (curBlockIdx >= (SizeType)incBlocks.size()) { - T* newBlock = (T*)ALIGN_ALLOC(sizeof(T) * (rowsInBlock + 1) * cols); - if (newBlock == nullptr) return ErrorCode::MemoryOverFlow; - std::memset(newBlock, -1, sizeof(T) * (rowsInBlock + 1) * cols); - incBlocks.push_back(newBlock); - } - written += min(rowsInBlock + 1 - ((incRows + written) & rowsInBlock), num - written); - } - incRows += written; - return ErrorCode::Success; - } - ErrorCode Save(std::shared_ptr p_out) const { SizeType CR = R(); @@ -203,12 +179,12 @@ namespace SPTAG return ErrorCode::Success; } - ErrorCode Refine(const std::vector& indices, Dataset& p_data) const + ErrorCode Refine(const std::vector& indices, Dataset& data) const { SizeType R = (SizeType)(indices.size()); - p_data.Initialize(R, cols, rowsInBlock + 1, static_cast(incBlocks.capacity() * (rowsInBlock + 1))); + data.Initialize(R, cols, rowsInBlock + 1, static_cast(incBlocks.capacity() * (rowsInBlock + 1))); for (SizeType i = 0; i < R; i++) { - std::memcpy((void*)p_data.At(i), (void*)this->At(indices[i]), sizeof(T) * cols); + std::memcpy((void*)data.At(i), (void*)this->At(indices[i]), sizeof(T) * cols); } return ErrorCode::Success; } @@ -234,6 +210,272 @@ namespace SPTAG return Refine(indices, ptr); } }; + */ + template + class Dataset + { + private: + std::string name = "Data"; + SizeType rows = 0; + DimensionType cols = 1; + char* data = nullptr; + bool ownData = false; + SizeType incRows = 0; + SizeType maxRows; + SizeType rowsInBlock; + SizeType rowsInBlockEx; + std::shared_ptr> incBlocks; + + DimensionType colStart = 0; + DimensionType mycols = 0; + + public: + Dataset() {} + + Dataset(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, const void* data_ = nullptr, bool shareOwnership_ = true, std::shared_ptr> incBlocks_ = nullptr, int colStart_ = 0, int rowEnd_ = -1) + { + Initialize(rows_, cols_, rowsInBlock_, capacity_, data_, shareOwnership_, incBlocks_, colStart_, rowEnd_); + } + ~Dataset() + { + if (ownData) ALIGN_FREE(data); + for (char* ptr : *incBlocks) ALIGN_FREE(ptr); + incBlocks->clear(); + } + + void Initialize(SizeType rows_, DimensionType cols_, SizeType rowsInBlock_, SizeType capacity_, const void* data_ = nullptr, bool shareOwnership_ = true, std::shared_ptr> incBlocks_ = nullptr, int colStart_ = 0, int rowEnd_ = -1) + { + if (data != nullptr) { + if (ownData) ALIGN_FREE(data); + for (char* ptr : *incBlocks) ALIGN_FREE(ptr); + incBlocks->clear(); + } + + rows = rows_; + if (rowEnd_ >= colStart_) cols = rowEnd_; + else cols = cols_ * sizeof(T); + data = (char*)data_; + if (data_ == nullptr || !shareOwnership_) + { + ownData = true; + data = (char*)ALIGN_ALLOC(((size_t)rows) * cols); + if (data_ != nullptr) memcpy(data, data_, ((size_t)rows) * cols); + else std::memset(data, -1, ((size_t)rows) * cols); + } + maxRows = capacity_; + rowsInBlockEx = static_cast(ceil(log2(rowsInBlock_))); + rowsInBlock = (1 << rowsInBlockEx) - 1; + incBlocks = incBlocks_; + if (incBlocks == nullptr) incBlocks.reset(new std::vector()); + incBlocks->reserve((static_cast(capacity_) + rowsInBlock) >> rowsInBlockEx); + + colStart = colStart_; + mycols = cols_; + } + + bool IsReady() const { return data != nullptr; } + + void SetName(const std::string& name_) { name = name_; } + const std::string& Name() const { return name; } + + void SetR(SizeType R_) + { + if (R_ >= rows) + incRows = R_ - rows; + else + { + rows = R_; + incRows = 0; + } + } + + inline SizeType R() const { return rows + incRows; } + inline const DimensionType& C() const { return mycols; } + inline std::uint64_t BufferSize() const { return sizeof(SizeType) + sizeof(DimensionType) + sizeof(T) * R() * C(); } + +#define GETITEM(index) \ + if (index >= rows) { \ + SizeType incIndex = index - rows; \ + return (T*)((*incBlocks)[incIndex >> rowsInBlockEx] + ((size_t)(incIndex & rowsInBlock)) * cols + colStart); \ + } \ + return (T*)(data + ((size_t)index) * cols + colStart); \ + + inline const T* At(SizeType index) const + { + GETITEM(index) + } + + inline T* At(SizeType index) + { + GETITEM(index) + } + + inline T* operator[](SizeType index) + { + GETITEM(index) + } + + inline const T* operator[](SizeType index) const + { + GETITEM(index) + } + +#undef GETITEM + + ErrorCode AddBatch(SizeType num, const T* pData = nullptr) + { + if (colStart != 0) return ErrorCode::Success; + if (R() > maxRows - num) return ErrorCode::MemoryOverFlow; + + SizeType written = 0; + while (written < num) { + SizeType curBlockIdx = ((incRows + written) >> rowsInBlockEx); + if (curBlockIdx >= (SizeType)(incBlocks->size())) { + char* newBlock = (char*)ALIGN_ALLOC(((size_t)rowsInBlock + 1) * cols); + if (newBlock == nullptr) return ErrorCode::MemoryOverFlow; + std::memset(newBlock, -1, ((size_t)rowsInBlock + 1) * cols); + incBlocks->push_back(newBlock); + } + SizeType curBlockPos = ((incRows + written) & rowsInBlock); + SizeType toWrite = min(rowsInBlock + 1 - curBlockPos, num - written); + if (pData) { + for (int i = 0; i < toWrite; i++) { + std::memcpy((*incBlocks)[curBlockIdx] + ((size_t)curBlockPos + i) * cols + colStart, pData + ((size_t)written + i) * mycols, mycols * sizeof(T)); + } + } + written += toWrite; + } + incRows += written; + return ErrorCode::Success; + } + + ErrorCode Save(std::shared_ptr p_out) const + { + SizeType CR = R(); + IOBINARY(p_out, WriteBinary, sizeof(SizeType), (char*)&CR); + IOBINARY(p_out, WriteBinary, sizeof(DimensionType), (char*)&mycols); + for (SizeType i = 0; i < CR; i++) { + IOBINARY(p_out, WriteBinary, sizeof(T) * mycols, (char*)At(i)); + } + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s (%d,%d) Finish!\n", name.c_str(), CR, mycols); + return ErrorCode::Success; + } + + ErrorCode Save(std::string sDataPointsFileName) const + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s To %s\n", name.c_str(), sDataPointsFileName.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile; + return Save(ptr); + } + + ErrorCode Load(std::shared_ptr pInput, SizeType blockSize, SizeType capacity) + { + IOBINARY(pInput, ReadBinary, sizeof(SizeType), (char*)&(rows)); + IOBINARY(pInput, ReadBinary, sizeof(DimensionType), (char*)&mycols); + + if (data == nullptr) Initialize(rows, mycols, blockSize, capacity); + + for (SizeType i = 0; i < rows; i++) { + IOBINARY(pInput, ReadBinary, sizeof(T) * mycols, (char*)At(i)); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", name.c_str(), rows, mycols); + return ErrorCode::Success; + } + + ErrorCode Load(std::string sDataPointsFileName, SizeType blockSize, SizeType capacity) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s From %s\n", name.c_str(), sDataPointsFileName.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile; + return Load(ptr, blockSize, capacity); + } + + // Functions for loading models from memory mapped files + ErrorCode Load(char* pDataPointsMemFile, SizeType blockSize, SizeType capacity) + { + SizeType R; + DimensionType C; + R = *((SizeType*)pDataPointsMemFile); + pDataPointsMemFile += sizeof(SizeType); + + C = *((DimensionType*)pDataPointsMemFile); + pDataPointsMemFile += sizeof(DimensionType); + + Initialize(R, C, blockSize, capacity, (char*)pDataPointsMemFile); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", name.c_str(), R, C); + return ErrorCode::Success; + } + + ErrorCode Refine(const std::vector& indices, COMMON::Dataset& dataset) const + { + SizeType newrows = (SizeType)(indices.size()); + if (dataset.data == nullptr) dataset.Initialize(newrows, mycols, rowsInBlock + 1, static_cast(incBlocks->capacity() * (rowsInBlock + 1))); + + for (SizeType i = 0; i < newrows; i++) { + std::memcpy((void*)dataset.At(i), (void*)At(indices[i]), sizeof(T) * mycols); + } + return ErrorCode::Success; + } + + virtual ErrorCode Refine(const std::vector& indices, std::shared_ptr output) const + { + SizeType newrows = (SizeType)(indices.size()); + IOBINARY(output, WriteBinary, sizeof(SizeType), (char*)&newrows); + IOBINARY(output, WriteBinary, sizeof(DimensionType), (char*)&mycols); + + for (SizeType i = 0; i < newrows; i++) { + IOBINARY(output, WriteBinary, sizeof(T) * mycols, (char*)At(indices[i])); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save Refine %s (%d,%d) Finish!\n", name.c_str(), newrows, C()); + return ErrorCode::Success; + } + + virtual ErrorCode Refine(const std::vector& indices, std::string sDataPointsFileName) const + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save Refine %s To %s\n", name.c_str(), sDataPointsFileName.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(sDataPointsFileName.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile; + return Refine(indices, ptr); + } + }; + + template + ErrorCode LoadOptDatasets(std::shared_ptr pVectorsInput, std::shared_ptr pGraphInput, + Dataset& pVectors, Dataset& pGraph, DimensionType pNeighborhoodSize, + SizeType blockSize, SizeType capacity) { + SizeType VR, GR; + DimensionType VC, GC; + IOBINARY(pVectorsInput, ReadBinary, sizeof(SizeType), (char*)&VR); + IOBINARY(pVectorsInput, ReadBinary, sizeof(DimensionType), (char*)&VC); + DimensionType totalC = ALIGN_ROUND(sizeof(T) * VC + sizeof(SizeType) * pNeighborhoodSize); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "OPT TotalC: %d\n", totalC); + char* data = (char*)ALIGN_ALLOC(((size_t)totalC) * VR); + std::shared_ptr> incBlocks(new std::vector()); + + pVectors.Initialize(VR, VC, blockSize, capacity, data, true, incBlocks, 0, totalC); + pVectors.SetName("Opt" + pVectors.Name()); + for (SizeType i = 0; i < VR; i++) { + IOBINARY(pVectorsInput, ReadBinary, sizeof(T) * VC, (char*)(pVectors.At(i))); + } + + IOBINARY(pGraphInput, ReadBinary, sizeof(SizeType), (char*)&GR); + IOBINARY(pGraphInput, ReadBinary, sizeof(DimensionType), (char*)&GC); + if (GR != VR || GC != pNeighborhoodSize) return ErrorCode::DiskIOFail; + + pGraph.Initialize(GR, GC, blockSize, capacity, data, false, incBlocks, sizeof(T) * VC, totalC); + pGraph.SetName("Opt" + pGraph.Name()); + for (SizeType i = 0; i < VR; i++) { + IOBINARY(pGraphInput, ReadBinary, sizeof(SizeType) * GC, (char*)(pGraph.At(i))); + } + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", pVectors.Name().c_str(), pVectors.R(), pVectors.C()); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s (%d,%d) Finish!\n", pGraph.Name().c_str(), pGraph.R(), pGraph.C()); + + return ErrorCode::Success; + } } } diff --git a/AnnService/inc/Core/Common/FineGrainedLock.h b/AnnService/inc/Core/Common/FineGrainedLock.h index 98659dab..61f3fda0 100644 --- a/AnnService/inc/Core/Common/FineGrainedLock.h +++ b/AnnService/inc/Core/Common/FineGrainedLock.h @@ -38,6 +38,32 @@ namespace SPTAG return ((unsigned)(idx * 99991) + _rotl(idx, 2) + 101) & PoolSize; } }; + + class FineGrainedRWLock { + public: + FineGrainedRWLock() { + m_locks.reset(new std::shared_timed_mutex[PoolSize + 1]); + } + ~FineGrainedRWLock() {} + + std::shared_timed_mutex& operator[](SizeType idx) { + unsigned index = hash_func((unsigned)idx); + return m_locks[index]; + } + + const std::shared_timed_mutex& operator[](SizeType idx) const { + unsigned index = hash_func((unsigned)idx); + return m_locks[index]; + } + + inline unsigned hash_func(unsigned idx) const + { + return ((unsigned)(idx * 99991) + _rotl(idx, 2) + 101) & PoolSize; + } + private: + static const int PoolSize = 32767; + std::unique_ptr m_locks; + }; } } diff --git a/AnnService/inc/Core/Common/OPQQuantizer.h b/AnnService/inc/Core/Common/OPQQuantizer.h index 250a29f7..533fb3ea 100644 --- a/AnnService/inc/Core/Common/OPQQuantizer.h +++ b/AnnService/inc/Core/Common/OPQQuantizer.h @@ -207,4 +207,4 @@ namespace SPTAG } } -#endif _SPTAG_COMMON_OPQQUANTIZER_H_ +#endif // _SPTAG_COMMON_OPQQUANTIZER_H_ diff --git a/AnnService/inc/Core/Common/PostingSizeRecord.h b/AnnService/inc/Core/Common/PostingSizeRecord.h new file mode 100644 index 00000000..8577a64f --- /dev/null +++ b/AnnService/inc/Core/Common/PostingSizeRecord.h @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_COMMON_POSTINGSIZERECORD_H_ +#define _SPTAG_COMMON_POSTINGSIZERECORD_H_ + +#include +#include "Dataset.h" + +namespace SPTAG +{ + namespace COMMON + { + class PostingSizeRecord + { + private: + Dataset m_data; + + public: + PostingSizeRecord() + { + m_data.SetName("PostingSizeRecord"); + } + + void Initialize(SizeType size, SizeType blockSize, SizeType capacity) + { + m_data.Initialize(size, 1, blockSize, capacity); + } + + inline int GetSize(const SizeType& headID) + { + return *m_data[headID]; + } + + inline bool UpdateSize(const SizeType& headID, int newSize) + { + while (true) { + int oldSize = GetSize(headID); + if (InterlockedCompareExchange((unsigned*)m_data[headID], (unsigned)newSize, (unsigned)oldSize) == oldSize) { + return true; + } + } + } + + inline bool IncSize(const SizeType& headID, int appendNum) + { + while (true) { + int oldSize = GetSize(headID); + int newSize = oldSize + appendNum; + if (InterlockedCompareExchange((unsigned*)m_data[headID], (unsigned)newSize, (unsigned)oldSize) == oldSize) { + return true; + } + } + } + + inline SizeType GetPostingNum() + { + return m_data.R(); + } + + inline ErrorCode Save(std::shared_ptr output) + { + return m_data.Save(output); + } + + inline ErrorCode Save(const std::string& filename) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s To %s\n", m_data.Name().c_str(), filename.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(filename.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile; + return Save(ptr); + } + + inline ErrorCode Load(std::shared_ptr input, SizeType blockSize, SizeType capacity) + { + return m_data.Load(input, blockSize, capacity); + } + + inline ErrorCode Load(const std::string& filename, SizeType blockSize, SizeType capacity) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s From %s\n", m_data.Name().c_str(), filename.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(filename.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile; + return Load(ptr, blockSize, capacity); + } + + inline ErrorCode Load(char* pmemoryFile, SizeType blockSize, SizeType capacity) + { + return m_data.Load(pmemoryFile + sizeof(SizeType), blockSize, capacity); + } + + inline ErrorCode AddBatch(SizeType num) + { + return m_data.AddBatch(num); + } + + inline std::uint64_t BufferSize() const + { + return m_data.BufferSize() + sizeof(SizeType); + } + + inline void SetR(SizeType num) + { + m_data.SetR(num); + } + }; + } +} + +#endif // _SPTAG_COMMON_LABELSET_H_ diff --git a/AnnService/inc/Core/Common/VersionLabel.h b/AnnService/inc/Core/Common/VersionLabel.h new file mode 100644 index 00000000..2ff2166c --- /dev/null +++ b/AnnService/inc/Core/Common/VersionLabel.h @@ -0,0 +1,126 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_COMMON_VERSIONLABEL_H_ +#define _SPTAG_COMMON_VERSIONLABEL_H_ + +#include +#include "Dataset.h" + +namespace SPTAG +{ + namespace COMMON + { + class VersionLabel + { + private: + std::atomic m_deleted; + Dataset m_data; + + public: + VersionLabel() + { + m_deleted = 0; + m_data.SetName("versionLabelID"); + } + + void Initialize(SizeType size, SizeType blockSize, SizeType capacity) + { + m_data.Initialize(size, 1, blockSize, capacity); + } + + inline size_t Count() const { return m_data.R() - m_deleted.load(); } + + inline size_t GetDeleteCount() const { return m_deleted.load();} + + inline bool Deleted(const SizeType& key) const + { + return *m_data[key] == 0xfe; + } + + inline bool Delete(const SizeType& key) + { + uint8_t oldvalue = (uint8_t)InterlockedExchange8((char*)(m_data[key]), (char)0xfe); + if (oldvalue == 0xfe) return false; + m_deleted++; + return true; + } + + inline uint8_t GetVersion(const SizeType& key) + { + return *m_data[key]; + } + + inline bool IncVersion(const SizeType& key, uint8_t* newVersion) + { + while (true) { + if (Deleted(key)) return false; + uint8_t oldVersion = GetVersion(key); + *newVersion = (oldVersion+1) & 0x7f; + if (((uint8_t)InterlockedCompareExchange((char*)m_data[key], (char)*newVersion, (char)oldVersion)) == oldVersion) { + return true; + } + } + } + + inline SizeType GetVectorNum() + { + return m_data.R(); + } + + inline ErrorCode Save(std::shared_ptr output) + { + SizeType deleted = m_deleted.load(); + IOBINARY(output, WriteBinary, sizeof(SizeType), (char*)&deleted); + return m_data.Save(output); + } + + inline ErrorCode Save(const std::string& filename) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save %s To %s\n", m_data.Name().c_str(), filename.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(filename.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile; + return Save(ptr); + } + + inline ErrorCode Load(std::shared_ptr input, SizeType blockSize, SizeType capacity) + { + SizeType deleted; + IOBINARY(input, ReadBinary, sizeof(SizeType), (char*)&deleted); + m_deleted = deleted; + return m_data.Load(input, blockSize, capacity); + } + + inline ErrorCode Load(const std::string& filename, SizeType blockSize, SizeType capacity) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load %s From %s\n", m_data.Name().c_str(), filename.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(filename.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile; + return Load(ptr, blockSize, capacity); + } + + inline ErrorCode Load(char* pmemoryFile, SizeType blockSize, SizeType capacity) + { + m_deleted = *((SizeType*)pmemoryFile); + return m_data.Load(pmemoryFile + sizeof(SizeType), blockSize, capacity); + } + + inline ErrorCode AddBatch(SizeType num) + { + return m_data.AddBatch(num); + } + + inline std::uint64_t BufferSize() const + { + return m_data.BufferSize() + sizeof(SizeType); + } + + inline void SetR(SizeType num) + { + m_data.SetR(num); + } + }; + } +} + +#endif // _SPTAG_COMMON_LABELSET_H_ diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h new file mode 100644 index 00000000..7618e050 --- /dev/null +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -0,0 +1,1782 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_SPANN_EXTRADYNAMICSEARCHER_H_ +#define _SPTAG_SPANN_EXTRADYNAMICSEARCHER_H_ + +#include "inc/Helper/VectorSetReader.h" +#include "inc/Helper/AsyncFileReader.h" +#include "IExtraSearcher.h" +#include "ExtraStaticSearcher.h" +#include "inc/Core/Common/TruthSet.h" +#include "inc/Helper/KeyValueIO.h" +#include "inc/Core/Common/FineGrainedLock.h" +#include "PersistentBuffer.h" +#include "inc/Core/Common/PostingSizeRecord.h" +#include "ExtraSPDKController.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef ROCKSDB +#include "ExtraRocksDBController.h" +#endif + +// enable rocksdb io_uring +extern "C" bool RocksDbIOUringEnable() { return true; } + +namespace SPTAG::SPANN { + template + class ExtraDynamicSearcher : public IExtraSearcher + { + class MergeAsyncJob : public Helper::ThreadPool::Job + { + private: + VectorIndex* m_index; + ExtraDynamicSearcher* m_extraIndex; + SizeType headID; + bool disableReassign; + std::function m_callback; + public: + MergeAsyncJob(VectorIndex* headIndex, ExtraDynamicSearcher* extraIndex, SizeType headID, bool disableReassign, std::function p_callback) + : m_index(headIndex), m_extraIndex(extraIndex), headID(headID), disableReassign(disableReassign), m_callback(std::move(p_callback)) {} + + ~MergeAsyncJob() {} + + inline void exec(IAbortOperation* p_abort) override { + m_extraIndex->MergePostings(m_index, headID, !disableReassign); + if (m_callback != nullptr) { + m_callback(); + } + } + }; + + class SplitAsyncJob : public Helper::ThreadPool::Job + { + private: + VectorIndex* m_index; + ExtraDynamicSearcher* m_extraIndex; + SizeType headID; + bool disableReassign; + std::function m_callback; + public: + SplitAsyncJob(VectorIndex* headIndex, ExtraDynamicSearcher* extraIndex, SizeType headID, bool disableReassign, std::function p_callback) + : m_index(headIndex), m_extraIndex(extraIndex), headID(headID), disableReassign(disableReassign), m_callback(std::move(p_callback)) {} + + ~SplitAsyncJob() {} + + inline void exec(IAbortOperation* p_abort) override { + m_extraIndex->Split(m_index, headID, !disableReassign); + if (m_callback != nullptr) { + m_callback(); + } + } + }; + + class ReassignAsyncJob : public Helper::ThreadPool::Job + { + private: + VectorIndex* m_index; + ExtraDynamicSearcher* m_extraIndex; + std::shared_ptr vectorInfo; + SizeType HeadPrev; + std::function m_callback; + public: + ReassignAsyncJob(VectorIndex* headIndex, ExtraDynamicSearcher* extraIndex, + std::shared_ptr vectorInfo, SizeType HeadPrev, std::function p_callback) + : m_index(headIndex), m_extraIndex(extraIndex), vectorInfo(std::move(vectorInfo)), HeadPrev(HeadPrev), m_callback(std::move(p_callback)) {} + + ~ReassignAsyncJob() {} + + void exec(IAbortOperation* p_abort) override { + m_extraIndex->Reassign(m_index, vectorInfo, HeadPrev); + if (m_callback != nullptr) { + m_callback(); + } + } + }; + + class SPDKThreadPool : public Helper::ThreadPool + { + public: + void initSPDK(int numberOfThreads, ExtraDynamicSearcher* extraIndex) + { + m_abort.SetAbort(false); + for (int i = 0; i < numberOfThreads; i++) + { + m_threads.emplace_back([this, extraIndex] { + extraIndex->Initialize(); + Job *j; + while (get(j)) + { + try + { + currentJobs++; + j->exec(&m_abort); + currentJobs--; + } + catch (std::exception& e) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "ThreadPool: exception in %s %s\n", typeid(*j).name(), e.what()); + } + + delete j; + } + extraIndex->ExitBlockController(); + }); + } + } + }; + + private: + std::shared_ptr db; + + COMMON::VersionLabel* m_versionMap; + Options* m_opt; + + std::mutex m_dataAddLock; + + std::mutex m_mergeLock; + + COMMON::FineGrainedRWLock m_rwLocks; + + COMMON::PostingSizeRecord m_postingSizes; + + std::shared_ptr m_splitThreadPool; + std::shared_ptr m_reassignThreadPool; + + IndexStats m_stat; + + std::shared_ptr m_wal; + + // tbb::concurrent_hash_map m_splitList; + + std::mutex m_runningLock; + std::unordered_setm_splitList; + + tbb::concurrent_hash_map m_mergeList; + + public: + ExtraDynamicSearcher(const char* dbPath, int dim, int postingBlockLimit, bool useDirectIO, float searchLatencyHardLimit, int mergeThreshold, bool useSPDK = false, int batchSize = 64, int bufferLength = 3, bool recovery = false) { + if (useSPDK) { + db.reset(new SPDKIO(dbPath, 1024 * 1024, MaxSize, postingBlockLimit + bufferLength, 1024, batchSize, recovery)); + m_postingSizeLimit = postingBlockLimit * PageSize / (sizeof(ValueType) * dim + sizeof(int) + sizeof(uint8_t)); + } else { +#ifdef ROCKSDB + db.reset(new RocksDBIO(dbPath, useDirectIO, false, recovery)); + m_postingSizeLimit = postingBlockLimit; +#endif + } + m_metaDataSize = sizeof(int) + sizeof(uint8_t); + m_vectorInfoSize = dim * sizeof(ValueType) + m_metaDataSize; + m_hardLatencyLimit = std::chrono::microseconds((int)searchLatencyHardLimit * 1000); + m_mergeThreshold = mergeThreshold; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Posting size limit: %d, search limit: %f, merge threshold: %d\n", m_postingSizeLimit, searchLatencyHardLimit, m_mergeThreshold); + } + + ~ExtraDynamicSearcher() {} + + //headCandidates: search data structrue for "vid" vector + //headID: the head vector that stands for vid + bool IsAssumptionBroken(VectorIndex* p_index, SizeType headID, QueryResult& headCandidates, SizeType vid) + { + p_index->SearchIndex(headCandidates); + int replicaCount = 0; + BasicResult* queryResults = headCandidates.GetResults(); + std::vector selections(static_cast(m_opt->m_replicaCount)); + for (int i = 0; i < headCandidates.GetResultNum() && replicaCount < m_opt->m_replicaCount; ++i) { + if (queryResults[i].VID == -1) { + break; + } + // RNG Check. + bool rngAccpeted = true; + for (int j = 0; j < replicaCount; ++j) { + float nnDist = p_index->ComputeDistance( + p_index->GetSample(queryResults[i].VID), + p_index->GetSample(selections[j].node)); + if (nnDist < queryResults[i].Dist) { + rngAccpeted = false; + break; + } + } + if (!rngAccpeted) + continue; + + selections[replicaCount].node = queryResults[i].VID; + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "head:%d\n", queryResults[i].VID); + if (selections[replicaCount].node == headID) return false; + ++replicaCount; + } + return true; + } + + //Measure that in "headID" posting list, how many vectors break their assumption + int QuantifyAssumptionBroken(VectorIndex* p_index, SizeType headID, std::string& postingList, SizeType SplitHead, std::vector& newHeads, std::set& brokenID, int topK = 0, float ratio = 1.0) + { + int assumptionBrokenNum = 0; + int postVectorNum = postingList.size() / m_vectorInfoSize; + uint8_t* postingP = reinterpret_cast(&postingList.front()); + float minDist; + float maxDist; + float avgDist = 0; + std::vector distanceSet; + //#pragma omp parallel for num_threads(32) + for (int j = 0; j < postVectorNum; j++) { + uint8_t* vectorId = postingP + j * m_vectorInfoSize; + SizeType vid = *(reinterpret_cast(vectorId)); + uint8_t version = *(reinterpret_cast(vectorId + sizeof(int))); + float_t dist = p_index->ComputeDistance(reinterpret_cast(vectorId + m_metaDataSize), p_index->GetSample(headID)); + // if (dist < Epsilon) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "head found: vid: %d, head: %d\n", vid, headID); + avgDist += dist; + distanceSet.push_back(dist); + if (m_versionMap->Deleted(vid) || m_versionMap->GetVersion(vid) != version) continue; + COMMON::QueryResultSet headCandidates(reinterpret_cast(vectorId + m_metaDataSize), 64); + if (brokenID.find(vid) == brokenID.end() && IsAssumptionBroken(headID, headCandidates, vid)) { + /* + float_t headDist = p_index->ComputeDistance(headCandidates.GetTarget(), p_index->GetSample(SplitHead)); + float_t newHeadDist_1 = p_index->ComputeDistance(headCandidates.GetTarget(), p_index->GetSample(newHeads[0])); + float_t newHeadDist_2 = p_index->ComputeDistance(headCandidates.GetTarget(), p_index->GetSample(newHeads[1])); + + float_t splitDist = p_index->ComputeDistance(p_index->GetSample(SplitHead), p_index->GetSample(headID)); + + float_t headToNewHeadDist_1 = p_index->ComputeDistance(p_index->GetSample(headID), p_index->GetSample(newHeads[0])); + float_t headToNewHeadDist_2 = p_index->ComputeDistance(p_index->GetSample(headID), p_index->GetSample(newHeads[1])); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "broken vid to head distance: %f, to split head distance: %f\n", dist, headDist); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "broken vid to new head 1 distance: %f, to new head 2 distance: %f\n", newHeadDist_1, newHeadDist_2); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "head to spilit head distance: %f\n", splitDist); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "head to new head 1 distance: %f, to new head 2 distance: %f\n", headToNewHeadDist_1, headToNewHeadDist_2); + */ + assumptionBrokenNum++; + brokenID.insert(vid); + } + } + + if (assumptionBrokenNum != 0) { + std::sort(distanceSet.begin(), distanceSet.end()); + minDist = distanceSet[1]; + maxDist = distanceSet.back(); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "distance: min: %f, max: %f, avg: %f, 50th: %f\n", minDist, maxDist, avgDist/postVectorNum, distanceSet[distanceSet.size() * 0.5]); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "assumption broken num: %d\n", assumptionBrokenNum); + float_t splitDist = p_index->ComputeDistance(p_index->GetSample(SplitHead), p_index->GetSample(headID)); + + float_t headToNewHeadDist_1 = p_index->ComputeDistance(p_index->GetSample(headID), p_index->GetSample(newHeads[0])); + float_t headToNewHeadDist_2 = p_index->ComputeDistance(p_index->GetSample(headID), p_index->GetSample(newHeads[1])); + + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "head to spilt head distance: %f/%d/%.2f\n", splitDist, topK, ratio); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "head to new head 1 distance: %f, to new head 2 distance: %f\n", headToNewHeadDist_1, headToNewHeadDist_2); + } + + return assumptionBrokenNum; + } + + int QuantifySplitCaseA(std::vector& newHeads, std::vector& postingLists, SizeType SplitHead, int split_order, std::set& brokenID) + { + int assumptionBrokenNum = 0; + assumptionBrokenNum += QuantifyAssumptionBroken(newHeads[0], postingLists[0], SplitHead, newHeads, brokenID); + assumptionBrokenNum += QuantifyAssumptionBroken(newHeads[1], postingLists[1], SplitHead, newHeads, brokenID); + int vectorNum = (postingLists[0].size() + postingLists[1].size()) / m_vectorInfoSize; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "After Split%d, Top0 nearby posting lists, caseA : %d/%d\n", split_order, assumptionBrokenNum, vectorNum); + return assumptionBrokenNum; + } + + //Measure that around "headID", how many vectors break their assumption + //"headID" is the head vector before split + void QuantifySplitCaseB(VectorIndex* p_index, SizeType headID, std::vector& newHeads, SizeType SplitHead, int split_order, int assumptionBrokenNum_top0, std::set& brokenID) + { + COMMON::QueryResultSet nearbyHeads(reinterpret_cast(p_index->GetSample(headID)), 64); + std::vector postingLists; + p_index->SearchIndex(nearbyHeads); + std::string postingList; + BasicResult* queryResults = nearbyHeads.GetResults(); + int topk = 8; + int assumptionBrokenNum = assumptionBrokenNum_top0; + int assumptionBrokenNum_topK = assumptionBrokenNum_top0; + int i; + int containedHead = 0; + if (assumptionBrokenNum_top0 != 0) containedHead++; + int vectorNum = 0; + float furthestDist = 0; + for (i = 0; i < nearbyHeads.GetResultNum(); i++) { + if (queryResults[i].VID == -1) { + break; + } + furthestDist = queryResults[i].Dist; + if (i == topk) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "After Split%d, Top%d nearby posting lists, caseB : %d in %d/%d\n", split_order, i, assumptionBrokenNum, containedHead, vectorNum); + topk *= 2; + } + if (queryResults[i].VID == newHeads[0] || queryResults[i].VID == newHeads[1]) continue; + db->Get(queryResults[i].VID, &postingList); + vectorNum += postingList.size() / m_vectorInfoSize; + int tempNum = QuantifyAssumptionBroken(queryResults[i].VID, postingList, SplitHead, newHeads, brokenID, i, queryResults[i].Dist / queryResults[1].Dist); + assumptionBrokenNum += tempNum; + if (tempNum != 0) containedHead++; + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "After Split%d, Top%d nearby posting lists, caseB : %d in %d/%d\n", split_order, i, assumptionBrokenNum, containedHead, vectorNum); + } + + void QuantifySplit(SizeType headID, std::vector& postingLists, std::vector& newHeads, SizeType SplitHead, int split_order) + { + std::set brokenID; + brokenID.clear(); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Split Quantify: %d, head1:%d, head2:%d\n", split_order, newHeads[0], newHeads[1]); + int assumptionBrokenNum = QuantifySplitCaseA(newHeads, postingLists, SplitHead, split_order, brokenID); + QuantifySplitCaseB(headID, newHeads, SplitHead, split_order, assumptionBrokenNum, brokenID); + } + + bool CheckIsNeedReassign(VectorIndex* p_index, std::vector& newHeads, ValueType* data, SizeType splitHead, float_t headToSplitHeadDist, float_t currentHeadDist, bool isInSplitHead, SizeType currentHead) + { + + float_t splitHeadDist = p_index->ComputeDistance(data, p_index->GetSample(splitHead)); + + if (isInSplitHead) { + if (splitHeadDist >= currentHeadDist) return false; + } + else { + float_t newHeadDist_1 = p_index->ComputeDistance(data, p_index->GetSample(newHeads[0])); + float_t newHeadDist_2 = p_index->ComputeDistance(data, p_index->GetSample(newHeads[1])); + if (splitHeadDist <= newHeadDist_1 && splitHeadDist <= newHeadDist_2) return false; + if (currentHeadDist <= newHeadDist_1 && currentHeadDist <= newHeadDist_2) return false; + } + return true; + } + + inline void Serialize(char* ptr, SizeType VID, std::uint8_t version, const void* vector) { + memcpy(ptr, &VID, sizeof(VID)); + memcpy(ptr + sizeof(VID), &version, sizeof(version)); + memcpy(ptr + m_metaDataSize, vector, m_vectorInfoSize - m_metaDataSize); + } + + void CalculatePostingDistribution(VectorIndex* p_index) + { + if (m_opt->m_inPlace) return; + int top = m_postingSizeLimit / 10 + 1; + int page = m_opt->m_postingPageLimit + 1; + std::vector lengthDistribution(top, 0); + std::vector sizeDistribution(page + 2, 0); + int deletedHead = 0; + for (int i = 0; i < p_index->GetNumSamples(); i++) { + if (!p_index->ContainSample(i)) deletedHead++; + lengthDistribution[m_postingSizes.GetSize(i) / 10]++; + int size = m_postingSizes.GetSize(i) * m_vectorInfoSize; + if (size < PageSize) { + if (size < 512) sizeDistribution[0]++; + else if (size < 1024) sizeDistribution[1]++; + else sizeDistribution[2]++; + } + else { + sizeDistribution[size / PageSize + 2]++; + } + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Posting Length (Vector Num):\n"); + for (int i = 0; i < top; ++i) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%d ~ %d: %d, \n", i * 10, (i + 1) * 10 - 1, lengthDistribution[i]); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Posting Length (Data Size):\n"); + for (int i = 0; i < page + 2; ++i) + { + if (i <= 2) { + if (i == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "0 ~ 512 B: %d, \n", sizeDistribution[0] - deletedHead); + else if (i == 1) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "512 B ~ 1 KB: %d, \n", sizeDistribution[1]); + else SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "1 KB ~ 4 KB: %d, \n", sizeDistribution[2]); + } + else + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%d ~ %d KB: %d, \n", (i - 2) * 4, (i - 1) * 4, sizeDistribution[i]); + } + } + + // TODO + void RefineIndex(std::shared_ptr& p_reader, + std::shared_ptr p_index) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Begin PreReassign\n"); + std::atomic_bool doneReassign; + doneReassign = false; + // p_index->UpdateIndex(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Into PreReassign Loop\n"); + while (!doneReassign) { + auto preReassignTimeBegin = std::chrono::high_resolution_clock::now(); + doneReassign = true; + std::vector threads; + std::atomic_int nextPostingID(0); + int currentPostingNum = p_index->GetNumSamples(); + int limit = m_postingSizeLimit * m_opt->m_preReassignRatio; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Batch PreReassign, Current PostingNum: %d, Current Limit: %d\n", currentPostingNum, limit); + auto func = [&]() + { + int index = 0; + Initialize(); + while (true) + { + index = nextPostingID.fetch_add(1); + if (index < currentPostingNum) + { + if ((index & ((1 << 14) - 1)) == 0) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Sent %.2lf%%...\n", index * 100.0 / currentPostingNum); + } + if (m_postingSizes.GetSize(index) >= limit) + { + doneReassign = false; + Split(p_index.get(), index, false, true); + } + } + else + { + ExitBlockController(); + return; + } + } + }; + for (int j = 0; j < m_opt->m_iSSDNumberOfThreads; j++) { threads.emplace_back(func); } + for (auto& thread : threads) { thread.join(); } + auto preReassignTimeEnd = std::chrono::high_resolution_clock::now(); + double elapsedSeconds = std::chrono::duration_cast(preReassignTimeEnd - preReassignTimeBegin).count(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "rebuild cost: %.2lf s\n", elapsedSeconds); + + //p_index->SaveIndex(m_opt->m_indexDirectory + FolderSep + m_opt->m_headIndexFolder); + //LOG(Helper::LogLevel::LL_Info, "SPFresh: ReWriting SSD Info\n"); + //m_postingSizes.Save(m_opt->m_ssdInfoFile); + + // for (int i = 0; i < p_index->GetNumSamples(); i++) { + // db->Delete(i); + // } + // ForceCompaction(); + p_index->SaveIndex(m_opt->m_indexDirectory + FolderSep + m_opt->m_headIndexFolder); + BuildIndex(p_reader, p_index, *m_opt, *m_versionMap); + // ForceCompaction(); + CalculatePostingDistribution(p_index.get()); + + // p_index->SaveIndex(m_opt->m_indexDirectory + FolderSep + m_opt->m_headIndexFolder); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: ReWriting SSD Info\n"); + m_postingSizes.Save(m_opt->m_ssdInfoFile); + } + } + + ErrorCode Split(VectorIndex* p_index, const SizeType headID, bool reassign = false, bool preReassign = false) + { + auto splitBegin = std::chrono::high_resolution_clock::now(); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "into split: %d\n", headID); + std::vector newHeadsID; + std::vector newPostingLists; + double elapsedMSeconds; + { + std::unique_lock lock(m_rwLocks[headID]); + + std::string postingList; + auto splitGetBegin = std::chrono::high_resolution_clock::now(); + if (db->Get(headID, &postingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Split fail to get oversized postings\n"); + exit(0); + } + auto splitGetEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(splitGetEnd - splitGetBegin).count(); + m_stat.m_getCost += elapsedMSeconds; + // reinterpret postingList to vectors and IDs + auto* postingP = reinterpret_cast(&postingList.front()); + SizeType postVectorNum = (SizeType)(postingList.size() / m_vectorInfoSize); + + COMMON::Dataset smallSample(postVectorNum, m_opt->m_dim, p_index->m_iDataBlockSize, p_index->m_iDataCapacity, (ValueType*)postingP, true, nullptr, m_metaDataSize, m_vectorInfoSize); + //COMMON::Dataset smallSample(0, m_opt->m_dim, p_index->m_iDataBlockSize, p_index->m_iDataCapacity); // smallSample[i] -> VID + //std::vector localIndicesInsert(postVectorNum); // smallSample[i] = j <-> localindices[j] = i + //std::vector localIndicesInsertVersion(postVectorNum); + std::vector localIndices(postVectorNum); + int index = 0; + uint8_t* vectorId = postingP; + for (int j = 0; j < postVectorNum; j++, vectorId += m_vectorInfoSize) + { + //LOG(Helper::LogLevel::LL_Info, "vector index/total:id: %d/%d:%d\n", j, m_postingSizes[headID].load(), *(reinterpret_cast(vectorId))); + uint8_t version = *(vectorId + sizeof(int)); + int VID = *((int*)(vectorId)); + if (m_versionMap->Deleted(VID) || m_versionMap->GetVersion(VID) != version) continue; + + //localIndicesInsert[index] = VID; + //localIndicesInsertVersion[index] = version; + //smallSample.AddBatch(1, (ValueType*)(vectorId + m_metaDataSize)); + localIndices[index] = j; + index++; + } + // double gcEndTime = sw.getElapsedMs(); + // m_splitGcCost += gcEndTime; + if (m_opt->m_inPlace || (!preReassign && index < m_postingSizeLimit)) + { + char* ptr = (char*)(postingList.c_str()); + for (int j = 0; j < index; j++, ptr += m_vectorInfoSize) + { + if (j == localIndices[j]) continue; + memcpy(ptr, postingList.c_str() + localIndices[j] * m_vectorInfoSize, m_vectorInfoSize); + //Serialize(ptr, localIndicesInsert[j], localIndicesInsertVersion[j], smallSample[j]); + } + postingList.resize(index * m_vectorInfoSize); + m_postingSizes.UpdateSize(headID, index); + if (db->Put(headID, postingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Split Fail to write back postings\n"); + exit(0); + } + m_stat.m_garbageNum++; + auto GCEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(GCEnd - splitBegin).count(); + m_stat.m_garbageCost += elapsedMSeconds; + { + std::lock_guard tmplock(m_runningLock); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"erase: %d\n", headID); + m_splitList.erase(headID); + } + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "GC triggered: %d, new length: %d\n", headID, index); + return ErrorCode::Success; + } + //LOG(Helper::LogLevel::LL_Info, "Resize\n"); + localIndices.resize(index); + + auto clusterBegin = std::chrono::high_resolution_clock::now(); + // k = 2, maybe we can change the split number, now it is fixed + SPTAG::COMMON::KmeansArgs args(2, smallSample.C(), (SizeType)localIndices.size(), 1, p_index->GetDistCalcMethod()); + std::shuffle(localIndices.begin(), localIndices.end(), std::mt19937(std::random_device()())); + + int numClusters = SPTAG::COMMON::KmeansClustering(smallSample, localIndices, 0, (SizeType)localIndices.size(), args, 1000, 100.0F, false, nullptr); + + auto clusterEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(clusterEnd - clusterBegin).count(); + m_stat.m_clusteringCost += elapsedMSeconds; + // int numClusters = ClusteringSPFresh(smallSample, localIndices, 0, localIndices.size(), args, 10, false, m_opt->m_virtualHead); + // exit(0); + if (numClusters <= 1) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Cluserting Failed (The same vector), Only Keep one\n"); + std::string newpostingList(1 * m_vectorInfoSize, '\0'); + char* ptr = (char*)(newpostingList.c_str()); + for (int j = 0; j < 1; j++, ptr += m_vectorInfoSize) + { + memcpy(ptr, postingList.c_str() + localIndices[j] * m_vectorInfoSize, m_vectorInfoSize); + //Serialize(ptr, localIndicesInsert[j], localIndicesInsertVersion[j], smallSample[j]); + } + m_postingSizes.UpdateSize(headID, 1); + if (db->Put(headID, newpostingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Split fail to override postings cut to limit\n"); + exit(0); + } + { + std::lock_guard tmplock(m_runningLock); + m_splitList.erase(headID); + } + return ErrorCode::Success; + } + + long long newHeadVID = -1; + int first = 0; + bool theSameHead = false; + newPostingLists.resize(2); + for (int k = 0; k < 2; k++) { + if (args.counts[k] == 0) continue; + + newPostingLists[k].resize(args.counts[k] * m_vectorInfoSize); + char* ptr = (char*)(newPostingLists[k].c_str()); + for (int j = 0; j < args.counts[k]; j++, ptr += m_vectorInfoSize) + { + memcpy(ptr, postingList.c_str() + localIndices[first + j] * m_vectorInfoSize, m_vectorInfoSize); + //Serialize(ptr, localIndicesInsert[localIndices[first + j]], localIndicesInsertVersion[localIndices[first + j]], smallSample[localIndices[first + j]]); + } + if (!theSameHead && p_index->ComputeDistance(args.centers + k * args._D, p_index->GetSample(headID)) < Epsilon) { + newHeadsID.push_back(headID); + newHeadVID = headID; + theSameHead = true; + auto splitPutBegin = std::chrono::high_resolution_clock::now(); + if (!preReassign && db->Put(newHeadVID, newPostingLists[k]) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Fail to override postings\n"); + exit(0); + } + auto splitPutEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(splitPutEnd - splitPutBegin).count(); + m_stat.m_putCost += elapsedMSeconds; + m_stat.m_theSameHeadNum++; + } + else { + int begin, end = 0; + p_index->AddIndexId(args.centers + k * args._D, 1, m_opt->m_dim, begin, end); + newHeadVID = begin; + newHeadsID.push_back(begin); + auto splitPutBegin = std::chrono::high_resolution_clock::now(); + if (!preReassign && db->Put(newHeadVID, newPostingLists[k]) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Fail to add new postings\n"); + exit(0); + } + auto splitPutEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(splitPutEnd - splitPutBegin).count(); + m_stat.m_putCost += elapsedMSeconds; + auto updateHeadBegin = std::chrono::high_resolution_clock::now(); + p_index->AddIndexIdx(begin, end); + auto updateHeadEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(updateHeadEnd - updateHeadBegin).count(); + m_stat.m_updateHeadCost += elapsedMSeconds; + + std::lock_guard tmplock(m_dataAddLock); + if (m_postingSizes.AddBatch(1) == ErrorCode::MemoryOverFlow) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "MemoryOverFlow: NnewHeadVID: %d, Map Size:%d\n", newHeadVID, m_postingSizes.BufferSize()); + exit(1); + } + } + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Head id: %d split into : %d, length: %d\n", headID, newHeadVID, args.counts[k]); + first += args.counts[k]; + m_postingSizes.UpdateSize(newHeadVID, args.counts[k]); + } + if (!theSameHead) { + p_index->DeleteIndex(headID); + m_postingSizes.UpdateSize(headID, 0); + } + } + { + std::lock_guard tmplock(m_runningLock); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"erase: %d\n", headID); + m_splitList.erase(headID); + } + m_stat.m_splitNum++; + if (reassign) { + auto reassignScanBegin = std::chrono::high_resolution_clock::now(); + + CollectReAssign(p_index, headID, newPostingLists, newHeadsID); + + auto reassignScanEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(reassignScanEnd - reassignScanBegin).count(); + + m_stat.m_reassignScanCost += elapsedMSeconds; + } + auto splitEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(splitEnd - splitBegin).count(); + m_stat.m_splitCost += elapsedMSeconds; + return ErrorCode::Success; + } + + ErrorCode MergePostings(VectorIndex* p_index, SizeType headID, bool reassign = false) + { + { + if (!m_mergeLock.try_lock()) { + auto* curJob = new MergeAsyncJob(p_index, this, headID, reassign, nullptr); + m_splitThreadPool->add(curJob); + return ErrorCode::Success; + } + std::unique_lock lock(m_rwLocks[headID]); + + if (!p_index->ContainSample(headID)) { + m_mergeLock.unlock(); + return ErrorCode::Success; + } + + std::string mergedPostingList; + std::set vectorIdSet; + + std::string currentPostingList; + if (db->Get(headID, ¤tPostingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Fail to get to be merged postings: %d\n", headID); + exit(0); + } + + auto* postingP = reinterpret_cast(¤tPostingList.front()); + size_t postVectorNum = currentPostingList.size() / m_vectorInfoSize; + int currentLength = 0; + uint8_t* vectorId = postingP; + for (int j = 0; j < postVectorNum; j++, vectorId += m_vectorInfoSize) + { + int VID = *((int*)(vectorId)); + uint8_t version = *(vectorId + sizeof(int)); + if (m_versionMap->Deleted(VID) || m_versionMap->GetVersion(VID) != version) continue; + vectorIdSet.insert(VID); + mergedPostingList += currentPostingList.substr(j * m_vectorInfoSize, m_vectorInfoSize); + currentLength++; + } + int totalLength = currentLength; + + if (currentLength > m_mergeThreshold) + { + m_postingSizes.UpdateSize(headID, currentLength); + if (db->Put(headID, mergedPostingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Merge Fail to write back postings\n"); + exit(0); + } + m_mergeList.erase(headID); + m_mergeLock.unlock(); + return ErrorCode::Success; + } + + QueryResult queryResults(p_index->GetSample(headID), m_opt->m_internalResultNum, false); + p_index->SearchIndex(queryResults); + + std::string nextPostingList; + + for (int i = 1; i < queryResults.GetResultNum(); ++i) + { + BasicResult* queryResult = queryResults.GetResult(i); + int nextLength = m_postingSizes.GetSize(queryResult->VID); + tbb::concurrent_hash_map::const_accessor headIDAccessor; + if (currentLength + nextLength < m_postingSizeLimit && !m_mergeList.find(headIDAccessor, queryResult->VID)) + { + { + std::unique_lock anotherLock(m_rwLocks[queryResult->VID], std::defer_lock); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"Locked: %d, to be lock: %d\n", headID, queryResult->VID); + if (m_rwLocks.hash_func(queryResult->VID) != m_rwLocks.hash_func(headID)) anotherLock.lock(); + if (!p_index->ContainSample(queryResult->VID)) continue; + if (db->Get(queryResult->VID, &nextPostingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Fail to get to be merged postings: %d\n", queryResult->VID); + exit(0); + } + + postingP = reinterpret_cast(&nextPostingList.front()); + postVectorNum = nextPostingList.size() / m_vectorInfoSize; + nextLength = 0; + vectorId = postingP; + for (int j = 0; j < postVectorNum; j++, vectorId += m_vectorInfoSize) + { + int VID = *((int*)(vectorId)); + uint8_t version = *(vectorId + sizeof(int)); + if (m_versionMap->Deleted(VID) || m_versionMap->GetVersion(VID) != version) continue; + if (vectorIdSet.find(VID) == vectorIdSet.end()) { + mergedPostingList += nextPostingList.substr(j * m_vectorInfoSize, m_vectorInfoSize); + totalLength++; + } + nextLength++; + } + if (currentLength > nextLength) + { + p_index->DeleteIndex(queryResult->VID); + if (db->Put(headID, mergedPostingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Split fail to override postings after merge\n"); + exit(0); + } + m_postingSizes.UpdateSize(queryResult->VID, 0); + m_postingSizes.UpdateSize(headID, totalLength); + } else + { + p_index->DeleteIndex(headID); + if (db->Put(queryResult->VID, mergedPostingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Split fail to override postings after merge\n"); + exit(0); + } + m_postingSizes.UpdateSize(queryResult->VID, totalLength); + m_postingSizes.UpdateSize(headID, 0); + } + if (m_rwLocks.hash_func(queryResult->VID) != m_rwLocks.hash_func(headID)) anotherLock.unlock(); + } + + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"Release: %d, Release: %d\n", headID, queryResult->VID); + lock.unlock(); + m_mergeLock.unlock(); + + if (reassign) + { + /* ReAssign */ + if (currentLength > nextLength) + { + /* ReAssign queryResult->VID*/ + postingP = reinterpret_cast(&nextPostingList.front()); + for (int j = 0; j < nextLength; j++) { + uint8_t* vectorId = postingP + j * m_vectorInfoSize; + // SizeType vid = *(reinterpret_cast(vectorId)); + ValueType* vector = reinterpret_cast(vectorId + m_metaDataSize); + float origin_dist = p_index->ComputeDistance(p_index->GetSample(queryResult->VID), vector); + float current_dist = p_index->ComputeDistance(p_index->GetSample(headID), vector); + if (current_dist > origin_dist) + ReassignAsync(p_index, std::make_shared((char*)vectorId, m_vectorInfoSize), headID); + } + } else + { + /* ReAssign headID*/ + postingP = reinterpret_cast(¤tPostingList.front()); + for (int j = 0; j < currentLength; j++) { + uint8_t* vectorId = postingP + j * m_vectorInfoSize; + // SizeType vid = *(reinterpret_cast(vectorId)); + ValueType* vector = reinterpret_cast(vectorId + m_metaDataSize); + float origin_dist = p_index->ComputeDistance(p_index->GetSample(headID), vector); + float current_dist = p_index->ComputeDistance(p_index->GetSample(queryResult->VID), vector); + if (current_dist > origin_dist) + ReassignAsync(p_index, std::make_shared((char*)vectorId, m_vectorInfoSize), queryResult->VID); + } + } + } + + m_mergeList.erase(headID); + m_stat.m_mergeNum++; + + return ErrorCode::Success; + } + } + m_postingSizes.UpdateSize(headID, currentLength); + if (db->Put(headID, mergedPostingList) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Merge Fail to write back postings\n"); + exit(0); + } + m_mergeList.erase(headID); + m_mergeLock.unlock(); + } + return ErrorCode::Success; + } + + inline void SplitAsync(VectorIndex* p_index, SizeType headID, std::function p_callback = nullptr) + { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"Into SplitAsync, current headID: %d, size: %d\n", headID, m_postingSizes.GetSize(headID)); + // tbb::concurrent_hash_map::const_accessor headIDAccessor; + // if (m_splitList.find(headIDAccessor, headID)) { + // return; + // } + // tbb::concurrent_hash_map::value_type workPair(headID, headID); + // m_splitList.insert(workPair); + { + std::lock_guard tmplock(m_runningLock); + + if (m_splitList.find(headID) != m_splitList.end()) { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"Already in queue\n"); + return; + } + m_splitList.insert(headID); + } + + auto* curJob = new SplitAsyncJob(p_index, this, headID, m_opt->m_disableReassign, p_callback); + m_splitThreadPool->add(curJob); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Add to thread pool\n"); + } + + inline void MergeAsync(VectorIndex* p_index, SizeType headID, std::function p_callback = nullptr) + { + tbb::concurrent_hash_map::const_accessor headIDAccessor; + if (m_mergeList.find(headIDAccessor, headID)) { + return; + } + tbb::concurrent_hash_map::value_type workPair(headID, headID); + m_mergeList.insert(workPair); + + auto* curJob = new MergeAsyncJob(p_index, this, headID, m_opt->m_disableReassign, p_callback); + m_splitThreadPool->add(curJob); + } + + inline void ReassignAsync(VectorIndex* p_index, std::shared_ptr vectorInfo, SizeType HeadPrev, std::function p_callback = nullptr) + { + auto* curJob = new ReassignAsyncJob(p_index, this, std::move(vectorInfo), HeadPrev, p_callback); + m_splitThreadPool->add(curJob); + } + + ErrorCode CollectReAssign(VectorIndex* p_index, SizeType headID, std::vector& postingLists, std::vector& newHeadsID) { + auto headVector = reinterpret_cast(p_index->GetSample(headID)); + std::vector newHeadsDist; + std::set reAssignVectorsTopK; + newHeadsDist.push_back(p_index->ComputeDistance(p_index->GetSample(headID), p_index->GetSample(newHeadsID[0]))); + newHeadsDist.push_back(p_index->ComputeDistance(p_index->GetSample(headID), p_index->GetSample(newHeadsID[1]))); + for (int i = 0; i < postingLists.size(); i++) { + auto& postingList = postingLists[i]; + size_t postVectorNum = postingList.size() / m_vectorInfoSize; + auto* postingP = reinterpret_cast(&postingList.front()); + for (int j = 0; j < postVectorNum; j++) { + uint8_t* vectorId = postingP + j * m_vectorInfoSize; + SizeType vid = *(reinterpret_cast(vectorId)); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "VID: %d, Head: %d\n", vid, newHeadsID[i]); + uint8_t version = *(reinterpret_cast(vectorId + sizeof(int))); + ValueType* vector = reinterpret_cast(vectorId + m_metaDataSize); + if (reAssignVectorsTopK.find(vid) == reAssignVectorsTopK.end() && !m_versionMap->Deleted(vid) && m_versionMap->GetVersion(vid) == version) { + m_stat.m_reAssignScanNum++; + float dist = p_index->ComputeDistance(p_index->GetSample(newHeadsID[i]), vector); + if (CheckIsNeedReassign(p_index, newHeadsID, vector, headID, newHeadsDist[i], dist, true, newHeadsID[i])) { + ReassignAsync(p_index, std::make_shared((char*)vectorId, m_vectorInfoSize), newHeadsID[i]); + reAssignVectorsTopK.insert(vid); + } + } + } + } + if (m_opt->m_reassignK > 0) { + std::vector HeadPrevTopK; + newHeadsDist.clear(); + newHeadsDist.resize(0); + postingLists.clear(); + postingLists.resize(0); + COMMON::QueryResultSet nearbyHeads(headVector, m_opt->m_reassignK); + p_index->SearchIndex(nearbyHeads); + BasicResult* queryResults = nearbyHeads.GetResults(); + for (int i = 0; i < nearbyHeads.GetResultNum(); i++) { + auto vid = queryResults[i].VID; + if (vid == -1) break; + + if (find(newHeadsID.begin(), newHeadsID.end(), vid) == newHeadsID.end()) { + HeadPrevTopK.push_back(vid); + newHeadsID.push_back(vid); + newHeadsDist.push_back(queryResults[i].Dist); + } + } + auto reassignScanIOBegin = std::chrono::high_resolution_clock::now(); + if (db->MultiGet(HeadPrevTopK, &postingLists) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ReAssign can't get all the near postings\n"); + exit(0); + } + auto reassignScanIOEnd = std::chrono::high_resolution_clock::now(); + auto elapsedMSeconds = std::chrono::duration_cast(reassignScanIOEnd - reassignScanIOBegin).count(); + m_stat.m_reassignScanIOCost += elapsedMSeconds; + + for (int i = 0; i < postingLists.size(); i++) { + auto& postingList = postingLists[i]; + size_t postVectorNum = postingList.size() / m_vectorInfoSize; + auto* postingP = reinterpret_cast(&postingList.front()); + for (int j = 0; j < postVectorNum; j++) { + uint8_t* vectorId = postingP + j * m_vectorInfoSize; + SizeType vid = *(reinterpret_cast(vectorId)); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%d: VID: %d, Head: %d, size:%d/%d\n", i, vid, HeadPrevTopK[i], postingLists.size(), HeadPrevTopK.size()); + uint8_t version = *(reinterpret_cast(vectorId + sizeof(int))); + ValueType* vector = reinterpret_cast(vectorId + m_metaDataSize); + if (reAssignVectorsTopK.find(vid) == reAssignVectorsTopK.end() && !m_versionMap->Deleted(vid) && m_versionMap->GetVersion(vid) == version) { + m_stat.m_reAssignScanNum++; + float dist = p_index->ComputeDistance(p_index->GetSample(HeadPrevTopK[i]), vector); + if (CheckIsNeedReassign(p_index, newHeadsID, vector, headID, newHeadsDist[i], dist, false, HeadPrevTopK[i])) { + ReassignAsync(p_index, std::make_shared((char*)vectorId, m_vectorInfoSize), HeadPrevTopK[i]); + reAssignVectorsTopK.insert(vid); + } + } + } + } + } + // exit(1); + return ErrorCode::Success; + } + + bool RNGSelection(std::vector& selections, ValueType* queryVector, VectorIndex* p_index, SizeType p_fullID, int& replicaCount, int checkHeadID = -1) + { + QueryResult queryResults(queryVector, m_opt->m_internalResultNum, false); + p_index->SearchIndex(queryResults); + + replicaCount = 0; + for (int i = 0; i < queryResults.GetResultNum() && replicaCount < m_opt->m_replicaCount; ++i) + { + BasicResult* queryResult = queryResults.GetResult(i); + if (queryResult->VID == -1) { + break; + } + // RNG Check. + bool rngAccpeted = true; + for (int j = 0; j < replicaCount; ++j) + { + float nnDist = p_index->ComputeDistance(p_index->GetSample(queryResult->VID), + p_index->GetSample(selections[j].node)); + if (m_opt->m_rngFactor * nnDist <= queryResult->Dist) + { + rngAccpeted = false; + break; + } + } + if (!rngAccpeted) continue; + selections[replicaCount].node = queryResult->VID; + selections[replicaCount].tonode = p_fullID; + selections[replicaCount].distance = queryResult->Dist; + if (selections[replicaCount].node == checkHeadID) { + return false; + } + ++replicaCount; + } + return true; + } + + ErrorCode Append(VectorIndex* p_index, SizeType headID, int appendNum, std::string& appendPosting, int reassignThreshold = 0) + { + auto appendBegin = std::chrono::high_resolution_clock::now(); + if (appendPosting.empty()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Error! empty append posting!\n"); + } + + if (appendNum == 0) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Error!, headID :%d, appendNum:%d\n", headID, appendNum); + } + + checkDeleted: + if (!p_index->ContainSample(headID)) { + for (int i = 0; i < appendNum; i++) + { + uint32_t idx = i * m_vectorInfoSize; + SizeType VID = *(int*)(&appendPosting[idx]); + uint8_t version = *(uint8_t*)(&appendPosting[idx + sizeof(int)]); + auto vectorInfo = std::make_shared(appendPosting.c_str() + idx, m_vectorInfoSize); + if (m_versionMap->GetVersion(VID) == version) { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Head Miss To ReAssign: VID: %d, current version: %d\n", *(int*)(&appendPosting[idx]), version); + m_stat.m_headMiss++; + ReassignAsync(p_index, vectorInfo, headID); + } + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Head Miss Do Not To ReAssign: VID: %d, version: %d, current version: %d\n", *(int*)(&appendPosting[idx]), m_versionMap->GetVersion(*(int*)(&appendPosting[idx])), version); + } + return ErrorCode::Undefined; + } + double appendIOSeconds = 0; + { + //std::shared_lock lock(m_rwLocks[headID]); //ROCKSDB + std::unique_lock lock(m_rwLocks[headID]); //SPDK + if (!p_index->ContainSample(headID)) { + goto checkDeleted; + } + auto appendIOBegin = std::chrono::high_resolution_clock::now(); + if (db->Merge(headID, appendPosting) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed! Posting Size:%d, limit: %d\n", m_postingSizes.GetSize(headID), m_postingSizeLimit); + GetDBStats(); + exit(1); + } + auto appendIOEnd = std::chrono::high_resolution_clock::now(); + appendIOSeconds = std::chrono::duration_cast(appendIOEnd - appendIOBegin).count(); + m_postingSizes.IncSize(headID, appendNum); + } + if (m_postingSizes.GetSize(headID) > (m_postingSizeLimit + reassignThreshold)) { + // SizeType VID = *(int*)(&appendPosting[0]); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split Triggered by inserting VID: %d, reAssign: %d\n", VID, reassignThreshold); + // GetDBStats(); + // if (m_postingSizes.GetSize(headID) > 120) { + // GetDBStats(); + // exit(1); + // } + if (!reassignThreshold) SplitAsync(p_index, headID); + else Split(p_index, headID, !m_opt->m_disableReassign); + // SplitAsync(p_index, headID); + } + auto appendEnd = std::chrono::high_resolution_clock::now(); + double elapsedMSeconds = std::chrono::duration_cast(appendEnd - appendBegin).count(); + if (!reassignThreshold) { + m_stat.m_appendTaskNum++; + m_stat.m_appendIOCost += appendIOSeconds; + m_stat.m_appendCost += elapsedMSeconds; + } + // } else { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ReAssign Append To: %d\n", headID); + // } + return ErrorCode::Success; + } + + void Reassign(VectorIndex* p_index, std::shared_ptr vectorInfo, SizeType HeadPrev) + { + SizeType VID = *((SizeType*)vectorInfo->c_str()); + uint8_t version = *((uint8_t*)(vectorInfo->c_str() + sizeof(VID))); + // return; + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ReassignID: %d, version: %d, current version: %d, HeadPrev: %d\n", VID, version, m_versionMap->GetVersion(VID), HeadPrev); + if (m_versionMap->Deleted(VID) || m_versionMap->GetVersion(VID) != version) { + return; + } + auto reassignBegin = std::chrono::high_resolution_clock::now(); + + m_stat.m_reAssignNum++; + + auto selectBegin = std::chrono::high_resolution_clock::now(); + std::vector selections(static_cast(m_opt->m_replicaCount)); + int replicaCount; + bool isNeedReassign = RNGSelection(selections, (ValueType*)(vectorInfo->c_str() + m_metaDataSize), p_index, VID, replicaCount, HeadPrev); + auto selectEnd = std::chrono::high_resolution_clock::now(); + auto elapsedMSeconds = std::chrono::duration_cast(selectEnd - selectBegin).count(); + m_stat.m_selectCost += elapsedMSeconds; + + auto reassignAppendBegin = std::chrono::high_resolution_clock::now(); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Need ReAssign\n"); + if (isNeedReassign && m_versionMap->GetVersion(VID) == version) { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Update Version: VID: %d, version: %d, current version: %d\n", VID, version, m_versionMap.GetVersion(VID)); + m_versionMap->IncVersion(VID, &version); + (*vectorInfo)[sizeof(VID)] = version; + + //LOG(Helper::LogLevel::LL_Info, "Reassign: oldVID:%d, replicaCount:%d, candidateNum:%d, dist0:%f\n", oldVID, replicaCount, i, selections[0].distance); + for (int i = 0; i < replicaCount && m_versionMap->GetVersion(VID) == version; i++) { + //LOG(Helper::LogLevel::LL_Info, "Reassign: headID :%d, oldVID:%d, newVID:%d, posting length: %d, dist: %f, string size: %d\n", headID, oldVID, VID, m_postingSizes[headID].load(), selections[i].distance, newPart.size()); + if (ErrorCode::Undefined == Append(p_index, selections[i].node, 1, *vectorInfo, 3)) { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Head Miss: VID: %d, current version: %d, another re-assign\n", VID, version); + break; + } + } + } + auto reassignAppendEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(reassignAppendEnd - reassignAppendBegin).count(); + m_stat.m_reAssignAppendCost += elapsedMSeconds; + + auto reassignEnd = std::chrono::high_resolution_clock::now(); + elapsedMSeconds = std::chrono::duration_cast(reassignEnd - reassignBegin).count(); + m_stat.m_reAssignCost += elapsedMSeconds; + } + + bool LoadIndex(Options& p_opt, COMMON::VersionLabel& p_versionMap, std::shared_ptr m_vectorTranslateMap, std::shared_ptr m_index) override { + m_versionMap = &p_versionMap; + m_opt = &p_opt; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "DataBlockSize: %d, Capacity: %d\n", m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); + + if (m_opt->m_recovery) { + std::string p_persistenMap = m_opt->m_persistentBufferPath + "_versionMap"; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery: Loading version map\n"); + m_versionMap->Load(p_persistenMap, m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery: Loading posting size\n"); + std::string p_persistenRecord = m_opt->m_persistentBufferPath + "_postingSizeRecord"; + m_postingSizes.Load(p_persistenRecord, m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery: Current vector num: %d.\n", m_versionMap->Count()); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery:Current posting num: %d.\n", m_postingSizes.GetPostingNum()); + } + else if (!m_opt->m_useSPDK) { + m_versionMap->Load(m_opt->m_deleteIDFile, m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); + m_postingSizes.Load(m_opt->m_ssdInfoFile, m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Current vector num: %d.\n", m_versionMap->Count()); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Current posting num: %d.\n", m_postingSizes.GetPostingNum()); + } else if (m_opt->m_useSPDK) { + m_versionMap->Initialize(m_opt->m_vectorSize, m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Copying data from static to SPDK\n"); + std::shared_ptr storeExtraSearcher; + storeExtraSearcher.reset(new ExtraStaticSearcher()); + if (!storeExtraSearcher->LoadIndex(*m_opt, *m_versionMap, m_vectorTranslateMap, m_index)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Initialize Error\n"); + exit(1); + } + int totalPostingNum = m_index->GetNumSamples(); + + m_postingSizes.Initialize((SizeType)(totalPostingNum), m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); + + std::vector threads; + std::atomic_size_t vectorsSent(0); + + auto func = [&]() + { + Initialize(); + size_t index = 0; + while (true) + { + index = vectorsSent.fetch_add(1); + if (index < totalPostingNum) + { + + if ((index & ((1 << 14) - 1)) == 0) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Copy to SPDK: Sent %.2lf%%...\n", index * 100.0 / totalPostingNum); + } + std::string tempPosting; + storeExtraSearcher->GetWritePosting(index, tempPosting); + int vectorNum = (int)(tempPosting.size() / (m_vectorInfoSize - sizeof(uint8_t))); + + if (vectorNum > m_postingSizeLimit) vectorNum = m_postingSizeLimit; + auto* postingP = reinterpret_cast(&tempPosting.front()); + std::string newPosting(m_vectorInfoSize * vectorNum , '\0'); + char* ptr = (char*)(newPosting.c_str()); + for (int j = 0; j < vectorNum; ++j, ptr += m_vectorInfoSize) { + char* vectorInfo = postingP + j * (m_vectorInfoSize - sizeof(uint8_t)); + int VID = *(reinterpret_cast(vectorInfo)); + uint8_t version = m_versionMap->GetVersion(VID); + memcpy(ptr, &VID, sizeof(int)); + memcpy(ptr + sizeof(int), &version, sizeof(uint8_t)); + memcpy(ptr + sizeof(int) + sizeof(uint8_t), vectorInfo + sizeof(int), m_vectorInfoSize - sizeof(uint8_t) - sizeof(int)); + } + if (m_opt->m_excludehead) { + auto VIDTrans = static_cast((m_vectorTranslateMap.get())[index]); + uint8_t version = m_versionMap->GetVersion(VIDTrans); + std::string appendPosting(m_vectorInfoSize, '\0'); + char* ptr = (char*)(appendPosting.c_str()); + memcpy(ptr, &VIDTrans, sizeof(VIDTrans)); + memcpy(ptr + sizeof(VIDTrans), &version, sizeof(version)); + memcpy(ptr + sizeof(int) + sizeof(uint8_t), m_index->GetSample(index), m_vectorInfoSize - sizeof(int) + sizeof(uint8_t)); + newPosting = appendPosting + newPosting; + } + GetWritePosting(index, newPosting, true); + } + else + { + ExitBlockController(); + return; + } + } + }; + for (int j = 0; j < m_opt->m_iSSDNumberOfThreads; j++) { threads.emplace_back(func); } + for (auto& thread : threads) { thread.join(); } + } + + if (m_opt->m_update) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: initialize thread pools, append: %d, reassign %d\n", m_opt->m_appendThreadNum, m_opt->m_reassignThreadNum); + m_splitThreadPool = std::make_shared(); + m_splitThreadPool->initSPDK(m_opt->m_appendThreadNum, this); + m_reassignThreadPool = std::make_shared(); + m_reassignThreadPool->initSPDK(m_opt->m_reassignThreadNum, this); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: finish initialization\n"); + + if (m_opt->m_enableWAL) { + std::string p_persistenWAL = m_opt->m_persistentBufferPath + "_WAL"; + std::shared_ptr pdb; + pdb.reset(new RocksDBIO(p_persistenWAL.c_str(), false, false)); + m_wal.reset(new PersistentBuffer(pdb)); + } + } + + /** recover the previous WAL **/ + if (m_opt->m_recovery && m_opt->m_enableWAL) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery: WAL\n"); + std::string assignment; + int countAssignment = 0; + if (!m_wal->StartToScan(assignment)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery: No log\n"); + return true; + } + do { + countAssignment++; + if (countAssignment % 10000 == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Process %d logs\n", countAssignment); + char* ptr = (char*)(assignment.c_str()); + SizeType VID = *(reinterpret_cast(ptr)); + if (assignment.size() == m_vectorInfoSize) { + if (VID >= m_versionMap->GetVectorNum()) { + if (m_versionMap->AddBatch(VID - m_versionMap->GetVectorNum() + 1) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "MemoryOverFlow: VID: %d, Map Size:%d\n", VID, m_versionMap->BufferSize()); + exit(1); + } + } + std::shared_ptr vectorSet; + vectorSet.reset(new BasicVectorSet(ByteArray((std::uint8_t*)ptr + sizeof(SizeType) + sizeof(uint8_t), sizeof(ValueType) * 1 * m_opt->m_dim, false), + GetEnumValueType(), m_opt->m_dim, 1)); + AddIndex(vectorSet, m_index, VID); + } else { + m_versionMap->Delete(VID); + } + } while (m_wal->NextToScan(assignment)); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery: No more to repeat, wait for rebalance\n"); + while(!AllFinished()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + } + return true; + } + + virtual void SearchIndex(ExtraWorkSpace* p_exWorkSpace, + QueryResult& p_queryResults, + std::shared_ptr p_index, + SearchStats* p_stats, std::set* truth, std::map>* found) override + { + auto exStart = std::chrono::high_resolution_clock::now(); + + // const auto postingListCount = static_cast(p_exWorkSpace->m_postingIDs.size()); + + p_exWorkSpace->m_deduper.clear(); + + auto exSetUpEnd = std::chrono::high_resolution_clock::now(); + + p_stats->m_exSetUpLatency = ((double)std::chrono::duration_cast(exSetUpEnd - exStart).count()) / 1000; + + COMMON::QueryResultSet& queryResults = *((COMMON::QueryResultSet*) & p_queryResults); + + int diskRead = 0; + int diskIO = 0; + int listElements = 0; + + double compLatency = 0; + double readLatency = 0; + + std::vector postingLists; + + std::chrono::microseconds remainLimit = m_hardLatencyLimit - std::chrono::microseconds((int)p_stats->m_totalLatency); + + auto readStart = std::chrono::high_resolution_clock::now(); + db->MultiGet(p_exWorkSpace->m_postingIDs, &postingLists, remainLimit); + auto readEnd = std::chrono::high_resolution_clock::now(); + + for (uint32_t pi = 0; pi < postingLists.size(); ++pi) { + diskIO += ((postingLists[pi].size() + PageSize - 1) >> PageSizeEx); + } + + readLatency += ((double)std::chrono::duration_cast(readEnd - readStart).count()); + + for (uint32_t pi = 0; pi < postingLists.size(); ++pi) { + auto curPostingID = p_exWorkSpace->m_postingIDs[pi]; + std::string& postingList = postingLists[pi]; + + int vectorNum = (int)(postingList.size() / m_vectorInfoSize); + + int realNum = vectorNum; + + diskRead += (int)(postingList.size()); + listElements += vectorNum; + + auto compStart = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < vectorNum; i++) { + char* vectorInfo = (char*)postingList.data() + i * m_vectorInfoSize; + int vectorID = *(reinterpret_cast(vectorInfo)); + if (m_versionMap->Deleted(vectorID)) { + realNum--; + listElements--; + continue; + } + if(p_exWorkSpace->m_deduper.CheckAndSet(vectorID)) { + listElements--; + continue; + } + auto distance2leaf = p_index->ComputeDistance(queryResults.GetQuantizedTarget(), vectorInfo + m_metaDataSize); + queryResults.AddPoint(vectorID, distance2leaf); + } + auto compEnd = std::chrono::high_resolution_clock::now(); + if (realNum <= m_mergeThreshold && !m_opt->m_inPlace) MergeAsync(p_index.get(), curPostingID); + + compLatency += ((double)std::chrono::duration_cast(compEnd - compStart).count()); + + if (truth) { + for (int i = 0; i < vectorNum; ++i) { + char* vectorInfo = (char*)postingList.data() + i * m_vectorInfoSize; + int vectorID = *(reinterpret_cast(vectorInfo)); + if (truth->count(vectorID) != 0) + (*found)[curPostingID].insert(vectorID); + } + } + } + + if (p_stats) + { + p_stats->m_compLatency = compLatency / 1000; + p_stats->m_diskReadLatency = readLatency / 1000; + p_stats->m_totalListElementsCount = listElements; + p_stats->m_diskIOCount = diskIO; + p_stats->m_diskAccessCount = diskRead / 1024; + } + } + + bool BuildIndex(std::shared_ptr& p_reader, std::shared_ptr p_headIndex, Options& p_opt, COMMON::VersionLabel& p_versionMap, SizeType upperBound = -1) override { + m_versionMap = &p_versionMap; + m_opt = &p_opt; + + int numThreads = m_opt->m_iSSDNumberOfThreads; + int candidateNum = m_opt->m_internalResultNum; + std::unordered_set headVectorIDS; + if (m_opt->m_headIDFile.empty()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Not found VectorIDTranslate!\n"); + return false; + } + + if (fileexists((m_opt->m_indexDirectory + FolderSep + m_opt->m_headIDFile).c_str())) + { + auto ptr = SPTAG::f_createIO(); + if (ptr == nullptr || !ptr->Initialize((m_opt->m_indexDirectory + FolderSep + m_opt->m_headIDFile).c_str(), std::ios::binary | std::ios::in)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "failed open VectorIDTranslate: %s\n", m_opt->m_headIDFile.c_str()); + return false; + } + + std::uint64_t vid; + while (ptr->ReadBinary(sizeof(vid), reinterpret_cast(&vid)) == sizeof(vid)) + { + headVectorIDS.insert(static_cast(vid)); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loaded %u Vector IDs\n", static_cast(headVectorIDS.size())); + } + + SizeType fullCount = 0; + { + auto fullVectors = p_reader->GetVectorSet(); + fullCount = fullVectors->Count(); + m_vectorInfoSize = fullVectors->PerVectorDataSize() + m_metaDataSize; + } + if (upperBound > 0) fullCount = upperBound; + + // m_metaDataSize = sizeof(int) + sizeof(uint8_t) + sizeof(float); + m_metaDataSize = sizeof(int) + sizeof(uint8_t); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Build SSD Index.\n"); + + Selection selections(static_cast(fullCount) * m_opt->m_replicaCount, m_opt->m_tmpdir); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Full vector count:%d Edge bytes:%llu selection size:%zu, capacity size:%zu\n", fullCount, sizeof(Edge), selections.m_selections.size(), selections.m_selections.capacity()); + std::vector replicaCount(fullCount); + std::vector postingListSize(p_headIndex->GetNumSamples()); + for (auto& pls : postingListSize) pls = 0; + std::unordered_set emptySet; + SizeType batchSize = (fullCount + m_opt->m_batches - 1) / m_opt->m_batches; + + auto t1 = std::chrono::high_resolution_clock::now(); + if (p_opt.m_batches > 1) + { + if (selections.SaveBatch() != ErrorCode::Success) + { + return false; + } + } + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Preparation done, start candidate searching.\n"); + SizeType sampleSize = m_opt->m_samples; + std::vector samples(sampleSize, 0); + for (int i = 0; i < m_opt->m_batches; i++) { + SizeType start = i * batchSize; + SizeType end = min(start + batchSize, fullCount); + auto fullVectors = p_reader->GetVectorSet(start, end); + if (m_opt->m_distCalcMethod == DistCalcMethod::Cosine && !p_reader->IsNormalized()) fullVectors->Normalize(m_opt->m_iSSDNumberOfThreads); + + if (p_opt.m_batches > 1) { + if (selections.LoadBatch(static_cast(start) * p_opt.m_replicaCount, static_cast(end) * p_opt.m_replicaCount) != ErrorCode::Success) + { + return false; + } + emptySet.clear(); + for (auto vid : headVectorIDS) { + if (vid >= start && vid < end) emptySet.insert(vid - start); + } + } + else { + emptySet = headVectorIDS; + } + + int sampleNum = 0; + for (int j = start; j < end && sampleNum < sampleSize; j++) + { + if (headVectorIDS.count(j) == 0) samples[sampleNum++] = j - start; + } + + float acc = 0; +// #pragma omp parallel for schedule(dynamic) +// for (int j = 0; j < sampleNum; j++) +// { +// COMMON::Utils::atomic_float_add(&acc, COMMON::TruthSet::CalculateRecall(p_headIndex.get(), fullVectors->GetVector(samples[j]), candidateNum)); +// } + acc = acc / sampleNum; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Batch %d vector(%d,%d) loaded with %d vectors (%zu) HeadIndex acc @%d:%f.\n", i, start, end, fullVectors->Count(), selections.m_selections.size(), candidateNum, acc); + + p_headIndex->ApproximateRNG(fullVectors, emptySet, candidateNum, selections.m_selections.data(), m_opt->m_replicaCount, numThreads, m_opt->m_gpuSSDNumTrees, m_opt->m_gpuSSDLeafSize, m_opt->m_rngFactor, m_opt->m_numGPUs); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Batch %d finished!\n", i); + + for (SizeType j = start; j < end; j++) { + replicaCount[j] = 0; + size_t vecOffset = j * (size_t)m_opt->m_replicaCount; + if (headVectorIDS.count(j) == 0) { + for (int resNum = 0; resNum < m_opt->m_replicaCount && selections[vecOffset + resNum].node != INT_MAX; resNum++) { + ++postingListSize[selections[vecOffset + resNum].node]; + selections[vecOffset + resNum].tonode = j; + ++replicaCount[j]; + } + } + } + + if (p_opt.m_batches > 1) + { + if (selections.SaveBatch() != ErrorCode::Success) + { + return false; + } + } + } + } + auto t2 = std::chrono::high_resolution_clock::now(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Searching replicas ended. Search Time: %.2lf mins\n", ((double)std::chrono::duration_cast(t2 - t1).count()) / 60.0); + + if (p_opt.m_batches > 1) + { + if (selections.LoadBatch(0, static_cast(fullCount) * p_opt.m_replicaCount) != ErrorCode::Success) + { + return false; + } + } + + // Sort results either in CPU or GPU + VectorIndex::SortSelections(&selections.m_selections); + + auto t3 = std::chrono::high_resolution_clock::now(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Time to sort selections:%.2lf sec.\n", ((double)std::chrono::duration_cast(t3 - t2).count()) + ((double)std::chrono::duration_cast(t3 - t2).count()) / 1000); + + auto postingSizeLimit = m_postingSizeLimit; + if (m_opt->m_postingPageLimit > 0) + { + postingSizeLimit = static_cast(m_opt->m_postingPageLimit * PageSize / m_vectorInfoSize); + } + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Posting size limit: %d\n", postingSizeLimit); + + + { + std::vector replicaCountDist(m_opt->m_replicaCount + 1, 0); + for (int i = 0; i < replicaCount.size(); ++i) + { + if (headVectorIDS.count(i) > 0) continue; + ++replicaCountDist[replicaCount[i]]; + } + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Before Posting Cut:\n"); + for (int i = 0; i < replicaCountDist.size(); ++i) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Replica Count Dist: %d, %d\n", i, replicaCountDist[i]); + } + } + + #pragma omp parallel for schedule(dynamic) + for (int i = 0; i < postingListSize.size(); ++i) + { + if (postingListSize[i] <= postingSizeLimit) continue; + + std::size_t selectIdx = std::lower_bound(selections.m_selections.begin(), selections.m_selections.end(), i, Selection::g_edgeComparer) - selections.m_selections.begin(); + + for (size_t dropID = postingSizeLimit; dropID < postingListSize[i]; ++dropID) + { + int tonode = selections.m_selections[selectIdx + dropID].tonode; + --replicaCount[tonode]; + } + postingListSize[i] = postingSizeLimit; + } + { + std::vector replicaCountDist(m_opt->m_replicaCount + 1, 0); + for (int i = 0; i < replicaCount.size(); ++i) + { + ++replicaCountDist[replicaCount[i]]; + } + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "After Posting Cut:\n"); + for (int i = 0; i < replicaCountDist.size(); ++i) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Replica Count Dist: %d, %d\n", i, replicaCountDist[i]); + } + } + + // if (m_opt->m_outputEmptyReplicaID) + // { + // std::vector replicaCountDist(m_opt->m_replicaCount + 1, 0); + // auto ptr = SPTAG::f_createIO(); + // if (ptr == nullptr || !ptr->Initialize("EmptyReplicaID.bin", std::ios::binary | std::ios::out)) { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to create EmptyReplicaID.bin!\n"); + // return false; + // } + // for (int i = 0; i < replicaCount.size(); ++i) + // { + // if (headVectorIDS.count(i) > 0) continue; + + // ++replicaCountDist[replicaCount[i]]; + + // if (replicaCount[i] < 2) + // { + // long long vid = i; + // if (ptr->WriteBinary(sizeof(vid), reinterpret_cast(&vid)) != sizeof(vid)) { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failt to write EmptyReplicaID.bin!"); + // return false; + // } + // } + // } + + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "After Posting Cut:\n"); + // for (int i = 0; i < replicaCountDist.size(); ++i) + // { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Replica Count Dist: %d, %d\n", i, replicaCountDist[i]); + // } + // } + + + auto t4 = std::chrono::high_resolution_clock::now(); + SPTAGLIB_LOG(SPTAG::Helper::LogLevel::LL_Info, "Time to perform posting cut:%.2lf sec.\n", ((double)std::chrono::duration_cast(t4 - t3).count()) + ((double)std::chrono::duration_cast(t4 - t3).count()) / 1000); + + auto fullVectors = p_reader->GetVectorSet(); + if (m_opt->m_distCalcMethod == DistCalcMethod::Cosine && !p_reader->IsNormalized()) fullVectors->Normalize(m_opt->m_iSSDNumberOfThreads); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: initialize versionMap\n"); + m_versionMap->Initialize(fullCount, p_headIndex->m_iDataBlockSize, p_headIndex->m_iDataCapacity); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: Writing values to DB\n"); + + std::vector postingListSize_int(postingListSize.begin(), postingListSize.end()); + + WriteDownAllPostingToDB(postingListSize_int, selections, fullVectors); + + m_postingSizes.Initialize((SizeType)(postingListSize.size()), p_headIndex->m_iDataBlockSize, p_headIndex->m_iDataCapacity); + for (int i = 0; i < postingListSize.size(); i++) { + m_postingSizes.UpdateSize(i, postingListSize[i]); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: Writing SSD Info\n"); + m_postingSizes.Save(m_opt->m_ssdInfoFile); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: save versionMap\n"); + m_versionMap->Save(m_opt->m_deleteIDFile); + + auto t5 = std::chrono::high_resolution_clock::now(); + double elapsedSeconds = std::chrono::duration_cast(t5 - t1).count(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Total used time: %.2lf minutes (about %.2lf hours).\n", elapsedSeconds / 60.0, elapsedSeconds / 3600.0); + return true; + } + + void WriteDownAllPostingToDB(const std::vector& p_postingListSizes, Selection& p_postingSelections, std::shared_ptr p_fullVectors) { + // #pragma omp parallel for num_threads(10) + std::vector threads; + std::atomic_size_t vectorsSent(0); + auto func = [&]() + { + Initialize(); + size_t index = 0; + while (true) + { + index = vectorsSent.fetch_add(1); + if (index < p_postingListSizes.size()) { + std::string postinglist(m_vectorInfoSize * p_postingListSizes[index], '\0'); + char* ptr = (char*)postinglist.c_str(); + std::size_t selectIdx = p_postingSelections.lower_bound(index); + for (int j = 0; j < p_postingListSizes[index]; ++j) { + if (p_postingSelections[selectIdx].node != index) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Selection ID NOT MATCH\n"); + exit(1); + } + SizeType fullID = p_postingSelections[selectIdx++].tonode; + // if (id == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ID: %d\n", fullID); + uint8_t version = m_versionMap->GetVersion(fullID); + // First Vector ID, then version, then Vector + Serialize(ptr, fullID, version, p_fullVectors->GetVector(fullID)); + ptr += m_vectorInfoSize; + } + db->Put(index, postinglist); + } + else + { + ExitBlockController(); + return; + } + } + }; + + for (int j = 0; j < 20; j++) { threads.emplace_back(func); } + for (auto& thread : threads) { thread.join(); } + } + + ErrorCode AddIndex(std::shared_ptr& p_vectorSet, + std::shared_ptr p_index, SizeType begin) override { + + for (int v = 0; v < p_vectorSet->Count(); v++) { + SizeType VID = begin + v; + std::vector selections(static_cast(m_opt->m_replicaCount)); + int replicaCount; + RNGSelection(selections, (ValueType*)(p_vectorSet->GetVector(v)), p_index.get(), VID, replicaCount); + + uint8_t version = m_versionMap->GetVersion(VID); + std::string appendPosting(m_vectorInfoSize, '\0'); + Serialize((char*)(appendPosting.c_str()), VID, version, p_vectorSet->GetVector(v)); + if (m_opt->m_enableWAL) { + m_wal->PutAssignment(appendPosting); + } + for (int i = 0; i < replicaCount; i++) + { + // AppendAsync(selections[i].node, 1, appendPosting_ptr); + Append(p_index.get(), selections[i].node, 1, appendPosting); + } + } + return ErrorCode::Success; + } + + ErrorCode DeleteIndex(SizeType p_id) override { + if (m_opt->m_enableWAL) { + std::string assignment(sizeof(SizeType), '\0'); + memcpy((char*)assignment.c_str(), &p_id, sizeof(SizeType)); + m_wal->PutAssignment(assignment); + } + if (m_versionMap->Delete(p_id)) return ErrorCode::Success; + return ErrorCode::VectorNotFound; + } + + SizeType SearchVector(std::shared_ptr& p_vectorSet, + std::shared_ptr p_index, int testNum = 64, SizeType VID = -1) override { + + QueryResult queryResults(p_vectorSet->GetVector(0), testNum, false); + p_index->SearchIndex(queryResults); + + std::set checked; + std::string postingList; + for (int i = 0; i < queryResults.GetResultNum(); ++i) + { + db->Get(queryResults.GetResult(i)->VID, &postingList); + int vectorNum = (int)(postingList.size() / m_vectorInfoSize); + + for (int j = 0; j < vectorNum; j++) { + char* vectorInfo = (char* )postingList.data() + j * m_vectorInfoSize; + int vectorID = *(reinterpret_cast(vectorInfo)); + if(checked.find(vectorID) != checked.end() || m_versionMap->Deleted(vectorID)) { + continue; + } + checked.insert(vectorID); + if (VID != -1 && VID == vectorID) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Find %d in %dth posting\n", VID, i); + auto distance2leaf = p_index->ComputeDistance(queryResults.GetQuantizedTarget(), vectorInfo + m_metaDataSize); + if (distance2leaf < 1e-6) return vectorID; + } + } + return -1; + } + + void ForceGC(VectorIndex* p_index) override { + for (int i = 0; i < p_index->GetNumSamples(); i++) { + if (!p_index->ContainSample(i)) continue; + Split(p_index, i, false); + } + } + + bool AllFinished() { return m_splitThreadPool->allClear() && m_reassignThreadPool->allClear(); } + void ForceCompaction() override { db->ForceCompaction(); } + void GetDBStats() override { + db->GetStat(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "remain splitJobs: %d, reassignJobs: %d, running split: %d, running reassign: %d\n", m_splitThreadPool->jobsize(), m_reassignThreadPool->jobsize(), m_splitThreadPool->runningJobs(), m_reassignThreadPool->runningJobs()); + } + + void GetIndexStats(int finishedInsert, bool cost, bool reset) override { m_stat.PrintStat(finishedInsert, cost, reset); } + + bool CheckValidPosting(SizeType postingID) override { + return m_postingSizes.GetSize(postingID) > 0; + } + + bool Initialize() override { + return db->Initialize(); + } + + bool ExitBlockController() override { + return db->ExitBlockController(); + } + + void GetWritePosting(SizeType pid, std::string& posting, bool write = false) override { + if (write) { + db->Put(pid, posting); + m_postingSizes.UpdateSize(pid, posting.size() / m_vectorInfoSize); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "PostingSize: %d\n", m_postingSizes.GetSize(pid)); + // exit(1); + } else { + db->Get(pid, &posting); + } + } + + void Checkpoint(std::string prefix) override { + /**flush SPTAG, versionMap, block mapping, block pool**/ + std::string p_persistenMap = prefix + "_versionMap"; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Saving version map\n"); + m_versionMap->Save(p_persistenMap); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Saving posting size\n"); + std::string p_persistenRecord = prefix + "_postingSizeRecord"; + m_postingSizes.Save(p_persistenRecord); + db->Checkpoint(prefix); + if (m_opt->m_enableWAL) { + /** delete all the previous record **/ + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Checkpoint done, delete previous record\n"); + m_wal->ClearPreviousRecord(); + } + } + + ErrorCode GetPostingDebug(ExtraWorkSpace* p_exWorkSpace, std::shared_ptr p_index, SizeType vid, std::vector& VIDs, std::shared_ptr& vecs) { + std::string posting; + db->Get(vid, &posting); + int vectorNum = (int)(posting.size() / m_vectorInfoSize); + int vectorNum_real = vectorNum; + for (int j = 0; j < vectorNum; j++) { + char* vectorInfo = (char*)posting.data() + j * m_vectorInfoSize; + int vectorID = *(reinterpret_cast(vectorInfo)); + uint8_t version = *(reinterpret_cast(vectorInfo + sizeof(int))); + if(m_versionMap->GetVersion(vectorID) != version) { + vectorNum_real--; + } + + } + VIDs.resize(vectorNum_real); + ByteArray vector_array = ByteArray::Alloc(sizeof(ValueType) * vectorNum_real * m_opt->m_dim); + vecs.reset(new BasicVectorSet(vector_array, GetEnumValueType(), m_opt->m_dim, vectorNum_real)); + + for (int j = 0, i = 0; j < vectorNum; j++) { + char* vectorInfo = (char*)posting.data() + j * m_vectorInfoSize; + int vectorID = *(reinterpret_cast(vectorInfo)); + uint8_t version = *(reinterpret_cast(vectorInfo + sizeof(int))); + if(m_versionMap->GetVersion(vectorID) != version) { + continue; + } + VIDs[i] = vectorID; + auto outVec = vecs->GetVector(i); + memcpy(outVec, (void*)(vectorInfo + sizeof(int) + sizeof(uint8_t)), sizeof(ValueType) * m_opt->m_dim); + i++; + } + return ErrorCode::Success; + } + + private: + + int m_metaDataSize = 0; + + int m_vectorInfoSize = 0; + + int m_postingSizeLimit = INT_MAX; + + std::chrono::microseconds m_hardLatencyLimit = std::chrono::microseconds(2000); + + int m_mergeThreshold = 10; + }; +} // namespace SPTAG +#endif // _SPTAG_SPANN_EXTRADYNAMICSEARCHER_H_ diff --git a/AnnService/inc/Core/SPANN/ExtraRocksDBController.h b/AnnService/inc/Core/SPANN/ExtraRocksDBController.h new file mode 100644 index 00000000..ef1cb9c2 --- /dev/null +++ b/AnnService/inc/Core/SPANN/ExtraRocksDBController.h @@ -0,0 +1,332 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_SPANN_EXTRAROCKSDBCONTROLLER_H_ +#define _SPTAG_SPANN_EXTRAROCKSDBCONTROLLER_H_ + +#include "inc/Helper/KeyValueIO.h" +#include "inc/Helper/StringConvert.h" + +#include "rocksdb/db.h" +#include "rocksdb/filter_policy.h" +#include "rocksdb/rate_limiter.h" +#include "rocksdb/slice.h" +#include "rocksdb/options.h" +#include "rocksdb/merge_operator.h" +#include "rocksdb/table.h" +#include "rocksdb/utilities/checkpoint.h" + +#include +#include +#include +#include + +namespace SPTAG::SPANN +{ + class RocksDBIO : public Helper::KeyValueIO + { + class AnnMergeOperator : public rocksdb::MergeOperator + { + public: + bool FullMergeV2(const rocksdb::MergeOperator::MergeOperationInput& merge_in, + rocksdb::MergeOperator::MergeOperationOutput* merge_out) const override + { + size_t length = (merge_in.existing_value)->size(); + for (const rocksdb::Slice& s : merge_in.operand_list) { + length += s.size(); + } + (merge_out->new_value).resize(length); + memcpy((char*)((merge_out->new_value).c_str()), + (merge_in.existing_value)->data(), (merge_in.existing_value)->size()); + size_t start = (merge_in.existing_value)->size(); + for (const rocksdb::Slice& s : merge_in.operand_list) { + memcpy((char*)((merge_out->new_value).c_str() + start), s.data(), s.size()); + start += s.size(); + } + return true; + } + + bool PartialMergeMulti(const rocksdb::Slice& key, + const std::deque& operand_list, + std::string* new_value, rocksdb::Logger* logger) const override + { + size_t length = 0; + for (const rocksdb::Slice& s : operand_list) { + length += s.size(); + } + new_value->resize(length); + size_t start = 0; + for (const rocksdb::Slice& s : operand_list) { + memcpy((char*)(new_value->c_str() + start), s.data(), s.size()); + start += s.size(); + } + return true; + } + + const char* Name() const override { + return "AnnMergeOperator"; + } + }; + + public: + RocksDBIO(const char* filePath, bool usdDirectIO, bool wal = false, bool recovery = false) { + dbPath = std::string(filePath); + //dbOptions.statistics = rocksdb::CreateDBStatistics(); + dbOptions.create_if_missing = true; + if (!wal) { + dbOptions.IncreaseParallelism(); + dbOptions.OptimizeLevelStyleCompaction(); + dbOptions.merge_operator.reset(new AnnMergeOperator); + // dbOptions.statistics = rocksdb::CreateDBStatistics(); + + // SST file size options + dbOptions.target_file_size_base = 128UL * 1024 * 1024; + dbOptions.target_file_size_multiplier = 2; + dbOptions.max_bytes_for_level_base = 16 * 1024UL * 1024 * 1024; + dbOptions.max_bytes_for_level_multiplier = 4; + dbOptions.max_subcompactions = 16; + dbOptions.num_levels = 4; + dbOptions.level0_file_num_compaction_trigger = 1; + dbOptions.level_compaction_dynamic_level_bytes = false; + dbOptions.write_buffer_size = 16UL * 1024 * 1024; + + // rate limiter options + // dbOptions.rate_limiter.reset(rocksdb::NewGenericRateLimiter(100UL << 20)); + + // blob options + dbOptions.enable_blob_files = true; + dbOptions.min_blob_size = 64; + dbOptions.blob_file_size = 8UL << 30; + dbOptions.blob_compression_type = rocksdb::CompressionType::kNoCompression; + dbOptions.enable_blob_garbage_collection = true; + dbOptions.compaction_pri = rocksdb::CompactionPri::kRoundRobin; + dbOptions.blob_garbage_collection_age_cutoff = 0.4; + // dbOptions.blob_garbage_collection_force_threshold = 0.5; + // dbOptions.blob_cache = rocksdb::NewLRUCache(5UL << 30); + // dbOptions.prepopulate_blob_cache = rocksdb::PrepopulateBlobCache::kFlushOnly; + + // dbOptions.env; + // dbOptions.sst_file_manager = std::shared_ptr(rocksdb::NewSstFileManager(dbOptions.env)); + // dbOptions.sst_file_manager->SetStatisticsPtr(dbOptions.statistics); + + // compression options + // dbOptions.compression = rocksdb::CompressionType::kLZ4Compression; + // dbOptions.bottommost_compression = rocksdb::CompressionType::kZSTD; + + // block cache options + rocksdb::BlockBasedTableOptions table_options; + // table_options.block_cache = rocksdb::NewSimCache(rocksdb::NewLRUCache(1UL << 30), (8UL << 30), -1); + table_options.block_cache = rocksdb::NewLRUCache(3UL << 30); + // table_options.no_block_cache = true; + + // filter options + table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); + table_options.optimize_filters_for_memory = true; + + dbOptions.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); + } + + if (usdDirectIO) { + dbOptions.use_direct_io_for_flush_and_compaction = true; + dbOptions.use_direct_reads = true; + } + + auto s = rocksdb::DB::Open(dbOptions, dbPath, &db); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: New Rocksdb: %s\n", filePath); + if (s != rocksdb::Status::OK()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "\e[0;31mRocksdb Open Error\e[0m: %s\n", s.getState()); + } + } + + ~RocksDBIO() override { + /* + std::string stats; + db->GetProperty("rocksdb.stats", &stats); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "RocksDB Status: %s\n%s", dbPath.c_str(),stats.c_str()); + */ + + if (db) { + ShutDown(); + } + } + + void ShutDown() override { + db->Close(); + //DestroyDB(dbPath, dbOptions); + delete db; + db = nullptr; + } + + ErrorCode Get(const std::string& key, std::string* value) override { + auto s = db->Get(rocksdb::ReadOptions(), key, value); + if (s == rocksdb::Status::OK()) { + return ErrorCode::Success; + } + else { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "\e[0;31mError in Get\e[0m: %s, key: %d\n", s.getState(), *((SizeType*)(key.data()))); + return ErrorCode::Fail; + } + } + + ErrorCode Get(SizeType key, std::string* value) override { + std::string k((char*)&key, sizeof(SizeType)); + return Get(k, value); + } + + ErrorCode MultiGet(const std::vector& keys, std::vector* values, const std::chrono::microseconds &timeout = std::chrono::microseconds::max()) { + size_t num_keys = keys.size(); + + rocksdb::Slice* slice_keys = new rocksdb::Slice[num_keys]; + rocksdb::PinnableSlice* slice_values = new rocksdb::PinnableSlice[num_keys]; + rocksdb::Status* statuses = new rocksdb::Status[num_keys]; + + for (int i = 0; i < num_keys; i++) { + slice_keys[i] = rocksdb::Slice(keys[i]); + } + + db->MultiGet(rocksdb::ReadOptions(), db->DefaultColumnFamily(), + num_keys, slice_keys, slice_values, statuses); + + for (int i = 0; i < num_keys; i++) { + if (statuses[i] != rocksdb::Status::OK()) { + delete[] slice_keys; + delete[] slice_values; + delete[] statuses; + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "\e[0;31mError in MultiGet\e[0m: %s, key: %d\n", statuses[i].getState(), *((SizeType*)(keys[i].data()))); + return ErrorCode::Fail; + } + values->push_back(slice_values[i].ToString()); + } + + delete[] slice_keys; + delete[] slice_values; + delete[] statuses; + return ErrorCode::Success; + } + + ErrorCode MultiGet(const std::vector& keys, std::vector* values, const std::chrono::microseconds &timeout = std::chrono::microseconds::max()) { + std::vector str_keys; + + for (const auto& key : keys) { + str_keys.emplace_back((char*)(&key), sizeof(SizeType)); + } + + return MultiGet(str_keys, values, timeout); + } + + ErrorCode Put(const std::string& key, const std::string& value) override { + auto s = db->Put(rocksdb::WriteOptions(), key, value); + if (s == rocksdb::Status::OK()) { + return ErrorCode::Success; + } + else { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "\e[0;31mError in Put\e[0m: %s, key: %d\n", s.getState(), *((SizeType*)(key.data()))); + return ErrorCode::Fail; + } + } + + ErrorCode Put(SizeType key, const std::string& value) override { + std::string k((char*)&key, sizeof(SizeType)); + return Put(k, value); + } + + ErrorCode Merge(SizeType key, const std::string& value) { + if (value.empty()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Error! empty append posting!\n"); + } + std::string k((char*)&key, sizeof(SizeType)); + auto s = db->Merge(rocksdb::WriteOptions(), k, value); + if (s == rocksdb::Status::OK()) { + return ErrorCode::Success; + } + else { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "\e[0;31mError in Merge\e[0m: %s, key: %d\n", s.getState(), key); + return ErrorCode::Fail; + } + } + + ErrorCode StartToScan(SizeType& key, std::string* value) { + it = db->NewIterator(rocksdb::ReadOptions()); + it->SeekToFirst(); + if (!it->Valid()) return ErrorCode::Fail; + key = *((SizeType*)(it->key().ToString().c_str())); + *value = it->value().ToString(); + return ErrorCode::Success; + } + + ErrorCode NextToScan(SizeType& key, std::string* value) { + it->Next(); + if (!it->Valid()) return ErrorCode::Fail; + key = *((SizeType*)(it->key().ToString().c_str())); + *value = it->value().ToString(); + return ErrorCode::Success; + } + + ErrorCode Delete(SizeType key) override { + std::string k((char*)&key, sizeof(SizeType)); + auto s = db->Delete(rocksdb::WriteOptions(), k); + if (s == rocksdb::Status::OK()) { + return ErrorCode::Success; + } + else { + return ErrorCode::Fail; + } + } + + ErrorCode DeleteRange(SizeType start, SizeType end) override { + std::string string_start((char*)&start, sizeof(SizeType)); + rocksdb::Slice slice_start = rocksdb::Slice(string_start); + std::string string_end((char*)&end, sizeof(SizeType)); + rocksdb::Slice slice_end = rocksdb::Slice(string_end); + auto s = db->DeleteRange(rocksdb::WriteOptions(), db->DefaultColumnFamily(), slice_start, slice_end); + if (s == rocksdb::Status::OK()) { + return ErrorCode::Success; + } + else { + return ErrorCode::Fail; + } + } + + void ForceCompaction() { + /* + std::string stats; + db->GetProperty("rocksdb.stats", &stats); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "RocksDB Status:\n%s", stats.c_str()); + */ + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start Compaction\n"); + auto s = db->CompactRange(rocksdb::CompactRangeOptions(), nullptr, nullptr); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Finish Compaction\n"); + + if (s != rocksdb::Status::OK()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "\e[0;31mRocksdb Compact Error\e[0m: %s\n", s.getState()); + } + /* + db->GetProperty("rocksdb.stats", &stats); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "RocksDB Status:\n%s", stats.c_str()); + */ + } + + void GetStat() { + if (dbOptions.statistics != nullptr) + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "RocksDB statistics:\n %s\n", dbOptions.statistics->ToString().c_str()); + else + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "DB statistics not set!\n"); + } + + ErrorCode Checkpoint(std::string prefix) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "RocksDB: checkpoint\n"); + rocksdb::Checkpoint* checkpoint_ptr; + rocksdb::Checkpoint::Create(db, &checkpoint_ptr); + std::string filename = prefix + "_rocksdb"; + checkpoint_ptr->CreateCheckpoint(filename); + return ErrorCode::Success; + } + + private: + std::string dbPath; + rocksdb::DB* db{}; + rocksdb::Options dbOptions; + rocksdb::Iterator* it; + }; +} +#endif // _SPTAG_SPANN_EXTRAROCKSDBCONTROLLER_H_ diff --git a/AnnService/inc/Core/SPANN/ExtraSPDKController.h b/AnnService/inc/Core/SPANN/ExtraSPDKController.h new file mode 100644 index 00000000..9f09a839 --- /dev/null +++ b/AnnService/inc/Core/SPANN/ExtraSPDKController.h @@ -0,0 +1,483 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef _SPTAG_SPANN_EXTRASPDKCONTROLLER_H_ +#define _SPTAG_SPANN_EXTRASPDKCONTROLLER_H_ + +#include "inc/Helper/KeyValueIO.h" +#include "inc/Core/Common/Dataset.h" +#include "inc/Core/VectorIndex.h" +#include "inc/Helper/ThreadPool.h" +#include +#include +#include +#include +#include +#include + +extern "C" { +#include "spdk/env.h" +#include "spdk/event.h" +#include "spdk/log.h" +#include "spdk/thread.h" +#include "spdk/bdev.h" +} + +namespace SPTAG::SPANN +{ + typedef std::int64_t AddressType; + class SPDKIO : public Helper::KeyValueIO + { + class BlockController { + private: + static constexpr const char* kUseMemImplEnv = "SPFRESH_SPDK_USE_MEM_IMPL"; + static constexpr AddressType kMemImplMaxNumBlocks = (1ULL << 30) >> PageSizeEx; // 1GB + static constexpr const char* kUseSsdImplEnv = "SPFRESH_SPDK_USE_SSD_IMPL"; + static constexpr AddressType kSsdImplMaxNumBlocks = (1ULL << 40) >> PageSizeEx; // 1T + // static constexpr AddressType kSsdImplMaxNumBlocks = 1700*1024*256; // 1.7T + static constexpr const char* kSpdkConfEnv = "SPFRESH_SPDK_CONF"; + static constexpr const char* kSpdkBdevNameEnv = "SPFRESH_SPDK_BDEV"; + static constexpr const char* kSpdkIoDepth = "SPFRESH_SPDK_IO_DEPTH"; + static constexpr int kSsdSpdkDefaultIoDepth = 1024; + + tbb::concurrent_queue m_blockAddresses; + tbb::concurrent_queue m_blockAddresses_reserve; + + bool m_useSsdImpl = false; + const char* m_ssdSpdkBdevName = nullptr; + pthread_t m_ssdSpdkTid; + volatile bool m_ssdSpdkThreadStartFailed = false; + volatile bool m_ssdSpdkThreadReady = false; + volatile bool m_ssdSpdkThreadExiting = false; + struct spdk_bdev *m_ssdSpdkBdev = nullptr; + struct spdk_bdev_desc *m_ssdSpdkBdevDesc = nullptr; + struct spdk_io_channel *m_ssdSpdkBdevIoChannel = nullptr; + + int m_ssdSpdkIoDepth = kSsdSpdkDefaultIoDepth; + struct SubIoRequest { + tbb::concurrent_queue* completed_sub_io_requests; + void* app_buff; + void* dma_buff; + AddressType real_size; + AddressType offset; + bool is_read; + BlockController* ctrl; + int posting_id; + }; + tbb::concurrent_queue m_submittedSubIoRequests; + struct IoContext { + std::vector sub_io_requests; + std::vector free_sub_io_requests; + tbb::concurrent_queue completed_sub_io_requests; + int in_flight = 0; + }; + static thread_local struct IoContext m_currIoContext; + + static int m_ssdInflight; + + bool m_useMemImpl = false; + static std::unique_ptr m_memBuffer; + + std::mutex m_initMutex; + int m_numInitCalled = 0; + + int m_batchSize; + static int m_ioCompleteCount; + int m_preIOCompleteCount = 0; + std::chrono::time_point m_preTime = std::chrono::high_resolution_clock::now(); + + static void* InitializeSpdk(void* args); + + static void SpdkStart(void* args); + + static void SpdkIoLoop(void *arg); + + static void SpdkBdevEventCallback(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx); + + static void SpdkBdevIoCallback(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); + + static void SpdkStop(void* args); + public: + bool Initialize(int batchSize); + + // get p_size blocks from front, and fill in p_data array + bool GetBlocks(AddressType* p_data, int p_size); + + // release p_size blocks, put them at the end of the queue + bool ReleaseBlocks(AddressType* p_data, int p_size); + + // read a posting list. p_data[0] is the total data size, + // p_data[1], p_data[2], ..., p_data[((p_data[0] + PageSize - 1) >> PageSizeEx)] are the addresses of the blocks + // concat all the block contents together into p_value string. + bool ReadBlocks(AddressType* p_data, std::string* p_value, const std::chrono::microseconds &timeout = std::chrono::microseconds::max()); + + // parallel read a list of posting lists. + bool ReadBlocks(std::vector& p_data, std::vector* p_values, const std::chrono::microseconds &timeout = std::chrono::microseconds::max()); + + // write p_value into p_size blocks start from p_data + bool WriteBlocks(AddressType* p_data, int p_size, const std::string& p_value); + + bool IOStatistics(); + + bool ShutDown(); + + int RemainBlocks() { + return m_blockAddresses.unsafe_size(); + } + + ErrorCode Checkpoint(std::string prefix) { + std::string filename = prefix + "_blockpool"; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPDK: saving block pool\n"); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Reload reserved blocks!\n"); + AddressType currBlockAddress = 0; + for (int count = 0; count < m_blockAddresses_reserve.unsafe_size(); count++) { + m_blockAddresses_reserve.try_pop(currBlockAddress); + m_blockAddresses.push(currBlockAddress); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Reload Finish!\n"); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save blockpool To %s\n", filename.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(filename.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile; + int blocks = RemainBlocks(); + IOBINARY(ptr, WriteBinary, sizeof(SizeType), (char*)&blocks); + for (auto it = m_blockAddresses.unsafe_begin(); it != m_blockAddresses.unsafe_end(); it++) { + IOBINARY(ptr, WriteBinary, sizeof(AddressType), (char*)&(*it)); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save Finish!\n"); + return ErrorCode::Success; + } + + ErrorCode Recovery(std::string prefix, int batchSize) { + std::lock_guard lock(m_initMutex); + m_numInitCalled++; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPDK Recovery: Loading block pool\n"); + std::string filename = prefix + "_blockpool"; + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(filename.c_str(), std::ios::binary | std::ios::in)) { + return ErrorCode::FailedCreateFile; + } + int blocks; + IOBINARY(ptr, ReadBinary, sizeof(SizeType), (char*)&blocks); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPDK Recovery: Reading %d blocks to pool\n", blocks); + AddressType currBlockAddress = 0; + for (int i = 0; i < blocks; i++) { + IOBINARY(ptr, ReadBinary, sizeof(AddressType), (char*)&(currBlockAddress)); + m_blockAddresses.push(currBlockAddress); + } + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPDK Recovery: Initializing SPDK\n"); + const char* useMemImplEnvStr = getenv(kUseMemImplEnv); + m_useMemImpl = useMemImplEnvStr && !strcmp(useMemImplEnvStr, "1"); + const char* useSsdImplEnvStr = getenv(kUseSsdImplEnv); + m_useSsdImpl = useSsdImplEnvStr && !strcmp(useSsdImplEnvStr, "1"); + if (m_useMemImpl) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"SPDK: Not support mem controller!\n"); + exit(0); + } else if (m_useSsdImpl) { + if (m_numInitCalled == 1) { + m_batchSize = batchSize; + pthread_create(&m_ssdSpdkTid, NULL, &InitializeSpdk, this); + while (!m_ssdSpdkThreadReady && !m_ssdSpdkThreadStartFailed); + if (m_ssdSpdkThreadStartFailed) { + fprintf(stderr, "SPDKIO::BlockController::Initialize failed\n"); + return ErrorCode::Fail; + } + } + // Create sub I/O request pool + m_currIoContext.sub_io_requests.resize(m_ssdSpdkIoDepth); + m_currIoContext.in_flight = 0; + uint32_t buf_align; + buf_align = spdk_bdev_get_buf_align(m_ssdSpdkBdev); + for (auto &sr : m_currIoContext.sub_io_requests) { + sr.completed_sub_io_requests = &(m_currIoContext.completed_sub_io_requests); + sr.app_buff = nullptr; + sr.dma_buff = spdk_dma_zmalloc(PageSize, buf_align, NULL); + sr.ctrl = this; + m_currIoContext.free_sub_io_requests.push_back(&sr); + } + return ErrorCode::Success; + } else { + fprintf(stderr, "SPDKIO::BlockController::Initialize failed\n"); + return ErrorCode::Fail; + } + return ErrorCode::Success; + } + }; + + class CompactionJob : public Helper::ThreadPool::Job + { + private: + SPDKIO* m_spdkIO; + + public: + CompactionJob(SPDKIO* spdkIO): m_spdkIO(spdkIO) {} + + ~CompactionJob() {} + + inline void exec(IAbortOperation* p_abort) override { + m_spdkIO->ForceCompaction(); + } + }; + + public: + SPDKIO(const char* filePath, SizeType blockSize, SizeType capacity, SizeType postingBlocks, SizeType bufferSize = 1024, int batchSize = 64, bool recovery = false, int compactionThreads = 1) + { + m_mappingPath = std::string(filePath); + m_blockLimit = postingBlocks + 1; + m_bufferLimit = bufferSize; + if (recovery) { + m_mappingPath += "_blockmapping"; + Load(m_mappingPath, blockSize, capacity); + } else if (fileexists(m_mappingPath.c_str())) { + Load(m_mappingPath, blockSize, capacity); + } else { + m_pBlockMapping.Initialize(0, 1, blockSize, capacity); + } + for (int i = 0; i < bufferSize; i++) { + m_buffer.push((uintptr_t)(new AddressType[m_blockLimit])); + } + m_compactionThreadPool = std::make_shared(); + m_compactionThreadPool->init(compactionThreads); + if (recovery) { + if (m_pBlockController.Recovery(std::string(filePath), batchSize) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to Recover SPDK!\n"); + exit(0); + } + } else if (!m_pBlockController.Initialize(batchSize)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to Initialize SPDK!\n"); + exit(0); + } + m_shutdownCalled = false; + } + + ~SPDKIO() { + ShutDown(); + } + + void ShutDown() override { + if (m_shutdownCalled) { + return; + } + if (!m_mappingPath.empty()) Save(m_mappingPath); + for (int i = 0; i < m_pBlockMapping.R(); i++) { + if (At(i) != 0xffffffffffffffff) delete[]((AddressType*)At(i)); + } + while (!m_buffer.empty()) { + uintptr_t ptr; + if (m_buffer.try_pop(ptr)) delete[]((AddressType*)ptr); + } + m_pBlockController.ShutDown(); + m_shutdownCalled = true; + } + + inline uintptr_t& At(SizeType key) { + return *(m_pBlockMapping[key]); + } + + ErrorCode Get(SizeType key, std::string* value) override { + if (key >= m_pBlockMapping.R()) return ErrorCode::Fail; + + if (m_pBlockController.ReadBlocks((AddressType*)At(key), value)) return ErrorCode::Success; + return ErrorCode::Fail; + } + + ErrorCode MultiGet(const std::vector& keys, std::vector* values, const std::chrono::microseconds &timeout = std::chrono::microseconds::max()) { + std::vector blocks; + for (SizeType key : keys) { + if (key < m_pBlockMapping.R()) blocks.push_back((AddressType*)At(key)); + else { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to read key:%d total key number:%d\n", key, m_pBlockMapping.R()); + } + } + if (m_pBlockController.ReadBlocks(blocks, values, timeout)) return ErrorCode::Success; + return ErrorCode::Fail; + } + + ErrorCode Put(SizeType key, const std::string& value) override { + int blocks = ((value.size() + PageSize - 1) >> PageSizeEx); + if (blocks >= m_blockLimit) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failt to put key:%d value:%lld since value too long!\n", key, value.size()); + return ErrorCode::Fail; + } + int delta = key + 1 - m_pBlockMapping.R(); + if (delta > 0) { + { + std::lock_guard lock(m_updateMutex); + m_pBlockMapping.AddBatch(delta); + } + } + if (At(key) == 0xffffffffffffffff) { + if (m_buffer.unsafe_size() > m_bufferLimit) { + uintptr_t tmpblocks; + while (!m_buffer.try_pop(tmpblocks)); + At(key) = tmpblocks; + } + else { + At(key) = (uintptr_t)(new AddressType[m_blockLimit]); + } + memset((AddressType*)At(key), -1, sizeof(AddressType) * m_blockLimit); + } + int64_t* postingSize = (int64_t*)At(key); + if (*postingSize < 0) { + m_pBlockController.GetBlocks(postingSize + 1, blocks); + m_pBlockController.WriteBlocks(postingSize + 1, blocks, value); + *postingSize = value.size(); + } + else { + uintptr_t tmpblocks; + while (!m_buffer.try_pop(tmpblocks)); + m_pBlockController.GetBlocks((AddressType*)tmpblocks + 1, blocks); + m_pBlockController.WriteBlocks((AddressType*)tmpblocks + 1, blocks, value); + *((int64_t*)tmpblocks) = value.size(); + + m_pBlockController.ReleaseBlocks(postingSize + 1, (*postingSize + PageSize -1) >> PageSizeEx); + while (InterlockedCompareExchange(&At(key), tmpblocks, (uintptr_t)postingSize) != (uintptr_t)postingSize) { + postingSize = (int64_t*)At(key); + } + m_buffer.push((uintptr_t)postingSize); + } + return ErrorCode::Success; + } + + ErrorCode Merge(SizeType key, const std::string& value) { + if (key >= m_pBlockMapping.R()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Key range error: key: %d, mapping size: %d\n", key, m_pBlockMapping.R()); + return ErrorCode::Fail; + } + + int64_t* postingSize = (int64_t*)At(key); + auto newSize = *postingSize + value.size(); + int newblocks = ((newSize + PageSize - 1) >> PageSizeEx); + if (newblocks >= m_blockLimit) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failt to merge key:%d value:%lld since value too long!\n", key, newSize); + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Origin Size: %lld, merge size: %lld\n", *postingSize, value.size()); + return ErrorCode::Fail; + } + + auto sizeInPage = (*postingSize) % PageSize; + int oldblocks = (*postingSize >> PageSizeEx); + int allocblocks = newblocks - oldblocks; + if (sizeInPage != 0) { + std::string newValue; + AddressType readreq[] = { sizeInPage, *(postingSize + 1 + oldblocks) }; + m_pBlockController.ReadBlocks(readreq, &newValue); + newValue += value; + + uintptr_t tmpblocks; + while (!m_buffer.try_pop(tmpblocks)); + memcpy((AddressType*)tmpblocks, postingSize, sizeof(AddressType) * (oldblocks + 1)); + m_pBlockController.GetBlocks((AddressType*)tmpblocks + 1 + oldblocks, allocblocks); + m_pBlockController.WriteBlocks((AddressType*)tmpblocks + 1 + oldblocks, allocblocks, newValue); + *((int64_t*)tmpblocks) = newSize; + + m_pBlockController.ReleaseBlocks(postingSize + 1 + oldblocks, 1); + while (InterlockedCompareExchange(&At(key), tmpblocks, (uintptr_t)postingSize) != (uintptr_t)postingSize) { + postingSize = (int64_t*)At(key); + } + m_buffer.push((uintptr_t)postingSize); + } + else { + m_pBlockController.GetBlocks(postingSize + 1 + oldblocks, allocblocks); + m_pBlockController.WriteBlocks(postingSize + 1 + oldblocks, allocblocks, value); + *postingSize = newSize; + } + return ErrorCode::Success; + } + + ErrorCode Delete(SizeType key) override { + if (key >= m_pBlockMapping.R()) return ErrorCode::Fail; + int64_t* postingSize = (int64_t*)At(key); + if (*postingSize < 0) return ErrorCode::Fail; + + int blocks = ((*postingSize + PageSize - 1) >> PageSizeEx); + m_pBlockController.ReleaseBlocks(postingSize + 1, blocks); + m_buffer.push((uintptr_t)postingSize); + At(key) = 0xffffffffffffffff; + return ErrorCode::Success; + } + + void ForceCompaction() { + Save(m_mappingPath); + } + + void GetStat() { + int remainBlocks = m_pBlockController.RemainBlocks(); + int remainGB = remainBlocks >> 20 << 2; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Remain %d blocks, totally %d GB\n", remainBlocks, remainGB); + m_pBlockController.IOStatistics(); + } + + ErrorCode Load(std::string path, SizeType blockSize, SizeType capacity) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load mapping From %s\n", path.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(path.c_str(), std::ios::binary | std::ios::in)) return ErrorCode::FailedOpenFile; + + SizeType CR, mycols; + IOBINARY(ptr, ReadBinary, sizeof(SizeType), (char*)&CR); + IOBINARY(ptr, ReadBinary, sizeof(SizeType), (char*)&mycols); + if (mycols > m_blockLimit) m_blockLimit = mycols; + + m_pBlockMapping.Initialize(CR, 1, blockSize, capacity); + for (int i = 0; i < CR; i++) { + At(i) = (uintptr_t)(new AddressType[m_blockLimit]); + IOBINARY(ptr, ReadBinary, sizeof(AddressType) * mycols, (char*)At(i)); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load mapping (%d,%d) Finish!\n", CR, mycols); + return ErrorCode::Success; + } + + ErrorCode Save(std::string path) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save mapping To %s\n", path.c_str()); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(path.c_str(), std::ios::binary | std::ios::out)) return ErrorCode::FailedCreateFile; + + SizeType CR = m_pBlockMapping.R(); + IOBINARY(ptr, WriteBinary, sizeof(SizeType), (char*)&CR); + IOBINARY(ptr, WriteBinary, sizeof(SizeType), (char*)&m_blockLimit); + std::vector empty(m_blockLimit, 0xffffffffffffffff); + for (int i = 0; i < CR; i++) { + if (At(i) == 0xffffffffffffffff) { + IOBINARY(ptr, WriteBinary, sizeof(AddressType) * m_blockLimit, (char*)(empty.data())); + } + else { + int64_t* postingSize = (int64_t*)At(i); + IOBINARY(ptr, WriteBinary, sizeof(AddressType) * m_blockLimit, (char*)postingSize); + } + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Save mapping (%d,%d) Finish!\n", CR, m_blockLimit); + return ErrorCode::Success; + } + + bool Initialize(bool debug = false) override { + if (debug) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Initialize SPDK for new threads\n"); + return m_pBlockController.Initialize(64); + } + + bool ExitBlockController(bool debug = false) override { + if (debug) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Exit SPDK for thread\n"); + return m_pBlockController.ShutDown(); + } + + ErrorCode Checkpoint(std::string prefix) override { + std::string filename = prefix + "_blockmapping"; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPDK: saving block mapping\n"); + Save(filename); + return m_pBlockController.Checkpoint(prefix); + } + + private: + std::string m_mappingPath; + SizeType m_blockLimit; + COMMON::Dataset m_pBlockMapping; + SizeType m_bufferLimit; + tbb::concurrent_queue m_buffer; + + //tbb::concurrent_hash_map *m_pCurrentCache, *m_pNextCache; + std::shared_ptr m_compactionThreadPool; + BlockController m_pBlockController; + + bool m_shutdownCalled; + std::mutex m_updateMutex; + }; +} +#endif // _SPTAG_SPANN_EXTRASPDKCONTROLLER_H_ diff --git a/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h b/AnnService/inc/Core/SPANN/ExtraStaticSearcher.h similarity index 96% rename from AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h rename to AnnService/inc/Core/SPANN/ExtraStaticSearcher.h index 5acd324f..c7285484 100644 --- a/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraStaticSearcher.h @@ -1,8 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#ifndef _SPTAG_SPANN_EXTRASEARCHER_H_ -#define _SPTAG_SPANN_EXTRASEARCHER_H_ +#ifndef _SPTAG_SPANN_EXTRASTATICSEARCHER_H_ +#define _SPTAG_SPANN_EXTRASTATICSEARCHER_H_ #include "inc/Helper/VectorSetReader.h" #include "inc/Helper/AsyncFileReader.h" @@ -113,10 +113,10 @@ namespace SPTAG } \ template - class ExtraFullGraphSearcher : public IExtraSearcher + class ExtraStaticSearcher : public IExtraSearcher { public: - ExtraFullGraphSearcher() + ExtraStaticSearcher() { m_enableDeltaEncoding = false; m_enablePostingListRearrange = false; @@ -124,11 +124,11 @@ namespace SPTAG m_enableDictTraining = true; } - virtual ~ExtraFullGraphSearcher() + virtual ~ExtraStaticSearcher() { } - virtual bool LoadIndex(Options& p_opt) { + virtual bool LoadIndex(Options& p_opt, COMMON::VersionLabel& p_versionMap, std::shared_ptr m_vectorTranslateMap, std::shared_ptr m_index) { m_extraFullGraphFile = p_opt.m_indexDirectory + FolderSep + p_opt.m_ssdIndex; std::string curFile = m_extraFullGraphFile; p_opt.m_searchPostingPageLimit = max(p_opt.m_searchPostingPageLimit, static_cast((p_opt.m_postingVectorLimit * (p_opt.m_dim * sizeof(ValueType) + sizeof(int)) + PageSize - 1) / PageSize)); @@ -176,10 +176,10 @@ namespace SPTAG m_enableDataCompression = p_opt.m_enableDataCompression; m_enableDictTraining = p_opt.m_enableDictTraining; - if (m_enablePostingListRearrange) m_parsePosting = &ExtraFullGraphSearcher::ParsePostingListRearrange; - else m_parsePosting = &ExtraFullGraphSearcher::ParsePostingList; - if (m_enableDeltaEncoding) m_parseEncoding = &ExtraFullGraphSearcher::ParseDeltaEncoding; - else m_parseEncoding = &ExtraFullGraphSearcher::ParseEncoding; + if (m_enablePostingListRearrange) m_parsePosting = &ExtraStaticSearcher::ParsePostingListRearrange; + else m_parsePosting = &ExtraStaticSearcher::ParsePostingList; + if (m_enableDeltaEncoding) m_parseEncoding = &ExtraStaticSearcher::ParseDeltaEncoding; + else m_parseEncoding = &ExtraStaticSearcher::ParseEncoding; m_listPerFile = static_cast((m_totalListCount + m_indexFiles.size() - 1) / m_indexFiles.size()); @@ -401,7 +401,7 @@ namespace SPTAG return postingListFullData; } - bool BuildIndex(std::shared_ptr& p_reader, std::shared_ptr p_headIndex, Options& p_opt) { + bool BuildIndex(std::shared_ptr& p_reader, std::shared_ptr p_headIndex, Options& p_opt, COMMON::VersionLabel& p_versionMap, SizeType upperBound = -1) { std::string outputFile = p_opt.m_indexDirectory + FolderSep + p_opt.m_ssdIndex; if (outputFile.empty()) { @@ -417,6 +417,7 @@ namespace SPTAG return false; } + if (fileexists((p_opt.m_indexDirectory + FolderSep + p_opt.m_headIDFile).c_str())) { auto ptr = SPTAG::f_createIO(); if (ptr == nullptr || !ptr->Initialize((p_opt.m_indexDirectory + FolderSep + p_opt.m_headIDFile).c_str(), std::ios::binary | std::ios::in)) { @@ -439,6 +440,7 @@ namespace SPTAG fullCount = fullVectors->Count(); vectorInfoSize = fullVectors->PerVectorDataSize() + sizeof(int); } + if (upperBound > 0) fullCount = upperBound; Selection selections(static_cast(fullCount) * p_opt.m_replicaCount, p_opt.m_tmpdir); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Full vector count:%d Edge bytes:%llu selection size:%zu, capacity size:%zu\n", fullCount, sizeof(Edge), selections.m_selections.size(), selections.m_selections.capacity()); @@ -1384,6 +1386,27 @@ namespace SPTAG SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Time to write results:%.2lf sec.\n", ((double)std::chrono::duration_cast(t2 - t1).count()) + ((double)std::chrono::duration_cast(t2 - t1).count()) / 1000); } + void GetWritePosting(SizeType pid, std::string& posting, bool write = false) override { + if (write) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Unsupport write\n"); + exit(1); + } + ListInfo* listInfo = &(m_listInfos[pid]); + size_t totalBytes = (static_cast(listInfo->listPageCount) << PageSizeEx); + size_t realBytes = listInfo->listEleCount * m_vectorInfoSize; + posting.resize(totalBytes); + int fileid = m_oneContext? 0: pid / m_listPerFile; + Helper::DiskIO* indexFile = m_indexFiles[fileid].get(); + auto numRead = indexFile->ReadBinary(totalBytes, (char*)posting.data(), listInfo->listOffset); + if (numRead != totalBytes) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "File %s read bytes, expected: %zu, acutal: %llu.\n", m_extraFullGraphFile.c_str(), totalBytes, numRead); + throw std::runtime_error("File read mismatch"); + } + char* ptr = (char*)(posting.c_str()); + memcpy(ptr, posting.c_str() + listInfo->pageOffset, realBytes); + posting.resize(realBytes); + } + private: std::string m_extraFullGraphFile; @@ -1398,8 +1421,8 @@ namespace SPTAG bool m_enableDataCompression; bool m_enableDictTraining; - void (ExtraFullGraphSearcher::*m_parsePosting)(uint64_t&, uint64_t&, int, int); - void (ExtraFullGraphSearcher::*m_parseEncoding)(std::shared_ptr&, ListInfo*, ValueType*); + void (ExtraStaticSearcher::*m_parsePosting)(uint64_t&, uint64_t&, int, int); + void (ExtraStaticSearcher::*m_parseEncoding)(std::shared_ptr&, ListInfo*, ValueType*); int m_vectorInfoSize = 0; int m_iDataDimension = 0; @@ -1411,4 +1434,4 @@ namespace SPTAG } // namespace SPANN } // namespace SPTAG -#endif // _SPTAG_SPANN_EXTRASEARCHER_H_ +#endif // _SPTAG_SPANN_EXTRASTATICSEARCHER_H_ diff --git a/AnnService/inc/Core/SPANN/IExtraSearcher.h b/AnnService/inc/Core/SPANN/IExtraSearcher.h index 6bf4eeb0..0b2710f3 100644 --- a/AnnService/inc/Core/SPANN/IExtraSearcher.h +++ b/AnnService/inc/Core/SPANN/IExtraSearcher.h @@ -7,12 +7,14 @@ #include "Options.h" #include "inc/Core/VectorIndex.h" +#include "inc/Core/Common/VersionLabel.h" #include "inc/Helper/AsyncFileReader.h" #include #include #include #include +#include namespace SPTAG { namespace SPANN { @@ -62,11 +64,94 @@ namespace SPTAG { double m_sleepLatency; + double m_compLatency; + + double m_diskReadLatency; + + double m_exSetUpLatency; + std::chrono::steady_clock::time_point m_searchRequestTime; int m_threadID; }; + struct IndexStats { + std::atomic_uint32_t m_headMiss{ 0 }; + uint32_t m_appendTaskNum{ 0 }; + uint32_t m_splitNum{ 0 }; + uint32_t m_theSameHeadNum{ 0 }; + uint32_t m_reAssignNum{ 0 }; + uint32_t m_garbageNum{ 0 }; + uint64_t m_reAssignScanNum{ 0 }; + uint32_t m_mergeNum{ 0 }; + + //Split + double m_splitCost{ 0 }; + double m_getCost{ 0 }; + double m_putCost{ 0 }; + double m_clusteringCost{ 0 }; + double m_updateHeadCost{ 0 }; + double m_reassignScanCost{ 0 }; + double m_reassignScanIOCost{ 0 }; + + // Append + double m_appendCost{ 0 }; + double m_appendIOCost{ 0 }; + + // reAssign + double m_reAssignCost{ 0 }; + double m_selectCost{ 0 }; + double m_reAssignAppendCost{ 0 }; + + // GC + double m_garbageCost{ 0 }; + + void PrintStat(int finishedInsert, bool cost = false, bool reset = false) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "After %d insertion, head vectors split %d times, head missing %d times, same head %d times, reassign %d times, reassign scan %ld times, garbage collection %d times, merge %d times\n", + finishedInsert, m_splitNum, m_headMiss.load(), m_theSameHeadNum, m_reAssignNum, m_reAssignScanNum, m_garbageNum, m_mergeNum); + + if (cost) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "AppendTaskNum: %d, TotalCost: %.3lf us, PerCost: %.3lf us\n", m_appendTaskNum, m_appendCost, m_appendCost / m_appendTaskNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "AppendTaskNum: %d, AppendIO TotalCost: %.3lf us, PerCost: %.3lf us\n", m_appendTaskNum, m_appendIOCost, m_appendIOCost / m_appendTaskNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SplitNum: %d, TotalCost: %.3lf ms, PerCost: %.3lf ms\n", m_splitNum, m_splitCost, m_splitCost / m_splitNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SplitNum: %d, Read TotalCost: %.3lf us, PerCost: %.3lf us\n", m_splitNum, m_getCost, m_getCost / m_splitNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SplitNum: %d, Clustering TotalCost: %.3lf us, PerCost: %.3lf us\n", m_splitNum, m_clusteringCost, m_clusteringCost / m_splitNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SplitNum: %d, UpdateHead TotalCost: %.3lf ms, PerCost: %.3lf ms\n", m_splitNum, m_updateHeadCost, m_updateHeadCost / m_splitNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SplitNum: %d, Write TotalCost: %.3lf us, PerCost: %.3lf us\n", m_splitNum, m_putCost, m_putCost / m_splitNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SplitNum: %d, ReassignScan TotalCost: %.3lf ms, PerCost: %.3lf ms\n", m_splitNum, m_reassignScanCost, m_reassignScanCost / m_splitNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SplitNum: %d, ReassignScanIO TotalCost: %.3lf us, PerCost: %.3lf us\n", m_splitNum, m_reassignScanIOCost, m_reassignScanIOCost / m_splitNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "GCNum: %d, TotalCost: %.3lf us, PerCost: %.3lf us\n", m_garbageNum, m_garbageCost, m_garbageCost / m_garbageNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ReassignNum: %d, TotalCost: %.3lf us, PerCost: %.3lf us\n", m_reAssignNum, m_reAssignCost, m_reAssignCost / m_reAssignNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ReassignNum: %d, Select TotalCost: %.3lf us, PerCost: %.3lf us\n", m_reAssignNum, m_selectCost, m_selectCost / m_reAssignNum); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ReassignNum: %d, ReassignAppend TotalCost: %.3lf us, PerCost: %.3lf us\n", m_reAssignNum, m_reAssignAppendCost, m_reAssignAppendCost / m_reAssignNum); + } + + if (reset) { + m_splitNum = 0; + m_headMiss = 0; + m_theSameHeadNum = 0; + m_reAssignNum = 0; + m_reAssignScanNum = 0; + m_mergeNum = 0; + m_garbageNum = 0; + m_appendTaskNum = 0; + m_splitCost = 0; + m_clusteringCost = 0; + m_garbageCost = 0; + m_updateHeadCost = 0; + m_getCost = 0; + m_putCost = 0; + m_reassignScanCost = 0; + m_reassignScanIOCost = 0; + m_appendCost = 0; + m_appendIOCost = 0; + m_reAssignCost = 0; + m_selectCost = 0; + m_reAssignAppendCost = 0; + } + } + }; + template class PageBuffer { @@ -188,28 +273,48 @@ namespace SPTAG { { } - virtual ~IExtraSearcher() + ~IExtraSearcher() { } - virtual bool LoadIndex(Options& p_options) = 0; + virtual bool LoadIndex(Options& p_options, COMMON::VersionLabel& p_versionMap, std::shared_ptr m_vectorTranslateMap, std::shared_ptr m_index) = 0; virtual void SearchIndex(ExtraWorkSpace* p_exWorkSpace, QueryResult& p_queryResults, std::shared_ptr p_index, - SearchStats* p_stats, - std::set* truth = nullptr, - std::map>* found = nullptr) = 0; + SearchStats* p_stats, std::set* truth = nullptr, std::map>* found = nullptr) = 0; virtual bool BuildIndex(std::shared_ptr& p_reader, std::shared_ptr p_index, - Options& p_opt) = 0; + Options& p_opt, COMMON::VersionLabel& p_versionMap, SizeType upperBound = -1) = 0; + + virtual ErrorCode GetPostingDebug(ExtraWorkSpace* p_exWorkSpace, std::shared_ptr p_index, SizeType vid, std::vector& VIDs, std::shared_ptr& vecs) = 0; + + virtual void RefineIndex(std::shared_ptr& p_reader, + std::shared_ptr p_index) { return; } + virtual ErrorCode AddIndex(std::shared_ptr& p_vectorSet, + std::shared_ptr p_index, SizeType p_begin) { return ErrorCode::Undefined; } + virtual ErrorCode DeleteIndex(SizeType p_id) { return ErrorCode::Undefined; } + + virtual bool AllFinished() { return false; } + virtual void GetDBStats() { return; } + virtual void GetIndexStats(int finishedInsert, bool cost, bool reset) { return; } + virtual void ForceCompaction() { return; } virtual bool CheckValidPosting(SizeType postingID) = 0; + virtual SizeType SearchVector(std::shared_ptr& p_vectorSet, + std::shared_ptr p_index, int testNum = 64, SizeType VID = -1) { return -1; } + virtual void ForceGC(VectorIndex* p_index) { return; } - virtual ErrorCode GetPostingDebug(ExtraWorkSpace* p_exWorkSpace, std::shared_ptr p_index, SizeType vid, std::vector& VIDs, std::shared_ptr& vecs) = 0; + virtual void GetWritePosting(SizeType pid, std::string& posting, bool write = false) { return; } + + virtual bool Initialize() { return false; } + + virtual bool ExitBlockController() { return false; } + + virtual void Checkpoint(std::string prefix) { return; } }; } // SPANN } // SPTAG -#endif // _SPTAG_SPANN_IEXTRASEARCHER_H_ \ No newline at end of file +#endif // _SPTAG_SPANN_IEXTRASEARCHER_H_ diff --git a/AnnService/inc/Core/SPANN/Index.h b/AnnService/inc/Core/SPANN/Index.h index 0c2541c2..b56e497a 100644 --- a/AnnService/inc/Core/SPANN/Index.h +++ b/AnnService/inc/Core/SPANN/Index.h @@ -47,12 +47,21 @@ namespace SPTAG std::unordered_map m_headParameters; std::shared_ptr m_extraSearcher; + std::unique_ptr> m_workSpaceFactory; Options m_options; std::function m_fComputeDistance; int m_iBaseSquare; - std::unique_ptr> m_workSpaceFactory; + + std::mutex m_dataAddLock; + + std::shared_timed_mutex m_checkPointLock; + + COMMON::VersionLabel m_versionMap; + + public: + static thread_local std::shared_ptr m_workspace; public: Index() @@ -68,9 +77,10 @@ namespace SPTAG inline std::shared_ptr GetDiskIndex() { return m_extraSearcher; } inline Options* GetOptions() { return &m_options; } - inline SizeType GetNumSamples() const { return m_options.m_vectorSize; } + inline SizeType GetNumSamples() const { return m_versionMap.Count(); } inline DimensionType GetFeatureDim() const { return m_pQuantizer ? m_pQuantizer->ReconstructDim() : m_index->GetFeatureDim(); } - + inline SizeType GetValueSize() const { return m_options.m_dim * sizeof(T); } + inline int GetCurrMaxCheck() const { return m_options.m_maxCheck; } inline int GetNumThreads() const { return m_options.m_iSSDNumberOfThreads; } inline DistCalcMethod GetDistCalcMethod() const { return m_options.m_distCalcMethod; } @@ -78,7 +88,7 @@ namespace SPTAG inline VectorValueType GetVectorValueType() const { return GetEnumValueType(); } void SetQuantizer(std::shared_ptr quantizer); - + inline float AccurateDistance(const void* pX, const void* pY) const { if (m_options.m_distCalcMethod == DistCalcMethod::L2) return m_fComputeDistance((const T*)pX, (const T*)pY, m_options.m_dim); @@ -130,16 +140,17 @@ namespace SPTAG std::string GetParameter(const char* p_param, const char* p_section = nullptr) const; inline const void* GetSample(const SizeType idx) const { return nullptr; } - inline SizeType GetNumDeleted() const { return 0; } + inline SizeType GetNumDeleted() const { return m_versionMap.GetDeleteCount(); } inline bool NeedRefine() const { return false; } - ErrorCode RefineSearchIndex(QueryResult &p_query, bool p_searchDeleted = false) const { return ErrorCode::Undefined; } ErrorCode SearchTree(QueryResult& p_query) const { return ErrorCode::Undefined; } - ErrorCode AddIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, std::shared_ptr p_metadataSet, bool p_withMetaIndex = false, bool p_normalized = false) { return ErrorCode::Undefined; } - ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum) { return ErrorCode::Undefined; } - ErrorCode DeleteIndex(const SizeType& p_id) { return ErrorCode::Undefined; } + ErrorCode AddIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, std::shared_ptr p_metadataSet, bool p_withMetaIndex = false, bool p_normalized = false); + ErrorCode DeleteIndex(const SizeType& p_id); + + ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum); ErrorCode RefineIndex(const std::vector>& p_indexStreams, IAbortOperation* p_abort) { return ErrorCode::Undefined; } ErrorCode RefineIndex(std::shared_ptr& p_newIndex) { return ErrorCode::Undefined; } + ErrorCode SetWorkSpaceFactory(std::unique_ptr> up_workSpaceFactory) { SPTAG::COMMON::IWorkSpaceFactory* raw_generic_ptr = up_workSpaceFactory.release(); @@ -174,7 +185,7 @@ namespace SPTAG } ErrorCode GetPostingDebug(SizeType vid, std::vector& VIDs, std::shared_ptr& vecs); - + private: bool CheckHeadIndexType(); void SelectHeadAdjustOptions(int p_vectorCount); @@ -185,6 +196,96 @@ namespace SPTAG bool SelectHeadInternal(std::shared_ptr& p_reader); ErrorCode BuildIndexInternal(std::shared_ptr& p_reader); + + public: + bool AllFinished() { if (m_options.m_useKV || m_options.m_useSPDK) return m_extraSearcher->AllFinished(); return true; } + + void GetDBStat() { + if (m_options.m_useKV || m_options.m_useSPDK) m_extraSearcher->GetDBStats(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Current Vector Num: %d, Deleted: %d .\n", GetNumSamples(), GetNumDeleted()); + } + + void GetIndexStat(int finishedInsert, bool cost, bool reset) { if (m_options.m_useKV || m_options.m_useSPDK) m_extraSearcher->GetIndexStats(finishedInsert, cost, reset); } + + void ForceCompaction() { if (m_options.m_useKV) m_extraSearcher->ForceCompaction(); } + + void StopMerge() { m_options.m_inPlace = true; } + + void OpenMerge() { m_options.m_inPlace = false; } + + void ForceGC() { m_extraSearcher->ForceGC(m_index.get()); } + + bool Initialize() { return m_extraSearcher->Initialize(); } + + bool ExitBlockController() { return m_extraSearcher->ExitBlockController(); } + void Checkpoint() { + /** Lock & wait until all jobs done **/ + + /** Lock **/ + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Locking Index\n"); + std::unique_lock lock(m_checkPointLock); + + /** Wait **/ + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Waiting for index update complete\n"); + while(!AllFinished()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + + /** Flush the checkpoint file: SPTAG states, block pool states, block mapping states **/ + std::string filename = m_options.m_persistentBufferPath + "_headIndex"; + // Flush SPTAG + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Saving in-memory index\n"); + m_index->SaveIndex(filename); + // Flush block pool states & block mapping states + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Saving storage states\n"); + m_extraSearcher->Checkpoint(m_options.m_persistentBufferPath); + } + + ErrorCode AddIndexSPFresh(const void *p_data, SizeType p_vectorNum, DimensionType p_dimension, SizeType* VID) { + if ((!m_options.m_useKV &&!m_options.m_useSPDK) || m_extraSearcher == nullptr) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Only Support KV Extra Update\n"); + return ErrorCode::Fail; + } + + if (p_data == nullptr || p_vectorNum == 0 || p_dimension == 0) return ErrorCode::EmptyData; + if (p_dimension != GetFeatureDim()) return ErrorCode::DimensionSizeMismatch; + + std::shared_lock lock(m_checkPointLock); + + SizeType begin; + { + std::lock_guard lock(m_dataAddLock); + + begin = m_versionMap.GetVectorNum(); + + if (begin == 0) { return ErrorCode::EmptyIndex; } + + if (m_versionMap.AddBatch(p_vectorNum) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "MemoryOverFlow: VID: %d, Map Size:%d\n", begin, m_versionMap.BufferSize()); + exit(1); + } + } + for (int i = 0; i < p_vectorNum; i++) VID[i] = begin + i; + + std::shared_ptr vectorSet; + if (m_options.m_distCalcMethod == DistCalcMethod::Cosine) { + ByteArray arr = ByteArray::Alloc(sizeof(T) * p_vectorNum * p_dimension); + memcpy(arr.Data(), p_data, sizeof(T) * p_vectorNum * p_dimension); + vectorSet.reset(new BasicVectorSet(arr, GetEnumValueType(), p_dimension, p_vectorNum)); + int base = COMMON::Utils::GetBase(); + for (SizeType i = 0; i < p_vectorNum; i++) { + COMMON::Utils::Normalize((T*)(vectorSet->GetVector(i)), p_dimension, base); + } + } + else { + vectorSet.reset(new BasicVectorSet(ByteArray((std::uint8_t*)p_data, sizeof(T) * p_vectorNum * p_dimension, false), + GetEnumValueType(), p_dimension, p_vectorNum)); + } + + return m_extraSearcher->AddIndex(vectorSet, m_index, begin); + } }; } // namespace SPANN } // namespace SPTAG diff --git a/AnnService/inc/Core/SPANN/Options.h b/AnnService/inc/Core/SPANN/Options.h index 2a275b7c..ab47836c 100644 --- a/AnnService/inc/Core/SPANN/Options.h +++ b/AnnService/inc/Core/SPANN/Options.h @@ -43,6 +43,8 @@ namespace SPTAG { bool m_deleteHeadVectors; int m_ssdIndexFileNum; std::string m_quantizerFilePath; + int m_datasetRowsInBlock; + int m_datasetCapacity; // Section 2: for selecting head bool m_selectHead; @@ -68,9 +70,6 @@ namespace SPTAG { bool m_recursiveCheckSmallCluster; bool m_printSizeCount; std::string m_selectType; - // Dataset constructor args - int m_datasetRowsInBlock; - int m_datasetCapacity; // Section 3: for build head bool m_buildHead; @@ -98,6 +97,16 @@ namespace SPTAG { int m_samples; bool m_excludehead; int m_postingVectorLimit; + std::string m_fullDeletedIDFile; + bool m_useKV; + bool m_useSPDK; + std::string m_KVPath; + std::string m_spdkMappingPath; + std::string m_ssdInfoFile; + bool m_useDirectIO; + bool m_preReassign; + float m_preReassignRatio; + bool m_enableWAL; // GPU building int m_gpuSSDNumTrees; @@ -123,6 +132,53 @@ namespace SPTAG { bool m_enableADC; int m_iotimeout; + int m_searchThreadNum; + + // Calculating + std::string m_truthFilePrefix; + bool m_calTruth; + bool m_calAllTruth; + int m_searchTimes; + int m_minInternalResultNum; + int m_stepInternalResultNum; + int m_maxInternalResultNum; + bool m_onlySearchFinalBatch; + + // Updating + bool m_disableReassign; + bool m_searchDuringUpdate; + int m_reassignK; + bool m_recovery; + + // Updating(SPFresh Update Test) + bool m_update; + bool m_inPlace; + bool m_outOfPlace; + float m_latencyLimit; + int m_step; + int m_insertThreadNum; + int m_endVectorNum; + std::string m_persistentBufferPath; + int m_appendThreadNum; + int m_reassignThreadNum; + int m_batch; + std::string m_fullVectorPath; + + // Steady State Update + std::string m_updateFilePrefix; + std::string m_updateMappingPrefix; + int m_days; + int m_deleteQPS; + int m_sampling; + bool m_showUpdateProgress; + int m_mergeThreshold; + bool m_loadAllVectors; + bool m_steadyState; + int m_spdkBatchSize; + bool m_stressTest; + int m_bufferLength; + + Options() { #define DefineBasicParameter(VarName, VarType, DefaultValue, RepresentStr) \ VarName = DefaultValue; \ diff --git a/AnnService/inc/Core/SPANN/ParameterDefinitionList.h b/AnnService/inc/Core/SPANN/ParameterDefinitionList.h index 5e516931..12afabb0 100644 --- a/AnnService/inc/Core/SPANN/ParameterDefinitionList.h +++ b/AnnService/inc/Core/SPANN/ParameterDefinitionList.h @@ -32,7 +32,8 @@ DefineBasicParameter(m_ssdIndex, std::string, std::string("SPTAGFullList.bin"), DefineBasicParameter(m_deleteHeadVectors, bool, false, "DeleteHeadVectors") DefineBasicParameter(m_ssdIndexFileNum, int, 1, "SSDIndexFileNum") DefineBasicParameter(m_quantizerFilePath, std::string, std::string(), "QuantizerFilePath") - +DefineBasicParameter(m_datasetRowsInBlock, int, 1024 * 1024, "DataBlockSize") +DefineBasicParameter(m_datasetCapacity, int, SPTAG::MaxSize, "DataCapacity") #endif #ifdef DefineSelectHeadParameter @@ -61,10 +62,6 @@ DefineSelectHeadParameter(m_headVectorCount, int, 0, "Count") DefineSelectHeadParameter(m_recursiveCheckSmallCluster, bool, true, "RecursiveCheckSmallCluster") DefineSelectHeadParameter(m_printSizeCount, bool, true, "PrintSizeCount") DefineSelectHeadParameter(m_selectType, std::string, "BKT", "SelectHeadType") - -DefineSelectHeadParameter(m_datasetRowsInBlock, int, 1024 * 1024, "DataBlockSize") -DefineSelectHeadParameter(m_datasetCapacity, int, SPTAG::MaxSize, "DataCapacity") - #endif #ifdef DefineBuildHeadParameter @@ -96,6 +93,18 @@ DefineSSDParameter(m_rngFactor, float, 1.0f, "RNGFactor") DefineSSDParameter(m_samples, int, 100, "RecallTestSampleNumber") DefineSSDParameter(m_excludehead, bool, true, "ExcludeHead") DefineSSDParameter(m_postingVectorLimit, int, 118, "PostingVectorLimit") +DefineSSDParameter(m_fullDeletedIDFile, std::string, std::string("fulldeleted"), "FullDeletedIDFile") +DefineSSDParameter(m_useKV, bool, false, "UseKV") +DefineSSDParameter(m_useSPDK, bool, false, "UseSPDK") +DefineSSDParameter(m_spdkBatchSize, int, 64, "SpdkBatchSize") +DefineSSDParameter(m_KVPath, std::string, std::string(""), "KVPath") +DefineSSDParameter(m_spdkMappingPath, std::string, std::string(""), "SpdkMappingPath") +DefineSSDParameter(m_ssdInfoFile, std::string, std::string(""), "SsdInfoFile") +DefineSSDParameter(m_useDirectIO, bool, false, "UseDirectIO") +DefineSSDParameter(m_preReassign, bool, false, "PreReassign") +DefineSSDParameter(m_preReassignRatio, float, 0.7f, "PreReassignRatio") +DefineSSDParameter(m_bufferLength, int, 3, "BufferLength") +DefineSSDParameter(m_enableWAL, bool, false, "EnableWAL") // GPU Building DefineSSDParameter(m_gpuSSDNumTrees, int, 100, "GPUSSDNumTrees") @@ -121,4 +130,69 @@ DefineSSDParameter(m_recall_analysis, bool, false, "RecallAnalysis") DefineSSDParameter(m_debugBuildInternalResultNum, int, 64, "DebugBuildInternalResultNum") DefineSSDParameter(m_iotimeout, int, 30, "IOTimeout") +// Calculating +// TruthFilePrefix +DefineSSDParameter(m_truthFilePrefix, std::string, std::string(""), "TruthFilePrefix") +// CalTruth +DefineSSDParameter(m_calTruth, bool, true, "CalTruth") +DefineSSDParameter(m_onlySearchFinalBatch, bool, false, "OnlySearchFinalBatch") +// Search multiple times for stable result +DefineSSDParameter(m_searchTimes, int, 1, "SearchTimes") +// Frontend search threadnum +DefineSSDParameter(m_searchThreadNum, int, 16, "SearchThreadNum") +// Show tradeoff of latency and acurracy +DefineSSDParameter(m_minInternalResultNum, int, -1, "MinInternalResultNum") +DefineSSDParameter(m_stepInternalResultNum, int, -1, "StepInternalResultNum") +DefineSSDParameter(m_maxInternalResultNum, int, -1, "MaxInternalResultNum") + +// Updating(SPFresh Update Test) +// For update mode: current only update +DefineSSDParameter(m_update, bool, false, "Update") +// For Test Mode +DefineSSDParameter(m_inPlace, bool, false, "InPlace") +DefineSSDParameter(m_outOfPlace, bool, false, "OutOfPlace") +// latency limit +DefineSSDParameter(m_latencyLimit, float, 2.0, "LatencyLimit") +// Update batch size +DefineSSDParameter(m_step, int, 0, "Step") +// Frontend update threadnum +DefineSSDParameter(m_insertThreadNum, int, 16, "InsertThreadNum") +// Update limit +DefineSSDParameter(m_endVectorNum, int, -1, "EndVectorNum") +// Persistent buffer path +DefineSSDParameter(m_persistentBufferPath, std::string, std::string(""), "PersistentBufferPath") +// Background append threadnum +DefineSSDParameter(m_appendThreadNum, int, 16, "AppendThreadNum") +// Background reassign threadnum +DefineSSDParameter(m_reassignThreadNum, int, 16, "ReassignThreadNum") +// Background process batch size +DefineSSDParameter(m_batch, int, 1000, "Batch") +// Total Vector Path +DefineSSDParameter(m_fullVectorPath, std::string, std::string(""), "FullVectorPath") +// Steady State: update trace +DefineSSDParameter(m_updateFilePrefix, std::string, std::string(""), "UpdateFilePrefix") +// Steady State: update mapping +DefineSSDParameter(m_updateMappingPrefix, std::string, std::string(""), "UpdateMappingPrefix") +// Steady State: days +DefineSSDParameter(m_days, int, 0, "Days") +// Steady State: deleteQPS +DefineSSDParameter(m_deleteQPS, int, -1, "DeleteQPS") +// Steady State: sampling +DefineSSDParameter(m_sampling, int, -1, "Sampling") +// Steady State: showUpdateProgress +DefineSSDParameter(m_showUpdateProgress, bool, true, "ShowUpdateProgress") +// Steady State: Merge Threshold +DefineSSDParameter(m_mergeThreshold, int, 10, "MergeThreshold") +// Steady State: showUpdateProgress +DefineSSDParameter(m_loadAllVectors, bool, false, "LoadAllVectors") +// Steady State: steady state +DefineSSDParameter(m_steadyState, bool, false, "SteadyState") +// Steady State: stress test +DefineSSDParameter(m_stressTest, bool, false, "StressTest") + +// SPANN +DefineSSDParameter(m_disableReassign, bool, false, "DisableReassign") +DefineSSDParameter(m_searchDuringUpdate, bool, false, "SearchDuringUpdate") +DefineSSDParameter(m_reassignK, int, 0, "ReassignK") +DefineSSDParameter(m_recovery, bool, false, "Recovery") #endif diff --git a/AnnService/inc/Core/SPANN/PersistentBuffer.h b/AnnService/inc/Core/SPANN/PersistentBuffer.h new file mode 100644 index 00000000..4361543a --- /dev/null +++ b/AnnService/inc/Core/SPANN/PersistentBuffer.h @@ -0,0 +1,56 @@ +#include "inc/Helper/KeyValueIO.h" +#include + +namespace SPTAG { + namespace SPANN { + // concurrently safe with RocksDBIO + class PersistentBuffer + { + public: + PersistentBuffer(std::shared_ptr db) : db(db), _size(0) { } + + ~PersistentBuffer() {} + + inline int GetNewAssignmentID() { return _size++; } + + inline int PutAssignment(std::string& assignment) + { + int assignmentID = GetNewAssignmentID(); + db->Put(assignmentID, assignment); + return assignmentID; + } + + inline bool StartToScan(std::string& assignment) + { + SizeType newSize = 0; + if (db->StartToScan(newSize, &assignment) != ErrorCode::Success) return false; + _size = newSize+1; + return true; + } + + inline bool NextToScan(std::string& assignment) + { + SizeType newSize = 0; + if (db->NextToScan(newSize, &assignment) != ErrorCode::Success) return false; + _size = newSize+1; + return true; + } + + inline void ClearPreviousRecord() + { + db->DeleteRange(0, _size.load()); + _size = 0; + } + + inline int StopPB() + { + db->ShutDown(); + return 0; + } + + private: + std::shared_ptr db; + std::atomic_int _size; + }; + } +} \ No newline at end of file diff --git a/AnnService/inc/Core/VectorIndex.h b/AnnService/inc/Core/VectorIndex.h index d2702bbe..7dea5812 100644 --- a/AnnService/inc/Core/VectorIndex.h +++ b/AnnService/inc/Core/VectorIndex.h @@ -32,6 +32,9 @@ class VectorIndex virtual ErrorCode BuildIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, bool p_normalized = false, bool p_shareOwnership = false) = 0; virtual ErrorCode AddIndex(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, std::shared_ptr p_metadataSet, bool p_withMetaIndex = false, bool p_normalized = false) = 0; + virtual ErrorCode AddIndexId(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, int& beginHead, int& endHead) { return ErrorCode::Undefined; } + virtual ErrorCode AddIndexIdx(SizeType begin, SizeType end) { return ErrorCode::Undefined; } + virtual ErrorCode DeleteIndex(const void* p_vectors, SizeType p_vectorNum) = 0; diff --git a/AnnService/inc/Helper/KeyValueIO.h b/AnnService/inc/Helper/KeyValueIO.h new file mode 100644 index 00000000..e2296d6e --- /dev/null +++ b/AnnService/inc/Helper/KeyValueIO.h @@ -0,0 +1,54 @@ +#ifndef _SPTAG_HELPER_KEYVALUEIO_H_ +#define _SPTAG_HELPER_KEYVALUEIO_H_ + +#include "inc/Core/Common.h" +#include + +namespace SPTAG +{ + namespace Helper + { + class KeyValueIO { + public: + KeyValueIO() {} + + virtual ~KeyValueIO() {} + + virtual void ShutDown() = 0; + + virtual ErrorCode Get(const std::string& key, std::string* value) { return ErrorCode::Undefined; } + + virtual ErrorCode Get(SizeType key, std::string* value) = 0; + + virtual ErrorCode MultiGet(const std::vector& keys, std::vector* values, const std::chrono::microseconds &timeout = std::chrono::microseconds::max()) { return ErrorCode::Undefined; } + + virtual ErrorCode MultiGet(const std::vector& keys, std::vector* values, const std::chrono::microseconds &timeout = std::chrono::microseconds::max()) = 0; + + virtual ErrorCode Put(const std::string& key, const std::string& value) { return ErrorCode::Undefined; } + + virtual ErrorCode Put(SizeType key, const std::string& value) = 0; + + virtual ErrorCode Merge(SizeType key, const std::string& value) = 0; + + virtual ErrorCode Delete(SizeType key) = 0; + + virtual ErrorCode DeleteRange(SizeType start, SizeType end) {return ErrorCode::Undefined;} + + virtual void ForceCompaction() {} + + virtual void GetStat() {} + + virtual bool Initialize(bool debug = false) { return false; } + + virtual bool ExitBlockController(bool debug = false) { return false; } + + virtual ErrorCode Checkpoint(std::string prefix) {return ErrorCode::Undefined;} + + virtual ErrorCode StartToScan(SizeType& key, std::string* value) {return ErrorCode::Undefined;} + + virtual ErrorCode NextToScan(SizeType& key, std::string* value) {return ErrorCode::Undefined;} + }; + } +} + +#endif \ No newline at end of file diff --git a/AnnService/inc/Helper/ThreadPool.h b/AnnService/inc/Helper/ThreadPool.h index b4fd1460..b706a978 100644 --- a/AnnService/inc/Helper/ThreadPool.h +++ b/AnnService/inc/Helper/ThreadPool.h @@ -4,6 +4,7 @@ #ifndef _SPTAG_HELPER_THREADPOOL_H_ #define _SPTAG_HELPER_THREADPOOL_H_ +#include #include #include #include @@ -57,7 +58,9 @@ namespace SPTAG { try { + currentJobs++; j->exec(&m_abort); + currentJobs--; } catch (std::exception& e) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "ThreadPool: exception in %s %s\n", typeid(*j).name(), e.what()); @@ -95,7 +98,12 @@ namespace SPTAG return m_jobs.size(); } + inline uint32_t runningJobs() { return currentJobs; } + + inline bool allClear() { return currentJobs == 0 && jobsize() == 0; } + protected: + std::atomic_uint32_t currentJobs{ 0 }; std::queue m_jobs; Abort m_abort; std::mutex m_lock; diff --git a/AnnService/inc/Quantizer/Training.h b/AnnService/inc/Quantizer/Training.h index 718dbbb8..3ab4b242 100644 --- a/AnnService/inc/Quantizer/Training.h +++ b/AnnService/inc/Quantizer/Training.h @@ -120,4 +120,4 @@ std::unique_ptr TrainPQQuantizer(std::shared_ptr options, } return codebooks; -} \ No newline at end of file +} diff --git a/AnnService/inc/SPFresh/SPFresh.h b/AnnService/inc/SPFresh/SPFresh.h new file mode 100644 index 00000000..70859018 --- /dev/null +++ b/AnnService/inc/SPFresh/SPFresh.h @@ -0,0 +1,1252 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "inc/Core/Common.h" +#include "inc/Core/Common/TruthSet.h" +#include "inc/Core/SPANN/Index.h" +#include "inc/Core/VectorIndex.h" +#include "inc/Helper/SimpleIniReader.h" +#include "inc/Helper/StringConvert.h" +#include "inc/Helper/VectorSetReader.h" +#include + +#include +#include +#include + +using namespace SPTAG; + +namespace SPTAG { + namespace SSDServing { + namespace SPFresh { + + typedef std::chrono::steady_clock SteadClock; + + double getMsInterval(std::chrono::steady_clock::time_point start, std::chrono::steady_clock::time_point end) { + return (std::chrono::duration_cast(end - start).count() * 1.0) / 1000.0; + } + + double getSecInterval(std::chrono::steady_clock::time_point start, std::chrono::steady_clock::time_point end) { + return (std::chrono::duration_cast(end - start).count() * 1.0) / 1000.0; + } + + double getMinInterval(std::chrono::steady_clock::time_point start, std::chrono::steady_clock::time_point end) { + return (std::chrono::duration_cast(end - start).count() * 1.0) / 60.0; + } + + /// Clock class + class StopWSPFresh { + private: + std::chrono::steady_clock::time_point time_begin; + public: + StopWSPFresh() { + time_begin = std::chrono::steady_clock::now(); + } + + double getElapsedMs() { + std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); + return getMsInterval(time_begin, time_end); + } + + double getElapsedSec() { + std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); + return getSecInterval(time_begin, time_end); + } + + double getElapsedMin() { + std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); + return getMinInterval(time_begin, time_end); + } + + void reset() { + time_begin = std::chrono::steady_clock::now(); + } + }; + + template + void OutputResult(const std::string& p_output, std::vector& p_results, int p_resultNum) + { + if (!p_output.empty()) + { + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(p_output.c_str(), std::ios::binary | std::ios::out)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed create file: %s\n", p_output.c_str()); + exit(1); + } + int32_t i32Val = static_cast(p_results.size()); + if (ptr->WriteBinary(sizeof(i32Val), reinterpret_cast(&i32Val)) != sizeof(i32Val)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to write result file!\n"); + exit(1); + } + i32Val = p_resultNum; + if (ptr->WriteBinary(sizeof(i32Val), reinterpret_cast(&i32Val)) != sizeof(i32Val)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to write result file!\n"); + exit(1); + } + + float fVal = 0; + for (size_t i = 0; i < p_results.size(); ++i) + { + for (int j = 0; j < p_resultNum; ++j) + { + i32Val = p_results[i].GetResult(j)->VID; + if (ptr->WriteBinary(sizeof(i32Val), reinterpret_cast(&i32Val)) != sizeof(i32Val)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to write result file!\n"); + exit(1); + } + + fVal = p_results[i].GetResult(j)->Dist; + if (ptr->WriteBinary(sizeof(fVal), reinterpret_cast(&fVal)) != sizeof(fVal)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to write result file!\n"); + exit(1); + } + } + } + } + } + + void ShowMemoryStatus(std::shared_ptr vectorSet, double second) + { + int tSize = 0, resident = 0, share = 0; + std::ifstream buffer("/proc/self/statm"); + buffer >> tSize >> resident >> share; + buffer.close(); +#ifndef _MSC_VER + long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages +#else + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + long page_size_kb = sysInfo.dwPageSize / 1024; +#endif + long rss = resident * page_size_kb; + long vector_size; + if (vectorSet != nullptr) + vector_size = vectorSet->PerVectorDataSize() * (vectorSet->Count() / 1024); + else + vector_size = 0; + long vector_size_mb = vector_size / 1024; + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"Current time: %.0lf. RSS : %ld MB, Vector Set Size : %ld MB, True Size: %ld MB\n", second, rss / 1024, vector_size_mb, rss / 1024 - vector_size_mb); + } + + template + void PrintPercentiles(const std::vector& p_values, std::function p_get, const char* p_format, bool reverse=false) + { + double sum = 0; + std::vector collects; + collects.reserve(p_values.size()); + for (const auto& v : p_values) + { + T tmp = p_get(v); + sum += tmp; + collects.push_back(tmp); + } + if (reverse) { + std::sort(collects.begin(), collects.end(), std::greater()); + } + else { + std::sort(collects.begin(), collects.end()); + } + if (reverse) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Avg\t50tiles\t90tiles\t95tiles\t99tiles\t99.9tiles\tMin\n"); + } + else { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Avg\t50tiles\t90tiles\t95tiles\t99tiles\t99.9tiles\tMax\n"); + } + + std::string formatStr("%.3lf"); + for (int i = 1; i < 7; ++i) + { + formatStr += '\t'; + formatStr += p_format; + } + + formatStr += '\n'; + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, + formatStr.c_str(), + sum / collects.size(), + collects[static_cast(collects.size() * 0.50)], + collects[static_cast(collects.size() * 0.90)], + collects[static_cast(collects.size() * 0.95)], + collects[static_cast(collects.size() * 0.99)], + collects[static_cast(collects.size() * 0.999)], + collects[static_cast(collects.size() - 1)]); + } + + template + static float CalculateRecallSPFresh(VectorIndex* index, std::vector& results, const std::vector>& truth, int K, int truthK, std::shared_ptr querySet, std::shared_ptr vectorSet, SizeType NumQuerys, std::ofstream* log = nullptr, bool debug = false) + { + float meanrecall = 0, minrecall = MaxDist, maxrecall = 0, stdrecall = 0; + std::vector thisrecall(NumQuerys, 0); + std::unique_ptr visited(new bool[K]); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start Calculating Recall\n"); + for (SizeType i = 0; i < NumQuerys; i++) + { + memset(visited.get(), 0, K * sizeof(bool)); + for (SizeType id : truth[i]) + { + for (int j = 0; j < K; j++) + { + if (visited[j] || results[i].GetResult(j)->VID < 0) continue; + if (results[i].GetResult(j)->VID == id) + { + thisrecall[i] += 1; + visited[j] = true; + break; + } else if (vectorSet != nullptr) { + float dist = results[i].GetResult(j)->Dist; + float truthDist = COMMON::DistanceUtils::ComputeDistance((const T*)querySet->GetVector(i), (const T*)vectorSet->GetVector(id), vectorSet->Dimension(), index->GetDistCalcMethod()); + if (index->GetDistCalcMethod() == SPTAG::DistCalcMethod::Cosine && fabs(dist - truthDist) < Epsilon) { + thisrecall[i] += 1; + visited[j] = true; + break; + } + else if (index->GetDistCalcMethod() == SPTAG::DistCalcMethod::L2 && fabs(dist - truthDist) < Epsilon * (dist + Epsilon)) { + thisrecall[i] += 1; + visited[j] = true; + break; + } + } + } + } + thisrecall[i] /= truthK; + meanrecall += thisrecall[i]; + if (thisrecall[i] < minrecall) minrecall = thisrecall[i]; + if (thisrecall[i] > maxrecall) maxrecall = thisrecall[i]; + + if (debug) { + std::string ll("recall:" + std::to_string(thisrecall[i]) + "\ngroundtruth:"); + std::vector truthvec; + for (SizeType id : truth[i]) { + float truthDist = 0.0; + if (vectorSet != nullptr) { + truthDist = COMMON::DistanceUtils::ComputeDistance((const T*)querySet->GetVector(i), (const T*)vectorSet->GetVector(id), querySet->Dimension(), index->GetDistCalcMethod()); + } + truthvec.emplace_back(id, truthDist); + } + std::sort(truthvec.begin(), truthvec.end()); + for (int j = 0; j < truthvec.size(); j++) + ll += std::to_string(truthvec[j].node) + "@" + std::to_string(truthvec[j].distance) + ","; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%s\n", ll.c_str()); + ll = "ann:"; + for (int j = 0; j < K; j++) + ll += std::to_string(results[i].GetResult(j)->VID) + "@" + std::to_string(results[i].GetResult(j)->Dist) + ","; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%s\n", ll.c_str()); + } + } + meanrecall /= NumQuerys; + for (SizeType i = 0; i < NumQuerys; i++) + { + stdrecall += (thisrecall[i] - meanrecall) * (thisrecall[i] - meanrecall); + } + stdrecall = std::sqrt(stdrecall / NumQuerys); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "stdrecall: %.6lf, maxrecall: %.2lf, minrecall: %.2lf\n", stdrecall, maxrecall, minrecall); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nRecall Distribution:\n"); + PrintPercentiles(thisrecall, + [](const float recall) -> float + { + return recall; + }, + "%.3lf", true); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recall%d@%d: %f\n", K, truthK, meanrecall); + + if (log) (*log) << meanrecall << " " << stdrecall << " " << minrecall << " " << maxrecall << std::endl; + return meanrecall; + } + + template + double SearchSequential(SPANN::Index* p_index, + int p_numThreads, + std::vector& p_results, + std::vector& p_stats, + int p_maxQueryCount, int p_internalResultNum) + { + int numQueries = min(static_cast(p_results.size()), p_maxQueryCount); + + std::atomic_size_t queriesSent(0); + + std::vector threads; + + StopWSPFresh sw; + + auto func = [&]() + { + p_index->Initialize(); + StopWSPFresh threadws; + size_t index = 0; + while (true) + { + index = queriesSent.fetch_add(1); + if (index < numQueries) + { + double startTime = threadws.getElapsedMs(); + p_index->GetMemoryIndex()->SearchIndex(p_results[index]); + double endTime = threadws.getElapsedMs(); + + p_stats[index].m_totalLatency = endTime - startTime; + + p_index->SearchDiskIndex(p_results[index], &(p_stats[index])); + double exEndTime = threadws.getElapsedMs(); + + p_stats[index].m_exLatency = exEndTime - endTime; + p_stats[index].m_totalLatency = p_stats[index].m_totalSearchLatency = exEndTime - startTime; + } + else + { + p_index->ExitBlockController(); + return; + } + } + }; + for (int i = 0; i < p_numThreads; i++) { threads.emplace_back(func); } + for (auto& thread : threads) { thread.join(); } + + auto sendingCost = sw.getElapsedSec(); + + return numQueries / sendingCost; + } + + template + void PrintStats(std::vector& stats) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nEx Elements Count:\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> double + { + return ss.m_totalListElementsCount; + }, + "%.3lf"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nHead Latency Distribution:\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> double + { + return ss.m_totalSearchLatency - ss.m_exLatency; + }, + "%.3lf"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nSetup Latency Distribution:\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> double + { + return ss.m_exSetUpLatency; + }, + "%.3lf"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nComp Latency Distribution:\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> double + { + return ss.m_compLatency; + }, + "%.3lf"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nSPDK Latency Distribution:\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> double + { + return ss.m_diskReadLatency; + }, + "%.3lf"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nEx Latency Distribution:\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> double + { + return ss.m_exLatency; + }, + "%.3lf"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nTotal Latency Distribution:\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> double + { + return ss.m_totalSearchLatency; + }, + "%.3lf"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nTotal Disk Page Access Distribution(KB):\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> int + { + return ss.m_diskAccessCount; + }, + "%4d"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nTotal Disk IO Distribution:\n"); + PrintPercentiles(stats, + [](const SPANN::SearchStats& ss) -> int + { + return ss.m_diskIOCount; + }, + "%4d"); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\n"); + } + + void ResetStats(std::vector& totalStats) + { + for (int i = 0; i < totalStats.size(); i++) + { + totalStats[i].m_totalListElementsCount = 0; + totalStats[i].m_exLatency = 0; + totalStats[i].m_totalSearchLatency = 0; + totalStats[i].m_diskAccessCount = 0; + totalStats[i].m_diskIOCount = 0; + totalStats[i].m_compLatency = 0; + totalStats[i].m_diskReadLatency = 0; + totalStats[i].m_exSetUpLatency = 0; + } + } + + void AddStats(std::vector& totalStats, std::vector& addedStats) + { + for (int i = 0; i < totalStats.size(); i++) + { + totalStats[i].m_totalListElementsCount += addedStats[i].m_totalListElementsCount; + totalStats[i].m_exLatency += addedStats[i].m_exLatency; + totalStats[i].m_totalSearchLatency += addedStats[i].m_totalSearchLatency; + totalStats[i].m_diskAccessCount += addedStats[i].m_diskAccessCount; + totalStats[i].m_diskIOCount += addedStats[i].m_diskIOCount; + totalStats[i].m_compLatency += addedStats[i].m_compLatency; + totalStats[i].m_diskReadLatency += addedStats[i].m_diskReadLatency; + totalStats[i].m_exSetUpLatency += addedStats[i].m_exSetUpLatency; + } + } + + void AvgStats(std::vector& totalStats, int avgStatsNum) + { + for (int i = 0; i < totalStats.size(); i++) + { + totalStats[i].m_totalListElementsCount /= avgStatsNum; + totalStats[i].m_exLatency /= avgStatsNum; + totalStats[i].m_totalSearchLatency /= avgStatsNum; + totalStats[i].m_diskAccessCount /= avgStatsNum; + totalStats[i].m_diskIOCount /= avgStatsNum; + totalStats[i].m_compLatency /= avgStatsNum; + totalStats[i].m_diskReadLatency /= avgStatsNum; + totalStats[i].m_exSetUpLatency /= avgStatsNum; + } + } + + std::string convertFloatToString(const float value, const int precision = 0) + { + std::stringstream stream{}; + stream< LoadVectorSet(SPANN::Options& p_opts, int numThreads) + { + std::shared_ptr vectorSet; + if (p_opts.m_loadAllVectors) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start loading VectorSet...\n"); + if (!p_opts.m_fullVectorPath.empty() && fileexists(p_opts.m_fullVectorPath.c_str())) { + std::shared_ptr vectorOptions(new Helper::ReaderOptions(p_opts.m_valueType, p_opts.m_dim, p_opts.m_vectorType, p_opts.m_vectorDelimiter)); + auto vectorReader = Helper::VectorSetReader::CreateInstance(vectorOptions); + if (ErrorCode::Success == vectorReader->LoadFile(p_opts.m_fullVectorPath)) + { + vectorSet = vectorReader->GetVectorSet(); + if (p_opts.m_distCalcMethod == DistCalcMethod::Cosine) vectorSet->Normalize(numThreads); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "\nLoad VectorSet(%d,%d).\n", vectorSet->Count(), vectorSet->Dimension()); + } + } + } else { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Reduce memory usage\n"); + } + return vectorSet; + } + + std::shared_ptr LoadUpdateVectors(SPANN::Options& p_opts, std::vector& insertSet, SizeType updateSize) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load Update Vectors\n"); + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(p_opts.m_fullVectorPath.c_str(), std::ios::binary | std::ios::in)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to read file %s.\n", p_opts.m_fullVectorPath.c_str()); + throw std::runtime_error("Failed read file"); + } + + SizeType row; + DimensionType col; + if (ptr->ReadBinary(sizeof(SizeType), (char*)&row) != sizeof(SizeType)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to read VectorSet!\n"); + throw std::runtime_error("Failed read file"); + } + if (ptr->ReadBinary(sizeof(DimensionType), (char*)&col) != sizeof(DimensionType)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to read VectorSet!\n"); + throw std::runtime_error("Failed read file"); + } + + std::uint64_t totalRecordVectorBytes = ((std::uint64_t)GetValueTypeSize(p_opts.m_valueType)) * updateSize * col; + ByteArray vectorSet; + if (totalRecordVectorBytes > 0) { + vectorSet = ByteArray::Alloc(totalRecordVectorBytes); + char* vecBuf = reinterpret_cast(vectorSet.Data()); + std::uint64_t readSize = ((std::uint64_t)GetValueTypeSize(p_opts.m_valueType)) * col; + for (int i = 0; i < updateSize; i++) { + std::uint64_t offset = ((std::uint64_t)GetValueTypeSize(p_opts.m_valueType)) * insertSet[i] * col + sizeof(SizeType) + sizeof(DimensionType); + if (ptr->ReadBinary(readSize, vecBuf + i*readSize, offset) != readSize) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to read VectorSet!\n"); + throw std::runtime_error("Failed read file"); + } + } + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Load Vector(%d,%d)\n",updateSize, col); + return std::make_shared(vectorSet, + p_opts.m_valueType, + col, + updateSize); + + } + + std::shared_ptr LoadQuerySet(SPANN::Options& p_opts) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start loading QuerySet...\n"); + std::shared_ptr queryOptions(new Helper::ReaderOptions(p_opts.m_valueType, p_opts.m_dim, p_opts.m_queryType, p_opts.m_queryDelimiter)); + auto queryReader = Helper::VectorSetReader::CreateInstance(queryOptions); + if (ErrorCode::Success != queryReader->LoadFile(p_opts.m_queryPath)) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to read query file.\n"); + exit(1); + } + return queryReader->GetVectorSet(); + } + + void LoadTruth(SPANN::Options& p_opts, std::vector>& truth, int numQueries, std::string truthfilename, int truthK) + { + auto ptr = f_createIO(); + if (p_opts.m_update) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start loading TruthFile...: %s\n", truthfilename.c_str()); + + if (ptr == nullptr || !ptr->Initialize(truthfilename.c_str(), std::ios::in | std::ios::binary)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed open truth file: %s\n", truthfilename.c_str()); + exit(1); + } + } else { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start loading TruthFile...: %s\n", p_opts.m_truthPath.c_str()); + + if (ptr == nullptr || !ptr->Initialize(p_opts.m_truthPath.c_str(), std::ios::in | std::ios::binary)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed open truth file: %s\n", truthfilename.c_str()); + exit(1); + } + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "K: %d, TruthResultNum: %d\n", p_opts.m_resultNum, p_opts.m_truthResultNum); + COMMON::TruthSet::LoadTruth(ptr, truth, numQueries, p_opts.m_truthResultNum, p_opts.m_resultNum, p_opts.m_truthType); + char tmp[4]; + if (ptr->ReadBinary(4, tmp) == 4) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Truth number is larger than query number(%d)!\n", numQueries); + } + } + + template + void StableSearch(SPANN::Index* p_index, + int numThreads, + std::shared_ptr querySet, + std::shared_ptr vectorSet, + int avgStatsNum, + int queryCountLimit, + int internalResultNum, + std::string& truthFileName, + SPANN::Options& p_opts, + double second = 0, + bool showStatus = true) + { + if (avgStatsNum == 0) return; + int numQueries = querySet->Count(); + + std::vector results(numQueries, QueryResult(NULL, internalResultNum, false)); + + if (showStatus) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Searching: numThread: %d, numQueries: %d, searchTimes: %d.\n", numThreads, numQueries, avgStatsNum); + std::vector stats(numQueries); + std::vector TotalStats(numQueries); + ResetStats(TotalStats); + double totalQPS = 0; + for (int i = 0; i < avgStatsNum; i++) + { + for (int j = 0; j < numQueries; ++j) + { + results[j].SetTarget(reinterpret_cast(querySet->GetVector(j))); + results[j].Reset(); + } + totalQPS += SearchSequential(p_index, numThreads, results, stats, queryCountLimit, internalResultNum); + //PrintStats(stats); + AddStats(TotalStats, stats); + } + if (showStatus) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Current time: %.0lf, Searching Times: %d, AvgQPS: %.2lf.\n", second, avgStatsNum, totalQPS/avgStatsNum); + + AvgStats(TotalStats, avgStatsNum); + + if (showStatus) PrintStats(TotalStats); + + if (p_opts.m_calTruth) + { + if (p_opts.m_searchResult.empty()) { + std::vector> truth; + int K = p_opts.m_resultNum; + int truthK = p_opts.m_resultNum; + // float MRR, recall; + LoadTruth(p_opts, truth, numQueries, truthFileName, truthK); + CalculateRecallSPFresh((p_index->GetMemoryIndex()).get(), results, truth, K, truthK, querySet, vectorSet, numQueries); + // recall = COMMON::TruthSet::CalculateRecall((p_index->GetMemoryIndex()).get(), results, truth, K, truthK, querySet, vectorSet, numQueries, nullptr, false, &MRR); + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recall%d@%d: %f MRR@%d: %f\n", truthK, K, recall, K, MRR); + } else { + OutputResult(p_opts.m_searchResult + std::to_string(second), results, p_opts.m_resultNum); + } + } + } + + void LoadUpdateMapping(std::string fileName, std::vector& reverseIndices) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading %s\n", fileName.c_str()); + + int vectorNum; + + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(fileName.c_str(), std::ios::in | std::ios::binary)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed open trace file: %s\n", fileName.c_str()); + exit(1); + } + + if (ptr->ReadBinary(4, (char *)&vectorNum) != 4) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "vector Size Error!\n"); + } + + reverseIndices.clear(); + reverseIndices.resize(vectorNum); + + if (ptr->ReadBinary(vectorNum * 4, (char*)reverseIndices.data()) != vectorNum * 4) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "update mapping Error!\n"); + exit(1); + } + } + + void LoadUpdateTrace(std::string fileName, SizeType& updateSize, std::vector& insertSet, std::vector& deleteSet) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading %s\n", fileName.c_str()); + + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(fileName.c_str(), std::ios::in | std::ios::binary)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed open trace file: %s\n", fileName.c_str()); + exit(1); + } + + int tempSize; + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading Size\n"); + + if (ptr->ReadBinary(4, (char *)&tempSize) != 4) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Update Size Error!\n"); + } + + updateSize = tempSize; + + deleteSet.clear(); + deleteSet.resize(updateSize); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading deleteSet\n"); + + if (ptr->ReadBinary(updateSize * 4, (char*)deleteSet.data()) != updateSize * 4) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Delete Set Error!\n"); + exit(1); + } + + insertSet.clear(); + insertSet.resize(updateSize); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading insertSet\n"); + + if (ptr->ReadBinary(updateSize * 4, (char*)insertSet.data()) != updateSize * 4) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Insert Set Error!\n"); + exit(1); + } + } + + void LoadUpdateTraceStressTest(std::string fileName, SizeType& updateSize, std::vector& insertSet) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading %s\n", fileName.c_str()); + + auto ptr = f_createIO(); + if (ptr == nullptr || !ptr->Initialize(fileName.c_str(), std::ios::in | std::ios::binary)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed open trace file: %s\n", fileName.c_str()); + exit(1); + } + + int tempSize; + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading Size\n"); + + if (ptr->ReadBinary(4, (char *)&tempSize) != 4) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Update Size Error!\n"); + } + + updateSize = tempSize; + + insertSet.clear(); + insertSet.resize(updateSize); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading insertSet\n"); + + if (ptr->ReadBinary(updateSize * 4, (char*)insertSet.data()) != updateSize * 4) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Insert Set Error!\n"); + exit(1); + } + } + + template + void InsertVectorsBySet(SPANN::Index* p_index, + int insertThreads, + std::shared_ptr vectorSet, + std::vector& insertSet, + std::vector& mapping, + int updateSize, + SPANN::Options& p_opts) + { + StopWSPFresh sw; + std::vector threads; + std::vector latency_vector(updateSize); + + std::atomic_size_t vectorsSent(0); + + auto func = [&]() + { + p_index->Initialize(); + size_t index = 0; + while (true) + { + index = vectorsSent.fetch_add(1); + if (index < updateSize) + { + if ((index & ((1 << 14) - 1)) == 0 && p_opts.m_showUpdateProgress) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Insert: Sent %.2lf%%...\n", index * 100.0 / updateSize); + } + // std::vector meta; + // std::vector metaoffset; + // std::string a = std::to_string(insertSet[index]); + // metaoffset.push_back((std::uint64_t)meta.size()); + // for (size_t j = 0; j < a.length(); j++) + // meta.push_back(a[j]); + // metaoffset.push_back((std::uint64_t)meta.size()); + // std::shared_ptr metaset(new SPTAG::MemMetadataSet( + // SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false), + // SPTAG::ByteArray((std::uint8_t*)metaoffset.data(), metaoffset.size() * sizeof(std::uint64_t), false), + // 1)); + if (p_opts.m_stressTest) p_index->DeleteIndex(mapping[insertSet[index]]); + auto insertBegin = std::chrono::high_resolution_clock::now(); + if (p_opts.m_loadAllVectors) + p_index->AddIndexSPFresh(vectorSet->GetVector(insertSet[index]), 1, p_opts.m_dim, &mapping[insertSet[index]]); + else + p_index->AddIndexSPFresh(vectorSet->GetVector(index), 1, p_opts.m_dim, &mapping[insertSet[index]]); + auto insertEnd = std::chrono::high_resolution_clock::now(); + latency_vector[index] = std::chrono::duration_cast(insertEnd - insertBegin).count(); + } + else + { + p_index->ExitBlockController(); + return; + } + } + }; + for (int j = 0; j < insertThreads; j++) { threads.emplace_back(func); } + for (auto& thread : threads) { thread.join(); } + + double sendingCost = sw.getElapsedSec(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, + "Insert: Finish sending in %.3lf seconds, sending throughput is %.2lf , insertion count %u.\n", + sendingCost, + updateSize / sendingCost, + static_cast(updateSize)); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"Insert: During Update\n"); + + while(!p_index->AllFinished()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + double syncingCost = sw.getElapsedSec(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, + "Insert: Finish syncing in %.3lf seconds, actuall throughput is %.2lf, insertion count %u.\n", + syncingCost, + updateSize / syncingCost, + static_cast(updateSize)); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Insert Latency Distribution:\n"); + PrintPercentiles(latency_vector, + [](const double& ss) -> double + { + return ss; + }, + "%.3lf"); + } + + template + void DeleteVectorsBySet(SPANN::Index* p_index, + int deleteThreads, + std::shared_ptr vectorSet, + std::vector& deleteSet, + std::vector& mapping, + int updateSize, + SPANN::Options& p_opts, + int batch) + { + int avgQPS = p_opts.m_deleteQPS / deleteThreads; + if (p_opts.m_deleteQPS == -1) avgQPS = -1; + std::vector latency_vector(updateSize); + std::vector threads; + StopWSPFresh sw; + std::atomic_size_t vectorsSent(0); + auto func = [&]() + { + int deleteCount = 0; + while (true) + { + deleteCount++; + size_t index = 0; + index = vectorsSent.fetch_add(1); + if (index < updateSize) + { + if ((index & ((1 << 14) - 1)) == 0 && p_opts.m_showUpdateProgress) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Delete: Sent %.2lf%%...\n", index * 100.0 / updateSize); + } + auto deleteBegin = std::chrono::high_resolution_clock::now(); + p_index->DeleteIndex(mapping[deleteSet[index]]); + // p_index->DeleteIndex(vectorSet->GetVector(deleteSet[index]), deleteSet[index]); + // std::vector meta; + // std::string a = std::to_string(deleteSet[index]); + // for (size_t j = 0; j < a.length(); j++) + // meta.push_back(a[j]); + // ByteArray metarr = SPTAG::ByteArray((std::uint8_t*)meta.data(), meta.size() * sizeof(char), false); + + // if (p_index->VectorIndex::DeleteIndex(metarr) == ErrorCode::VectorNotFound) { + // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"VID meta no found: %d\n", deleteSet[index]); + // exit(1); + // } + + auto deleteEnd = std::chrono::high_resolution_clock::now(); + latency_vector[index] = std::chrono::duration_cast(deleteEnd - deleteBegin).count(); + if (avgQPS != -1 && deleteCount == avgQPS) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + deleteCount = 0; + } + } + else + { + return; + } + } + }; + for (int j = 0; j < deleteThreads; j++) { threads.emplace_back(func); } + for (auto& thread : threads) { thread.join(); } + double sendingCost = sw.getElapsedSec(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, + "Delete: Finish sending in %.3lf seconds, sending throughput is %.2lf , deletion count %u.\n", + sendingCost, + updateSize / sendingCost, + static_cast(updateSize)); + } + + template + void SteadyStateSPFresh(SPANN::Index* p_index) + { + SPANN::Options& p_opts = *(p_index->GetOptions()); + int days = p_opts.m_days; + if (days == 0) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Need to input update days\n"); + exit(1); + } + StopWSPFresh sw; + + int numThreads = p_opts.m_searchThreadNum; + int internalResultNum = p_opts.m_searchInternalResultNum; + int searchTimes = p_opts.m_searchTimes; + + auto vectorSet = LoadVectorSet(p_opts, numThreads); + + auto querySet = LoadQuerySet(p_opts); + + int curCount = p_index->GetNumSamples(); + + bool calTruthOrigin = p_opts.m_calTruth; + + p_index->ForceCompaction(); + + p_index->GetDBStat(); + + if (!p_opts.m_onlySearchFinalBatch) { + if (p_opts.m_maxInternalResultNum != -1) + { + for (int iterInternalResultNum = p_opts.m_minInternalResultNum; iterInternalResultNum <= p_opts.m_maxInternalResultNum; iterInternalResultNum += p_opts.m_stepInternalResultNum) + { + StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, iterInternalResultNum, p_opts.m_truthPath, p_opts, sw.getElapsedSec()); + } + } + else + { + StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, internalResultNum, p_opts.m_truthPath, p_opts, sw.getElapsedSec()); + } + } + // exit(1); + + ShowMemoryStatus(vectorSet, sw.getElapsedSec()); + p_index->GetDBStat(); + + int insertThreads = p_opts.m_insertThreadNum; + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Updating: numThread: %d, total days: %d.\n", insertThreads, days); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start updating...\n"); + + int updateSize; + std::vector insertSet; + std::vector deleteSet; + std::vector mapping; + if (p_opts.m_endVectorNum == -1) p_opts.m_endVectorNum = curCount; + mapping.resize(p_opts.m_endVectorNum); + for (int i = 0; i < p_opts.m_endVectorNum; i++) { + mapping[i] = i; + } + + for (int i = 0; i < days; i++) + { + + std::string traceFileName = p_opts.m_updateFilePrefix + std::to_string(i); + if (!p_opts.m_stressTest) LoadUpdateTrace(traceFileName, updateSize, insertSet, deleteSet); + else LoadUpdateTraceStressTest(traceFileName, updateSize, insertSet); + if (!p_opts.m_loadAllVectors) { + vectorSet = LoadUpdateVectors(p_opts, insertSet, updateSize); + } + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Updating day: %d: numThread: %d, updateSize: %d,total days: %d.\n", i, insertThreads, updateSize, days); + + int sampleSize = updateSize / p_opts.m_sampling; + + int nextSamplePoint = sampleSize + updateSize * i; + + bool showStatus = false; + + std::future delete_future; + if (!p_opts.m_stressTest) { + delete_future = + std::async(std::launch::async, DeleteVectorsBySet, p_index, + 1, vectorSet, std::ref(deleteSet), std::ref(mapping), updateSize, std::ref(p_opts), i); + } + + std::future_status delete_status; + + std::future insert_future = + std::async(std::launch::async, InsertVectorsBySet, p_index, + insertThreads, vectorSet, std::ref(insertSet), std::ref(mapping), updateSize, std::ref(p_opts)); + + std::future_status insert_status; + + std::string tempFileName; + p_opts.m_calTruth = false; + do { + insert_status = insert_future.wait_for(std::chrono::seconds(2)); + if (!p_opts.m_stressTest) delete_status = delete_future.wait_for(std::chrono::seconds(2)); + else delete_status = std::future_status::ready; + if (insert_status == std::future_status::timeout || delete_status == std::future_status::timeout) { + if (p_index->GetNumDeleted() >= nextSamplePoint) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Samppling Size: %d\n", nextSamplePoint); + showStatus = true; + nextSamplePoint += sampleSize; + ShowMemoryStatus(vectorSet, sw.getElapsedSec()); + p_index->GetIndexStat(-1, false, false); + } else { + showStatus = false; + } + p_index->GetDBStat(); + if(p_opts.m_searchDuringUpdate) StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, internalResultNum, tempFileName, p_opts, sw.getElapsedSec(), showStatus); + p_index->GetDBStat(); + } + } while (insert_status != std::future_status::ready || delete_status != std::future_status::ready); + + curCount += updateSize; + + p_index->GetIndexStat(updateSize, true, true); + p_index->GetDBStat(); + + ShowMemoryStatus(vectorSet, sw.getElapsedSec()); + + std::string truthFileName; + + if (!p_opts.m_stressTest) truthFileName = p_opts.m_truthFilePrefix + std::to_string(i); + else truthFileName = p_opts.m_truthPath; + + p_index->Checkpoint(); + + p_opts.m_calTruth = calTruthOrigin; + if (p_opts.m_onlySearchFinalBatch && days - 1 != i) continue; + p_index->StopMerge(); + if (p_opts.m_maxInternalResultNum != -1) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Latency & Recall Tradeoff\n"); + for (int iterInternalResultNum = p_opts.m_minInternalResultNum; iterInternalResultNum <= p_opts.m_maxInternalResultNum; iterInternalResultNum += p_opts.m_stepInternalResultNum) + { + StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, iterInternalResultNum, truthFileName, p_opts, sw.getElapsedSec()); + } + } + else + { + StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, internalResultNum, truthFileName, p_opts, sw.getElapsedSec()); + } + p_index->OpenMerge(); + } + } + + template + void InsertVectors(SPANN::Index* p_index, + int insertThreads, + std::shared_ptr vectorSet, + int curCount, + int step, + SPANN::Options& p_opts) + { + StopWSPFresh sw; + std::vector threads; + + std::atomic_size_t vectorsSent(0); + std::vector latency_vector(step); + + auto func = [&]() + { + p_index->Initialize(); + size_t index = 0; + while (true) + { + index = vectorsSent.fetch_add(1); + if (index < step) + { + if ((index & ((1 << 14) - 1)) == 0) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Sent %.2lf%%...\n", index * 100.0 / step); + } + auto insertBegin = std::chrono::high_resolution_clock::now(); + p_index->AddIndex(vectorSet->GetVector((SizeType)(index + curCount)), 1, p_opts.m_dim, nullptr); + auto insertEnd = std::chrono::high_resolution_clock::now(); + latency_vector[index] = std::chrono::duration_cast(insertEnd - insertBegin).count(); + } + else + { + p_index->ExitBlockController(); + return; + } + } + }; + for (int j = 0; j < insertThreads; j++) { threads.emplace_back(func); } + for (auto& thread : threads) { thread.join(); } + + double sendingCost = sw.getElapsedSec(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, + "Finish sending in %.3lf seconds, sending throughput is %.2lf , insertion count %u.\n", + sendingCost, + step/ sendingCost, + static_cast(step)); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"During Update\n"); + + while(!p_index->AllFinished()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + double syncingCost = sw.getElapsedSec(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, + "Finish syncing in %.3lf seconds, actuall throughput is %.2lf, insertion count %u.\n", + syncingCost, + step / syncingCost, + static_cast(step)); + PrintPercentiles(latency_vector, + [](const double& ss) -> double + { + return ss; + }, + "%.3lf"); + } + + template + void UpdateSPFresh(SPANN::Index* p_index) + { + SPANN::Options& p_opts = *(p_index->GetOptions()); + int step = p_opts.m_step; + if (step == 0) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Incremental Test Error, Need to set step.\n"); + exit(1); + } + StopWSPFresh sw; + + int numThreads = p_opts.m_searchThreadNum; + int internalResultNum = p_opts.m_searchInternalResultNum; + int searchTimes = p_opts.m_searchTimes; + + auto vectorSet = LoadVectorSet(p_opts, numThreads); + + auto querySet = LoadQuerySet(p_opts); + + int curCount = p_index->GetNumSamples(); + int insertCount = vectorSet->Count() - curCount; + + bool calTruthOrigin = p_opts.m_calTruth; + + if (p_opts.m_endVectorNum != -1) + { + insertCount = p_opts.m_endVectorNum - curCount; + } + + p_index->ForceCompaction(); + + p_index->GetDBStat(); + + if (!p_opts.m_onlySearchFinalBatch) { + if (p_opts.m_maxInternalResultNum != -1) + { + for (int iterInternalResultNum = p_opts.m_minInternalResultNum; iterInternalResultNum <= p_opts.m_maxInternalResultNum; iterInternalResultNum += p_opts.m_stepInternalResultNum) + { + StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, iterInternalResultNum, p_opts.m_truthPath, p_opts, sw.getElapsedSec()); + } + } + else + { + StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, internalResultNum, p_opts.m_truthPath, p_opts, sw.getElapsedSec()); + } + } + + ShowMemoryStatus(vectorSet, sw.getElapsedSec()); + p_index->GetDBStat(); + + int batch; + if (step == 0) { + batch = 0; + } else { + batch = insertCount / step; + } + + int finishedInsert = 0; + int insertThreads = p_opts.m_insertThreadNum; + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Updating: numThread: %d, step: %d, insertCount: %d, totalBatch: %d.\n", insertThreads, step, insertCount, batch); + + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start updating...\n"); + for (int i = 0; i < 1; i++) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Updating Batch %d: numThread: %d, step: %d.\n", i, insertThreads, step); + + std::future insert_future = + std::async(std::launch::async, InsertVectors, p_index, + insertThreads, vectorSet, curCount, step, std::ref(p_opts)); + + std::future_status insert_status; + + std::string tempFileName; + p_opts.m_calTruth = false; + do { + insert_status = insert_future.wait_for(std::chrono::seconds(3)); + if (insert_status == std::future_status::timeout) { + ShowMemoryStatus(vectorSet, sw.getElapsedSec()); + p_index->GetIndexStat(-1, false, false); + p_index->GetDBStat(); + if(p_opts.m_searchDuringUpdate) StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, internalResultNum, tempFileName, p_opts, sw.getElapsedSec()); + } + }while (insert_status != std::future_status::ready); + + curCount += step; + finishedInsert += step; + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Total Vector num %d \n", curCount); + + p_index->GetIndexStat(finishedInsert, true, true); + + ShowMemoryStatus(vectorSet, sw.getElapsedSec()); + + std::string truthFileName = p_opts.m_truthFilePrefix + std::to_string(i); + + p_opts.m_calTruth = calTruthOrigin; + if (p_opts.m_onlySearchFinalBatch && batch - 1 != i) continue; + // p_index->ForceGC(); + // p_index->ForceCompaction(); + p_index->StopMerge(); + if (p_opts.m_maxInternalResultNum != -1) + { + for (int iterInternalResultNum = p_opts.m_minInternalResultNum; iterInternalResultNum <= p_opts.m_maxInternalResultNum; iterInternalResultNum += p_opts.m_stepInternalResultNum) + { + StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, iterInternalResultNum, truthFileName, p_opts, sw.getElapsedSec()); + } + } + else + { + StableSearch(p_index, numThreads, querySet, vectorSet, searchTimes, p_opts.m_queryCountLimit, internalResultNum, truthFileName, p_opts, sw.getElapsedSec()); + } + p_index->OpenMerge(); + } + } + + int UpdateTest(const char* storePath) { + + std::shared_ptr index; + + if (index->LoadIndex(storePath, index) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to load index.\n"); + return 1; + } + + SPANN::Options* opts = nullptr; + + #define DefineVectorValueType(Name, Type) \ + if (index->GetVectorValueType() == VectorValueType::Name) { \ + opts = ((SPANN::Index*)index.get())->GetOptions(); \ + } \ + + #include "inc/Core/DefinitionList.h" + #undef DefineVectorValueType + + #define DefineVectorValueType(Name, Type) \ + if (opts->m_valueType == VectorValueType::Name) { \ + if (opts->m_steadyState) SteadyStateSPFresh((SPANN::Index*)(index.get())); \ + else UpdateSPFresh((SPANN::Index*)(index.get())); \ + } \ + + #include "inc/Core/DefinitionList.h" + #undef DefineVectorValueType + + return 0; + } + } + } +} \ No newline at end of file diff --git a/AnnService/inc/SSDServing/SSDIndex.h b/AnnService/inc/SSDServing/SSDIndex.h index 7cd0a41c..a4541c39 100644 --- a/AnnService/inc/SSDServing/SSDIndex.h +++ b/AnnService/inc/SSDServing/SSDIndex.h @@ -8,7 +8,6 @@ #include "inc/Core/Common/DistanceUtils.h" #include "inc/Core/Common/QueryResultSet.h" #include "inc/Core/SPANN/Index.h" -#include "inc/Core/SPANN/ExtraFullGraphSearcher.h" #include "inc/Helper/VectorSetReader.h" #include "inc/Helper/StringConvert.h" #include "inc/SSDServing/Utils.h" @@ -178,7 +177,7 @@ namespace SPTAG { { SetLogger(std::make_shared(Helper::LogLevel::LL_Info, p_opts.m_logFile.c_str())); } - int numThreads = p_opts.m_iSSDNumberOfThreads; + int numThreads = p_opts.m_searchThreadNum; int internalResultNum = p_opts.m_searchInternalResultNum; int K = p_opts.m_resultNum; int truthK = (p_opts.m_truthResultNum <= 0) ? K : p_opts.m_truthResultNum; diff --git a/AnnService/packages.config b/AnnService/packages.config index 4ad04937..e56c07d6 100644 --- a/AnnService/packages.config +++ b/AnnService/packages.config @@ -8,5 +8,7 @@ + + \ No newline at end of file diff --git a/AnnService/src/Core/BKT/BKTIndex.cpp b/AnnService/src/Core/BKT/BKTIndex.cpp index a2c71f21..9e87c34b 100644 --- a/AnnService/src/Core/BKT/BKTIndex.cpp +++ b/AnnService/src/Core/BKT/BKTIndex.cpp @@ -154,6 +154,7 @@ namespace SPTAG const SizeType *node = m_pGraph[tmpNode]; \ _mm_prefetch((const char *)node, _MM_HINT_T0); \ for (DimensionType i = 0; i <= checkPos; i++) { \ + if (node[i] < 0 || node[i] >= m_pSamples.R()) break; \ _mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \ } \ if (gnode.distance <= p_query.worstDist()) { \ @@ -192,6 +193,7 @@ namespace SPTAG for (DimensionType i = 0; i <= checkPos; i++) { \ SizeType nn_index = node[i]; \ if (nn_index < 0) break; \ + if (nn_index >= m_pSamples.R()) continue; \ if (p_space.CheckAndSet(nn_index)) continue; \ float distance2leaf = m_fComputeDistance(p_query.GetQuantizedTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \ p_space.m_iNumberOfCheckedLeaves++; \ @@ -657,7 +659,7 @@ namespace SPTAG if (p_dimension != GetFeatureDim()) return ErrorCode::DimensionSizeMismatch; - if (m_pSamples.AddBatch((const T*)p_data, p_vectorNum) != ErrorCode::Success || + if (m_pSamples.AddBatch(p_vectorNum, (const T*)p_data) != ErrorCode::Success || m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success || m_deletedID.AddBatch(p_vectorNum) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Memory Error: Cannot alloc space for vectors!\n"); @@ -703,6 +705,55 @@ namespace SPTAG return ErrorCode::Success; } + template + ErrorCode Index::AddIndexId(const void* p_data, SizeType p_vectorNum, DimensionType p_dimension, int& beginHead, int& endHead) + { + if (p_data == nullptr || p_vectorNum == 0 || p_dimension == 0) return ErrorCode::EmptyData; + + SizeType begin, end; + { + std::lock_guard lock(m_dataAddLock); + + begin = GetNumSamples(); + end = begin + p_vectorNum; + + if (begin == 0) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Index Error: No vector in Index!\n"); + return ErrorCode::EmptyIndex; + } + + if (p_dimension != GetFeatureDim()) return ErrorCode::DimensionSizeMismatch; + + + if (m_pSamples.AddBatch(p_vectorNum, (const T*)p_data) != ErrorCode::Success || + m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success || + m_deletedID.AddBatch(p_vectorNum) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Memory Error: Cannot alloc space for vectors!\n"); + m_pSamples.SetR(begin); + m_pGraph.SetR(begin); + m_deletedID.SetR(begin); + return ErrorCode::MemoryOverFlow; + } + } + beginHead = begin; + endHead = end; + return ErrorCode::Success; + } + + template + ErrorCode Index::AddIndexIdx(SizeType begin, SizeType end) + { + // if (end - m_pTrees.sizePerTree() >= m_addCountForRebuild && m_threadPool.jobsize() == 0) { + // m_threadPool.add(new RebuildJob(&m_pSamples, &m_pTrees, &m_pGraph, m_iDistCalcMethod)); + // } + + for (SizeType node = begin; node < end; node++) + { + m_pGraph.RefineNode(this, node, true, true, m_pGraph.m_iAddCEF); + } + return ErrorCode::Success; + } + template ErrorCode Index::UpdateIndex() diff --git a/AnnService/src/Core/KDT/KDTIndex.cpp b/AnnService/src/Core/KDT/KDTIndex.cpp index d2206aa2..f6eb9c41 100644 --- a/AnnService/src/Core/KDT/KDTIndex.cpp +++ b/AnnService/src/Core/KDT/KDTIndex.cpp @@ -608,7 +608,7 @@ case VectorValueType::Name: \ if (p_dimension != GetFeatureDim()) return ErrorCode::DimensionSizeMismatch; - if (m_pSamples.AddBatch((const T*)p_data, p_vectorNum) != ErrorCode::Success || + if (m_pSamples.AddBatch(p_vectorNum, (const T*)p_data) != ErrorCode::Success || m_pGraph.AddBatch(p_vectorNum) != ErrorCode::Success || m_deletedID.AddBatch(p_vectorNum) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Memory Error: Cannot alloc space for vectors!\n"); diff --git a/AnnService/src/Core/SPANN/ExtraSPDKController.cpp b/AnnService/src/Core/SPANN/ExtraSPDKController.cpp new file mode 100644 index 00000000..68225810 --- /dev/null +++ b/AnnService/src/Core/SPANN/ExtraSPDKController.cpp @@ -0,0 +1,478 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "inc/Core/SPANN/ExtraSPDKController.h" + +namespace SPTAG::SPANN +{ + +thread_local struct SPDKIO::BlockController::IoContext SPDKIO::BlockController::m_currIoContext; +int SPDKIO::BlockController::m_ssdInflight = 0; +int SPDKIO::BlockController::m_ioCompleteCount = 0; +std::unique_ptr SPDKIO::BlockController::m_memBuffer; + +void SPDKIO::BlockController::SpdkBdevEventCallback(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) { + fprintf(stderr, "SpdkBdevEventCallback: supported bdev event type %d\n", type); +} + +void SPDKIO::BlockController::SpdkBdevIoCallback(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + SubIoRequest* currSubIo = (SubIoRequest *)cb_arg; + if (success) { + m_ioCompleteCount++; + spdk_bdev_free_io(bdev_io); + currSubIo->completed_sub_io_requests->push(currSubIo); + m_ssdInflight--; + SpdkIoLoop(currSubIo->ctrl); + } else { + fprintf(stderr, "SpdkBdevIoCallback: I/O failed %p\n", currSubIo); + spdk_app_stop(-1); + } +} + +void SPDKIO::BlockController::SpdkStop(void *arg) { + SPDKIO::BlockController* ctrl = (SPDKIO::BlockController *)arg; + // Close I/O channel and bdev + spdk_put_io_channel(ctrl->m_ssdSpdkBdevIoChannel); + spdk_bdev_close(ctrl->m_ssdSpdkBdevDesc); + fprintf(stdout, "SPDKIO::BlockController::SpdkStop: finalized\n"); +} + +void SPDKIO::BlockController::SpdkIoLoop(void *arg) { + SPDKIO::BlockController* ctrl = (SPDKIO::BlockController *)arg; + int rc = 0; + SubIoRequest* currSubIo = nullptr; + while (!ctrl->m_ssdSpdkThreadExiting) { + if (ctrl->m_submittedSubIoRequests.try_pop(currSubIo)) { + if (currSubIo->is_read) { + rc = spdk_bdev_read( + ctrl->m_ssdSpdkBdevDesc, ctrl->m_ssdSpdkBdevIoChannel, + currSubIo->dma_buff, currSubIo->offset, PageSize, SpdkBdevIoCallback, currSubIo); + } else { + rc = spdk_bdev_write( + ctrl->m_ssdSpdkBdevDesc, ctrl->m_ssdSpdkBdevIoChannel, + currSubIo->dma_buff, currSubIo->offset, PageSize, SpdkBdevIoCallback, currSubIo); + } + if (rc && rc != -ENOMEM) { + fprintf(stderr, "SPDKIO::BlockController::SpdkStart %s failed: %d, shutting down, offset: %ld\n", + currSubIo->is_read ? "spdk_bdev_read" : "spdk_bdev_write", rc, currSubIo->offset); + spdk_app_stop(-1); + break; + } else { + m_ssdInflight++; + } + } else if (m_ssdInflight) { + break; + } + } + if (ctrl->m_ssdSpdkThreadExiting) { + SpdkStop(ctrl); + } +} + +void SPDKIO::BlockController::SpdkStart(void *arg) { + SPDKIO::BlockController* ctrl = (SPDKIO::BlockController *)arg; + + fprintf(stdout, "SPDKIO::BlockController::SpdkStart: using bdev %s\n", ctrl->m_ssdSpdkBdevName); + + int rc = 0; + ctrl->m_ssdSpdkBdev = NULL; + ctrl->m_ssdSpdkBdevDesc = NULL; + + // Open bdev + rc = spdk_bdev_open_ext(ctrl->m_ssdSpdkBdevName, true, SpdkBdevEventCallback, NULL, &ctrl->m_ssdSpdkBdevDesc); + if (rc) { + fprintf(stderr, "SPDKIO::BlockController::SpdkStart: spdk_bdev_open_ext failed, %d\n", rc); + ctrl->m_ssdSpdkThreadStartFailed = true; + spdk_app_stop(-1); + return; + } + ctrl->m_ssdSpdkBdev = spdk_bdev_desc_get_bdev(ctrl->m_ssdSpdkBdevDesc); + + // Open I/O channel + ctrl->m_ssdSpdkBdevIoChannel = spdk_bdev_get_io_channel(ctrl->m_ssdSpdkBdevDesc); + if (ctrl->m_ssdSpdkBdevIoChannel == NULL) { + fprintf(stderr, "SPDKIO::BlockController::SpdkStart: spdk_bdev_get_io_channel failed\n"); + spdk_bdev_close(ctrl->m_ssdSpdkBdevDesc); + ctrl->m_ssdSpdkThreadStartFailed = true; + spdk_app_stop(-1); + return; + } + + ctrl->m_ssdSpdkThreadReady = true; + m_ssdInflight = 0; + + SpdkIoLoop(ctrl); +} + +void* SPDKIO::BlockController::InitializeSpdk(void *arg) { + SPDKIO::BlockController* ctrl = (SPDKIO::BlockController *)arg; + + struct spdk_app_opts opts; + spdk_app_opts_init(&opts, sizeof(opts)); + opts.name = "spfresh"; + const char* spdkConf = getenv(kSpdkConfEnv); + opts.json_config_file = spdkConf ? spdkConf : ""; + const char* spdkBdevName = getenv(kSpdkBdevNameEnv); + ctrl->m_ssdSpdkBdevName = spdkBdevName ? spdkBdevName : ""; + const char* spdkIoDepth = getenv(kSpdkIoDepth); + if (spdkIoDepth) ctrl->m_ssdSpdkIoDepth = atoi(spdkIoDepth); + + int rc; + rc = spdk_app_start(&opts, &SPTAG::SPANN::SPDKIO::BlockController::SpdkStart, arg); + if (rc) { + ctrl->m_ssdSpdkThreadStartFailed = true; + } else { + spdk_app_fini(); + } + pthread_exit(NULL); +} + +bool SPDKIO::BlockController::Initialize(int batchSize) { + std::lock_guard lock(m_initMutex); + m_numInitCalled++; + + const char* useMemImplEnvStr = getenv(kUseMemImplEnv); + m_useMemImpl = useMemImplEnvStr && !strcmp(useMemImplEnvStr, "1"); + const char* useSsdImplEnvStr = getenv(kUseSsdImplEnv); + m_useSsdImpl = useSsdImplEnvStr && !strcmp(useSsdImplEnvStr, "1"); + if (m_useMemImpl) { + if (m_numInitCalled == 1) { + if (m_memBuffer == nullptr) { + m_memBuffer.reset(new char[kMemImplMaxNumBlocks * PageSize]); + } + for (AddressType i = 0; i < kMemImplMaxNumBlocks; i++) { + m_blockAddresses.push(i); + } + } + return true; + } else if (m_useSsdImpl) { + if (m_numInitCalled == 1) { + m_batchSize = batchSize; + for (AddressType i = 0; i < kSsdImplMaxNumBlocks; i++) { + m_blockAddresses.push(i); + } + pthread_create(&m_ssdSpdkTid, NULL, &InitializeSpdk, this); + while (!m_ssdSpdkThreadReady && !m_ssdSpdkThreadStartFailed); + if (m_ssdSpdkThreadStartFailed) { + fprintf(stderr, "SPDKIO::BlockController::Initialize failed\n"); + return false; + } + } + // Create sub I/O request pool + m_currIoContext.sub_io_requests.resize(m_ssdSpdkIoDepth); + m_currIoContext.in_flight = 0; + uint32_t buf_align; + buf_align = spdk_bdev_get_buf_align(m_ssdSpdkBdev); + for (auto &sr : m_currIoContext.sub_io_requests) { + sr.completed_sub_io_requests = &(m_currIoContext.completed_sub_io_requests); + sr.app_buff = nullptr; + sr.dma_buff = spdk_dma_zmalloc(PageSize, buf_align, NULL); + sr.ctrl = this; + m_currIoContext.free_sub_io_requests.push_back(&sr); + } + return true; + } else { + fprintf(stderr, "SPDKIO::BlockController::Initialize failed\n"); + return false; + } +} + +// get p_size blocks from front, and fill in p_data array +bool SPDKIO::BlockController::GetBlocks(AddressType* p_data, int p_size) { + AddressType currBlockAddress = 0; + if (m_useMemImpl || m_useSsdImpl) { + for (int i = 0; i < p_size; i++) { + while (!m_blockAddresses.try_pop(currBlockAddress)); + p_data[i] = currBlockAddress; + } + return true; + } else { + fprintf(stderr, "SPDKIO::BlockController::GetBlocks failed\n"); + return false; + } +} + +// release p_size blocks, put them at the end of the queue +bool SPDKIO::BlockController::ReleaseBlocks(AddressType* p_data, int p_size) { + if (m_useMemImpl || m_useSsdImpl) { + for (int i = 0; i < p_size; i++) { + // m_blockAddresses.push(p_data[i]); + m_blockAddresses_reserve.push(p_data[i]); + } + return true; + } else { + fprintf(stderr, "SPDKIO::BlockController::ReleaseBlocks failed\n"); + return false; + } +} + +// read a posting list. p_data[0] is the total data size, +// p_data[1], p_data[2], ..., p_data[((p_data[0] + PageSize - 1) >> PageSizeEx)] are the addresses of the blocks +// concat all the block contents together into p_value string. +bool SPDKIO::BlockController::ReadBlocks(AddressType* p_data, std::string* p_value, const std::chrono::microseconds &timeout) { + if (m_useMemImpl) { + p_value->resize(p_data[0]); + AddressType currOffset = 0; + AddressType dataIdx = 1; + while (currOffset < p_data[0]) { + AddressType readSize = (p_data[0] - currOffset) < PageSize ? (p_data[0] - currOffset) : PageSize; + memcpy((void *)p_value->data() + currOffset, m_memBuffer.get() + p_data[dataIdx] * PageSize, readSize); + currOffset += PageSize; + dataIdx++; + } + return true; + } else if (m_useSsdImpl) { + p_value->resize(p_data[0]); + AddressType currOffset = 0; + AddressType dataIdx = 1; + SubIoRequest* currSubIo; + + // Clear timeout I/Os + while (m_currIoContext.in_flight) { + if (m_currIoContext.completed_sub_io_requests.try_pop(currSubIo)) { + currSubIo->app_buff = nullptr; + m_currIoContext.free_sub_io_requests.push_back(currSubIo); + m_currIoContext.in_flight--; + } + } + + auto t1 = std::chrono::high_resolution_clock::now(); + // Submit all I/Os + while (currOffset < p_data[0] || m_currIoContext.in_flight) { + auto t2 = std::chrono::high_resolution_clock::now(); + if (std::chrono::duration_cast(t2 - t1) > timeout) { + return false; + } + // Try submit + if (currOffset < p_data[0] && m_currIoContext.free_sub_io_requests.size()) { + currSubIo = m_currIoContext.free_sub_io_requests.back(); + m_currIoContext.free_sub_io_requests.pop_back(); + currSubIo->app_buff = (void *)p_value->data() + currOffset; + currSubIo->real_size = (p_data[0] - currOffset) < PageSize ? (p_data[0] - currOffset) : PageSize; + currSubIo->is_read = true; + currSubIo->offset = p_data[dataIdx] * PageSize; + m_submittedSubIoRequests.push(currSubIo); + currOffset += PageSize; + dataIdx++; + m_currIoContext.in_flight++; + } + // Try complete + if (m_currIoContext.in_flight && m_currIoContext.completed_sub_io_requests.try_pop(currSubIo)) { + memcpy(currSubIo->app_buff, currSubIo->dma_buff, currSubIo->real_size); + currSubIo->app_buff = nullptr; + m_currIoContext.free_sub_io_requests.push_back(currSubIo); + m_currIoContext.in_flight--; + } + } + return true; + } else { + fprintf(stderr, "SPDKIO::BlockController::ReadBlocks single failed\n"); + return false; + } +} + +// parallel read a list of posting lists. +bool SPDKIO::BlockController::ReadBlocks(std::vector& p_data, std::vector* p_values, const std::chrono::microseconds &timeout) { + if (m_useMemImpl) { + p_values->resize(p_data.size()); + for (size_t i = 0; i < p_data.size(); i++) { + ReadBlocks(p_data[i], &((*p_values)[i])); + } + return true; + } else if (m_useSsdImpl) { + // Temporarily disable timeout + + // Convert request format to SubIoRequests + auto t1 = std::chrono::high_resolution_clock::now(); + p_values->resize(p_data.size()); + std::vector subIoRequests; + std::vector subIoRequestCount(p_data.size(), 0); + subIoRequests.reserve(256); + for (size_t i = 0; i < p_data.size(); i++) { + AddressType* p_data_i = p_data[i]; + std::string* p_value = &((*p_values)[i]); + + p_value->resize(p_data_i[0]); + AddressType currOffset = 0; + AddressType dataIdx = 1; + + while (currOffset < p_data_i[0]) { + SubIoRequest currSubIo; + currSubIo.app_buff = (void *)p_value->data() + currOffset; + currSubIo.real_size = (p_data_i[0] - currOffset) < PageSize ? (p_data_i[0] - currOffset) : PageSize; + currSubIo.is_read = true; + currSubIo.offset = p_data_i[dataIdx] * PageSize; + currSubIo.posting_id = i; + subIoRequests.push_back(currSubIo); + subIoRequestCount[i]++; + currOffset += PageSize; + dataIdx++; + } + } + + // Clear timeout I/Os + while (m_currIoContext.in_flight) { + SubIoRequest* currSubIo; + if (m_currIoContext.completed_sub_io_requests.try_pop(currSubIo)) { + currSubIo->app_buff = nullptr; + m_currIoContext.free_sub_io_requests.push_back(currSubIo); + m_currIoContext.in_flight--; + } + } + + const int batch_size = m_batchSize; + for (int currSubIoStartId = 0; currSubIoStartId < subIoRequests.size(); currSubIoStartId += batch_size) { + int currSubIoEndId = (currSubIoStartId + batch_size) > subIoRequests.size() ? subIoRequests.size() : currSubIoStartId + batch_size; + int currSubIoIdx = currSubIoStartId; + SubIoRequest* currSubIo; + while (currSubIoIdx < currSubIoEndId || m_currIoContext.in_flight) { + auto t2 = std::chrono::high_resolution_clock::now(); + if (std::chrono::duration_cast(t2 - t1) > timeout) { + break; + } + // Try submit + if (currSubIoIdx < currSubIoEndId && m_currIoContext.free_sub_io_requests.size()) { + currSubIo = m_currIoContext.free_sub_io_requests.back(); + m_currIoContext.free_sub_io_requests.pop_back(); + currSubIo->app_buff = subIoRequests[currSubIoIdx].app_buff; + currSubIo->real_size = subIoRequests[currSubIoIdx].real_size; + currSubIo->is_read = true; + currSubIo->offset = subIoRequests[currSubIoIdx].offset; + currSubIo->posting_id = subIoRequests[currSubIoIdx].posting_id; + m_submittedSubIoRequests.push(currSubIo); + m_currIoContext.in_flight++; + currSubIoIdx++; + } + // Try complete + if (m_currIoContext.in_flight && m_currIoContext.completed_sub_io_requests.try_pop(currSubIo)) { + memcpy(currSubIo->app_buff, currSubIo->dma_buff, currSubIo->real_size); + currSubIo->app_buff = nullptr; + subIoRequestCount[currSubIo->posting_id]--; + m_currIoContext.free_sub_io_requests.push_back(currSubIo); + m_currIoContext.in_flight--; + } + } + + auto t2 = std::chrono::high_resolution_clock::now(); + if (std::chrono::duration_cast(t2 - t1) > timeout) { + break; + } + } + + for (int i = 0; i < subIoRequestCount.size(); i++) { + if (subIoRequestCount[i] != 0) { + (*p_values)[i].clear(); + } + } + return true; + } else { + fprintf(stderr, "SPDKIO::BlockController::ReadBlocks batch failed\n"); + return false; + } +} + +// write p_value into p_size blocks start from p_data +bool SPDKIO::BlockController::WriteBlocks(AddressType* p_data, int p_size, const std::string& p_value) { + if (m_useMemImpl) { + for (int i = 0; i < p_size; i++) { + memcpy(m_memBuffer.get() + p_data[i] * PageSize, p_value.data() + i * PageSize, PageSize); + } + return true; + } else if (m_useSsdImpl) { + AddressType currBlockIdx = 0; + int inflight = 0; + SubIoRequest* currSubIo; + int totalSize = p_value.size(); + // Submit all I/Os + while (currBlockIdx < p_size || inflight) { + // Try submit + if (currBlockIdx < p_size && m_currIoContext.free_sub_io_requests.size()) { + currSubIo = m_currIoContext.free_sub_io_requests.back(); + m_currIoContext.free_sub_io_requests.pop_back(); + currSubIo->app_buff = const_cast(p_value.data()) + currBlockIdx * PageSize; + currSubIo->real_size = (PageSize * (currBlockIdx + 1)) > totalSize ? (totalSize - currBlockIdx * PageSize): PageSize; + currSubIo->is_read = false; + currSubIo->offset = p_data[currBlockIdx] * PageSize; + memcpy(currSubIo->dma_buff, currSubIo->app_buff, currSubIo->real_size); + m_submittedSubIoRequests.push(currSubIo); + currBlockIdx++; + inflight++; + } + // Try complete + if (inflight && m_currIoContext.completed_sub_io_requests.try_pop(currSubIo)) { + currSubIo->app_buff = nullptr; + m_currIoContext.free_sub_io_requests.push_back(currSubIo); + inflight--; + } + } + return true; + } else { + fprintf(stderr, "SPDKIO::BlockController::ReadBlocks single failed\n"); + return false; + } +} + +bool SPDKIO::BlockController::IOStatistics() { + int currIOCount = m_ioCompleteCount; + int diffIOCount = currIOCount - m_preIOCompleteCount; + m_preIOCompleteCount = currIOCount; + + auto currTime = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(currTime - m_preTime); + m_preTime = currTime; + + double currIOPS = (double)diffIOCount * 1000 / duration.count(); + double currBandWidth = (double)diffIOCount * PageSize / 1024 * 1000 / 1024 * 1000 / duration.count(); + + std::cout << "IOPS: " << currIOPS << "k Bandwidth: " << currBandWidth << "MB/s" << std::endl; + + return true; +} + +bool SPDKIO::BlockController::ShutDown() { + std::lock_guard lock(m_initMutex); + m_numInitCalled--; + + if (m_useMemImpl) { + if (m_numInitCalled == 0) { + while (!m_blockAddresses.empty()) { + AddressType currBlockAddress; + m_blockAddresses.try_pop(currBlockAddress); + } + } + return true; + } else if (m_useSsdImpl) { + if (m_numInitCalled == 0) { + m_ssdSpdkThreadExiting = true; + spdk_app_start_shutdown(); + pthread_join(m_ssdSpdkTid, NULL); + while (!m_blockAddresses.empty()) { + AddressType currBlockAddress; + m_blockAddresses.try_pop(currBlockAddress); + } + } + + SubIoRequest* currSubIo; + while (m_currIoContext.in_flight) { + if (m_currIoContext.completed_sub_io_requests.try_pop(currSubIo)) { + currSubIo->app_buff = nullptr; + m_currIoContext.free_sub_io_requests.push_back(currSubIo); + m_currIoContext.in_flight--; + } + } + // Free memory buffers + for (auto &sr : m_currIoContext.sub_io_requests) { + sr.completed_sub_io_requests = nullptr; + sr.app_buff = nullptr; + spdk_free(sr.dma_buff); + sr.dma_buff = nullptr; + } + m_currIoContext.free_sub_io_requests.clear(); + return true; + } else { + fprintf(stderr, "SPDKIO::BlockController::ShutDown failed\n"); + return false; + } +} + +} diff --git a/AnnService/src/Core/SPANN/SPANNIndex.cpp b/AnnService/src/Core/SPANN/SPANNIndex.cpp index f059c0a3..0be3c158 100644 --- a/AnnService/src/Core/SPANN/SPANNIndex.cpp +++ b/AnnService/src/Core/SPANN/SPANNIndex.cpp @@ -3,8 +3,11 @@ #include "inc/Core/SPANN/Index.h" #include "inc/Helper/VectorSetReaders/MemoryReader.h" -#include "inc/Core/SPANN/ExtraFullGraphSearcher.h" +#include "inc/Core/SPANN/ExtraStaticSearcher.h" +#include "inc/Core/SPANN/ExtraDynamicSearcher.h" +#include #include +#include #pragma warning(disable:4242) // '=' : conversion from 'int' to 'short', possible loss of data #pragma warning(disable:4244) // '=' : conversion from 'int' to 'short', possible loss of data @@ -80,6 +83,7 @@ namespace SPTAG template ErrorCode Index::LoadIndexDataFromMemory(const std::vector& p_indexBlobs) { + /** Need to modify **/ m_index->SetQuantizer(m_pQuantizer); if (m_index->LoadIndexDataFromMemory(p_indexBlobs) != ErrorCode::Success) return ErrorCode::Fail; @@ -91,17 +95,27 @@ namespace SPTAG if (m_pQuantizer) { - m_extraSearcher.reset(new ExtraFullGraphSearcher()); + m_extraSearcher.reset(new ExtraStaticSearcher()); } else { - m_extraSearcher.reset(new ExtraFullGraphSearcher()); + if (m_options.m_useKV) { + if (m_options.m_inPlace) { + m_extraSearcher.reset(new ExtraDynamicSearcher(m_options.m_KVPath.c_str(), m_options.m_dim, INT_MAX, m_options.m_useDirectIO, m_options.m_latencyLimit, m_options.m_mergeThreshold)); + } + else { + m_extraSearcher.reset(new ExtraDynamicSearcher(m_options.m_KVPath.c_str(), m_options.m_dim, m_options.m_postingPageLimit * PageSize / (sizeof(T) * m_options.m_dim + sizeof(int) + sizeof(uint8_t)), m_options.m_useDirectIO, m_options.m_latencyLimit, m_options.m_mergeThreshold)); + } + } + else { + m_extraSearcher.reset(new ExtraStaticSearcher()); + } } - - if (!m_extraSearcher->LoadIndex(m_options)) return ErrorCode::Fail; + + if (!m_extraSearcher->LoadIndex(m_options, m_versionMap, m_vectorTranslateMap, m_index)) return ErrorCode::Fail; m_vectorTranslateMap.reset((std::uint64_t*)(p_indexBlobs.back().Data()), [=](std::uint64_t* ptr) {}); - + omp_set_num_threads(m_options.m_iSSDNumberOfThreads); return ErrorCode::Success; } @@ -110,29 +124,88 @@ namespace SPTAG ErrorCode Index::LoadIndexData(const std::vector>& p_indexStreams) { m_index->SetQuantizer(m_pQuantizer); - if (m_index->LoadIndexData(p_indexStreams) != ErrorCode::Success) return ErrorCode::Fail; + + auto headfiles = m_index->GetIndexFiles(); + if (m_options.m_recovery) { + std::shared_ptr> files(new std::vector); + auto headfiles = m_index->GetIndexFiles(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery: Loading another in-memory index\n"); + std::string filename = m_options.m_persistentBufferPath + "_headIndex"; + for (auto file : *headfiles) { + files->push_back(filename + FolderSep + file); + } + std::vector> handles; + for (std::string& f : *files) { + auto ptr = SPTAG::f_createIO(); + if (ptr == nullptr || !ptr->Initialize(f.c_str(), std::ios::binary | std::ios::in)) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot open file %s!\n", f.c_str()); + ptr = nullptr; + } + handles.push_back(std::move(ptr)); + } + m_index->LoadIndexData(handles); + } else if (m_index->LoadIndexData(p_indexStreams) != ErrorCode::Success) return ErrorCode::Fail; m_index->SetParameter("NumberOfThreads", std::to_string(m_options.m_iSSDNumberOfThreads)); - //m_index->SetParameter("MaxCheck", std::to_string(m_options.m_maxCheck)); - //m_index->SetParameter("HashTableExponent", std::to_string(m_options.m_hashExp)); + m_index->SetParameter("MaxCheck", std::to_string(m_options.m_maxCheck)); + m_index->SetParameter("HashTableExponent", std::to_string(m_options.m_hashExp)); m_index->UpdateIndex(); m_index->SetReady(true); + if (m_options.m_recovery) { + m_options.m_KVPath = m_options.m_persistentBufferPath + "_rocksdb"; + m_options.m_spdkMappingPath = m_options.m_persistentBufferPath; + } + if (m_pQuantizer) { - m_extraSearcher.reset(new ExtraFullGraphSearcher()); + m_extraSearcher.reset(new ExtraStaticSearcher()); } else { - m_extraSearcher.reset(new ExtraFullGraphSearcher()); + if (m_options.m_useKV) { + if (m_options.m_inPlace) { + m_extraSearcher.reset(new ExtraDynamicSearcher(m_options.m_KVPath.c_str(), m_options.m_dim, INT_MAX, m_options.m_useDirectIO, m_options.m_latencyLimit, m_options.m_mergeThreshold, false, m_options.m_spdkBatchSize, m_options.m_bufferLength, m_options.m_recovery)); + } + else { + m_extraSearcher.reset(new ExtraDynamicSearcher(m_options.m_KVPath.c_str(), m_options.m_dim, m_options.m_postingPageLimit * PageSize / (sizeof(T) * m_options.m_dim + sizeof(int) + sizeof(uint8_t)), m_options.m_useDirectIO, m_options.m_latencyLimit, m_options.m_mergeThreshold, false, m_options.m_spdkBatchSize, m_options.m_bufferLength, m_options.m_recovery)); + } + } + else if (m_options.m_useSPDK) { + m_extraSearcher.reset(new ExtraDynamicSearcher(m_options.m_spdkMappingPath.c_str(), m_options.m_dim, m_options.m_postingPageLimit, m_options.m_useDirectIO, m_options.m_latencyLimit, m_options.m_mergeThreshold, true, m_options.m_spdkBatchSize, m_options.m_bufferLength, m_options.m_recovery)); + } else { + m_extraSearcher.reset(new ExtraStaticSearcher()); + } } - if (!m_extraSearcher->LoadIndex(m_options)) return ErrorCode::Fail; + if (!m_options.m_recovery) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading headID map\n"); + m_vectorTranslateMap.reset(new std::uint64_t[m_index->GetNumSamples()], std::default_delete()); + IOBINARY(p_indexStreams[m_index->GetIndexFiles()->size()], ReadBinary, sizeof(std::uint64_t) * m_index->GetNumSamples(), reinterpret_cast(m_vectorTranslateMap.get())); + } + omp_set_num_threads(m_options.m_iSSDNumberOfThreads); - m_vectorTranslateMap.reset(new std::uint64_t[m_index->GetNumSamples()], std::default_delete()); - IOBINARY(p_indexStreams[m_index->GetIndexFiles()->size()], ReadBinary, sizeof(std::uint64_t) * m_index->GetNumSamples(), reinterpret_cast(m_vectorTranslateMap.get())); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading storage\n"); + if (!m_extraSearcher->LoadIndex(m_options, m_versionMap, m_vectorTranslateMap, m_index)) return ErrorCode::Fail; + + if ((m_options.m_useSPDK || m_options.m_useKV) && m_options.m_preReassign) { + std::shared_ptr vectorOptions(new Helper::ReaderOptions(m_options.m_valueType, m_options.m_dim, m_options.m_vectorType, m_options.m_vectorDelimiter, m_options.m_iSSDNumberOfThreads)); + auto vectorReader = Helper::VectorSetReader::CreateInstance(vectorOptions); + if (m_options.m_vectorPath.empty()) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Vector file is empty. Skipping loading.\n"); + } + else { + if (ErrorCode::Success != vectorReader->LoadFile(m_options.m_vectorPath)) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to read vector file.\n"); + return ErrorCode::Fail; + } + // m_options.m_vectorSize = vectorReader->GetVectorSet()->Count(); + } + m_extraSearcher->RefineIndex(vectorReader, m_index); + } - omp_set_num_threads(m_options.m_iSSDNumberOfThreads); return ErrorCode::Success; } @@ -182,7 +255,8 @@ namespace SPTAG ErrorCode ret; if ((ret = m_index->SaveIndexData(p_indexStreams)) != ErrorCode::Success) return ret; - IOBINARY(p_indexStreams[m_index->GetIndexFiles()->size()], WriteBinary, sizeof(std::uint64_t) * m_index->GetNumSamples(), (char*)(m_vectorTranslateMap.get())); + if (m_options.m_excludehead) IOBINARY(p_indexStreams[m_index->GetIndexFiles()->size()], WriteBinary, sizeof(std::uint64_t) * m_index->GetNumSamples(), (char*)(m_vectorTranslateMap.get())); + m_versionMap.Save(m_options.m_deleteIDFile); return ErrorCode::Success; } @@ -218,10 +292,9 @@ namespace SPTAG { auto res = p_queryResults->GetResult(i); if (res->VID == -1) break; - auto postingID = res->VID; - res->VID = static_cast((m_vectorTranslateMap.get())[res->VID]); - if (res->VID == MaxSize) { + if (m_vectorTranslateMap.get() != nullptr) res->VID = static_cast((m_vectorTranslateMap.get())[res->VID]); + else { res->VID = -1; res->Dist = MaxDist; } @@ -291,7 +364,11 @@ namespace SPTAG { workSpace->m_postingIDs.emplace_back(res->VID); } - res->VID = static_cast((m_vectorTranslateMap.get())[res->VID]); + if (m_vectorTranslateMap.get() != nullptr) res->VID = static_cast((m_vectorTranslateMap.get())[res->VID]); + else { + res->VID = -1; + res->Dist = MaxDist; + } if (res->VID == MaxSize) { res->VID = -1; @@ -303,7 +380,11 @@ namespace SPTAG { auto res = p_queryResults->GetResult(i); if (res->VID == -1) break; - res->VID = static_cast((m_vectorTranslateMap.get())[res->VID]); + if (m_vectorTranslateMap.get() != nullptr) res->VID = static_cast((m_vectorTranslateMap.get())[res->VID]); + else { + res->VID = -1; + res->Dist = MaxDist; + } if (res->VID == MaxSize) { res->VID = -1; @@ -333,10 +414,6 @@ namespace SPTAG auto global_VID = static_cast((m_vectorTranslateMap.get())[res->VID]); if (truth && truth->count(global_VID)) (*found)[res->VID].insert(global_VID); res->VID = global_VID; - if (res->VID == MaxSize) { - res->VID = -1; - res->Dist = MaxDist; - } } newResults.Reverse(); @@ -767,10 +844,33 @@ namespace SPTAG if (m_pQuantizer) { - m_extraSearcher.reset(new ExtraFullGraphSearcher()); + m_extraSearcher.reset(new ExtraStaticSearcher()); + } + else if (m_options.m_useKV) + { + if (m_options.m_inPlace) { + m_extraSearcher.reset(new ExtraDynamicSearcher(m_options.m_KVPath.c_str(), m_options.m_dim, INT_MAX, m_options.m_useDirectIO, m_options.m_latencyLimit, m_options.m_mergeThreshold)); + } + else { + m_extraSearcher.reset(new ExtraDynamicSearcher(m_options.m_KVPath.c_str(), m_options.m_dim, m_options.m_postingPageLimit * PageSize / (sizeof(T)*m_options.m_dim + sizeof(int) + sizeof(uint8_t)), m_options.m_useDirectIO, m_options.m_latencyLimit, m_options.m_mergeThreshold)); + } + } else if (m_options.m_useSPDK) + { + if (m_options.m_inPlace) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Currently unsupport SPDK with inplace!\n"); + exit(1); + } + else { + m_extraSearcher.reset(new ExtraDynamicSearcher(m_options.m_spdkMappingPath.c_str(), m_options.m_dim, m_options.m_postingPageLimit, m_options.m_useDirectIO, m_options.m_latencyLimit, m_options.m_mergeThreshold, true, m_options.m_spdkBatchSize)); + } } else { - m_extraSearcher.reset(new ExtraFullGraphSearcher()); + if (m_pQuantizer) { + m_extraSearcher.reset(new ExtraStaticSearcher()); + } + else { + m_extraSearcher.reset(new ExtraStaticSearcher()); + } } if (m_options.m_buildSsdIndex) { @@ -787,12 +887,17 @@ namespace SPTAG } } - if (!m_extraSearcher->BuildIndex(p_reader, m_index, m_options)) { + if (!m_extraSearcher->BuildIndex(p_reader, m_index, m_options, m_versionMap)) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "BuildSSDIndex Failed!\n"); - return ErrorCode::Fail; + if (m_options.m_buildSsdIndex) { + return ErrorCode::Fail; + } + else { + m_extraSearcher.reset(); + } } } - if (!m_extraSearcher->LoadIndex(m_options)) { + if (!m_extraSearcher->LoadIndex(m_options, m_versionMap, m_vectorTranslateMap, m_index)) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot Load SSDIndex!\n"); if (m_options.m_buildSsdIndex) { return ErrorCode::Fail; @@ -810,8 +915,12 @@ namespace SPTAG return ErrorCode::Fail; } IOBINARY(ptr, ReadBinary, sizeof(std::uint64_t) * m_index->GetNumSamples(), (char*)(m_vectorTranslateMap.get())); + if ((m_options.m_useKV || m_options.m_useSPDK) && m_options.m_preReassign) { + m_extraSearcher->RefineIndex(p_reader, m_index); + } } } + auto t4 = std::chrono::high_resolution_clock::now(); double buildSSDTime = std::chrono::duration_cast(t4 - t3).count(); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "select head time: %.2lfs build head time: %.2lfs build ssd time: %.2lfs\n", selectHeadTime, buildHeadTime, buildSSDTime); @@ -845,7 +954,6 @@ namespace SPTAG } m_options.m_vectorSize = vectorReader->GetVectorSet()->Count(); } - return BuildIndexInternal(vectorReader); } @@ -934,6 +1042,101 @@ namespace SPTAG return m_options.GetParameter(p_section, p_param); } } + + // Add insert entry to persistent buffer + template + ErrorCode Index::AddIndex(const void *p_data, SizeType p_vectorNum, DimensionType p_dimension, + std::shared_ptr p_metadataSet, bool p_withMetaIndex, + bool p_normalized) + { + if ((!m_options.m_useKV &&!m_options.m_useSPDK) || m_extraSearcher == nullptr) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Only Support KV Extra Update\n"); + return ErrorCode::Fail; + } + + if (p_data == nullptr || p_vectorNum == 0 || p_dimension == 0) return ErrorCode::EmptyData; + if (p_dimension != GetFeatureDim()) return ErrorCode::DimensionSizeMismatch; + + SizeType begin, end; + { + std::lock_guard lock(m_dataAddLock); + + begin = m_versionMap.GetVectorNum(); + end = begin + p_vectorNum; + + if (begin == 0) { return ErrorCode::EmptyIndex; } + + if (m_versionMap.AddBatch(p_vectorNum) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "MemoryOverFlow: VID: %d, Map Size:%d\n", begin, m_versionMap.BufferSize()); + exit(1); + } + + if (m_pMetadata != nullptr) { + if (p_metadataSet != nullptr) { + m_pMetadata->AddBatch(*p_metadataSet); + if (HasMetaMapping()) { + for (SizeType i = begin; i < end; i++) { + ByteArray meta = m_pMetadata->GetMetadata(i); + std::string metastr((char*)meta.Data(), meta.Length()); + UpdateMetaMapping(metastr, i); + } + } + } + else { + for (SizeType i = begin; i < end; i++) m_pMetadata->Add(ByteArray::c_empty); + } + } + } + + std::shared_ptr vectorSet; + if (m_options.m_distCalcMethod == DistCalcMethod::Cosine && !p_normalized) { + ByteArray arr = ByteArray::Alloc(sizeof(T) * p_vectorNum * p_dimension); + memcpy(arr.Data(), p_data, sizeof(T) * p_vectorNum * p_dimension); + vectorSet.reset(new BasicVectorSet(arr, GetEnumValueType(), p_dimension, p_vectorNum)); + int base = COMMON::Utils::GetBase(); + for (SizeType i = 0; i < p_vectorNum; i++) { + COMMON::Utils::Normalize((T*)(vectorSet->GetVector(i)), p_dimension, base); + } + } + else { + vectorSet.reset(new BasicVectorSet(ByteArray((std::uint8_t*)p_data, sizeof(T) * p_vectorNum * p_dimension, false), + GetEnumValueType(), p_dimension, p_vectorNum)); + } + + return m_extraSearcher->AddIndex(vectorSet, m_index, begin); + } + + template + ErrorCode Index::DeleteIndex(const SizeType &p_id) + { + // if (m_versionMap.Delete(p_id)) return ErrorCode::Success; + // return ErrorCode::VectorNotFound; + return m_extraSearcher->DeleteIndex(p_id); + } + + template + ErrorCode Index::DeleteIndex(const void* p_vectors, SizeType p_vectorNum) + { + // TODO: Support batch delete + DimensionType p_dimension = GetFeatureDim(); + std::shared_ptr vectorSet; + if (m_options.m_distCalcMethod == DistCalcMethod::Cosine) { + ByteArray arr = ByteArray::Alloc(sizeof(T) * p_vectorNum * p_dimension); + memcpy(arr.Data(), p_vectors, sizeof(T) * p_vectorNum * p_dimension); + vectorSet.reset(new BasicVectorSet(arr, GetEnumValueType(), p_dimension, p_vectorNum)); + int base = COMMON::Utils::GetBase(); + for (SizeType i = 0; i < p_vectorNum; i++) { + COMMON::Utils::Normalize((T*)(vectorSet->GetVector(i)), p_dimension, base); + } + } + else { + vectorSet.reset(new BasicVectorSet(ByteArray((std::uint8_t*)p_vectors, sizeof(T) * 1 * p_dimension, false), + GetEnumValueType(), p_dimension, 1)); + } + SizeType p_id = m_extraSearcher->SearchVector(vectorSet, m_index); + if (p_id == -1) return ErrorCode::VectorNotFound; + return DeleteIndex(p_id); + } } } diff --git a/AnnService/src/SPFresh/main.cpp b/AnnService/src/SPFresh/main.cpp new file mode 100644 index 00000000..45fd8b89 --- /dev/null +++ b/AnnService/src/SPFresh/main.cpp @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include + +#include "inc/Core/Common.h" +#include "inc/Core/VectorIndex.h" +#include "inc/Core/SPANN/Index.h" +#include "inc/Helper/SimpleIniReader.h" +#include "inc/Helper/VectorSetReader.h" +#include "inc/Helper/StringConvert.h" +#include "inc/Core/Common/TruthSet.h" + +#include "inc/SPFresh/SPFresh.h" + +using namespace SPTAG; + +// switch between exe and static library by _$(OutputType) +#ifdef _exe + +int main(int argc, char* argv[]) { + if (argc < 2) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, + "spfresh storePath\n"); + exit(-1); + } + + auto ret = SSDServing::SPFresh::UpdateTest(argv[1]); + return ret; +} + +#endif diff --git a/CMakeLists.txt b/CMakeLists.txt index d6da2a83..9031a454 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") if (CXX_COMPILER_VERSION VERSION_LESS 5.0) message(FATAL_ERROR "GCC version must be at least 5.0!") endif() - set (CMAKE_CXX_FLAGS "-Wall -Wunreachable-code -Wno-reorder -Wno-delete-non-virtual-dtor -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -lm -lrt -std=c++14 -fopenmp") + set (CMAKE_CXX_FLAGS "-Wall -Wunreachable-code -Wno-reorder -Wno-delete-non-virtual-dtor -Wno-sign-compare -Wno-unknown-pragmas -Wcast-align -lm -lrt -std=c++17 -fopenmp") set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -march=native") set (CMAKE_CXX_FLAGS_DEBUG "-g -DDEBUG") @@ -117,6 +117,11 @@ endif() option(GPU "GPU" ON) option(LIBRARYONLY "LIBRARYONLY" OFF) +option(ROCKSDB "ROCKSDB" ON) + +if (ROCKSDB) + add_definitions(-DROCKSDB) +endif() add_subdirectory (ThirdParty/zstd/build/cmake) diff --git a/LICENSE b/LICENSE index d1ca00f2..21071075 100644 --- a/LICENSE +++ b/LICENSE @@ -18,4 +18,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE \ No newline at end of file + SOFTWARE diff --git a/README.md b/README.md index cc380fa8..66923309 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,45 @@ Run the SPTAGTest (or Test.exe) in the Release folder to verify all the tests ha The detailed usage can be found in [Get started](docs/GettingStart.md). There is also an end-to-end tutorial for building vector search online service using Python Wrapper in [Python Tutorial](docs/Tutorial.ipynb). The detailed parameters tunning can be found in [Parameters](docs/Parameters.md). +## **Build** +> Clone the repository and submodules +```bash +git clone git@github.com:MaggieQi/SPFresh.git +git submodule update --init --recursive +``` + +> Compile SPDK +```bash +cd ThirdParty/spdk +./scripts/pkgdep.sh +CC=gcc-9 ./configure +CC=gcc-9 make -j +``` +Remember to use higher version of gcc to do **both configure and compile**. + +> Compile isal-l_crypto +```bash +cd ThirdParty/isal-l_crypto +./autogen.sh +./configure +make -j +``` + +> Build RocksDB +```bash +mkdir build && cd build +cmake -DUSE_RTTI=1 -DWITH_JEMALLOC=1 -DWITH_SNAPPY=1 -DCMAKE_C_COMPILER=gcc-7 -DCMAKE_CXX_COMPILER=g++-7 -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-fPIC" .. +make -j +sudo make install +``` + +> Build SPFresh +```bash +mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=Release .. +make -j +``` + ## **References** Please cite SPTAG in your publications if it helps your research: ``` diff --git a/Test/CMakeLists.txt b/Test/CMakeLists.txt index ed100276..52f4168a 100644 --- a/Test/CMakeLists.txt +++ b/Test/CMakeLists.txt @@ -19,7 +19,7 @@ if (NOT LIBRARYONLY) message (FATAL_ERROR "Could not find Boost 1.67!") endif() - include_directories(${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Test) + include_directories(${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Test ${PROJECT_SOURCE_DIR}/ThirdParty/spdk/build/include) file(GLOB TEST_HDR_FILES ${PROJECT_SOURCE_DIR}/Test/inc/Test.h) file(GLOB TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/Test/src/*.cpp) diff --git a/Test/Test.vcxproj b/Test/Test.vcxproj index 37007b0a..08611fc3 100644 --- a/Test/Test.vcxproj +++ b/Test/Test.vcxproj @@ -154,6 +154,7 @@ + diff --git a/Test/Test.vcxproj.filters b/Test/Test.vcxproj.filters index 39a60ba8..026da33f 100644 --- a/Test/Test.vcxproj.filters +++ b/Test/Test.vcxproj.filters @@ -54,6 +54,12 @@ Source Files + + Source Files + + + Source Files + diff --git a/Test/src/KVTest.cpp b/Test/src/KVTest.cpp new file mode 100644 index 00000000..4a8c883b --- /dev/null +++ b/Test/src/KVTest.cpp @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "inc/Test.h" +// #include "inc/Core/SPANN/ExtraRocksDBController.h" +#include "inc/Core/SPANN/ExtraSPDKController.h" + +#include +#include + +// enable rocksdb io_uring +// extern "C" bool RocksDbIOUringEnable() { return true; } + +using namespace SPTAG; +using namespace SPTAG::SPANN; + +void Search(std::shared_ptr db, int internalResultNum, int totalSize, int times, bool debug = false) { + std::vector headIDs(internalResultNum, 0); + + std::vector values; + double latency = 0; + for (int i = 0; i < times; i++) { + values.clear(); + for (int j = 0; j < internalResultNum; j++) headIDs[j] = (j + i * internalResultNum) % totalSize; + auto t1 = std::chrono::high_resolution_clock::now(); + db->MultiGet(headIDs, &values); + auto t2 = std::chrono::high_resolution_clock::now(); + latency += std::chrono::duration_cast(t2 - t1).count(); + + if (debug) { + for (int j = 0; j < internalResultNum; j++) { + std::cout << values[j].substr(PageSize) << std::endl; + } + } + } + std::cout << "avg get time: " << (latency / (float)(times)) << "us" << std::endl; + +} + +void Test(std::string path, std::string type, bool debug = false) +{ + int internalResultNum = 64; + int totalNum = 1024; + int mergeIters = 3; + std::shared_ptr db; + // if (type == "RocksDB") { + // db.reset(new RocksDBIO(path.c_str(), true)); + // } else if (type == "SPDK") { + db.reset(new SPDKIO(path.c_str(), 1024 * 1024, MaxSize, 64)); + // } + + auto t1 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < totalNum; i++) { + int len = std::to_string(i).length(); + std::string val(PageSize - len, '0'); + db->Put(i, val); + } + auto t2 = std::chrono::high_resolution_clock::now(); + std::cout << "avg put time: " << (std::chrono::duration_cast(t2 - t1).count() / (float)(totalNum)) << "us" << std::endl; + + db->ForceCompaction(); + + t1 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < totalNum; i++) { + for (int j = 0; j < mergeIters; j++) { + db->Merge(i, std::to_string(i)); + } + } + t2 = std::chrono::high_resolution_clock::now(); + std::cout << "avg merge time: " << (std::chrono::duration_cast(t2 - t1).count() / (float)(totalNum * mergeIters)) << "us" << std::endl; + + Search(db, internalResultNum, totalNum, 10, debug); + + db->ForceCompaction(); + db->ShutDown(); + + // if (type == "RocksDB") { + // db.reset(new RocksDBIO(path.c_str(), true)); + // Search(db, internalResultNum, totalNum, 10, debug); + // db->ForceCompaction(); + // db->ShutDown(); + // } +} + +BOOST_AUTO_TEST_SUITE(KVTest) + +BOOST_AUTO_TEST_CASE(RocksDBTest) +{ + Test("tmp_rocksdb", "RocksDB", true); +} + +BOOST_AUTO_TEST_CASE(SPDKTest) +{ + Test("tmp_spdk", "SPDK", true); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/ThirdParty/RocksDB b/ThirdParty/RocksDB new file mode 160000 index 00000000..275cd80c --- /dev/null +++ b/ThirdParty/RocksDB @@ -0,0 +1 @@ +Subproject commit 275cd80cdb2de8e53c7ab805d74394309372005e diff --git a/ThirdParty/isal-l_crypto b/ThirdParty/isal-l_crypto new file mode 160000 index 00000000..08297dc3 --- /dev/null +++ b/ThirdParty/isal-l_crypto @@ -0,0 +1 @@ +Subproject commit 08297dc3e76d65e1bad83a9c9f9e49059cf806b5 diff --git a/ThirdParty/spdk b/ThirdParty/spdk new file mode 160000 index 00000000..10edc60a --- /dev/null +++ b/ThirdParty/spdk @@ -0,0 +1 @@ +Subproject commit 10edc60aa8b5f1b04d6496fea976dec75e276a95 diff --git a/bdev.json b/bdev.json new file mode 100644 index 00000000..445539f0 --- /dev/null +++ b/bdev.json @@ -0,0 +1,17 @@ +{ + "subsystems": [ + { + "subsystem": "bdev", + "config": [ + { + "method": "bdev_nvme_attach_controller", + "params": { + "trtype": "pcie", + "name": "Nvme0", + "traddr": "0000:9b:00.0" + } + } + ] + } + ] +}