From 706ddc3e5fc40a383dd03ecdd3ab67c43dea3677 Mon Sep 17 00:00:00 2001 From: Wei-Cheng Chang Date: Thu, 23 Feb 2023 03:42:38 +0000 Subject: [PATCH] Enabled C++ memory-map usage in PECOS-HNSW --- pecos/core/ann/hnsw.hpp | 297 ++++++++---------- pecos/core/ann/quantizer_impl/common.hpp | 78 ++--- .../ann/hnsw-model-dense/c_model/config.json | 6 +- .../hnsw-model-dense/c_model/index.mmap_store | Bin 0 -> 29104 bytes test/tst-data/ann/hnsw-model-dense/param.json | 2 +- .../ann/hnsw-model-sparse/c_model/config.json | 6 +- .../c_model/index.mmap_store | Bin 0 -> 29824 bytes .../tst-data/ann/hnsw-model-sparse/param.json | 2 +- 8 files changed, 160 insertions(+), 231 deletions(-) create mode 100644 test/tst-data/ann/hnsw-model-dense/c_model/index.mmap_store create mode 100644 test/tst-data/ann/hnsw-model-sparse/c_model/index.mmap_store diff --git a/pecos/core/ann/hnsw.hpp b/pecos/core/ann/hnsw.hpp index f9954b01..051d02b1 100644 --- a/pecos/core/ann/hnsw.hpp +++ b/pecos/core/ann/hnsw.hpp @@ -25,12 +25,11 @@ #include #include - - #include "ann/feat_vectors.hpp" #include "ann/quantizer.hpp" #include "third_party/nlohmann_json/json.hpp" #include "utils/file_util.hpp" +#include "utils/mmap_util.hpp" #include "utils/matrix.hpp" #include "utils/random.hpp" #include "utils/type_util.hpp" @@ -94,44 +93,27 @@ namespace ann { index_type feat_dim; index_type max_degree; index_type node_mem_size; - std::vector mem_start_of_node; - std::vector buffer; + mmap_util::MmapableVector mem_start_of_node; + mmap_util::MmapableVector buffer; size_t neighborhood_memory_size() const { return (1 + max_degree) * sizeof(index_type); } - void save(FILE *fp) const { - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&feat_dim, 1, fp); - pecos::file_util::fput_multiple(&max_degree, 1, fp); - pecos::file_util::fput_multiple(&node_mem_size, 1, fp); - size_t sz = mem_start_of_node.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&mem_start_of_node[0], sz, fp); - } - sz = buffer.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&buffer[0], sz, fp); - } + void save(mmap_util::MmapStore& mmap_s) const { + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->feat_dim); + mmap_s.fput_one(this->max_degree); + mmap_s.fput_one(this->node_mem_size); + this->mem_start_of_node.save_to_mmap_store(mmap_s); + this->buffer.save_to_mmap_store(mmap_s); } - void load(FILE *fp) { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&feat_dim, 1, fp); - pecos::file_util::fget_multiple(&max_degree, 1, fp); - pecos::file_util::fget_multiple(&node_mem_size, 1, fp); - size_t sz = 0; - pecos::file_util::fget_multiple(&sz, 1, fp); - mem_start_of_node.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&mem_start_of_node[0], sz, fp); - } - pecos::file_util::fget_multiple(&sz, 1, fp); - buffer.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&buffer[0], sz, fp); - } + void load(mmap_util::MmapStore& mmap_s) { + this->num_node = mmap_s.fget_one(); + this->feat_dim = mmap_s.fget_one(); + this->max_degree = mmap_s.fget_one(); + this->node_mem_size = mmap_s.fget_one(); + this->mem_start_of_node.load_from_mmap_store(mmap_s); + this->buffer.load_from_mmap_store(mmap_s); } template @@ -198,33 +180,24 @@ namespace ann { index_type max_degree; index_type node_mem_size; index_type level_mem_size; - std::vector buffer; - - void save(FILE *fp) const { - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&max_level, 1, fp); - pecos::file_util::fput_multiple(&max_degree, 1, fp); - pecos::file_util::fput_multiple(&node_mem_size, 1, fp); - pecos::file_util::fput_multiple(&level_mem_size, 1, fp); - size_t sz = buffer.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&buffer[0], sz, fp); - } + mmap_util::MmapableVector buffer; + + void save(mmap_util::MmapStore& mmap_s) const { + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->max_level); + mmap_s.fput_one(this->max_degree); + mmap_s.fput_one(this->node_mem_size); + mmap_s.fput_one(this->level_mem_size); + this->buffer.save_to_mmap_store(mmap_s); } - void load(FILE *fp) { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&max_level, 1, fp); - pecos::file_util::fget_multiple(&max_degree, 1, fp); - pecos::file_util::fget_multiple(&node_mem_size, 1, fp); - pecos::file_util::fget_multiple(&level_mem_size, 1, fp); - size_t sz = 0; - pecos::file_util::fget_multiple(&sz, 1, fp); - buffer.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&buffer[0], sz, fp); - } + void load(mmap_util::MmapStore& mmap_s) { + this->num_node = mmap_s.fget_one(); + this->max_level = mmap_s.fget_one(); + this->max_degree = mmap_s.fget_one(); + this->node_mem_size = mmap_s.fget_one(); + this->level_mem_size = mmap_s.fget_one(); + this->buffer.load_from_mmap_store(mmap_s); } template @@ -246,7 +219,6 @@ namespace ann { template struct GraphProductQuantizer4Bits : GraphBase { typedef FeatVec_T feat_vec_t; - ProductQuantizer4Bits quantizer; index_type num_node; // code_dimension is number of 4 bits code used to encode a data point in GraphPQ4Bits // code_dimension can be different from parameter num_local_codebooks in quantizer @@ -254,55 +226,34 @@ namespace ann { // found in pad_parameters function of ann/quantizer_impl/x86.hpp size_t code_dimension; // code_offset helps to locate memory position containing neighboring codes - size_t code_offset; + size_t code_offset; size_t node_mem_size; index_type max_degree; - std::vector mem_start_of_node; - std::vector buffer; - - void save(FILE *fp) const { - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&code_dimension, 1, fp); - pecos::file_util::fput_multiple(&code_offset, 1, fp); - pecos::file_util::fput_multiple(&node_mem_size, 1, fp); - pecos::file_util::fput_multiple(&max_degree, 1, fp); - size_t sz = mem_start_of_node.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&mem_start_of_node[0], sz, fp); - } - sz = buffer.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&buffer[0], sz, fp); - } - quantizer.save(fp); - fclose(fp); - } - - void load(FILE *fp) { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&code_dimension, 1, fp); - pecos::file_util::fget_multiple(&code_offset, 1, fp); - pecos::file_util::fget_multiple(&node_mem_size, 1, fp); - pecos::file_util::fget_multiple(&max_degree, 1, fp); - size_t sz = 0; - pecos::file_util::fget_multiple(&sz, 1, fp); - mem_start_of_node.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&mem_start_of_node[0], sz, fp); - } - pecos::file_util::fget_multiple(&sz, 1, fp); - buffer.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&buffer[0], sz, fp); - } - - quantizer.load(fp); + mmap_util::MmapableVector mem_start_of_node; + mmap_util::MmapableVector buffer; + ProductQuantizer4Bits quantizer; - fclose(fp); + void save(mmap_util::MmapStore& mmap_s) const { + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->code_dimension); + mmap_s.fput_one(this->code_offset); + mmap_s.fput_one(this->node_mem_size); + mmap_s.fput_one(this->max_degree); + this->mem_start_of_node.save_to_mmap_store(mmap_s); + this->buffer.save_to_mmap_store(mmap_s); + quantizer.save(mmap_s); } + void load(mmap_util::MmapStore& mmap_s) { + this->num_node = mmap_s.fget_one(); + this->code_dimension = mmap_s.fget_one(); + this->code_offset = mmap_s.fget_one(); + this->node_mem_size = mmap_s.fget_one(); + this->max_degree = mmap_s.fget_one(); + this->mem_start_of_node.load_from_mmap_store(mmap_s); + this->buffer.load_from_mmap_store(mmap_s); + quantizer.load(mmap_s); + } void build_quantizer(const pecos::drm_t& X_trn, index_type subspace_dimension, index_type sub_sample_points) { size_t code_dimension = X_trn.cols; @@ -384,7 +335,6 @@ namespace ann { } }; - template struct SetOfVistedNodes { T init_token, curr_token; @@ -508,6 +458,9 @@ namespace ann { GraphL0 graph_l0; // neighborhood graph along with feature vectors at level 0 GraphL1 graph_l1; // neighborhood graphs from level 1 and above + // for loading memory-mapped file + pecos::mmap_util::MmapStore mmap_store; + // destructor ~HNSW() {} @@ -534,7 +487,7 @@ namespace ann { void save_config(const std::string& filepath) const { nlohmann::json j_params = { {"hnsw_t", pecos::type_util::full_name()}, - {"version", "v1.0"}, + {"version", "v2.0"}, {"train_params", { {"num_node", this->num_node}, {"maxM", this->maxM}, @@ -561,37 +514,37 @@ namespace ann { } } save_config(model_dir + "/config.json"); - std::string index_path = model_dir + "/index.bin"; - FILE *fp = fopen(index_path.c_str(), "wb"); - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&maxM, 1, fp); - pecos::file_util::fput_multiple(&maxM0, 1, fp); - pecos::file_util::fput_multiple(&efC, 1, fp); - pecos::file_util::fput_multiple(&max_level, 1, fp); - pecos::file_util::fput_multiple(&init_node, 1, fp); - graph_l0.save(fp); - graph_l1.save(fp); - fclose(fp); + std::string index_path = model_dir + "/index.mmap_store"; + mmap_util::MmapStore mmap_s = mmap_util::MmapStore(); + mmap_s.open(index_path.c_str(), "w"); + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->maxM); + mmap_s.fput_one(this->maxM0); + mmap_s.fput_one(this->efC); + mmap_s.fput_one(this->max_level); + mmap_s.fput_one(this->init_node); + graph_l0.save(mmap_s); + graph_l1.save(mmap_s); + mmap_s.close(); } - void load(const std::string& model_dir) { + void load(const std::string& model_dir, bool lazy_load = false) { auto config = load_config(model_dir + "/config.json"); std::string version = config.find("version") != config.end() ? config["version"] : "not found"; - std::string index_path = model_dir + "/index.bin"; - FILE *fp = fopen(index_path.c_str(), "rb"); - if (version == "v1.0") { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&maxM, 1, fp); - pecos::file_util::fget_multiple(&maxM0, 1, fp); - pecos::file_util::fget_multiple(&efC, 1, fp); - pecos::file_util::fget_multiple(&max_level, 1, fp); - pecos::file_util::fget_multiple(&init_node, 1, fp); - graph_l0.load(fp); - graph_l1.load(fp); + if (version == "v2.0") { + std::string index_path = model_dir + "/index.mmap_store"; + mmap_store.open(index_path.c_str(), lazy_load ? "r_lazy" : "r"); + this->num_node = mmap_store.fget_one(); + this->maxM = mmap_store.fget_one(); + this->maxM0 = mmap_store.fget_one(); + this->efC = mmap_store.fget_one(); + this->max_level = mmap_store.fget_one(); + this->init_node = mmap_store.fget_one(); + graph_l0.load(mmap_store); + graph_l1.load(mmap_store); } else { - throw std::runtime_error("Unable to load this binary with version = " + version); + throw std::runtime_error("Unable to load memory-mapped file with version = " + version); } - fclose(fp); } // Algorithm 4 of HNSW paper @@ -1014,7 +967,7 @@ namespace ann { } }; - + // PECOS-HNSW-PQ4 Interface template struct HNSWProductQuantizer4Bits { typedef FeatVec_T feat_vec_t; @@ -1022,7 +975,6 @@ namespace ann { typedef heap_t> max_heap_t; typedef heap_t> min_heap_t; - // scalar variables index_type num_node; index_type maxM; // max number of out-degree for level l=1,...,L @@ -1031,18 +983,24 @@ namespace ann { index_type max_level; index_type init_node; index_type subspace_dimension; // dimension of each subspace in Product Quantization - index_type sub_sample_points; // number of sub-sampled points used to build quantizer subspace centors. + index_type sub_sample_points; // number of sub-sampled points used to build quantizer subspace centors. GraphL0 feature_vec; // feature vectors only GraphL1 graph_l1; // neighborhood graphs from level 1 and above GraphProductQuantizer4Bits graph_l0_pq4; // Productquantized4Bits neighborhood graph built from graph_l0 + + // for loading memory-mapped file + mmap_util::MmapStore mmap_store; + HNSWProductQuantizer4Bits() { std::string space_type = pecos::type_util::full_name(); if (space_type != "pecos::ann::FeatVecDenseL2Simd") { throw std::runtime_error("Currently, we only support L2 distance with float type."); - } + } } + ~HNSWProductQuantizer4Bits() {} + struct Searcher : SetOfVistedNodes { typedef SetOfVistedNodes set_of_visited_nodes_t; typedef HNSWProductQuantizer4Bits hnswpq4_t; @@ -1108,7 +1066,6 @@ namespace ann { return Searcher(this); } - static nlohmann::json load_config(const std::string& filepath) { std::ifstream loadfile(filepath); std::string json_str; @@ -1131,7 +1088,7 @@ namespace ann { void save_config(const std::string& filepath) const { nlohmann::json j_params = { {"hnsw_t", pecos::type_util::full_name()}, - {"version", "v1.0"}, + {"version", "v2.0"}, {"train_params", { {"num_node", this->num_node}, {"subspace_dimension", this->subspace_dimension}, @@ -1160,43 +1117,43 @@ namespace ann { } } save_config(model_dir + "/config.json"); - std::string index_path = model_dir + "/index.bin"; - FILE *fp = fopen(index_path.c_str(), "wb"); - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&maxM, 1, fp); - pecos::file_util::fput_multiple(&maxM0, 1, fp); - pecos::file_util::fput_multiple(&efC, 1, fp); - pecos::file_util::fput_multiple(&max_level, 1, fp); - pecos::file_util::fput_multiple(&init_node, 1, fp); - pecos::file_util::fput_multiple(&subspace_dimension, 1, fp); - pecos::file_util::fput_multiple(&sub_sample_points, 1, fp); - feature_vec.save(fp); - graph_l1.save(fp); - graph_l0_pq4.save(fp); - fclose(fp); + std::string index_path = model_dir + "/index.mmap_store"; + mmap_util::MmapStore mmap_s = mmap_util::MmapStore(); + mmap_s.open(index_path.c_str(), "w"); + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->maxM); + mmap_s.fput_one(this->maxM0); + mmap_s.fput_one(this->efC); + mmap_s.fput_one(this->max_level); + mmap_s.fput_one(this->init_node); + mmap_s.fput_one(this->subspace_dimension); + mmap_s.fput_one(this->sub_sample_points); + feature_vec.save(mmap_s); + graph_l1.save(mmap_s); + graph_l0_pq4.save(mmap_s); + mmap_s.close(); } - void load(const std::string& model_dir) { + void load(const std::string& model_dir, bool lazy_load = false) { auto config = load_config(model_dir + "/config.json"); std::string version = config.find("version") != config.end() ? config["version"] : "not found"; - std::string index_path = model_dir + "/index.bin"; - FILE *fp = fopen(index_path.c_str(), "rb"); - if (version == "v1.0") { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&maxM, 1, fp); - pecos::file_util::fget_multiple(&maxM0, 1, fp); - pecos::file_util::fget_multiple(&efC, 1, fp); - pecos::file_util::fget_multiple(&max_level, 1, fp); - pecos::file_util::fget_multiple(&init_node, 1, fp); - pecos::file_util::fget_multiple(&subspace_dimension, 1, fp); - pecos::file_util::fget_multiple(&sub_sample_points, 1, fp); - feature_vec.load(fp); - graph_l1.load(fp); - graph_l0_pq4.load(fp); + if (version == "v2.0") { + std::string index_path = model_dir + "/index.mmap_store"; + mmap_store.open(index_path.c_str(), lazy_load ? "r_lazy" : "r"); + this->num_node = mmap_store.fget_one(); + this->maxM = mmap_store.fget_one(); + this->maxM0 = mmap_store.fget_one(); + this->efC = mmap_store.fget_one(); + this->max_level = mmap_store.fget_one(); + this->init_node = mmap_store.fget_one(); + this->subspace_dimension = mmap_store.fget_one(); + this->sub_sample_points = mmap_store.fget_one(); + feature_vec.load(mmap_store); + graph_l1.load(mmap_store); + graph_l0_pq4.load(mmap_store); } else { - throw std::runtime_error("Unable to load this binary with version = " + version); + throw std::runtime_error("Unable to load memory-mapped file with version = " + version); } - fclose(fp); } template @@ -1234,7 +1191,6 @@ namespace ann { feature_vec.init(X_trn, -1); } - max_heap_t& predict_single(const feat_vec_t& query, index_type efS, index_type topk, Searcher& searcher, index_type num_rerank) const { index_type curr_node = this->init_node; auto &G1 = graph_l1; @@ -1370,5 +1326,6 @@ namespace ann { return topk_queue; } }; + } // end of namespace ann } // end of namespace pecos diff --git a/pecos/core/ann/quantizer_impl/common.hpp b/pecos/core/ann/quantizer_impl/common.hpp index a20fe8fd..28e3e7ed 100644 --- a/pecos/core/ann/quantizer_impl/common.hpp +++ b/pecos/core/ann/quantizer_impl/common.hpp @@ -15,79 +15,54 @@ #include #include #include "utils/clustering.hpp" +#include "utils/mmap_util.hpp" namespace pecos { namespace ann { - struct ProductQuantizer4BitsBase { // num_of_local_centroids denotes number of cluster centers used in quantization // In 4 Bit case, it's a fixed to be 16 - const size_t num_of_local_centroids = 16; + const index_type num_of_local_centroids = 16; // num_local_codebooks denotes number of local codebooks we have or in other words, // number of subspace we have in Product Quantization. // Supposedly, num_local_codebooks * local_dimension equals dimension of original data vector index_type num_local_codebooks; // local dimension denotes the dimensionality of subspace in Product Quantization int local_dimension; - std::vector global_centroid; - std::vector local_codebooks; - std::vector original_local_codebooks; - - inline void save(FILE* fp) const { - pecos::file_util::fput_multiple(&num_local_codebooks, 1, fp); - pecos::file_util::fput_multiple(&local_dimension, 1, fp); - size_t sz = global_centroid.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&global_centroid[0], sz, fp); - } - sz = original_local_codebooks.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&original_local_codebooks[0], sz, fp); - } - sz = local_codebooks.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&local_codebooks[0], sz, fp); - } + mmap_util::MmapableVector global_centroid; + mmap_util::MmapableVector local_codebooks; + mmap_util::MmapableVector original_local_codebooks; + + inline void save(mmap_util::MmapStore& mmap_s) const { + mmap_s.fput_one(this->num_local_codebooks); + mmap_s.fput_one(this->local_dimension); + this->global_centroid.save_to_mmap_store(mmap_s); + this->local_codebooks.save_to_mmap_store(mmap_s); + this->original_local_codebooks.save_to_mmap_store(mmap_s); } - inline void load(FILE* fp) { - pecos::file_util::fget_multiple(&num_local_codebooks, 1, fp); - pecos::file_util::fget_multiple(&local_dimension, 1, fp); - size_t sz = 0; - pecos::file_util::fget_multiple(&sz, 1, fp); - global_centroid.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&global_centroid[0], sz, fp); - } - pecos::file_util::fget_multiple(&sz, 1, fp); - original_local_codebooks.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&original_local_codebooks[0], sz, fp); - } - pecos::file_util::fget_multiple(&sz, 1, fp); - local_codebooks.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&local_codebooks[0], sz, fp); - } + inline void load(mmap_util::MmapStore& mmap_s) { + this->num_local_codebooks = mmap_s.fget_one(); + this->local_dimension = mmap_s.fget_one(); + this->global_centroid.load_from_mmap_store(mmap_s); + this->local_codebooks.load_from_mmap_store(mmap_s); + this->original_local_codebooks.load_from_mmap_store(mmap_s); } inline void pack_codebook_for_inference_default() { local_codebooks = original_local_codebooks; } - + inline void pad_parameters_default(index_type& max_degree, size_t& code_dimension) {} - + inline void approximate_neighbor_group_distance_default(size_t neighbor_size, float* ds, const char* neighbor_codes, uint8_t* lut_ptr, float scale, float bias) const { index_type num_groups = neighbor_size % 16 == 0 ? neighbor_size / 16 : neighbor_size / 16 + 1; - + std::vector d(num_of_local_centroids); int ptr = 0; - + const uint8_t *localID = reinterpret_cast(neighbor_codes); for (index_type iters = 0; iters < num_groups; iters++) { memset(d.data(), 0, sizeof(uint32_t) * num_of_local_centroids); @@ -103,7 +78,7 @@ namespace ann { } d[k] += *(local_lut_ptr + obj); } - + local_lut_ptr += num_of_local_centroids; } for (size_t k = 0; k < num_of_local_centroids; k++) { @@ -112,7 +87,7 @@ namespace ann { ptr += num_of_local_centroids; } } - + inline void setup_lut_default(float* query, uint8_t* lut_ptr, float& scale, float& bias) const { float min = std::numeric_limits::max(); float max = std::numeric_limits::min(); @@ -134,7 +109,7 @@ namespace ann { min = std::min(min, tmp_v); } } - + bias = min; scale = (max - min) / 255.0; // second iteration to calculate quantized distnace and put it into lut @@ -236,10 +211,7 @@ namespace ann { &original_local_codebooks[m * num_of_local_centroids * local_dimension], threads); } } - }; - } // end of namespace ann } // end of namespace pecos - diff --git a/test/tst-data/ann/hnsw-model-dense/c_model/config.json b/test/tst-data/ann/hnsw-model-dense/c_model/config.json index 9b554f4c..e90a19ed 100644 --- a/test/tst-data/ann/hnsw-model-dense/c_model/config.json +++ b/test/tst-data/ann/hnsw-model-dense/c_model/config.json @@ -1,6 +1,5 @@ { "hnsw_t": "pecos::ann::HNSW>", - "version": "v1.0", "train_params": { "efC": 100, "init_node": 50, @@ -8,5 +7,6 @@ "maxM0": 48, "max_level": 1, "num_node": 90 - } -} + }, + "version": "v2.0" +} \ No newline at end of file diff --git a/test/tst-data/ann/hnsw-model-dense/c_model/index.mmap_store b/test/tst-data/ann/hnsw-model-dense/c_model/index.mmap_store new file mode 100644 index 0000000000000000000000000000000000000000..b1cb2b4ea48cbefbbe44276be12a3176cc2742e8 GIT binary patch literal 29104 zcmeI53v5?+6~|j1E8|%Z9H5LsM}b01DdlD1-aaU_lSe5ON*Sz?39>4**%&K~wqs(L z*-WvyicAD7`DcN4Ng{(T`oG2~~5{K}BuBjks_zccjt>X2_- z^K-3!05$#>=k?S-k=#q$F5kBG)_&t{+Lk71o6<+yZr^s?uKlLI+E(?`Hmkq32?Mn4 z^lj@v?bi&_HZNJ*LQ-*2V?c0t|Yrp9ZZL9o*WR1{%!boj9 zecSpO?bm!(+q_ZQCf})Tmv7rfYrio?+tM-Gri|6L+qWI5+Hbl`+p2NeW~FJHFkahE z-?mQBe$7N}^U}3Vo}_J;Z`&qozcEAG(oAhrrfA#k+YY}Vnx^XWRenKbP1Altj<%h? zZJn{+v(fZ z0`1onYMVDt+vFl`yL{U=U;B*xHl;*c^WU&f`Y9dCEk%^_r9>znN|myq#3_49 zmNHEgrR*qa%AQiEEGa3on9JcMd0Q@`!=hh?AJXK%C$rRs_7n z3uc4W;56}rmDu@iuz=AYGIA7O;hL$Vwz+SYRmK>8t3)YVQco=7M2U@fhzq`n8%$sX zFY&_zxg%fTBd72JX0XEp0#EP*SMd<=JidO6cj3qLUD^z-g5kvVOg(XZ;zdk7hI0cZ zQ*&T7xq=ImACoUR6*u7;eUC44zg02ZWBeFy%jKAui8IKr;TE0^Ua*>&4JL96F1|mA zP29K&*E>;uG55ZIHF)GFPxLNPVj(sY3o%nm@NLEyze^s89h}Iu@@MK5e&Q*_lzBGa zU79oACoz*ZzQ@!YT$tR3`vzRW5xI+ps2tRmx_|!OP!~K4L}!Q=ic)99YvK&X2p($B z^k?!1R$?_h0WJ&=ruN{#tKCN7ay#d zqT~oZs8={J@dsb~yM0cZE2T2*hwa&EpTbu>nl_{ke_Un^$AH?hNm>3_s;xQ83V ziOHRuy5Yh#emJStec{-1F~$p1=yXxW3IgZhF%2&85pS@b3@+jY7x^=IOs#<{o&uil ze)&hv>327KESv5Y9VePAI$3nAsOimyUt%?NNX*0^tV5Ga@P=#9j7h_h{KyRzVjNPJ z8T)DeBdXr!iC-kjQYKZD&xsjciQ8ZcVj^x6CpAZ`;0yK+>KAO})8tnUZ@3We*4#p` zsAZhOlPCUiQTU%EN)55hHFZOrU?VKa>rM5qml(MW_{GHvGcB zi4lCnd8_pT2}$9}-gVR&d}@=LUo$>(M5!_I0JmoBlgnHE9z=|f^*H40t*ce*xe4N@ ziJDxHpWwJAXXM6=>0pi+7mSl&U79{^>JWb7Dd1VZtB>>Ei#yc(fIbB#)2E0%jOTi= zfP?(<{WlAXTuNQ<=|kU`ll*tRp39JP&C*uf1BGv=6v`Cb-Hup-~1U$z+Smr+U)j=M|2z6(xfFWtuNvC*pwd&X8X1*wUAhzab z&&Ye-<-;z=s5Ry?lSSh_&yq{TMaa*->SpJe30qwHHTwojM47uWmt$^elYpn;*?YVR zFDJSjo3)Eg7=p1XJP#~#R~;Mf(%)u@W{Gm%#+oDUjJngY-dnz;?mrpgn{|L3S|{N7 z{;NgK%8c!*zcJ5Yy}%m5wz-aktm>+yvB9lwX_yN(a>F{*RsqAWmi2P#&+bt7Qyb@? zLGHox-r{=qw*Hsqa{pl;o;`Wi&^8Kq&b|GjlRk2@$Ni^7bh#*VYUbDM!`UR@S-WDC z`^CSNsQ$)tg|VVG_G)+s&pUexodYG?)%`S0eD)A+74YocQtVvmr~CDE@mXWrynoRC z*jn%5^eZv2*|aw$;r^F^VY{ripR7OCPJd&*!?|&yHVUy{`Aa|V$QM?q@xxw@t!r)g z2hXoBtoH__yr%Yd3dH}sD1DmV&3=eY0-mzux12N2uT%5ueDPT`uZIVWdMt;&xpK4XaYTWr}Al~6wv@TBfr?Y+|6Q_V-&f2Ysc zD&TqJe3tv*z}X59dsVzAVyl3sJnyeg=7n_%4{L2(?>*!IJSQ*r^o~}pR{J}QALcwO zMCXXwHip1+;w{Ikcp_cphjmZ7sI3B?)@?_ex|cS4^a1t~?-sRry>MakJ9D$A)vJEJ zRIpHE)Ehn9HX%R1@8`wZcYa0XCsq6+QT9h|6!7#uzS5ieyW3TM%EV`nmpxyw*(TsQ zS6$~Ve|Ei^@6g+9tTV$qcn(fmY-yFTcJwNe}>Ul8Z zh4E9!hZYL?nLjYq{dr}Py8rO3-p1$jhI;V4a(bh?v#L|&hy5w`t8DGr;7NOQsOx;K zRP_Va2|Q!4RlqZ2TUq=42OAU~_7H8&4LMNxS()U1XW=T<-xkX`-sQ7(&lfy7S@mA# ztasZvrdRXsj;#WQDYNcz-{0Ls^|yuM+nO740G|3cAMsv#?M~I-SkLj^j*V+M30vVw zdUCUOanB2CJ;!~Nd8n;Ieh$1m%uQ=qqUw#k5cb_{74Yo8bRgDvs8->j*4R6+RlxJq zx3_wmChb)9#xoz*sJ4p2vuTT)*s@3AVgJCkXL3RA!Q&mA>h3I^rRE2`1IHM$Rlu{R zt9Pv7H~M`f_Nr{}2MN1JdTApTs(wJfX79jOAwNUUC%H4` zQB$qn=bI-!=WG>XKbV%|O+Gi>tqg%k4Ef>b_u;w>`zLwuA4$YO#6ZMA#6ZMA#6ZMA z#6ZMA#6ZMA#6ZMA#6ZMA#6ZMA#K28ufIYtgQQnJ=Bw`?9AYve5AY$Np8Q}iGJ7v-R z<9Zp3e(saRfSF&}{NCj!DPIxRh=GWKh=E&$f$-l4;~hPOcMlQcrzr0uKP%+tg?#=U z4Pt)3N_ijoH6gz->", - "version": "v1.0", "train_params": { "efC": 100, "init_node": 50, @@ -8,5 +7,6 @@ "maxM0": 48, "max_level": 1, "num_node": 90 - } -} + }, + "version": "v2.0" +} \ No newline at end of file diff --git a/test/tst-data/ann/hnsw-model-sparse/c_model/index.mmap_store b/test/tst-data/ann/hnsw-model-sparse/c_model/index.mmap_store new file mode 100644 index 0000000000000000000000000000000000000000..0d7d73cce4c93f8be4aecf9fcabdf3d4ae2317e8 GIT binary patch literal 29824 zcmeI54Qy8B701h$MN#<}%*qBWR7xpO3KS>;_ZCW_ARpzsfFK!;583z#q797|nv zDPvh7?gKL;1s5U1xsK6ZB+J5>b7UGu*wl^>$0RY#kBvm#c7E@3&+U8fU@;jl;fePo zC+D8$zP$JR&-p*+KKFSquplig8DTfAAmTq5@modwArXH;#BUw(qu(DnowhJK;QOb2 zd(aP{#{c3aZOnTvws^9!8TT1`*|)|^ON;u@01837EK%qSRiF`+ zg+5Rn+Cy1r%9Mb9P#fAqb!Z8-pkW75Xv>uGX)H&n6RuOe?uc_BeYM9`h>uu^k$8z6 zEW`^wVn)DC++bJCidS)f84O$lOknk=kQ}8}xMp^bP40U$7c_Ata4{|Ci$ZHCLQG60 zVk9o&Bmcw?Hn4&l9OQ#q@;%_B#^eX=nB4g6RvUFk*V%?D>4EeGQTv}bL5!Vi8ndl znrnEfxWTNkD>iBkKCY9DQQVXY*E=@n!l(!TRUP1>Ud5vKh!PVqYD~mVP07E`KfZ&y zf&sk9wd$!nCQqppV#+@=)}2w;`-g9lw_^Xe zfQOkfHbazU68$ildn3gs_sP5)CO(++MVWi#1s;Kzvm3XCfeL}d3Wqh_M_2@4Ow|LxEJ`pb%iIW(q z4c|qqa4lLNtqZx)c}IT8MJj|?+)J7EO)? z9D0w^dzjWVIS1eekIu=f)mBcYOyEbGrY&CEL+LeA^`8>e`Uz=1p1@@DQH$6s2xFOTj(c1YB>;t@c*l zwzHAgSzmx>n&=c!@Ui|FESfKB3r27)u6fkkQMav04imWG;UH1wQiUjUjNWgPfa{kH zRqnSQdNrsQ^NF+fseKd7}B?&aLO`Q1-PgewVEKBDQauI zc65Kho!#x@pdZ3P)<^?HQ@xIpOQb}o*X~t2o#%2l1bu@(KSp%AC~HGUlr^kP0!jc?o0@wPf?)<|U0WRiOk!XP^=WXO7aSyCK)zogoiU1e$YoMq; zf5@SA0VaxY;9k)`!%K^%Ku2Je%0IhLVsIJe#z&+FeywO{+-Y0xPxRx!6_# z!*6D_b*j#^>~An;<61Yl24MJbT9tdt?U#ajF~9h|f!`i@wzpBhb@siNoxJWXzdLaM znj|_~lr=o-`5vM+3Ah%|?csj)?~_9Pkl#^yi`v-Z$v?Q>-%;Y!Pud*dVk}3Ly--^P z47)azIiGe2_xWKm&a=PG`x}iSc7XWzyq0oTk< z?>eVnTpHAi_2F1io>kb-wsn0tvvINe;|D(B@iAB*vd3oIy_on_sF%0s1*dRy%kO&B zi{dfIMvB_9$19-*0{6z;)8dcAEzz5dw2 zYiiu~^?--eD_gWwl>J^C1zhcp%<~5S;nq;SX398w*X(73(KZ3s*;SR^?B|z-)=~6A z8~LaC2iLx#^SwdU=>ZQJW50%eZ>xY|&g~=I7ymgV;2~rBE{e^#Hda=+&3oI2aIuzW zf6Z2*HcR)OaQ<@FssI=K<9(ZQQPzkPMOg>iB;Xp?c!xVGe@-yJ==1YM>G7PmQHZR;>T=G09O@sK?m_6=Z&Gx>LR@2?fxqC{eZ_wx2+ptxrSJ!jt?ud^nLb!NO zk9SjS6>vSbv)X-o?JBl3g84OGjyY$m5c|HILT|v?VL{(m5CKCkNtWn)4^h5G{S&+} z5lftbI0JD8;ta$Yh%*ppAkILXfj9$k2I36F8Hh6wXCTf%oPjt4H#7t6{f!gleehV~ z48$3TGZ1GW&cJ8Q0QV2xWsC10pEYCg=RQvv(Djwg?|Xip>J?*+GZ1GW&cMx*f#|>2 z#=Cq7?