From e3fc3424b3bc51c868c64e3b02826877c92e9f92 Mon Sep 17 00:00:00 2001 From: Wei-Cheng Chang Date: Thu, 23 Feb 2023 03:42:38 +0000 Subject: [PATCH] Enabled C++ memory-map usage in PECOS-HNSW --- pecos/core/ann/hnsw.hpp | 307 ++++++++---------- pecos/core/ann/quantizer_impl/common.hpp | 212 ++++++------ pecos/core/ann/quantizer_impl/x86.hpp | 28 +- .../ann/hnsw-model-dense/c_model/config.json | 6 +- .../hnsw-model-dense/c_model/index.mmap_store | Bin 0 -> 29104 bytes test/tst-data/ann/hnsw-model-dense/param.json | 2 +- .../ann/hnsw-model-sparse/c_model/config.json | 6 +- .../c_model/index.mmap_store | Bin 0 -> 29824 bytes .../tst-data/ann/hnsw-model-sparse/param.json | 2 +- 9 files changed, 273 insertions(+), 290 deletions(-) create mode 100644 test/tst-data/ann/hnsw-model-dense/c_model/index.mmap_store create mode 100644 test/tst-data/ann/hnsw-model-sparse/c_model/index.mmap_store diff --git a/pecos/core/ann/hnsw.hpp b/pecos/core/ann/hnsw.hpp index f9954b0..af1f9bd 100644 --- a/pecos/core/ann/hnsw.hpp +++ b/pecos/core/ann/hnsw.hpp @@ -25,12 +25,11 @@ #include #include - - #include "ann/feat_vectors.hpp" #include "ann/quantizer.hpp" #include "third_party/nlohmann_json/json.hpp" #include "utils/file_util.hpp" +#include "utils/mmap_util.hpp" #include "utils/matrix.hpp" #include "utils/random.hpp" #include "utils/type_util.hpp" @@ -94,44 +93,27 @@ namespace ann { index_type feat_dim; index_type max_degree; index_type node_mem_size; - std::vector mem_start_of_node; - std::vector buffer; + mmap_util::MmapableVector mem_start_of_node; + mmap_util::MmapableVector buffer; size_t neighborhood_memory_size() const { return (1 + max_degree) * sizeof(index_type); } - void save(FILE *fp) const { - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&feat_dim, 1, fp); - pecos::file_util::fput_multiple(&max_degree, 1, fp); - pecos::file_util::fput_multiple(&node_mem_size, 1, fp); - size_t sz = mem_start_of_node.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&mem_start_of_node[0], sz, fp); - } - sz = buffer.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&buffer[0], sz, fp); - } + void save(mmap_util::MmapStore& mmap_s) const { + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->feat_dim); + mmap_s.fput_one(this->max_degree); + mmap_s.fput_one(this->node_mem_size); + this->mem_start_of_node.save_to_mmap_store(mmap_s); + this->buffer.save_to_mmap_store(mmap_s); } - void load(FILE *fp) { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&feat_dim, 1, fp); - pecos::file_util::fget_multiple(&max_degree, 1, fp); - pecos::file_util::fget_multiple(&node_mem_size, 1, fp); - size_t sz = 0; - pecos::file_util::fget_multiple(&sz, 1, fp); - mem_start_of_node.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&mem_start_of_node[0], sz, fp); - } - pecos::file_util::fget_multiple(&sz, 1, fp); - buffer.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&buffer[0], sz, fp); - } + void load(mmap_util::MmapStore& mmap_s) { + this->num_node = mmap_s.fget_one(); + this->feat_dim = mmap_s.fget_one(); + this->max_degree = mmap_s.fget_one(); + this->node_mem_size = mmap_s.fget_one(); + this->mem_start_of_node.load_from_mmap_store(mmap_s); + this->buffer.load_from_mmap_store(mmap_s); } template @@ -198,33 +180,24 @@ namespace ann { index_type max_degree; index_type node_mem_size; index_type level_mem_size; - std::vector buffer; - - void save(FILE *fp) const { - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&max_level, 1, fp); - pecos::file_util::fput_multiple(&max_degree, 1, fp); - pecos::file_util::fput_multiple(&node_mem_size, 1, fp); - pecos::file_util::fput_multiple(&level_mem_size, 1, fp); - size_t sz = buffer.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&buffer[0], sz, fp); - } + mmap_util::MmapableVector buffer; + + void save(mmap_util::MmapStore& mmap_s) const { + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->max_level); + mmap_s.fput_one(this->max_degree); + mmap_s.fput_one(this->node_mem_size); + mmap_s.fput_one(this->level_mem_size); + this->buffer.save_to_mmap_store(mmap_s); } - void load(FILE *fp) { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&max_level, 1, fp); - pecos::file_util::fget_multiple(&max_degree, 1, fp); - pecos::file_util::fget_multiple(&node_mem_size, 1, fp); - pecos::file_util::fget_multiple(&level_mem_size, 1, fp); - size_t sz = 0; - pecos::file_util::fget_multiple(&sz, 1, fp); - buffer.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&buffer[0], sz, fp); - } + void load(mmap_util::MmapStore& mmap_s) { + this->num_node = mmap_s.fget_one(); + this->max_level = mmap_s.fget_one(); + this->max_degree = mmap_s.fget_one(); + this->node_mem_size = mmap_s.fget_one(); + this->level_mem_size = mmap_s.fget_one(); + this->buffer.load_from_mmap_store(mmap_s); } template @@ -246,7 +219,6 @@ namespace ann { template struct GraphProductQuantizer4Bits : GraphBase { typedef FeatVec_T feat_vec_t; - ProductQuantizer4Bits quantizer; index_type num_node; // code_dimension is number of 4 bits code used to encode a data point in GraphPQ4Bits // code_dimension can be different from parameter num_local_codebooks in quantizer @@ -254,55 +226,34 @@ namespace ann { // found in pad_parameters function of ann/quantizer_impl/x86.hpp size_t code_dimension; // code_offset helps to locate memory position containing neighboring codes - size_t code_offset; + size_t code_offset; size_t node_mem_size; index_type max_degree; - std::vector mem_start_of_node; - std::vector buffer; - - void save(FILE *fp) const { - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&code_dimension, 1, fp); - pecos::file_util::fput_multiple(&code_offset, 1, fp); - pecos::file_util::fput_multiple(&node_mem_size, 1, fp); - pecos::file_util::fput_multiple(&max_degree, 1, fp); - size_t sz = mem_start_of_node.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&mem_start_of_node[0], sz, fp); - } - sz = buffer.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&buffer[0], sz, fp); - } - quantizer.save(fp); - fclose(fp); - } - - void load(FILE *fp) { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&code_dimension, 1, fp); - pecos::file_util::fget_multiple(&code_offset, 1, fp); - pecos::file_util::fget_multiple(&node_mem_size, 1, fp); - pecos::file_util::fget_multiple(&max_degree, 1, fp); - size_t sz = 0; - pecos::file_util::fget_multiple(&sz, 1, fp); - mem_start_of_node.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&mem_start_of_node[0], sz, fp); - } - pecos::file_util::fget_multiple(&sz, 1, fp); - buffer.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&buffer[0], sz, fp); - } - - quantizer.load(fp); + mmap_util::MmapableVector mem_start_of_node; + mmap_util::MmapableVector buffer; + ProductQuantizer4Bits quantizer; - fclose(fp); + void save(mmap_util::MmapStore& mmap_s) const { + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->code_dimension); + mmap_s.fput_one(this->code_offset); + mmap_s.fput_one(this->node_mem_size); + mmap_s.fput_one(this->max_degree); + this->mem_start_of_node.save_to_mmap_store(mmap_s); + this->buffer.save_to_mmap_store(mmap_s); + quantizer.save(mmap_s); } + void load(mmap_util::MmapStore& mmap_s) { + this->num_node = mmap_s.fget_one(); + this->code_dimension = mmap_s.fget_one(); + this->code_offset = mmap_s.fget_one(); + this->node_mem_size = mmap_s.fget_one(); + this->max_degree = mmap_s.fget_one(); + this->mem_start_of_node.load_from_mmap_store(mmap_s); + this->buffer.load_from_mmap_store(mmap_s); + quantizer.load(mmap_s); + } void build_quantizer(const pecos::drm_t& X_trn, index_type subspace_dimension, index_type sub_sample_points) { size_t code_dimension = X_trn.cols; @@ -384,7 +335,6 @@ namespace ann { } }; - template struct SetOfVistedNodes { T init_token, curr_token; @@ -508,6 +458,9 @@ namespace ann { GraphL0 graph_l0; // neighborhood graph along with feature vectors at level 0 GraphL1 graph_l1; // neighborhood graphs from level 1 and above + // for loading memory-mapped file + pecos::mmap_util::MmapStore mmap_store; + // destructor ~HNSW() {} @@ -534,7 +487,7 @@ namespace ann { void save_config(const std::string& filepath) const { nlohmann::json j_params = { {"hnsw_t", pecos::type_util::full_name()}, - {"version", "v1.0"}, + {"version", "v2.0"}, {"train_params", { {"num_node", this->num_node}, {"maxM", this->maxM}, @@ -561,37 +514,38 @@ namespace ann { } } save_config(model_dir + "/config.json"); - std::string index_path = model_dir + "/index.bin"; - FILE *fp = fopen(index_path.c_str(), "wb"); - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&maxM, 1, fp); - pecos::file_util::fput_multiple(&maxM0, 1, fp); - pecos::file_util::fput_multiple(&efC, 1, fp); - pecos::file_util::fput_multiple(&max_level, 1, fp); - pecos::file_util::fput_multiple(&init_node, 1, fp); - graph_l0.save(fp); - graph_l1.save(fp); - fclose(fp); + std::string index_path = model_dir + "/index.mmap_store"; + mmap_util::MmapStore mmap_s = mmap_util::MmapStore(); + mmap_s.open(index_path.c_str(), "w"); + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->maxM); + mmap_s.fput_one(this->maxM0); + mmap_s.fput_one(this->efC); + mmap_s.fput_one(this->max_level); + mmap_s.fput_one(this->init_node); + graph_l0.save(mmap_s); + graph_l1.save(mmap_s); + mmap_s.close(); } - void load(const std::string& model_dir) { + void load(const std::string& model_dir, bool lazy_load = false) { auto config = load_config(model_dir + "/config.json"); std::string version = config.find("version") != config.end() ? config["version"] : "not found"; - std::string index_path = model_dir + "/index.bin"; - FILE *fp = fopen(index_path.c_str(), "rb"); - if (version == "v1.0") { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&maxM, 1, fp); - pecos::file_util::fget_multiple(&maxM0, 1, fp); - pecos::file_util::fget_multiple(&efC, 1, fp); - pecos::file_util::fget_multiple(&max_level, 1, fp); - pecos::file_util::fget_multiple(&init_node, 1, fp); - graph_l0.load(fp); - graph_l1.load(fp); + if (version == "v2.0") { + std::string index_path = model_dir + "/index.mmap_store"; + mmap_store.open(index_path.c_str(), lazy_load ? "r_lazy" : "r"); + this->num_node = mmap_store.fget_one(); + this->maxM = mmap_store.fget_one(); + this->maxM0 = mmap_store.fget_one(); + this->efC = mmap_store.fget_one(); + this->max_level = mmap_store.fget_one(); + this->init_node = mmap_store.fget_one(); + graph_l0.load(mmap_store); + graph_l1.load(mmap_store); + // DO NOT call mmap_store.close() as the actual memory is held by this->mmap_store object. } else { - throw std::runtime_error("Unable to load this binary with version = " + version); + throw std::runtime_error("Unable to load memory-mapped file with version = " + version); } - fclose(fp); } // Algorithm 4 of HNSW paper @@ -1014,7 +968,7 @@ namespace ann { } }; - + // PECOS-HNSW-PQ4 Interface template struct HNSWProductQuantizer4Bits { typedef FeatVec_T feat_vec_t; @@ -1022,7 +976,6 @@ namespace ann { typedef heap_t> max_heap_t; typedef heap_t> min_heap_t; - // scalar variables index_type num_node; index_type maxM; // max number of out-degree for level l=1,...,L @@ -1031,18 +984,24 @@ namespace ann { index_type max_level; index_type init_node; index_type subspace_dimension; // dimension of each subspace in Product Quantization - index_type sub_sample_points; // number of sub-sampled points used to build quantizer subspace centors. + index_type sub_sample_points; // number of sub-sampled points used to build quantizer subspace centors. GraphL0 feature_vec; // feature vectors only GraphL1 graph_l1; // neighborhood graphs from level 1 and above GraphProductQuantizer4Bits graph_l0_pq4; // Productquantized4Bits neighborhood graph built from graph_l0 + + // for loading memory-mapped file + mmap_util::MmapStore mmap_store; + HNSWProductQuantizer4Bits() { std::string space_type = pecos::type_util::full_name(); if (space_type != "pecos::ann::FeatVecDenseL2Simd") { throw std::runtime_error("Currently, we only support L2 distance with float type."); - } + } } + ~HNSWProductQuantizer4Bits() {} + struct Searcher : SetOfVistedNodes { typedef SetOfVistedNodes set_of_visited_nodes_t; typedef HNSWProductQuantizer4Bits hnswpq4_t; @@ -1074,22 +1033,24 @@ namespace ann { auto num_local_codebooks = hnsw->graph_l0_pq4.quantizer.num_local_codebooks; // When using AVX512f, we have 16 centroids per local codebook, and each of it uses 8 bits to represent quantized - // distance value. Thus,m we will have 128 bits to load 1 set of local codebooks. Thus, a loadu_si512 will load + // distance value. Thus, we will have 128 bits to load 1 set of local codebooks. Thus, a loadu_si512 will load // 512 / 128 == 4 local codebooks at a time. Thus, the lookup table size needs to be adjusted (padding 0) if // if num_local_codebooks is not divisible by 4. - size_t adjusted_num_local_codebooks = num_local_codebooks % 4 == 0 ? num_local_codebooks : (num_local_codebooks / 4 + 1) * 4; + index_type adjusted_num_local_codebooks = num_local_codebooks % 4 == 0 ? num_local_codebooks : (num_local_codebooks / 4 + 1) * 4; // Similarly, we have to parse every 16 neighbors at a time to maximally leverage avx512f. // Thus, we have to prepare result array which is multiple of 16 to make sure the SIMD // will not touch unavailable memory - size_t adjusted_max_degree = max_degree % 16 == 0 ? max_degree : ((max_degree / 16) + 1) * 16; + index_type adjusted_max_degree = max_degree % 16 == 0 ? max_degree : ((max_degree / 16) + 1) * 16; lut.resize(num_of_local_centroids * adjusted_num_local_codebooks, 0); appx_dist.resize(adjusted_max_degree, 0); } + void setup_lut(float* query) { hnsw->graph_l0_pq4.quantizer.setup_lut(query, lut.data(), scale, bias); } + void approximate_distance(size_t neighbor_size, const char* neighbor_codes) { // pass searcher to group_distance hnsw->graph_l0_pq4.quantizer.approximate_neighbor_group_distance(neighbor_size, appx_dist.data(), neighbor_codes, lut.data(), scale, bias); @@ -1108,7 +1069,6 @@ namespace ann { return Searcher(this); } - static nlohmann::json load_config(const std::string& filepath) { std::ifstream loadfile(filepath); std::string json_str; @@ -1131,7 +1091,7 @@ namespace ann { void save_config(const std::string& filepath) const { nlohmann::json j_params = { {"hnsw_t", pecos::type_util::full_name()}, - {"version", "v1.0"}, + {"version", "v2.0"}, {"train_params", { {"num_node", this->num_node}, {"subspace_dimension", this->subspace_dimension}, @@ -1160,43 +1120,44 @@ namespace ann { } } save_config(model_dir + "/config.json"); - std::string index_path = model_dir + "/index.bin"; - FILE *fp = fopen(index_path.c_str(), "wb"); - pecos::file_util::fput_multiple(&num_node, 1, fp); - pecos::file_util::fput_multiple(&maxM, 1, fp); - pecos::file_util::fput_multiple(&maxM0, 1, fp); - pecos::file_util::fput_multiple(&efC, 1, fp); - pecos::file_util::fput_multiple(&max_level, 1, fp); - pecos::file_util::fput_multiple(&init_node, 1, fp); - pecos::file_util::fput_multiple(&subspace_dimension, 1, fp); - pecos::file_util::fput_multiple(&sub_sample_points, 1, fp); - feature_vec.save(fp); - graph_l1.save(fp); - graph_l0_pq4.save(fp); - fclose(fp); + std::string index_path = model_dir + "/index.mmap_store"; + mmap_util::MmapStore mmap_s = mmap_util::MmapStore(); + mmap_s.open(index_path.c_str(), "w"); + mmap_s.fput_one(this->num_node); + mmap_s.fput_one(this->maxM); + mmap_s.fput_one(this->maxM0); + mmap_s.fput_one(this->efC); + mmap_s.fput_one(this->max_level); + mmap_s.fput_one(this->init_node); + mmap_s.fput_one(this->subspace_dimension); + mmap_s.fput_one(this->sub_sample_points); + feature_vec.save(mmap_s); + graph_l1.save(mmap_s); + graph_l0_pq4.save(mmap_s); + mmap_s.close(); } - void load(const std::string& model_dir) { + void load(const std::string& model_dir, bool lazy_load = false) { auto config = load_config(model_dir + "/config.json"); std::string version = config.find("version") != config.end() ? config["version"] : "not found"; - std::string index_path = model_dir + "/index.bin"; - FILE *fp = fopen(index_path.c_str(), "rb"); - if (version == "v1.0") { - pecos::file_util::fget_multiple(&num_node, 1, fp); - pecos::file_util::fget_multiple(&maxM, 1, fp); - pecos::file_util::fget_multiple(&maxM0, 1, fp); - pecos::file_util::fget_multiple(&efC, 1, fp); - pecos::file_util::fget_multiple(&max_level, 1, fp); - pecos::file_util::fget_multiple(&init_node, 1, fp); - pecos::file_util::fget_multiple(&subspace_dimension, 1, fp); - pecos::file_util::fget_multiple(&sub_sample_points, 1, fp); - feature_vec.load(fp); - graph_l1.load(fp); - graph_l0_pq4.load(fp); + if (version == "v2.0") { + std::string index_path = model_dir + "/index.mmap_store"; + mmap_store.open(index_path.c_str(), lazy_load ? "r_lazy" : "r"); + this->num_node = mmap_store.fget_one(); + this->maxM = mmap_store.fget_one(); + this->maxM0 = mmap_store.fget_one(); + this->efC = mmap_store.fget_one(); + this->max_level = mmap_store.fget_one(); + this->init_node = mmap_store.fget_one(); + this->subspace_dimension = mmap_store.fget_one(); + this->sub_sample_points = mmap_store.fget_one(); + feature_vec.load(mmap_store); + graph_l1.load(mmap_store); + graph_l0_pq4.load(mmap_store); + // DO NOT call mmap_store.close() as the actual memory is held by this->mmap_store object. } else { - throw std::runtime_error("Unable to load this binary with version = " + version); + throw std::runtime_error("Unable to load memory-mapped file with version = " + version); } - fclose(fp); } template @@ -1234,7 +1195,6 @@ namespace ann { feature_vec.init(X_trn, -1); } - max_heap_t& predict_single(const feat_vec_t& query, index_type efS, index_type topk, Searcher& searcher, index_type num_rerank) const { index_type curr_node = this->init_node; auto &G1 = graph_l1; @@ -1370,5 +1330,6 @@ namespace ann { return topk_queue; } }; + } // end of namespace ann } // end of namespace pecos diff --git a/pecos/core/ann/quantizer_impl/common.hpp b/pecos/core/ann/quantizer_impl/common.hpp index a20fe8f..04aaf8f 100644 --- a/pecos/core/ann/quantizer_impl/common.hpp +++ b/pecos/core/ann/quantizer_impl/common.hpp @@ -15,85 +15,63 @@ #include #include #include "utils/clustering.hpp" +#include "utils/mmap_util.hpp" namespace pecos { namespace ann { + typedef uint32_t index_type; + typedef uint64_t mem_index_type; struct ProductQuantizer4BitsBase { // num_of_local_centroids denotes number of cluster centers used in quantization // In 4 Bit case, it's a fixed to be 16 - const size_t num_of_local_centroids = 16; + const index_type num_of_local_centroids = 16; // num_local_codebooks denotes number of local codebooks we have or in other words, // number of subspace we have in Product Quantization. // Supposedly, num_local_codebooks * local_dimension equals dimension of original data vector index_type num_local_codebooks; // local dimension denotes the dimensionality of subspace in Product Quantization - int local_dimension; - std::vector global_centroid; - std::vector local_codebooks; - std::vector original_local_codebooks; - - inline void save(FILE* fp) const { - pecos::file_util::fput_multiple(&num_local_codebooks, 1, fp); - pecos::file_util::fput_multiple(&local_dimension, 1, fp); - size_t sz = global_centroid.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&global_centroid[0], sz, fp); - } - sz = original_local_codebooks.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&original_local_codebooks[0], sz, fp); - } - sz = local_codebooks.size(); - pecos::file_util::fput_multiple(&sz, 1, fp); - if (sz) { - pecos::file_util::fput_multiple(&local_codebooks[0], sz, fp); - } + index_type local_dimension; + mmap_util::MmapableVector global_centroid; + mmap_util::MmapableVector local_codebooks; + mmap_util::MmapableVector original_local_codebooks; + + inline void save(mmap_util::MmapStore& mmap_s) const { + mmap_s.fput_one(this->num_local_codebooks); + mmap_s.fput_one(this->local_dimension); + this->global_centroid.save_to_mmap_store(mmap_s); + this->local_codebooks.save_to_mmap_store(mmap_s); + this->original_local_codebooks.save_to_mmap_store(mmap_s); } - inline void load(FILE* fp) { - pecos::file_util::fget_multiple(&num_local_codebooks, 1, fp); - pecos::file_util::fget_multiple(&local_dimension, 1, fp); - size_t sz = 0; - pecos::file_util::fget_multiple(&sz, 1, fp); - global_centroid.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&global_centroid[0], sz, fp); - } - pecos::file_util::fget_multiple(&sz, 1, fp); - original_local_codebooks.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&original_local_codebooks[0], sz, fp); - } - pecos::file_util::fget_multiple(&sz, 1, fp); - local_codebooks.resize(sz); - if (sz) { - pecos::file_util::fget_multiple(&local_codebooks[0], sz, fp); - } + inline void load(mmap_util::MmapStore& mmap_s) { + this->num_local_codebooks = mmap_s.fget_one(); + this->local_dimension = mmap_s.fget_one(); + this->global_centroid.load_from_mmap_store(mmap_s); + this->local_codebooks.load_from_mmap_store(mmap_s); + this->original_local_codebooks.load_from_mmap_store(mmap_s); } inline void pack_codebook_for_inference_default() { local_codebooks = original_local_codebooks; } - + inline void pad_parameters_default(index_type& max_degree, size_t& code_dimension) {} - + inline void approximate_neighbor_group_distance_default(size_t neighbor_size, float* ds, const char* neighbor_codes, uint8_t* lut_ptr, float scale, float bias) const { index_type num_groups = neighbor_size % 16 == 0 ? neighbor_size / 16 : neighbor_size / 16 + 1; - + std::vector d(num_of_local_centroids); int ptr = 0; - + const uint8_t *localID = reinterpret_cast(neighbor_codes); for (index_type iters = 0; iters < num_groups; iters++) { memset(d.data(), 0, sizeof(uint32_t) * num_of_local_centroids); uint8_t* local_lut_ptr = lut_ptr; - for (index_type i = 0; i < num_local_codebooks; i++) { - for (size_t k = 0; k < num_of_local_centroids; k++) { + for (index_type m = 0; m < num_local_codebooks; m++) { + for (index_type k = 0; k < num_of_local_centroids; k++) { uint8_t obj = *localID; if (k % 2 == 0) { obj &= 0x0f; @@ -103,79 +81,98 @@ namespace ann { } d[k] += *(local_lut_ptr + obj); } - + local_lut_ptr += num_of_local_centroids; } - for (size_t k = 0; k < num_of_local_centroids; k++) { + for (index_type k = 0; k < num_of_local_centroids; k++) { ds[k + ptr] = d[k] * scale + bias; } ptr += num_of_local_centroids; } } - + inline void setup_lut_default(float* query, uint8_t* lut_ptr, float& scale, float& bias) const { float min = std::numeric_limits::max(); float max = std::numeric_limits::min(); + mem_index_type buf_size, offset1, offset2, offset3; // first iteration to calculate raw distance and max,min values for quantized lut - std::vector raw_dist(num_local_codebooks * num_of_local_centroids, 0); + buf_size = (mem_index_type) num_local_codebooks * num_of_local_centroids; + std::vector raw_dist(buf_size, 0); std::vector qs(local_dimension); - for (index_type d = 0; d < num_local_codebooks; d++) { - for (int j = 0; j < local_dimension; j++) { - qs[j] = query[d * local_dimension + j] - global_centroid[d * local_dimension + j]; + for (index_type m = 0; m < num_local_codebooks; m++) { + offset1 = (mem_index_type) m * num_of_local_centroids * local_dimension; + offset2 = (mem_index_type) m * num_of_local_centroids; + offset3 = (mem_index_type) m * local_dimension; + for (index_type d = 0; d < local_dimension; d++) { + qs[d] = query[offset3 + d] - global_centroid[offset3 + d]; } - for (size_t k = 0; k < num_of_local_centroids; k++) { + for (index_type k = 0; k < num_of_local_centroids; k++) { float tmp_v = 0; - for (int j = 0; j < local_dimension; j++) { - float v = (qs[j] - local_codebooks[d * num_of_local_centroids * local_dimension + k * local_dimension + j]); + offset3 = (mem_index_type) k * local_dimension; + for (index_type d = 0; d < local_dimension; d++) { + float v = (qs[d] - local_codebooks[offset1 + offset3 + d]); tmp_v += (v * v); } - raw_dist[d * num_of_local_centroids + k] = tmp_v; + raw_dist[offset2 + k] = tmp_v; max = std::max(max, tmp_v); min = std::min(min, tmp_v); } } - + bias = min; scale = (max - min) / 255.0; // second iteration to calculate quantized distnace and put it into lut - for (index_type d = 0; d < num_local_codebooks; d++) { - for (size_t k = 0; k < num_of_local_centroids; k++) { - lut_ptr[d * num_of_local_centroids + k] = std::round((raw_dist[d * num_of_local_centroids + k] - bias) / scale); + for (index_type m = 0; m < num_local_codebooks; m++) { + offset2 = (mem_index_type) m * num_of_local_centroids; + for (index_type k = 0; k < num_of_local_centroids; k++) { + lut_ptr[offset2 + k] = std::round((raw_dist[offset2 + k] - bias) / scale); } } } inline void encode(float* query, uint8_t* codes) { - for (index_type d = 0; d < num_local_codebooks; d++) { - std::vector results; - for (size_t k = 0; k < num_of_local_centroids; k++) { + mem_index_type offset1, offset2, offset3; + for (index_type m = 0; m < num_local_codebooks; m++) { + std::vector results; + offset1 = (mem_index_type) m * num_of_local_centroids * local_dimension; + offset2 = (mem_index_type) m * local_dimension; + for (index_type k = 0; k < num_of_local_centroids; k++) { float v = 0; - for (int j = 0; j < local_dimension; j++) { - float tmp_v = original_local_codebooks[d * num_of_local_centroids * local_dimension + k * local_dimension + j] - - (query[d * local_dimension + j] - global_centroid[d * local_dimension + j]); + offset3 = (mem_index_type) k * local_dimension; + for (index_type d = 0; d < local_dimension; d++) { + float tmp_v = original_local_codebooks[offset1 + offset3 + d] + - (query[offset2 + d] - global_centroid[offset2 + d]); v += (tmp_v * tmp_v); } results.push_back(v); } std::vector::iterator argmin_result = std::min_element(results.begin(), results.end()); - codes[d] = std::distance(results.begin(), argmin_result); + codes[m] = std::distance(results.begin(), argmin_result); } } - inline void compute_centroids(pecos::drm_t& X, int dsub, size_t ksub, index_type *assign, float *centroids, int threads=1) { + inline void compute_centroids( + pecos::drm_t& X, + index_type dsub, + index_type ksub, + index_type *assign, + float *centroids, + int threads=1 + ) { // zero initialization for later do_axpy - memset(centroids, 0, ksub * dsub * sizeof(*centroids)); + mem_index_type buf_size = (mem_index_type) ksub * dsub; + memset(centroids, 0, buf_size * sizeof(*centroids)); std::vector centroids_size(ksub); #pragma omp parallel num_threads(threads) { // each thread takes care of [c_l, c_r) int rank = omp_get_thread_num(); - size_t c_l = (ksub * rank) / threads; - size_t c_r = (ksub * (rank + 1)) / threads; - for (size_t i = 0; i < X.rows; i++) { + size_t c_l = ((mem_index_type) ksub * rank) / threads; + size_t c_r = ((mem_index_type) ksub * (rank + 1)) / threads; + for (index_type i = 0; i < X.rows; i++) { auto ci = assign[i]; if (ci >= c_l && ci < c_r) { - float* y = centroids + ci * dsub; + float* y = centroids + (mem_index_type) ci * dsub; const auto& xi = X.get_row(i); pecos::do_axpy(1.0, xi.val, y, dsub); centroids_size[ci] += 1; @@ -183,36 +180,50 @@ namespace ann { } // normalize center vector for (size_t ci = c_l; ci < c_r; ci++) { - float* y = centroids + ci * dsub; + float* y = centroids + (mem_index_type) ci * dsub; pecos::do_scale(1.0 / centroids_size[ci], y, dsub); } } } - inline void train(const pecos::drm_t& X_trn, index_type num_local_codebooks, size_t sub_sample_points=0, int seed=0, size_t max_iter=10, int threads=32) { - size_t dimension = X_trn.cols; - if (dimension % num_local_codebooks != 0) { + inline void train( + const pecos::drm_t& X_trn, + index_type num_local_codebooks, + index_type sub_sample_points=0, + int seed=0, + size_t max_iter=10, + int threads=32 + ) { + mem_index_type buf_size; // for allocating memory of vectors + mem_index_type offset; // for offsetting pointer of vectors + index_type n_data = X_trn.rows; + index_type global_dimension = X_trn.cols; + if (global_dimension % num_local_codebooks != 0) { throw std::runtime_error("Original dimension must be divided by subspace dimension"); } this->num_local_codebooks = num_local_codebooks; - local_dimension = dimension / num_local_codebooks; - index_type n_data = X_trn.rows; + this->local_dimension = global_dimension / num_local_codebooks; if (sub_sample_points == 0) { sub_sample_points = n_data; } - std::vector centroids; - original_local_codebooks.resize(num_local_codebooks * num_of_local_centroids * dimension); - global_centroid.resize(dimension, 0); + buf_size = (mem_index_type) num_local_codebooks * num_of_local_centroids * local_dimension; + original_local_codebooks.resize(buf_size, 0); + global_centroid.resize(global_dimension, 0); - std::vector xslice(sub_sample_points * local_dimension); + buf_size = (mem_index_type) sub_sample_points * local_dimension; + std::vector xslice(buf_size); for (index_type m = 0; m < num_local_codebooks; m++) { - std::vector indices(n_data, 0); + std::vector indices(n_data, 0); std::iota(indices.data(), indices.data() + n_data, 0); std::random_shuffle(indices.data(), indices.data() + n_data); - for (size_t i = 0; i < sub_sample_points; i++) { - size_t index = indices[i]; - std::memcpy(xslice.data() + i * local_dimension, X_trn.val + index * dimension + m * local_dimension, local_dimension * sizeof(float)); + for (index_type i = 0; i < sub_sample_points; i++) { + offset = (mem_index_type) indices[i] * global_dimension; + std::memcpy( + xslice.data() + (mem_index_type) i * local_dimension, + X_trn.val + offset + (mem_index_type) m * local_dimension, + local_dimension * sizeof(float) + ); } pecos::drm_t Xsub; Xsub.rows = sub_sample_points; @@ -221,7 +232,7 @@ namespace ann { // fit HLT or flat-Kmeans for each sub-space std::vector assignments(sub_sample_points); - int hlt_depth = std::log2(num_of_local_centroids); + index_type hlt_depth = (index_type) std::log2(num_of_local_centroids); float max_sample_rate = 1.0; float min_sample_rate = 1.0; float warmup_ratio = 1.0; @@ -230,16 +241,21 @@ namespace ann { hlt.run_clustering( Xsub, &clustering_param, - assignments.data()); + assignments.data() + ); - compute_centroids(Xsub, local_dimension, num_of_local_centroids, assignments.data(), - &original_local_codebooks[m * num_of_local_centroids * local_dimension], threads); + offset = (mem_index_type) m * num_of_local_centroids * local_dimension; + compute_centroids( + Xsub, + local_dimension, + num_of_local_centroids, + assignments.data(), + &original_local_codebooks[offset], + threads + ); } } - }; - } // end of namespace ann } // end of namespace pecos - diff --git a/pecos/core/ann/quantizer_impl/x86.hpp b/pecos/core/ann/quantizer_impl/x86.hpp index 3f32ddf..67c61d4 100644 --- a/pecos/core/ann/quantizer_impl/x86.hpp +++ b/pecos/core/ann/quantizer_impl/x86.hpp @@ -48,6 +48,9 @@ namespace pecos { namespace ann { + typedef uint32_t index_type; + typedef uint64_t mem_index_type; + struct ProductQuantizer4Bits : ProductQuantizer4BitsBase { __attribute__((__target__("default"))) @@ -57,12 +60,15 @@ namespace ann { __attribute__((__target__("avx512f"))) void pack_codebook_for_inference() { + mem_index_type offset1, offset2; local_codebooks.resize(original_local_codebooks.size(), 0); - for (index_type i = 0; i < num_local_codebooks; i++) { - for (size_t j = 0; j < num_of_local_centroids; j++) { - for (int k = 0; k < local_dimension; k++) { - local_codebooks[i * num_of_local_centroids * local_dimension + k * num_of_local_centroids + j] - = original_local_codebooks[i * num_of_local_centroids * local_dimension + j * local_dimension + k]; + for (index_type m = 0; m < num_local_codebooks; m++) { + offset1 = (mem_index_type) m * num_of_local_centroids * local_dimension; + for (index_type k = 0; k < num_of_local_centroids; k++) { + offset2 = (mem_index_type) k * local_dimension; + for (index_type d = 0; d < local_dimension; d++) { + local_codebooks[offset1 + (mem_index_type) d * num_of_local_centroids + k] + = original_local_codebooks[offset1 + offset2 + d]; } } } @@ -73,7 +79,7 @@ namespace ann { pad_parameters_default(max_degree, code_dimension); } - __attribute__((__target__("avx512f"))) + __attribute__((__target__("avx512f"))) void pad_parameters(index_type& max_degree, size_t& code_dimension) { // When using AVX512f, we have 16 centroids per local codebook, and each of it uses 8 bits to represent quantized // distance value. Thus, we will have 128 bits to load 1 set of local codebooks. Thus, a loadu_si512 will load @@ -184,14 +190,14 @@ namespace ann { globalID += 16; } - for (index_type d = 0; d < num_local_codebooks; d++) { + for (index_type m = 0; m < num_local_codebooks; m++) { __m512 tmp_v = _mm512_setzero_ps(); // prefect data used in the next round. It's currently decided by experience and observance of good empirical // results. The best prefetch position could be determined by a more complete empirical study. _mm_prefetch(localID + local_dimension * 16, _MM_HINT_T0); - _mm_prefetch(raw_dist.data() + d * num_of_local_centroids, _MM_HINT_T0); + _mm_prefetch(raw_dist.data() + m * num_of_local_centroids, _MM_HINT_T0); - for (int j = 0; j < local_dimension; j++) { + for (index_type d = 0; d < local_dimension; d++) { __m512 q = _mm512_set1_ps(*query_ptr++); __m512 l = _mm512_loadu_ps(localID); __m512 v = _mm512_sub_ps(q, l); @@ -200,7 +206,7 @@ namespace ann { // AVX512 read 16 floats at a time so locaiID will move 16 positions after a round localID += 16; } - _mm512_storeu_ps(&raw_dist[d * num_of_local_centroids], tmp_v); + _mm512_storeu_ps(&raw_dist[m * num_of_local_centroids], tmp_v); max = std::max(max, _mm512_reduce_max_ps(tmp_v)); min = std::min(min, _mm512_reduce_min_ps(tmp_v)); } @@ -210,7 +216,7 @@ namespace ann { __m512 _scale = _mm512_set1_ps(scale); __m512 _bias = _mm512_set1_ps(bias); auto *raw_ptr = raw_dist.data(); - for (index_type d = 0; d < num_local_codebooks; d++) { + for (index_type m = 0; m < num_local_codebooks; m++) { __m512 raw_table = _mm512_loadu_ps(raw_ptr); raw_table = _mm512_sub_ps(raw_table, _bias); raw_table = _mm512_div_ps(raw_table, _scale); diff --git a/test/tst-data/ann/hnsw-model-dense/c_model/config.json b/test/tst-data/ann/hnsw-model-dense/c_model/config.json index 9b554f4..e90a19e 100644 --- a/test/tst-data/ann/hnsw-model-dense/c_model/config.json +++ b/test/tst-data/ann/hnsw-model-dense/c_model/config.json @@ -1,6 +1,5 @@ { "hnsw_t": "pecos::ann::HNSW>", - "version": "v1.0", "train_params": { "efC": 100, "init_node": 50, @@ -8,5 +7,6 @@ "maxM0": 48, "max_level": 1, "num_node": 90 - } -} + }, + "version": "v2.0" +} \ No newline at end of file diff --git a/test/tst-data/ann/hnsw-model-dense/c_model/index.mmap_store b/test/tst-data/ann/hnsw-model-dense/c_model/index.mmap_store new file mode 100644 index 0000000000000000000000000000000000000000..b1cb2b4ea48cbefbbe44276be12a3176cc2742e8 GIT binary patch literal 29104 zcmeI53v5?+6~|j1E8|%Z9H5LsM}b01DdlD1-aaU_lSe5ON*Sz?39>4**%&K~wqs(L z*-WvyicAD7`DcN4Ng{(T`oG2~~5{K}BuBjks_zccjt>X2_- z^K-3!05$#>=k?S-k=#q$F5kBG)_&t{+Lk71o6<+yZr^s?uKlLI+E(?`Hmkq32?Mn4 z^lj@v?bi&_HZNJ*LQ-*2V?c0t|Yrp9ZZL9o*WR1{%!boj9 zecSpO?bm!(+q_ZQCf})Tmv7rfYrio?+tM-Gri|6L+qWI5+Hbl`+p2NeW~FJHFkahE z-?mQBe$7N}^U}3Vo}_J;Z`&qozcEAG(oAhrrfA#k+YY}Vnx^XWRenKbP1Altj<%h? zZJn{+v(fZ z0`1onYMVDt+vFl`yL{U=U;B*xHl;*c^WU&f`Y9dCEk%^_r9>znN|myq#3_49 zmNHEgrR*qa%AQiEEGa3on9JcMd0Q@`!=hh?AJXK%C$rRs_7n z3uc4W;56}rmDu@iuz=AYGIA7O;hL$Vwz+SYRmK>8t3)YVQco=7M2U@fhzq`n8%$sX zFY&_zxg%fTBd72JX0XEp0#EP*SMd<=JidO6cj3qLUD^z-g5kvVOg(XZ;zdk7hI0cZ zQ*&T7xq=ImACoUR6*u7;eUC44zg02ZWBeFy%jKAui8IKr;TE0^Ua*>&4JL96F1|mA zP29K&*E>;uG55ZIHF)GFPxLNPVj(sY3o%nm@NLEyze^s89h}Iu@@MK5e&Q*_lzBGa zU79oACoz*ZzQ@!YT$tR3`vzRW5xI+ps2tRmx_|!OP!~K4L}!Q=ic)99YvK&X2p($B z^k?!1R$?_h0WJ&=ruN{#tKCN7ay#d zqT~oZs8={J@dsb~yM0cZE2T2*hwa&EpTbu>nl_{ke_Un^$AH?hNm>3_s;xQ83V ziOHRuy5Yh#emJStec{-1F~$p1=yXxW3IgZhF%2&85pS@b3@+jY7x^=IOs#<{o&uil ze)&hv>327KESv5Y9VePAI$3nAsOimyUt%?NNX*0^tV5Ga@P=#9j7h_h{KyRzVjNPJ z8T)DeBdXr!iC-kjQYKZD&xsjciQ8ZcVj^x6CpAZ`;0yK+>KAO})8tnUZ@3We*4#p` zsAZhOlPCUiQTU%EN)55hHFZOrU?VKa>rM5qml(MW_{GHvGcB zi4lCnd8_pT2}$9}-gVR&d}@=LUo$>(M5!_I0JmoBlgnHE9z=|f^*H40t*ce*xe4N@ ziJDxHpWwJAXXM6=>0pi+7mSl&U79{^>JWb7Dd1VZtB>>Ei#yc(fIbB#)2E0%jOTi= zfP?(<{WlAXTuNQ<=|kU`ll*tRp39JP&C*uf1BGv=6v`Cb-Hup-~1U$z+Smr+U)j=M|2z6(xfFWtuNvC*pwd&X8X1*wUAhzab z&&Ye-<-;z=s5Ry?lSSh_&yq{TMaa*->SpJe30qwHHTwojM47uWmt$^elYpn;*?YVR zFDJSjo3)Eg7=p1XJP#~#R~;Mf(%)u@W{Gm%#+oDUjJngY-dnz;?mrpgn{|L3S|{N7 z{;NgK%8c!*zcJ5Yy}%m5wz-aktm>+yvB9lwX_yN(a>F{*RsqAWmi2P#&+bt7Qyb@? zLGHox-r{=qw*Hsqa{pl;o;`Wi&^8Kq&b|GjlRk2@$Ni^7bh#*VYUbDM!`UR@S-WDC z`^CSNsQ$)tg|VVG_G)+s&pUexodYG?)%`S0eD)A+74YocQtVvmr~CDE@mXWrynoRC z*jn%5^eZv2*|aw$;r^F^VY{ripR7OCPJd&*!?|&yHVUy{`Aa|V$QM?q@xxw@t!r)g z2hXoBtoH__yr%Yd3dH}sD1DmV&3=eY0-mzux12N2uT%5ueDPT`uZIVWdMt;&xpK4XaYTWr}Al~6wv@TBfr?Y+|6Q_V-&f2Ysc zD&TqJe3tv*z}X59dsVzAVyl3sJnyeg=7n_%4{L2(?>*!IJSQ*r^o~}pR{J}QALcwO zMCXXwHip1+;w{Ikcp_cphjmZ7sI3B?)@?_ex|cS4^a1t~?-sRry>MakJ9D$A)vJEJ zRIpHE)Ehn9HX%R1@8`wZcYa0XCsq6+QT9h|6!7#uzS5ieyW3TM%EV`nmpxyw*(TsQ zS6$~Ve|Ei^@6g+9tTV$qcn(fmY-yFTcJwNe}>Ul8Z zh4E9!hZYL?nLjYq{dr}Py8rO3-p1$jhI;V4a(bh?v#L|&hy5w`t8DGr;7NOQsOx;K zRP_Va2|Q!4RlqZ2TUq=42OAU~_7H8&4LMNxS()U1XW=T<-xkX`-sQ7(&lfy7S@mA# ztasZvrdRXsj;#WQDYNcz-{0Ls^|yuM+nO740G|3cAMsv#?M~I-SkLj^j*V+M30vVw zdUCUOanB2CJ;!~Nd8n;Ieh$1m%uQ=qqUw#k5cb_{74Yo8bRgDvs8->j*4R6+RlxJq zx3_wmChb)9#xoz*sJ4p2vuTT)*s@3AVgJCkXL3RA!Q&mA>h3I^rRE2`1IHM$Rlu{R zt9Pv7H~M`f_Nr{}2MN1JdTApTs(wJfX79jOAwNUUC%H4` zQB$qn=bI-!=WG>XKbV%|O+Gi>tqg%k4Ef>b_u;w>`zLwuA4$YO#6ZMA#6ZMA#6ZMA z#6ZMA#6ZMA#6ZMA#6ZMA#6ZMA#K28ufIYtgQQnJ=Bw`?9AYve5AY$Np8Q}iGJ7v-R z<9Zp3e(saRfSF&}{NCj!DPIxRh=GWKh=E&$f$-l4;~hPOcMlQcrzr0uKP%+tg?#=U z4Pt)3N_ijoH6gz->", - "version": "v1.0", "train_params": { "efC": 100, "init_node": 50, @@ -8,5 +7,6 @@ "maxM0": 48, "max_level": 1, "num_node": 90 - } -} + }, + "version": "v2.0" +} \ No newline at end of file diff --git a/test/tst-data/ann/hnsw-model-sparse/c_model/index.mmap_store b/test/tst-data/ann/hnsw-model-sparse/c_model/index.mmap_store new file mode 100644 index 0000000000000000000000000000000000000000..0d7d73cce4c93f8be4aecf9fcabdf3d4ae2317e8 GIT binary patch literal 29824 zcmeI54Qy8B701h$MN#<}%*qBWR7xpO3KS>;_ZCW_ARpzsfFK!;583z#q797|nv zDPvh7?gKL;1s5U1xsK6ZB+J5>b7UGu*wl^>$0RY#kBvm#c7E@3&+U8fU@;jl;fePo zC+D8$zP$JR&-p*+KKFSquplig8DTfAAmTq5@modwArXH;#BUw(qu(DnowhJK;QOb2 zd(aP{#{c3aZOnTvws^9!8TT1`*|)|^ON;u@01837EK%qSRiF`+ zg+5Rn+Cy1r%9Mb9P#fAqb!Z8-pkW75Xv>uGX)H&n6RuOe?uc_BeYM9`h>uu^k$8z6 zEW`^wVn)DC++bJCidS)f84O$lOknk=kQ}8}xMp^bP40U$7c_Ata4{|Ci$ZHCLQG60 zVk9o&Bmcw?Hn4&l9OQ#q@;%_B#^eX=nB4g6RvUFk*V%?D>4EeGQTv}bL5!Vi8ndl znrnEfxWTNkD>iBkKCY9DQQVXY*E=@n!l(!TRUP1>Ud5vKh!PVqYD~mVP07E`KfZ&y zf&sk9wd$!nCQqppV#+@=)}2w;`-g9lw_^Xe zfQOkfHbazU68$ildn3gs_sP5)CO(++MVWi#1s;Kzvm3XCfeL}d3Wqh_M_2@4Ow|LxEJ`pb%iIW(q z4c|qqa4lLNtqZx)c}IT8MJj|?+)J7EO)? z9D0w^dzjWVIS1eekIu=f)mBcYOyEbGrY&CEL+LeA^`8>e`Uz=1p1@@DQH$6s2xFOTj(c1YB>;t@c*l zwzHAgSzmx>n&=c!@Ui|FESfKB3r27)u6fkkQMav04imWG;UH1wQiUjUjNWgPfa{kH zRqnSQdNrsQ^NF+fseKd7}B?&aLO`Q1-PgewVEKBDQauI zc65Kho!#x@pdZ3P)<^?HQ@xIpOQb}o*X~t2o#%2l1bu@(KSp%AC~HGUlr^kP0!jc?o0@wPf?)<|U0WRiOk!XP^=WXO7aSyCK)zogoiU1e$YoMq; zf5@SA0VaxY;9k)`!%K^%Ku2Je%0IhLVsIJe#z&+FeywO{+-Y0xPxRx!6_# z!*6D_b*j#^>~An;<61Yl24MJbT9tdt?U#ajF~9h|f!`i@wzpBhb@siNoxJWXzdLaM znj|_~lr=o-`5vM+3Ah%|?csj)?~_9Pkl#^yi`v-Z$v?Q>-%;Y!Pud*dVk}3Ly--^P z47)azIiGe2_xWKm&a=PG`x}iSc7XWzyq0oTk< z?>eVnTpHAi_2F1io>kb-wsn0tvvINe;|D(B@iAB*vd3oIy_on_sF%0s1*dRy%kO&B zi{dfIMvB_9$19-*0{6z;)8dcAEzz5dw2 zYiiu~^?--eD_gWwl>J^C1zhcp%<~5S;nq;SX398w*X(73(KZ3s*;SR^?B|z-)=~6A z8~LaC2iLx#^SwdU=>ZQJW50%eZ>xY|&g~=I7ymgV;2~rBE{e^#Hda=+&3oI2aIuzW zf6Z2*HcR)OaQ<@FssI=K<9(ZQQPzkPMOg>iB;Xp?c!xVGe@-yJ==1YM>G7PmQHZR;>T=G09O@sK?m_6=Z&Gx>LR@2?fxqC{eZ_wx2+ptxrSJ!jt?ud^nLb!NO zk9SjS6>vSbv)X-o?JBl3g84OGjyY$m5c|HILT|v?VL{(m5CKCkNtWn)4^h5G{S&+} z5lftbI0JD8;ta$Yh%*ppAkILXfj9$k2I36F8Hh6wXCTf%oPjt4H#7t6{f!gleehV~ z48$3TGZ1GW&cJ8Q0QV2xWsC10pEYCg=RQvv(Djwg?|Xip>J?*+GZ1GW&cMx*f#|>2 z#=Cq7?