From 221cec0914f7a97305f7c86bb9f5c4e2cce6c1cd Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 15 Jun 2023 23:02:28 +0000 Subject: [PATCH] Add Python binding --- pecos/core/base.py | 84 ++++++++ pecos/core/libpecos.cpp | 52 +++++ pecos/core/utils/ankerl_int2int_mmap_vec.hpp | 130 ------------ .../utils/ankerl_mmap_hashmap_wrapper.hpp | 45 ---- ...t_mmap_vec.hpp => mmap_ankerl_hashmap.hpp} | 193 ++++++++++++++++++ pecos/utils/mmap_ankerl_hashmap_util.py | 137 +++++++++++++ setup.py | 2 +- 7 files changed, 467 insertions(+), 176 deletions(-) delete mode 100644 pecos/core/utils/ankerl_int2int_mmap_vec.hpp delete mode 100644 pecos/core/utils/ankerl_mmap_hashmap_wrapper.hpp rename pecos/core/utils/{ankerl_str2int_mmap_vec.hpp => mmap_ankerl_hashmap.hpp} (50%) create mode 100644 pecos/utils/mmap_ankerl_hashmap_util.py diff --git a/pecos/core/base.py b/pecos/core/base.py index 39ca131..dc9c629 100644 --- a/pecos/core/base.py +++ b/pecos/core/base.py @@ -25,6 +25,7 @@ c_int32, c_uint32, c_uint64, + c_size_t, c_void_p, cast, ) @@ -531,6 +532,7 @@ def __init__(self, dirname, soname, forced_rebuild=False): self.link_clustering() self.link_tfidf_vectorizer() self.link_ann_hnsw_methods() + self.link_mmap_ankerl_hashmap_methods() def link_xlinear_methods(self): """ @@ -1699,5 +1701,87 @@ def ann_hnsw_init(self, data_type, metric_type): ) return self.ann_hnsw_fn_dict[data_type, metric_type] + def link_mmap_ankerl_hashmap_methods(self): + """ + Specify C-lib's Memory-mappable Ankerl Hashmap methods arguments and return types. + """ + fn_prefix = "ankerl_map" + map_type_list = ["str2int", "int2int"] + self.mmap_ankerl_map_fn_dict = {} + + for map_type in map_type_list: + local_fn_dict = {} + + fn_name = "new" + local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}") + corelib.fillprototype(local_fn_dict[fn_name], c_void_p, None) + + fn_name = "destruct" + local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}") + corelib.fillprototype(local_fn_dict[fn_name], None, [c_void_p]) + + fn_name = "save" + local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}") + corelib.fillprototype(local_fn_dict[fn_name], None, [c_void_p, c_char_p]) + + fn_name = "load" + local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}") + corelib.fillprototype(local_fn_dict[fn_name], c_void_p, [c_char_p, c_bool]) + + fn_name = "size" + local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}") + corelib.fillprototype(local_fn_dict[fn_name], c_size_t, None) + + # Fill insert & get + fn_name = "insert" + local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}") + if map_type == "str2int": + arg_list = [ + c_void_p, # pointer of C/C++ map + c_char_p, # pointer of key string + c_uint32, # length of key string + c_uint64, # value int64 + ] + elif map_type == "int2int": + arg_list = [ + c_void_p, # pointer of C/C++ map + c_uint64, # key int64 + c_uint64, # value int64 + ] + else: + raise ValueError(f"{map_type} not implemented.") + corelib.fillprototype(local_fn_dict[fn_name], None, arg_list) + + fn_name = "get" + local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}") + if map_type == "str2int": + arg_list = [ + c_void_p, # pointer of C/C++ map + c_char_p, # pointer of key string + c_uint32, # length of key string + ] + elif map_type == "int2int": + arg_list = [ + c_void_p, # pointer of C/C++ map + c_uint64, # key int64 + ] + else: + raise ValueError(f"{map_type} not implemented.") + corelib.fillprototype(local_fn_dict[fn_name], c_uint64, arg_list) + + self.mmap_ankerl_map_fn_dict[map_type] = local_fn_dict + + def mmap_ankerl_hashmap_init(self, map_type): + """Python to C/C++ interface for Memory-mappable Ankerl Hashmap initialization + Args: + map_type (string): Type of Hashmap. + Returns: + mmap_ankerl_map_fn_dict (dict): a dictionary that holds clib's C/C++ functions for Python to call + """ + + if map_type not in self.mmap_ankerl_map_fn_dict: + raise NotImplementedError(f"map_type={map_type} is not implemented.") + return self.mmap_ankerl_map_fn_dict[map_type] + clib = corelib(os.path.join(os.path.dirname(os.path.abspath(pecos.__file__)), "core"), "libpecos") diff --git a/pecos/core/libpecos.cpp b/pecos/core/libpecos.cpp index 7378c89..1878c18 100644 --- a/pecos/core/libpecos.cpp +++ b/pecos/core/libpecos.cpp @@ -13,6 +13,7 @@ #include "utils/clustering.hpp" #include "utils/matrix.hpp" +#include "utils/mmap_ankerl_hashmap.hpp" #include "utils/tfidf.hpp" #include "utils/parallel.hpp" #include "xmc/inference.hpp" @@ -474,4 +475,55 @@ extern "C" { C_ANN_HNSW_PREDICT(_csr_ip_f32, ScipyCsrF32, pecos::csr_t, hnsw_csr_ip_t) C_ANN_HNSW_PREDICT(_csr_l2_f32, ScipyCsrF32, pecos::csr_t, hnsw_csr_l2_t) + // ==== C Interface of Memory-mappable Ankerl Hashmap ==== + + typedef pecos::ankerl_mmap_hashmap::Str2IntMap ankerl_map_str2int; + typedef pecos::ankerl_mmap_hashmap::Int2IntMap ankerl_map_int2int; + + #define ANKERL_MAP_NEW(SUFFIX) \ + void* ankerl_map_new_ ## SUFFIX () { \ + return static_cast(new ankerl_map_ ## SUFFIX()); } + ANKERL_MAP_NEW(str2int) + ANKERL_MAP_NEW(int2int) + + #define ANKERL_MAP_DESTRUCT(SUFFIX) \ + void ankerl_map_destruct_ ## SUFFIX (void* map_ptr) { \ + delete static_cast(map_ptr); } + ANKERL_MAP_DESTRUCT(str2int) + ANKERL_MAP_DESTRUCT(int2int) + + #define ANKERL_MAP_SAVE(SUFFIX) \ + void ankerl_map_save_ ## SUFFIX (void* map_ptr, const char* map_dir) { \ + static_cast(map_ptr)->save(map_dir); } + ANKERL_MAP_SAVE(str2int) + ANKERL_MAP_SAVE(int2int) + + #define ANKERL_MAP_LOAD(SUFFIX) \ + void* ankerl_map_load_ ## SUFFIX (const char* map_dir, const bool lazy_load) { \ + ankerl_map_ ## SUFFIX * map_ptr = new ankerl_map_ ## SUFFIX(); \ + map_ptr->load(map_dir, lazy_load); \ + return static_cast(map_ptr); } + ANKERL_MAP_LOAD(str2int) + ANKERL_MAP_LOAD(int2int) + + #define ANKERL_MAP_SIZE(SUFFIX) \ + size_t ankerl_map_size_ ## SUFFIX (void* map_ptr) { \ + return static_cast(map_ptr)->size(); } + ANKERL_MAP_SIZE(str2int) + ANKERL_MAP_SIZE(int2int) + + // Insert + #define KEY_SINGLE_ARG(A,B) A,B + #define ANKERL_MAP_INSERT(SUFFIX, KEY, FUNC_CALL_KEY) \ + void ankerl_map_insert_ ## SUFFIX (void* map_ptr, KEY, uint64_t val) { \ + static_cast(map_ptr)->insert( FUNC_CALL_KEY, val); } + ANKERL_MAP_INSERT(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len)) + ANKERL_MAP_INSERT(int2int, uint64_t key, key) + + // Get + #define ANKERL_MAP_GET(SUFFIX, KEY, FUNC_CALL_KEY) \ + uint64_t ankerl_map_get_ ## SUFFIX (void* map_ptr, KEY) { \ + return static_cast(map_ptr)->get( FUNC_CALL_KEY); } + ANKERL_MAP_GET(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len)) + ANKERL_MAP_GET(int2int, uint64_t key, key) } diff --git a/pecos/core/utils/ankerl_int2int_mmap_vec.hpp b/pecos/core/utils/ankerl_int2int_mmap_vec.hpp deleted file mode 100644 index 11cf360..0000000 --- a/pecos/core/utils/ankerl_int2int_mmap_vec.hpp +++ /dev/null @@ -1,130 +0,0 @@ -#include "mmap_util.hpp" - - -// Memory-mappable vector of std::pair for Ankerl -// When calling write methods, the assumption is that the underlying storage is in memory, i.e. std::vector -class AnkerlInt2IntMmapableVector : public pecos::mmap_util::MmapableVector> { - template - class iter_t; - - public: - using key_type = uint64_t; - using value_type = std::pair; - using mem_vec_type = std::vector; - using allocator_type = typename mem_vec_type::allocator_type; - using size_type = typename mem_vec_type::size_type; - using difference_type = typename mem_vec_type::difference_type; - using reference = typename mem_vec_type::reference; - using const_reference = typename mem_vec_type::const_reference; - using pointer = typename mem_vec_type::pointer; - using const_pointer = typename mem_vec_type::const_pointer; - // Custom iterator - using iterator = iter_t; - using const_iterator = iter_t; - - AnkerlInt2IntMmapableVector() = default; - AnkerlInt2IntMmapableVector(allocator_type alloc) - : pecos::mmap_util::MmapableVector(alloc) {} - - auto get_allocator() { return this->store_.get_allocator(); } - - constexpr auto back() -> reference { return this->data_[this->size_ - 1]; } - constexpr auto begin() -> iterator { return {this->data_}; } - constexpr auto cbegin() -> const_iterator { return {this->data_}; } - constexpr auto end() -> iterator { return {this->data_ + this->size_}; } - constexpr auto cend() -> const_iterator{ return {this->data_ + this->size_}; } - - void shrink_to_fit() { this->store_.shrink_to_fit(); } - void reserve(size_t new_capacity) { this->store_.reserve(new_capacity); } - - template - auto emplace_back(Args&&... args) { - auto eb_val = this->store_.emplace_back(std::forward(args)...); - this->size_ = this->store_.size(); - this->data_ = this->store_.data(); - return eb_val; - } - - void pop_back() { - this->store_.pop_back(); - this->size_ = this->store_.size(); - this->data_ = this->store_.data(); - } - - /* Get key for member */ - key_type get_key(value_type const& vt) const { - return vt.first; - } - - - private: - /** - * Iterator class doubles as const_iterator and iterator - */ - template - class iter_t { - using ptr_t = typename std::conditional_t; - ptr_t iter_data_{}; - - template - friend class iter_t; - - public: - using iterator_category = std::forward_iterator_tag; - using difference_type = AnkerlInt2IntMmapableVector::difference_type; - using value_type = AnkerlInt2IntMmapableVector::value_type; - using reference = typename std::conditional_t; - using pointer = typename std::conditional_t; - - iter_t() noexcept = default; - - template ::type> - constexpr iter_t(iter_t const& other) noexcept - : iter_data_(other.iter_data_) {} - - constexpr iter_t(ptr_t data) noexcept - : iter_data_(data) {} - - template ::type> - constexpr auto operator=(iter_t const& other) noexcept -> iter_t& { - iter_data_ = other.iter_data_; - return *this; - } - - constexpr auto operator++() noexcept -> iter_t& { - ++iter_data_; - return *this; - } - - constexpr auto operator+(difference_type diff) noexcept -> iter_t { - return {iter_data_ + diff}; - } - - template - constexpr auto operator-(iter_t const& other) noexcept -> difference_type { - return static_cast(iter_data_ - other.iter_data_); - } - - constexpr auto operator*() const noexcept -> reference { - return *iter_data_; - } - - constexpr auto operator->() const noexcept -> pointer { - return iter_data_; - } - - template - constexpr auto operator==(iter_t const& o) const noexcept -> bool { - return iter_data_ == o.iter_data_; - } - - template - constexpr auto operator!=(iter_t const& o) const noexcept -> bool { - return !(*this == o); - } - }; - -}; diff --git a/pecos/core/utils/ankerl_mmap_hashmap_wrapper.hpp b/pecos/core/utils/ankerl_mmap_hashmap_wrapper.hpp deleted file mode 100644 index 119593b..0000000 --- a/pecos/core/utils/ankerl_mmap_hashmap_wrapper.hpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "../third_party/ankerl/unordered_dense.h" -#include "ankerl_int2int_mmap_vec.hpp" -#include "ankerl_str2int_mmap_vec.hpp" - -namespace pecos { -namespace ankerl_mmap_hashmap { - -class Str2IntMap { -public: - void insert(std::string_view key, uint64_t val) { map[key] = val; } - uint64_t get(std::string_view key) { return map[key]; } - auto size() { return map.size(); } - - void save(const std::string& folderpath) { map.save_mmap(folderpath); } - void load(const std::string& folderpath, const bool lazy_load) { map.load_mmap(folderpath, lazy_load); } - -private: - ankerl::unordered_dense::map< - std::string_view, uint64_t, - ankerl::unordered_dense::v4_0_0::hash, - std::equal_to, - AnkerlStr2IntMmapableVector - > map; -}; - -class Int2IntMap { -public: - void insert(uint64_t key, uint64_t val) { map[key] = val; } - uint64_t get(uint64_t key) { return map[key]; } - auto size() { return map.size(); } - - void save(const std::string& folderpath) { map.save_mmap(folderpath); } - void load(const std::string& folderpath, const bool lazy_load) { map.load_mmap(folderpath, lazy_load); } - -private: - ankerl::unordered_dense::map< - uint64_t, uint64_t, - ankerl::unordered_dense::v4_0_0::hash, - std::equal_to, - AnkerlInt2IntMmapableVector - > map; -}; - -} // end namespace mmap_util -} // end namespace pecos diff --git a/pecos/core/utils/ankerl_str2int_mmap_vec.hpp b/pecos/core/utils/mmap_ankerl_hashmap.hpp similarity index 50% rename from pecos/core/utils/ankerl_str2int_mmap_vec.hpp rename to pecos/core/utils/mmap_ankerl_hashmap.hpp index c5fe8fc..64a57eb 100644 --- a/pecos/core/utils/ankerl_str2int_mmap_vec.hpp +++ b/pecos/core/utils/mmap_ankerl_hashmap.hpp @@ -1,5 +1,26 @@ +/* + * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://aws.amazon.com/apache2.0/ + * + * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +#ifndef __MMAP_ANKERL_HASHMAP_H__ +#define __MMAP_ANKERL_HASHMAP_H__ + +#include "../third_party/ankerl/unordered_dense.h" #include "mmap_util.hpp" +namespace pecos { +namespace ankerl_mmap_hashmap { + +namespace details_ { // namespace for Module Private classes // Memory-mappable vector of std::pair for Ankerl // This vector takes/gets std::string_view as the key, but emplace back as the special mmap format StrView @@ -211,3 +232,175 @@ class AnkerlStr2IntMmapableVector { } }; }; + + +// Memory-mappable vector of std::pair for Ankerl +// When calling write methods, the assumption is that the underlying storage is in memory, i.e. std::vector +class AnkerlInt2IntMmapableVector : public pecos::mmap_util::MmapableVector> { + template + class iter_t; + + public: + using key_type = uint64_t; + using value_type = std::pair; + using mem_vec_type = std::vector; + using allocator_type = typename mem_vec_type::allocator_type; + using size_type = typename mem_vec_type::size_type; + using difference_type = typename mem_vec_type::difference_type; + using reference = typename mem_vec_type::reference; + using const_reference = typename mem_vec_type::const_reference; + using pointer = typename mem_vec_type::pointer; + using const_pointer = typename mem_vec_type::const_pointer; + // Custom iterator + using iterator = iter_t; + using const_iterator = iter_t; + + AnkerlInt2IntMmapableVector() = default; + AnkerlInt2IntMmapableVector(allocator_type alloc) + : pecos::mmap_util::MmapableVector(alloc) {} + + auto get_allocator() { return this->store_.get_allocator(); } + + constexpr auto back() -> reference { return this->data_[this->size_ - 1]; } + constexpr auto begin() -> iterator { return {this->data_}; } + constexpr auto cbegin() -> const_iterator { return {this->data_}; } + constexpr auto end() -> iterator { return {this->data_ + this->size_}; } + constexpr auto cend() -> const_iterator{ return {this->data_ + this->size_}; } + + void shrink_to_fit() { this->store_.shrink_to_fit(); } + void reserve(size_t new_capacity) { this->store_.reserve(new_capacity); } + + template + auto emplace_back(Args&&... args) { + auto eb_val = this->store_.emplace_back(std::forward(args)...); + this->size_ = this->store_.size(); + this->data_ = this->store_.data(); + return eb_val; + } + + void pop_back() { + this->store_.pop_back(); + this->size_ = this->store_.size(); + this->data_ = this->store_.data(); + } + + /* Get key for member */ + key_type get_key(value_type const& vt) const { + return vt.first; + } + + + private: + /** + * Iterator class doubles as const_iterator and iterator + */ + template + class iter_t { + using ptr_t = typename std::conditional_t; + ptr_t iter_data_{}; + + template + friend class iter_t; + + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = AnkerlInt2IntMmapableVector::difference_type; + using value_type = AnkerlInt2IntMmapableVector::value_type; + using reference = typename std::conditional_t; + using pointer = typename std::conditional_t; + + iter_t() noexcept = default; + + template ::type> + constexpr iter_t(iter_t const& other) noexcept + : iter_data_(other.iter_data_) {} + + constexpr iter_t(ptr_t data) noexcept + : iter_data_(data) {} + + template ::type> + constexpr auto operator=(iter_t const& other) noexcept -> iter_t& { + iter_data_ = other.iter_data_; + return *this; + } + + constexpr auto operator++() noexcept -> iter_t& { + ++iter_data_; + return *this; + } + + constexpr auto operator+(difference_type diff) noexcept -> iter_t { + return {iter_data_ + diff}; + } + + template + constexpr auto operator-(iter_t const& other) noexcept -> difference_type { + return static_cast(iter_data_ - other.iter_data_); + } + + constexpr auto operator*() const noexcept -> reference { + return *iter_data_; + } + + constexpr auto operator->() const noexcept -> pointer { + return iter_data_; + } + + template + constexpr auto operator==(iter_t const& o) const noexcept -> bool { + return iter_data_ == o.iter_data_; + } + + template + constexpr auto operator!=(iter_t const& o) const noexcept -> bool { + return !(*this == o); + } + }; + +}; +} // end namespace details_ + + +class Str2IntMap { +public: + void insert(const char* key, uint32_t key_len, uint64_t val) { map[std::string_view(key, key_len)] = val; } + uint64_t get(const char* key, uint32_t key_len) { return map[std::string_view(key, key_len)]; } + auto size() { return map.size(); } + + void save(const std::string& map_dir) { map.save_mmap(map_dir); } + void load(const std::string& map_dir, const bool lazy_load) { map.load_mmap(map_dir, lazy_load); } + +private: + ankerl::unordered_dense::map< + std::string_view, uint64_t, + ankerl::unordered_dense::v4_0_0::hash, + std::equal_to, + details_::AnkerlStr2IntMmapableVector + > map; +}; + +class Int2IntMap { +public: + void insert(uint64_t key, uint64_t val) { map[key] = val; } + uint64_t get(uint64_t key) { return map[key]; } + auto size() { return map.size(); } + + void save(const std::string& folderpath) { map.save_mmap(folderpath); } + void load(const std::string& folderpath, const bool lazy_load) { map.load_mmap(folderpath, lazy_load); } + +private: + ankerl::unordered_dense::map< + uint64_t, uint64_t, + ankerl::unordered_dense::v4_0_0::hash, + std::equal_to, + details_::AnkerlInt2IntMmapableVector + > map; +}; + +} // end namespace ankerl_mmap_hashmap +} // end namespace pecos + +#endif // end of __MMAP_ANKERL_HASHMAP_H__ diff --git a/pecos/utils/mmap_ankerl_hashmap_util.py b/pecos/utils/mmap_ankerl_hashmap_util.py new file mode 100644 index 0000000..b9f6c4e --- /dev/null +++ b/pecos/utils/mmap_ankerl_hashmap_util.py @@ -0,0 +1,137 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance +# with the License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +from abc import abstractmethod +from ctypes import ( + c_bool, + c_uint32, + c_uint64, + c_char_p, + c_void_p, +) +from pecos.core import clib + + +class MmapAnkerlHashmap(object): + """ + Python wrapper of Memory-mappable Ankerl Hashmap + """ + + def __init__(self, map_type): + if map_type not in clib.mmap_ankerl_map_fn_dict: + raise NotImplementedError(f"map_type={map_type} is not implemented.") + + self.map_type = map_type + self.map = None + self.mode = None + self.map_dir = None + + def open(self, mode, map_dir=None): + if mode == "w": + map = _MmapAnkerlHashmapReadWrite.init(self.map_type) + elif mode == "r" or mode == "r_lazy": + lazy_load = True if mode == "r_lazy" else False + map = _MmapAnkerlHashmapReadOnly.init(self.map_type, map_dir, lazy_load) + else: + raise NotImplementedError(f"{mode} not implemented.") + + self.map = map + self.mode = mode + self.map_dir = map_dir + + def close(self): + if self.mode == "w" and self.map_dir is not None: + self.map.save(self.map_dir) + self.map.destruct() + + +class _MmapAnkerlHashmapReadOnly(object): + def __init__(self, map_ptr, fn_dict): + self.map_ptr = map_ptr + self.fn_dict = fn_dict + + @abstractmethod + def get(self, key): + pass + + def size(self): + return self.fn_dict["size"](self.map_ptr) + + def destruct(self): + self.fn_dict["destruct"](self.map_ptr) + + @classmethod + def init(cls, map_type, map_dir, lazy_load): + fn_dict = clib.mmap_ankerl_hashmap_init(map_type) + map_ptr = fn_dict["load"](c_char_p(map_dir.encode("utf-8")), c_bool(lazy_load)) + + if map_type == "str2int": + return _MmapAnkerlHashmapStr2IntReadOnly(map_ptr, fn_dict) + elif map_type == "int2int": + return _MmapAnkerlHashmapInt2IntReadOnly(map_ptr, fn_dict) + else: + raise NotImplementedError(f"{map_type}") + + +class _MmapAnkerlHashmapStr2IntReadOnly(_MmapAnkerlHashmapReadOnly): + def get(self, key_utf8): + """ + Args: + key_utf8: UTF8 encoded bytes string key + """ + return self.fn_dict["get"]( + c_void_p(self.map_ptr), c_char_p(key_utf8), c_uint32(len(key_utf8)) + ) + + +class _MmapAnkerlHashmapInt2IntReadOnly(_MmapAnkerlHashmapReadOnly): + def get(self, key): + return self.fn_dict["get"](c_void_p(self.map_ptr), c_uint64(key)) + + +class _MmapAnkerlHashmapReadWrite(_MmapAnkerlHashmapReadOnly): + @abstractmethod + def insert(self, key, val): + pass + + def save(self, map_dir): + import pathlib + + pathlib.Path(map_dir).mkdir(parents=True, exist_ok=True) + self.fn_dict["save"](self.map_ptr, c_char_p(map_dir.encode("utf-8"))) + + @classmethod + def init(cls, map_type): + fn_dict = clib.mmap_ankerl_hashmap_init(map_type) + map_ptr = fn_dict["new"]() + + if map_type == "str2int": + return _MmapAnkerlHashmapStr2IntReadWrite(map_ptr, fn_dict) + elif map_type == "int2int": + return _MmapAnkerlHashmapInt2IntReadWrite(map_ptr, fn_dict) + else: + raise NotImplementedError(f"{map_type}") + + +class _MmapAnkerlHashmapStr2IntReadWrite(_MmapAnkerlHashmapReadWrite): + def insert(self, key_utf8, val): + """ + Args: + key_utf8 (bytes): UTF8 encoded bytes string key + val (int): Integer value + """ + self.fn_dict["insert"]( + c_void_p(self.map_ptr), c_char_p(key_utf8), c_uint32(len(key_utf8)), c_uint64(val) + ) + + +class _MmapAnkerlHashmapInt2IntReadWrite(_MmapAnkerlHashmapReadWrite): + def insert(self, key, val): + self.fn_dict["insert"](c_void_p(self.map_ptr), c_uint64(key), c_uint64(val)) diff --git a/setup.py b/setup.py index 2e9ae26..7d83871 100644 --- a/setup.py +++ b/setup.py @@ -141,7 +141,7 @@ def get_blas_lib_dir(cls): include_dirs=["pecos/core", "/usr/include/", "/usr/local/include"], libraries=["gomp", "gcc"] + blas_lib, library_dirs=blas_dir, - extra_compile_args=["-fopenmp", "-O3", "-std=c++14"] + manual_compile_args, + extra_compile_args=["-fopenmp", "-O3", "-std=c++17"] + manual_compile_args, extra_link_args=['-Wl,--no-as-needed', f"-Wl,-rpath,{':'.join(blas_dir)}"] )