Skip to content

Commit

Permalink
Add methods for contains and get with default
Browse files Browse the repository at this point in the history
  • Loading branch information
weiliw-amz committed Jul 20, 2023
1 parent ff61667 commit 125894e
Show file tree
Hide file tree
Showing 6 changed files with 293 additions and 142 deletions.
81 changes: 39 additions & 42 deletions pecos/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ def __init__(self, dirname, soname, forced_rebuild=False):
self.link_clustering()
self.link_tfidf_vectorizer()
self.link_ann_hnsw_methods()
self.link_mmap_ankerl_hashmap_methods()
self.link_mmap_hashmap_methods()

def link_xlinear_methods(self):
"""
Expand Down Expand Up @@ -1701,13 +1701,22 @@ def ann_hnsw_init(self, data_type, metric_type):
)
return self.ann_hnsw_fn_dict[data_type, metric_type]

def link_mmap_ankerl_hashmap_methods(self):
def link_mmap_hashmap_methods(self):
"""
Specify C-lib's Memory-mappable Ankerl Hashmap methods arguments and return types.
Specify C-lib's Memory-mappable Hashmap methods arguments and return types.
"""
fn_prefix = "ankerl_map"
fn_prefix = "mmap_hashmap"
map_type_list = ["str2int", "int2int"]
self.mmap_ankerl_map_fn_dict = {}
key_args_dict = {
"str2int": [
c_char_p, # pointer of key string
c_uint32, # length of key string
],
"int2int": [
c_uint64, # key int64
],
}
self.mmap_map_fn_dict = {}

for map_type in map_type_list:
local_fn_dict = {}
Expand All @@ -1730,58 +1739,46 @@ def link_mmap_ankerl_hashmap_methods(self):

fn_name = "size"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], c_size_t, None)
corelib.fillprototype(local_fn_dict[fn_name], c_size_t, [c_void_p])

# Fill insert & get
fn_name = "insert"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
if map_type == "str2int":
arg_list = [
c_void_p, # pointer of C/C++ map
c_char_p, # pointer of key string
c_uint32, # length of key string
c_uint64, # value int64
]
elif map_type == "int2int":
arg_list = [
c_void_p, # pointer of C/C++ map
c_uint64, # key int64
c_uint64, # value int64
]
else:
raise ValueError(f"{map_type} not implemented.")
corelib.fillprototype(local_fn_dict[fn_name], None, arg_list)
corelib.fillprototype(
local_fn_dict[fn_name], None, [c_void_p] + key_args_dict[map_type] + [c_uint64]
)

fn_name = "get"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
if map_type == "str2int":
arg_list = [
c_void_p, # pointer of C/C++ map
c_char_p, # pointer of key string
c_uint32, # length of key string
]
elif map_type == "int2int":
arg_list = [
c_void_p, # pointer of C/C++ map
c_uint64, # key int64
]
else:
raise ValueError(f"{map_type} not implemented.")
corelib.fillprototype(local_fn_dict[fn_name], c_uint64, arg_list)
corelib.fillprototype(
local_fn_dict[fn_name], c_uint64, [c_void_p] + key_args_dict[map_type]
)

fn_name = "get_w_default"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(
local_fn_dict[fn_name], c_uint64, [c_void_p] + key_args_dict[map_type] + [c_uint64]
)

fn_name = "contains"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(
local_fn_dict[fn_name], c_bool, [c_void_p] + key_args_dict[map_type]
)

self.mmap_ankerl_map_fn_dict[map_type] = local_fn_dict
self.mmap_map_fn_dict[map_type] = local_fn_dict

def mmap_ankerl_hashmap_init(self, map_type):
"""Python to C/C++ interface for Memory-mappable Ankerl Hashmap initialization
def mmap_hashmap_init(self, map_type):
"""Python to C/C++ interface for Memory-mappable Hashmap initialization
Args:
map_type (string): Type of Hashmap.
Returns:
mmap_ankerl_map_fn_dict (dict): a dictionary that holds clib's C/C++ functions for Python to call
mmap_map_fn_dict (dict): a dictionary that holds clib's C/C++ functions for Python to call
"""

if map_type not in self.mmap_ankerl_map_fn_dict:
if map_type not in self.mmap_map_fn_dict:
raise NotImplementedError(f"map_type={map_type} is not implemented.")
return self.mmap_ankerl_map_fn_dict[map_type]
return self.mmap_map_fn_dict[map_type]


clib = corelib(os.path.join(os.path.dirname(os.path.abspath(pecos.__file__)), "core"), "libpecos")
106 changes: 62 additions & 44 deletions pecos/core/libpecos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#include "utils/clustering.hpp"
#include "utils/matrix.hpp"
#include "utils/mmap_ankerl_hashmap.hpp"
#include "utils/mmap_hashmap.hpp"
#include "utils/tfidf.hpp"
#include "utils/parallel.hpp"
#include "xmc/inference.hpp"
Expand Down Expand Up @@ -475,55 +475,73 @@ extern "C" {
C_ANN_HNSW_PREDICT(_csr_ip_f32, ScipyCsrF32, pecos::csr_t, hnsw_csr_ip_t)
C_ANN_HNSW_PREDICT(_csr_l2_f32, ScipyCsrF32, pecos::csr_t, hnsw_csr_l2_t)

// ==== C Interface of Memory-mappable Ankerl Hashmap ====

typedef pecos::ankerl_mmap_hashmap::Str2IntMap ankerl_map_str2int;
typedef pecos::ankerl_mmap_hashmap::Int2IntMap ankerl_map_int2int;

#define ANKERL_MAP_NEW(SUFFIX) \
void* ankerl_map_new_ ## SUFFIX () { \
return static_cast<void*>(new ankerl_map_ ## SUFFIX()); }
ANKERL_MAP_NEW(str2int)
ANKERL_MAP_NEW(int2int)

#define ANKERL_MAP_DESTRUCT(SUFFIX) \
void ankerl_map_destruct_ ## SUFFIX (void* map_ptr) { \
delete static_cast<ankerl_map_ ## SUFFIX *>(map_ptr); }
ANKERL_MAP_DESTRUCT(str2int)
ANKERL_MAP_DESTRUCT(int2int)

#define ANKERL_MAP_SAVE(SUFFIX) \
void ankerl_map_save_ ## SUFFIX (void* map_ptr, const char* map_dir) { \
static_cast<ankerl_map_ ## SUFFIX *>(map_ptr)->save(map_dir); }
ANKERL_MAP_SAVE(str2int)
ANKERL_MAP_SAVE(int2int)

#define ANKERL_MAP_LOAD(SUFFIX) \
void* ankerl_map_load_ ## SUFFIX (const char* map_dir, const bool lazy_load) { \
ankerl_map_ ## SUFFIX * map_ptr = new ankerl_map_ ## SUFFIX(); \
// ==== C Interface of Memory-mappable Hashmap ====

typedef pecos::mmap_hashmap::Str2IntMap mmap_hashmap_str2int;
typedef pecos::mmap_hashmap::Int2IntMap mmap_hashmap_int2int;

// New
#define MMAP_MAP_NEW(SUFFIX) \
void* mmap_hashmap_new_ ## SUFFIX () { \
return static_cast<void*>(new mmap_hashmap_ ## SUFFIX()); }
MMAP_MAP_NEW(str2int)
MMAP_MAP_NEW(int2int)

// Destruct
#define MMAP_MAP_DESTRUCT(SUFFIX) \
void mmap_hashmap_destruct_ ## SUFFIX (void* map_ptr) { \
delete static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr); }
MMAP_MAP_DESTRUCT(str2int)
MMAP_MAP_DESTRUCT(int2int)

// Save
#define MMAP_MAP_SAVE(SUFFIX) \
void mmap_hashmap_save_ ## SUFFIX (void* map_ptr, const char* map_dir) { \
static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->save(map_dir); }
MMAP_MAP_SAVE(str2int)
MMAP_MAP_SAVE(int2int)

// Load
#define MMAP_MAP_LOAD(SUFFIX) \
void* mmap_hashmap_load_ ## SUFFIX (const char* map_dir, const bool lazy_load) { \
mmap_hashmap_ ## SUFFIX * map_ptr = new mmap_hashmap_ ## SUFFIX(); \
map_ptr->load(map_dir, lazy_load); \
return static_cast<void *>(map_ptr); }
ANKERL_MAP_LOAD(str2int)
ANKERL_MAP_LOAD(int2int)
MMAP_MAP_LOAD(str2int)
MMAP_MAP_LOAD(int2int)

#define ANKERL_MAP_SIZE(SUFFIX) \
size_t ankerl_map_size_ ## SUFFIX (void* map_ptr) { \
return static_cast<ankerl_map_ ## SUFFIX *>(map_ptr)->size(); }
ANKERL_MAP_SIZE(str2int)
ANKERL_MAP_SIZE(int2int)
// Size
#define MMAP_MAP_SIZE(SUFFIX) \
size_t mmap_hashmap_size_ ## SUFFIX (void* map_ptr) { \
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->size(); }
MMAP_MAP_SIZE(str2int)
MMAP_MAP_SIZE(int2int)

// Insert
#define KEY_SINGLE_ARG(A,B) A,B
#define ANKERL_MAP_INSERT(SUFFIX, KEY, FUNC_CALL_KEY) \
void ankerl_map_insert_ ## SUFFIX (void* map_ptr, KEY, uint64_t val) { \
static_cast<ankerl_map_ ## SUFFIX *>(map_ptr)->insert( FUNC_CALL_KEY, val); }
ANKERL_MAP_INSERT(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
ANKERL_MAP_INSERT(int2int, uint64_t key, key)
#define MMAP_MAP_INSERT(SUFFIX, KEY, FUNC_CALL_KEY) \
void mmap_hashmap_insert_ ## SUFFIX (void* map_ptr, KEY, uint64_t val) { \
static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->insert(FUNC_CALL_KEY, val); }
MMAP_MAP_INSERT(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_INSERT(int2int, uint64_t key, key)

// Get
#define ANKERL_MAP_GET(SUFFIX, KEY, FUNC_CALL_KEY) \
uint64_t ankerl_map_get_ ## SUFFIX (void* map_ptr, KEY) { \
return static_cast<ankerl_map_ ## SUFFIX *>(map_ptr)->get( FUNC_CALL_KEY); }
ANKERL_MAP_GET(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
ANKERL_MAP_GET(int2int, uint64_t key, key)
#define MMAP_MAP_GET(SUFFIX, KEY, FUNC_CALL_KEY) \
uint64_t mmap_hashmap_get_ ## SUFFIX (void* map_ptr, KEY) { \
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->get(FUNC_CALL_KEY); }
MMAP_MAP_GET(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET(int2int, uint64_t key, key)

#define MMAP_MAP_GET_W_DEFAULT(SUFFIX, KEY, FUNC_CALL_KEY) \
uint64_t mmap_hashmap_get_w_default_ ## SUFFIX (void* map_ptr, KEY, uint64_t def_val) { \
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->get_w_default(FUNC_CALL_KEY, def_val); }
MMAP_MAP_GET_W_DEFAULT(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET_W_DEFAULT(int2int, uint64_t key, key)

// Contains
#define MMAP_MAP_CONTAINS(SUFFIX, KEY, FUNC_CALL_KEY) \
bool mmap_hashmap_contains_ ## SUFFIX (void* map_ptr, KEY) { \
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->contains(FUNC_CALL_KEY); }
MMAP_MAP_CONTAINS(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_CONTAINS(int2int, uint64_t key, key)
}
9 changes: 8 additions & 1 deletion pecos/core/third_party/ankerl/unordered_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
# include <type_traits> // for enable_if_t, declval, conditional_t, ena...
# include <utility> // for forward, exchange, pair, as_const, piece...
# include <vector> // for vector
# include "../../utils/mmap_util.hpp" // for mmap
# include "../../utils/mmap_util.hpp" // MODIFIED for mmap
# if ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS() == 0
# include <cstdlib> // for abort
# endif
Expand Down Expand Up @@ -799,6 +799,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
KeyEqual m_equal{};
uint8_t m_shifts = initial_shifts;

// MODIFIED
// mmap. Not opened for read indicates everything is in memory
pecos::mmap_util::MmapStore mmap_store;

Expand Down Expand Up @@ -850,6 +851,8 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas

[[nodiscard]] constexpr auto get_key(value_type const& vt) -> key_type const {
if constexpr (is_map_v<T>) {
// MODIFIED
// Requires container to have get_key method implemented
return m_values.get_key(vt);
} else {
return vt;
Expand Down Expand Up @@ -908,6 +911,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas

void deallocate_buckets() {
auto ba = bucket_alloc(m_values.get_allocator());
// MODIFIED
if (!mmap_store.is_open_for_read()) { // In memory
if (nullptr != m_buckets) {
bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count());
Expand Down Expand Up @@ -1201,6 +1205,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
: table(init, bucket_count, hash, KeyEqual(), alloc) {}

~table() {
// MODIFIED
if (!mmap_store.is_open_for_read()) { // in memory
if (nullptr != m_buckets) {
auto ba = bucket_alloc(m_values.get_allocator());
Expand All @@ -1209,6 +1214,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas
}
}

// ---- MODIFIED ----
/* Memory-mapped */
table(const std::string& folderpath, const bool lazy_load) : table(0) {
load_mmap(folderpath, lazy_load);
Expand Down Expand Up @@ -1248,6 +1254,7 @@ class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, bas

mmap_s.close();
}
// --------

auto operator=(table const& other) -> table& {
if (&other != this) {
Expand Down
Loading

0 comments on commit 125894e

Please sign in to comment.