Skip to content

Commit

Permalink
Add Python binding
Browse files Browse the repository at this point in the history
  • Loading branch information
weiliw-amz committed Jun 16, 2023
1 parent 5adf0f4 commit 221cec0
Show file tree
Hide file tree
Showing 7 changed files with 467 additions and 176 deletions.
84 changes: 84 additions & 0 deletions pecos/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
c_int32,
c_uint32,
c_uint64,
c_size_t,
c_void_p,
cast,
)
Expand Down Expand Up @@ -531,6 +532,7 @@ def __init__(self, dirname, soname, forced_rebuild=False):
self.link_clustering()
self.link_tfidf_vectorizer()
self.link_ann_hnsw_methods()
self.link_mmap_ankerl_hashmap_methods()

def link_xlinear_methods(self):
"""
Expand Down Expand Up @@ -1699,5 +1701,87 @@ def ann_hnsw_init(self, data_type, metric_type):
)
return self.ann_hnsw_fn_dict[data_type, metric_type]

def link_mmap_ankerl_hashmap_methods(self):
"""
Specify C-lib's Memory-mappable Ankerl Hashmap methods arguments and return types.
"""
fn_prefix = "ankerl_map"
map_type_list = ["str2int", "int2int"]
self.mmap_ankerl_map_fn_dict = {}

for map_type in map_type_list:
local_fn_dict = {}

fn_name = "new"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], c_void_p, None)

fn_name = "destruct"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], None, [c_void_p])

fn_name = "save"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], None, [c_void_p, c_char_p])

fn_name = "load"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], c_void_p, [c_char_p, c_bool])

fn_name = "size"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], c_size_t, None)

# Fill insert & get
fn_name = "insert"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
if map_type == "str2int":
arg_list = [
c_void_p, # pointer of C/C++ map
c_char_p, # pointer of key string
c_uint32, # length of key string
c_uint64, # value int64
]
elif map_type == "int2int":
arg_list = [
c_void_p, # pointer of C/C++ map
c_uint64, # key int64
c_uint64, # value int64
]
else:
raise ValueError(f"{map_type} not implemented.")
corelib.fillprototype(local_fn_dict[fn_name], None, arg_list)

fn_name = "get"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
if map_type == "str2int":
arg_list = [
c_void_p, # pointer of C/C++ map
c_char_p, # pointer of key string
c_uint32, # length of key string
]
elif map_type == "int2int":
arg_list = [
c_void_p, # pointer of C/C++ map
c_uint64, # key int64
]
else:
raise ValueError(f"{map_type} not implemented.")
corelib.fillprototype(local_fn_dict[fn_name], c_uint64, arg_list)

self.mmap_ankerl_map_fn_dict[map_type] = local_fn_dict

def mmap_ankerl_hashmap_init(self, map_type):
"""Python to C/C++ interface for Memory-mappable Ankerl Hashmap initialization
Args:
map_type (string): Type of Hashmap.
Returns:
mmap_ankerl_map_fn_dict (dict): a dictionary that holds clib's C/C++ functions for Python to call
"""

if map_type not in self.mmap_ankerl_map_fn_dict:
raise NotImplementedError(f"map_type={map_type} is not implemented.")
return self.mmap_ankerl_map_fn_dict[map_type]


clib = corelib(os.path.join(os.path.dirname(os.path.abspath(pecos.__file__)), "core"), "libpecos")
52 changes: 52 additions & 0 deletions pecos/core/libpecos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "utils/clustering.hpp"
#include "utils/matrix.hpp"
#include "utils/mmap_ankerl_hashmap.hpp"
#include "utils/tfidf.hpp"
#include "utils/parallel.hpp"
#include "xmc/inference.hpp"
Expand Down Expand Up @@ -474,4 +475,55 @@ extern "C" {
C_ANN_HNSW_PREDICT(_csr_ip_f32, ScipyCsrF32, pecos::csr_t, hnsw_csr_ip_t)
C_ANN_HNSW_PREDICT(_csr_l2_f32, ScipyCsrF32, pecos::csr_t, hnsw_csr_l2_t)

// ==== C Interface of Memory-mappable Ankerl Hashmap ====

typedef pecos::ankerl_mmap_hashmap::Str2IntMap ankerl_map_str2int;
typedef pecos::ankerl_mmap_hashmap::Int2IntMap ankerl_map_int2int;

#define ANKERL_MAP_NEW(SUFFIX) \
void* ankerl_map_new_ ## SUFFIX () { \
return static_cast<void*>(new ankerl_map_ ## SUFFIX()); }
ANKERL_MAP_NEW(str2int)
ANKERL_MAP_NEW(int2int)

#define ANKERL_MAP_DESTRUCT(SUFFIX) \
void ankerl_map_destruct_ ## SUFFIX (void* map_ptr) { \
delete static_cast<ankerl_map_ ## SUFFIX *>(map_ptr); }
ANKERL_MAP_DESTRUCT(str2int)
ANKERL_MAP_DESTRUCT(int2int)

#define ANKERL_MAP_SAVE(SUFFIX) \
void ankerl_map_save_ ## SUFFIX (void* map_ptr, const char* map_dir) { \
static_cast<ankerl_map_ ## SUFFIX *>(map_ptr)->save(map_dir); }
ANKERL_MAP_SAVE(str2int)
ANKERL_MAP_SAVE(int2int)

#define ANKERL_MAP_LOAD(SUFFIX) \
void* ankerl_map_load_ ## SUFFIX (const char* map_dir, const bool lazy_load) { \
ankerl_map_ ## SUFFIX * map_ptr = new ankerl_map_ ## SUFFIX(); \
map_ptr->load(map_dir, lazy_load); \
return static_cast<void *>(map_ptr); }
ANKERL_MAP_LOAD(str2int)
ANKERL_MAP_LOAD(int2int)

#define ANKERL_MAP_SIZE(SUFFIX) \
size_t ankerl_map_size_ ## SUFFIX (void* map_ptr) { \
return static_cast<ankerl_map_ ## SUFFIX *>(map_ptr)->size(); }
ANKERL_MAP_SIZE(str2int)
ANKERL_MAP_SIZE(int2int)

// Insert
#define KEY_SINGLE_ARG(A,B) A,B
#define ANKERL_MAP_INSERT(SUFFIX, KEY, FUNC_CALL_KEY) \
void ankerl_map_insert_ ## SUFFIX (void* map_ptr, KEY, uint64_t val) { \
static_cast<ankerl_map_ ## SUFFIX *>(map_ptr)->insert( FUNC_CALL_KEY, val); }
ANKERL_MAP_INSERT(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
ANKERL_MAP_INSERT(int2int, uint64_t key, key)

// Get
#define ANKERL_MAP_GET(SUFFIX, KEY, FUNC_CALL_KEY) \
uint64_t ankerl_map_get_ ## SUFFIX (void* map_ptr, KEY) { \
return static_cast<ankerl_map_ ## SUFFIX *>(map_ptr)->get( FUNC_CALL_KEY); }
ANKERL_MAP_GET(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
ANKERL_MAP_GET(int2int, uint64_t key, key)
}
130 changes: 0 additions & 130 deletions pecos/core/utils/ankerl_int2int_mmap_vec.hpp

This file was deleted.

45 changes: 0 additions & 45 deletions pecos/core/utils/ankerl_mmap_hashmap_wrapper.hpp

This file was deleted.

Loading

0 comments on commit 221cec0

Please sign in to comment.