Skip to content

Commit

Permalink
Add mmap compile/load for Xlinear model
Browse files Browse the repository at this point in the history
  • Loading branch information
weiliw-amz committed Dec 23, 2022
1 parent 1175b33 commit f31fbc2
Show file tree
Hide file tree
Showing 7 changed files with 508 additions and 46 deletions.
41 changes: 41 additions & 0 deletions pecos/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,15 @@ def link_xlinear_methods(self):
self.clib_float32.c_xlinear_load_model_from_disk_ext, res_list, arg_list
)

res_list = c_void_p
arg_list = [c_char_p, c_bool]
corelib.fillprototype(
self.clib_float32.c_xlinear_load_mmap_model_from_disk, res_list, arg_list
)

arg_list = [c_char_p, c_char_p]
corelib.fillprototype(self.clib_float32.c_xlinear_compile_mmap_model, None, arg_list)

# c interface for per-layer prediction
arg_list = [
POINTER(ScipyCsrF32),
Expand Down Expand Up @@ -704,6 +713,38 @@ def link_xlinear_methods(self):
arg_list = [c_void_p, c_int]
corelib.fillprototype(self.clib_float32.c_xlinear_get_layer_type, res_list, arg_list)

def xlinear_compile_mmap_model(self, npz_folder, mmap_folder):
"""
Compile xlinear model from npz format to memory-mapped format
for faster loading.
Args:
npz_folder (str): The source folder path for xlinear npz model.
mmap_folder (str): The destination folder path for xlinear mmap model.
"""
self.clib_float32.c_xlinear_compile_mmap_model(
c_char_p(npz_folder.encode("utf-8")), c_char_p(mmap_folder.encode("utf-8"))
)

def xlinear_load_mmap(
self,
folder,
pre_load=False,
):
"""
Load xlinear model in read-only mmap mode for prediction.
Args:
folder (str): The folder path for xlinear model.
pre_load (bool): Whether to lazy-load (False) or fully load model (True).
Return:
cmodel (ptr): The pointer to xlinear model.
"""
cmodel = self.clib_float32.c_xlinear_load_mmap_model_from_disk(
c_char_p(folder.encode("utf-8")), c_bool(pre_load)
)
return cmodel

def xlinear_load_predict_only(
self,
folder,
Expand Down
11 changes: 11 additions & 0 deletions pecos/core/libpecos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ extern "C" {
return static_cast<void*>(model);
}

void* c_xlinear_load_mmap_model_from_disk(const char* model_path, const bool pre_load) {
auto model = new pecos::HierarchicalMLModel(model_path, pre_load);
return static_cast<void*>(model);
}

void c_xlinear_compile_mmap_model(const char* model_path, const char* mmap_model_path) {
// Only implemented for bin_search_chunked
auto model = new pecos::HierarchicalMLModel(model_path, pecos::layer_type_t::LAYER_TYPE_BINARY_SEARCH_CHUNKED);
model->save_mmap(mmap_model_path);
}

void c_xlinear_destruct_model(void* ptr) {
pecos::HierarchicalMLModel* mc = static_cast<pecos::HierarchicalMLModel*>(ptr);
delete mc;
Expand Down
86 changes: 72 additions & 14 deletions pecos/core/utils/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <stdexcept>
#include <vector>

#include "mmap_util.hpp"
#include "parallel.hpp"
#include "scipy_loader.hpp"

Expand Down Expand Up @@ -74,7 +75,7 @@ extern "C" {
namespace pecos {
// ===== Container for sparse-dense vectors =====
// For sparse vectors computational acceleration
template<class IDX_T=uint32_t, class VAL_T=float32_t>
template<class IDX_T=uint32_t, class VAL_T=float32_t>
struct sdvec_t {
typedef IDX_T index_type;
typedef VAL_T value_type;
Expand Down Expand Up @@ -326,6 +327,9 @@ namespace pecos {
value_type *data;
};

// mmap
std::shared_ptr<mmap_util::MmapStore> mmap_store_ptr = nullptr;

csc_t() :
rows(0),
cols(0),
Expand All @@ -340,6 +344,44 @@ namespace pecos {
row_idx(py->row_idx),
val(py->val) { }

// Save/load mmap
void save_to_mmap_store(mmap_util::MmapStore& mmap_s) const {
auto nnz = get_nnz();
// scalars
mmap_s.fput_one<index_type>(rows);
mmap_s.fput_one<index_type>(cols);
mmap_s.fput_one<mem_index_type>(nnz);
// arrays
mmap_s.fput_multiple<mem_index_type>(col_ptr, cols + 1);
mmap_s.fput_multiple<index_type>(row_idx, nnz);
mmap_s.fput_multiple<value_type>(val, nnz);
}

void load_from_mmap_store(mmap_util::MmapStore& mmap_s) {
// scalars
rows = mmap_s.fget_one<index_type>();
cols = mmap_s.fget_one<index_type>();
auto nnz = mmap_s.fget_one<mem_index_type>();
// arrays
col_ptr = mmap_s.fget_multiple<mem_index_type>(cols + 1);
row_idx = mmap_s.fget_multiple<index_type>(nnz);
val = mmap_s.fget_multiple<value_type>(nnz);
}

void save(const std::string & file_name) const {
mmap_util::MmapStore mmap_s = mmap_util::MmapStore();
mmap_s.open(file_name, "w");
save_to_mmap_store(mmap_s);
mmap_s.close();
}

void load(const std::string & file_name, const bool pre_load) {
free_underlying_memory(); // Clear any existing memory
mmap_store_ptr = std::make_shared<mmap_util::MmapStore>(); // Create instance
mmap_store_ptr->open(file_name, pre_load?"r":"r_lazy");
load_from_mmap_store(*mmap_store_ptr);
}

bool is_empty() const {
return val == nullptr;
}
Expand All @@ -361,18 +403,25 @@ namespace pecos {
// Every function in the inference code that returns a matrix has allocated memory, and
// therefore one should call this function to free that memory.
void free_underlying_memory() {
if (col_ptr) {
delete[] col_ptr;
col_ptr = nullptr;
}
if (row_idx) {
delete[] row_idx;
row_idx = nullptr;
}
if (val) {
delete[] val;
val = nullptr;
if (mmap_store_ptr) { // mmap case, no need to check and free other pointers
mmap_store_ptr.reset(); // decrease reference count
} else { // memory case
if (col_ptr) {
delete[] col_ptr;
}
if (row_idx) {
delete[] row_idx;
}
if (val) {
delete[] val;
}
}
mmap_store_ptr = nullptr;
col_ptr = nullptr;
row_idx = nullptr;
val = nullptr;
rows = 0;
cols = 0;
}

csr_t transpose() const ;
Expand All @@ -382,6 +431,9 @@ namespace pecos {
// This allocates memory, so one should call free_underlying_memory on the copy when
// one is finished using it.
csc_t deep_copy() const {
if (mmap_store_ptr) {
throw std::runtime_error("Cannot deep copy for mmap instance.");
}
mem_index_type nnz = col_ptr[cols];
csc_t res;
res.allocate(rows, cols, nnz);
Expand All @@ -392,6 +444,9 @@ namespace pecos {
}

void allocate(index_type rows, index_type cols, mem_index_type nnz) {
if (mmap_store_ptr) {
throw std::runtime_error("Cannot allocate for mmap instance.");
}
this->rows = rows;
this->cols = cols;
col_ptr = new mem_index_type[cols + 1];
Expand All @@ -401,6 +456,9 @@ namespace pecos {

// Construct a csc_t object with shape _rows x _cols filled by 1.
void fill_ones(index_type _rows, index_type _cols) {
if (mmap_store_ptr) {
throw std::runtime_error("Cannot fill ones for mmap instance.");
}
mem_index_type nnz = (mem_index_type) _rows * _cols;
this->free_underlying_memory();
this->allocate(_rows, _cols, nnz);
Expand Down Expand Up @@ -670,7 +728,7 @@ namespace pecos {
float32_t ret = 0;
for(size_t s = 0; s < x.nnz; s++) {
auto &idx = x.idx[s];
if(y.entries[idx].touched)
if(y.entries[idx].touched)
ret += y.entries[idx].val * x.val[s];
}
return ret;
Expand Down Expand Up @@ -749,7 +807,7 @@ namespace pecos {
float32_t ret = 0;
for(size_t s = 0; s < x.nr_touch; s++) {
auto &idx = x.touched_indices[s];
if(y.entries[idx].touched)
if(y.entries[idx].touched)
ret += y.entries[idx].val * x.entries[idx].val;
}
return static_cast<float32_t>(ret);
Expand Down
20 changes: 16 additions & 4 deletions pecos/core/utils/mmap_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ class MmapStore {
* For std::vector case, it own the memory for data storage.
* For mmap view case, it does not own any memory, but serve as a view for a piece of memory owned by MmapStore.
* By default, it is initialized as empty std::vector that can be resized or loaded as mmap view.
* Once loaded as mmap view, it cannot go back to std::vector case unless clear() is called.
* Once loaded as mmap view, it cannot go back to std::vector case unless clear() or convertion is called.
*/
template<class T, class TT = T, details_::if_simple_serializable<TT> = true>
class MmapableVector {
Expand Down Expand Up @@ -521,17 +521,29 @@ class MmapableVector {
}

void load_from_mmap_store(MmapStore& mmap_s) {
if (is_self_allocated_()) { // raises error for non-empty vector
if (is_self_allocated_()) { // raises error for non-empty self-allocated vector
throw std::runtime_error("Cannot load for non-empty vector case.");
}
size_ = mmap_s.fget_one<uint64_t>();
data_ = mmap_s.fget_multiple<T>(size_);
}

/* Convert from mmap view into self-allocated vector by copying data.
* To be noted, this is only a shallow copy and only good for POD without pointer members. */
void mmap_to_vec() {
if (!is_self_allocated_()) {
store_.resize(size_);
for (uint64_t i = 0; i < size_; ++i) {
store_[i] = data_[i];
}
data_ = store_.data();
}
}

private:
uint64_t size_ = 0; // Number of elements of the data
T* data_ = nullptr; // Pointer to actual data
std::vector<T> store_; // Actual data storage for self-allocated case
T* data_ = nullptr; // Pointer to data. The same as store_.data() for self-allocated vector case
std::vector<T> store_; // Actual data storage for self-allocated vector case

/* Whether data storage is non-empty self-allocated vector.
* True indicates non-empty vector case; False indicates either empty or mmap view. */
Expand Down
Loading

0 comments on commit f31fbc2

Please sign in to comment.