Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memory-mapped XLinear Model #195

Merged
merged 4 commits into from
Dec 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions pecos/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,15 @@ def link_xlinear_methods(self):
self.clib_float32.c_xlinear_load_model_from_disk_ext, res_list, arg_list
)

res_list = c_void_p
arg_list = [c_char_p, c_bool]
corelib.fillprototype(
self.clib_float32.c_xlinear_load_mmap_model_from_disk, res_list, arg_list
)

arg_list = [c_char_p, c_char_p]
corelib.fillprototype(self.clib_float32.c_xlinear_compile_mmap_model, None, arg_list)

# c interface for per-layer prediction
arg_list = [
POINTER(ScipyCsrF32),
Expand Down Expand Up @@ -704,6 +713,39 @@ def link_xlinear_methods(self):
arg_list = [c_void_p, c_int]
corelib.fillprototype(self.clib_float32.c_xlinear_get_layer_type, res_list, arg_list)

def xlinear_compile_mmap_model(self, npz_folder, mmap_folder):
"""
Compile xlinear model from npz format to memory-mapped format
for faster loading.
Args:
npz_folder (str): The source folder path for xlinear npz model.
mmap_folder (str): The destination folder path for xlinear mmap model.
"""
self.clib_float32.c_xlinear_compile_mmap_model(
c_char_p(npz_folder.encode("utf-8")), c_char_p(mmap_folder.encode("utf-8"))
)

def xlinear_load_mmap(
self,
folder,
lazy_load=False,
):
"""
Load xlinear model in read-only mmap mode for prediction.

Args:
folder (str): The folder path for xlinear model.
lazy_load (bool): Whether to lazy-load, i.e. load when needed(True)
or fully load model before returning(False).

Return:
cmodel (ptr): The pointer to xlinear model.
"""
cmodel = self.clib_float32.c_xlinear_load_mmap_model_from_disk(
c_char_p(folder.encode("utf-8")), c_bool(lazy_load)
)
return cmodel

def xlinear_load_predict_only(
self,
folder,
Expand Down
11 changes: 11 additions & 0 deletions pecos/core/libpecos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ extern "C" {
return static_cast<void*>(model);
}

void* c_xlinear_load_mmap_model_from_disk(const char* model_path, const bool lazy_load) {
auto model = new pecos::HierarchicalMLModel(model_path, lazy_load);
return static_cast<void*>(model);
}

void c_xlinear_compile_mmap_model(const char* model_path, const char* mmap_model_path) {
// Only implemented for bin_search_chunked
auto model = new pecos::HierarchicalMLModel(model_path, pecos::layer_type_t::LAYER_TYPE_BINARY_SEARCH_CHUNKED);
model->save_mmap(mmap_model_path);
}

void c_xlinear_destruct_model(void* ptr) {
pecos::HierarchicalMLModel* mc = static_cast<pecos::HierarchicalMLModel*>(ptr);
delete mc;
Expand Down
104 changes: 90 additions & 14 deletions pecos/core/utils/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <stdexcept>
#include <vector>

#include "mmap_util.hpp"
#include "parallel.hpp"
#include "scipy_loader.hpp"

Expand Down Expand Up @@ -74,7 +75,7 @@ extern "C" {
namespace pecos {
// ===== Container for sparse-dense vectors =====
// For sparse vectors computational acceleration
template<class IDX_T=uint32_t, class VAL_T=float32_t>
template<class IDX_T=uint32_t, class VAL_T=float32_t>
struct sdvec_t {
typedef IDX_T index_type;
typedef VAL_T value_type;
Expand Down Expand Up @@ -207,6 +208,24 @@ namespace pecos {
col_idx(py->col_idx),
val(py->val) { }

// Save/load mmap
// Signature for symmetry, not implemented
void save_to_mmap_store(mmap_util::MmapStore& mmap_s) const {
throw std::runtime_error("Not implemented yet.");
}

void load_from_mmap_store(mmap_util::MmapStore& mmap_s) {
throw std::runtime_error("Not implemented yet.");
}

void save_mmap(const std::string& file_name) const {
throw std::runtime_error("Not implemented yet.");
}

void load_mmap(const std::string& file_name, const bool lazy_load) {
throw std::runtime_error("Not implemented yet.");
}

bool is_empty() const {
return val == nullptr;
}
Expand Down Expand Up @@ -326,6 +345,9 @@ namespace pecos {
value_type *data;
};

// mmap
std::shared_ptr<mmap_util::MmapStore> mmap_store_ptr = nullptr;

csc_t() :
rows(0),
cols(0),
Expand All @@ -340,6 +362,44 @@ namespace pecos {
row_idx(py->row_idx),
val(py->val) { }

// Save/load mmap
void save_to_mmap_store(mmap_util::MmapStore& mmap_s) const {
auto nnz = get_nnz();
// scalars
mmap_s.fput_one<index_type>(rows);
mmap_s.fput_one<index_type>(cols);
mmap_s.fput_one<mem_index_type>(nnz);
// arrays
mmap_s.fput_multiple<mem_index_type>(col_ptr, cols + 1);
mmap_s.fput_multiple<index_type>(row_idx, nnz);
mmap_s.fput_multiple<value_type>(val, nnz);
}

void load_from_mmap_store(mmap_util::MmapStore& mmap_s) {
// scalars
rows = mmap_s.fget_one<index_type>();
cols = mmap_s.fget_one<index_type>();
auto nnz = mmap_s.fget_one<mem_index_type>();
// arrays
col_ptr = mmap_s.fget_multiple<mem_index_type>(cols + 1);
row_idx = mmap_s.fget_multiple<index_type>(nnz);
val = mmap_s.fget_multiple<value_type>(nnz);
}

void save_mmap(const std::string& file_name) const {
mmap_util::MmapStore mmap_s = mmap_util::MmapStore();
mmap_s.open(file_name, "w");
save_to_mmap_store(mmap_s);
mmap_s.close();
}

void load_mmap(const std::string& file_name, const bool lazy_load) {
free_underlying_memory(); // Clear any existing memory
mmap_store_ptr = std::make_shared<mmap_util::MmapStore>(); // Create instance
mmap_store_ptr->open(file_name, lazy_load?"r_lazy":"r");
load_from_mmap_store(*mmap_store_ptr);
}

bool is_empty() const {
return val == nullptr;
}
Expand All @@ -361,18 +421,25 @@ namespace pecos {
// Every function in the inference code that returns a matrix has allocated memory, and
// therefore one should call this function to free that memory.
void free_underlying_memory() {
if (col_ptr) {
delete[] col_ptr;
col_ptr = nullptr;
}
if (row_idx) {
delete[] row_idx;
row_idx = nullptr;
}
if (val) {
delete[] val;
val = nullptr;
if (mmap_store_ptr) { // mmap case, no need to check and free other pointers
mmap_store_ptr.reset(); // decrease reference count
} else { // memory case
if (col_ptr) {
delete[] col_ptr;
}
if (row_idx) {
delete[] row_idx;
}
if (val) {
delete[] val;
}
}
mmap_store_ptr = nullptr;
col_ptr = nullptr;
row_idx = nullptr;
val = nullptr;
rows = 0;
cols = 0;
}

csr_t transpose() const ;
Expand All @@ -382,6 +449,9 @@ namespace pecos {
// This allocates memory, so one should call free_underlying_memory on the copy when
// one is finished using it.
csc_t deep_copy() const {
if (mmap_store_ptr) {
throw std::runtime_error("Cannot deep copy for mmap instance.");
}
mem_index_type nnz = col_ptr[cols];
csc_t res;
res.allocate(rows, cols, nnz);
Expand All @@ -392,6 +462,9 @@ namespace pecos {
}

void allocate(index_type rows, index_type cols, mem_index_type nnz) {
if (mmap_store_ptr) {
throw std::runtime_error("Cannot allocate for mmap instance.");
}
this->rows = rows;
this->cols = cols;
col_ptr = new mem_index_type[cols + 1];
Expand All @@ -401,6 +474,9 @@ namespace pecos {

// Construct a csc_t object with shape _rows x _cols filled by 1.
void fill_ones(index_type _rows, index_type _cols) {
if (mmap_store_ptr) {
throw std::runtime_error("Cannot fill ones for mmap instance.");
}
mem_index_type nnz = (mem_index_type) _rows * _cols;
this->free_underlying_memory();
this->allocate(_rows, _cols, nnz);
Expand Down Expand Up @@ -670,7 +746,7 @@ namespace pecos {
float32_t ret = 0;
for(size_t s = 0; s < x.nnz; s++) {
auto &idx = x.idx[s];
if(y.entries[idx].touched)
if(y.entries[idx].touched)
ret += y.entries[idx].val * x.val[s];
}
return ret;
Expand Down Expand Up @@ -749,7 +825,7 @@ namespace pecos {
float32_t ret = 0;
for(size_t s = 0; s < x.nr_touch; s++) {
auto &idx = x.touched_indices[s];
if(y.entries[idx].touched)
if(y.entries[idx].touched)
ret += y.entries[idx].val * x.entries[idx].val;
}
return static_cast<float32_t>(ret);
Expand Down
38 changes: 25 additions & 13 deletions pecos/core/utils/mmap_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,9 @@ class MmapStoreLoad {
/* Constructor loads from memory-mapped file name
* Parameters:
* file_name (const std::string&): Name of file to load from.
* pre_load (const bool): Whether to pre-fault all pages into memory before accessing.
* lazy_load (const bool): If false, pre-fault all pages into memory before accessing.
*/
MmapStoreLoad(const std::string& file_name, const bool pre_load) {
MmapStoreLoad(const std::string& file_name, const bool lazy_load) {
// Load metadata first
FILE* fp = fopen(file_name.c_str(), "rb");
if (!fp) {
Expand All @@ -210,7 +210,7 @@ class MmapStoreLoad {
if (fd == -1) {
throw std::runtime_error("Load Mmap file: Open file failed.");
}
load_mmap_(fd, pre_load);
load_mmap_(fd, lazy_load);
if (close(fd) < 0) {
throw std::runtime_error("Load Mmap file: Close file failed.");
}
Expand Down Expand Up @@ -249,7 +249,7 @@ class MmapStoreLoad {
uint64_t mmap_size_ = 0; // Memory-mapped file size

/* Create memory-mapped region */
void load_mmap_(const int fd, const bool pre_load) {
void load_mmap_(const int fd, const bool lazy_load) {
// Get file size
struct stat file_stat;
fstat(fd, &file_stat);
Expand All @@ -260,7 +260,7 @@ class MmapStoreLoad {

// Creat mmap
int mmap_flags = MAP_SHARED;
if (pre_load) { // pre-fault all pages to load them into memory
if (!lazy_load) { // pre-fault all pages to load them into memory
mmap_flags |= MAP_POPULATE;
}
mmap_ptr_ = mmap(NULL, mmap_size_, PROT_READ, mmap_flags, fd, 0);
Expand Down Expand Up @@ -367,12 +367,12 @@ class MmapStore {
if (mode_ != Mode::UNINIT) {
throw std::runtime_error("Should close existing file before open new one.");
}
if (mode_str == "r") { // pre-load all pages
mmap_r_ = new details_::MmapStoreLoad(file_name, true);
mode_ = Mode::READONLY;
} else if (mode_str == "r_lazy") {
if (mode_str == "r") { // lazy_load=false, pre-load all pages
mmap_r_ = new details_::MmapStoreLoad(file_name, false);
mode_ = Mode::READONLY;
} else if (mode_str == "r_lazy") { // lazy_load=true
mmap_r_ = new details_::MmapStoreLoad(file_name, true);
mode_ = Mode::READONLY;
} else if (mode_str == "w") {
mmap_w_ = new details_::MmapStoreSave(file_name);
mode_ = Mode::WRITEONLY;
Expand Down Expand Up @@ -453,7 +453,7 @@ class MmapStore {
* For std::vector case, it own the memory for data storage.
* For mmap view case, it does not own any memory, but serve as a view for a piece of memory owned by MmapStore.
* By default, it is initialized as empty std::vector that can be resized or loaded as mmap view.
* Once loaded as mmap view, it cannot go back to std::vector case unless clear() is called.
* Once loaded as mmap view, it cannot go back to std::vector case unless clear() or convertion is called.
*/
template<class T, class TT = T, details_::if_simple_serializable<TT> = true>
class MmapableVector {
Expand Down Expand Up @@ -521,17 +521,29 @@ class MmapableVector {
}

void load_from_mmap_store(MmapStore& mmap_s) {
if (is_self_allocated_()) { // raises error for non-empty vector
if (is_self_allocated_()) { // raises error for non-empty self-allocated vector
throw std::runtime_error("Cannot load for non-empty vector case.");
}
size_ = mmap_s.fget_one<uint64_t>();
data_ = mmap_s.fget_multiple<T>(size_);
}

/* Convert (from mmap view) into self-allocated vector by copying data.
* To be noted, this is only a shallow copy and only good for POD without pointer members. */
void to_self_alloc_vec() {
if (!is_self_allocated_()) {
store_.resize(size_);
for (uint64_t i = 0; i < size_; ++i) {
store_[i] = data_[i];
}
data_ = store_.data();
}
}

private:
uint64_t size_ = 0; // Number of elements of the data
T* data_ = nullptr; // Pointer to actual data
std::vector<T> store_; // Actual data storage for self-allocated case
T* data_ = nullptr; // Pointer to data. The same as store_.data() for self-allocated vector case
std::vector<T> store_; // Actual data storage for self-allocated vector case

/* Whether data storage is non-empty self-allocated vector.
* True indicates non-empty vector case; False indicates either empty or mmap view. */
Expand Down
Loading