Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing of non-finite values #3942

Merged
merged 20 commits into from
Mar 16, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add API method LGBM_BoosterPredictForMats which runs prediction on a …
…data set given as of array of pointers to rows (as opposed to existing method LGBM_BoosterPredictForMat which requires data given as contiguous array)
matthew-peacock committed Feb 12, 2019
commit c3ab42f39aab41be7fc238228c83dcefd3b49102
31 changes: 31 additions & 0 deletions include/LightGBM/c_api.h
Original file line number Diff line number Diff line change
@@ -711,6 +711,37 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMat(BoosterHandle handle,
int64_t* out_len,
double* out_result);

/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle
* \param data pointer to the data space
* \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nrow number of rows
* \param ncol number columns
* \param predict_type
* C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* C_API_PREDICT_RAW_SCORE: raw score
* C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param parameter Other parameters for the parameters, e.g. early stopping for prediction.
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMats(BoosterHandle handle,
const void** data,
int data_type,
int32_t nrow,
int32_t ncol,
int predict_type,
int num_iteration,
const char* parameter,
int64_t* out_len,
double* out_result);

/*!
* \brief save model into file
* \param handle handle
43 changes: 43 additions & 0 deletions src/c_api.cpp
Original file line number Diff line number Diff line change
@@ -350,6 +350,9 @@ RowFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_
std::function<std::vector<std::pair<int, double>>(int row_idx)>
RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int data_type, int is_row_major);

std::function<std::vector<std::pair<int, double>>(int row_idx)>
RowPairFunctionFromDenseRows(const void** data, int num_col, int data_type);

std::function<std::vector<std::pair<int, double>>(int idx)>
RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices,
const void* data, int data_type, int64_t nindptr, int64_t nelem);
@@ -1232,6 +1235,30 @@ int LGBM_BoosterPredictForMat(BoosterHandle handle,
API_END();
}

int LGBM_BoosterPredictForMats(BoosterHandle handle,
const void** data,
int data_type,
int32_t nrow,
int32_t ncol,
int predict_type,
int num_iteration,
const char* parameter,
int64_t* out_len,
double* out_result) {
API_BEGIN();
auto param = Config::Str2Map(parameter);
Config config;
config.Set(param);
if (config.num_threads > 0) {
omp_set_num_threads(config.num_threads);
}
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto get_row_fun = RowPairFunctionFromDenseRows(data, ncol, data_type);
ref_booster->Predict(num_iteration, predict_type, nrow, get_row_fun,
config, out_result, out_len);
API_END();
}

int LGBM_BoosterSaveModel(BoosterHandle handle,
int start_iteration,
int num_iteration,
@@ -1405,6 +1432,22 @@ RowPairFunctionFromDenseMatric(const void* data, int num_row, int num_col, int d
return nullptr;
}

// data is array of pointers to individual rows
std::function<std::vector<std::pair<int, double>>(int row_idx)>
RowPairFunctionFromDenseRows(const void** data, int num_col, int data_type) {
return [=](int row_idx) {
auto inner_function = RowFunctionFromDenseMatric(data[row_idx], 1, num_col, data_type, /* is_row_major */ true);
auto raw_values = inner_function(0);
std::vector<std::pair<int, double>> ret;
for (int i = 0; i < static_cast<int>(raw_values.size()); ++i) {
if (std::fabs(raw_values[i]) > kZeroThreshold || std::isnan(raw_values[i])) {
ret.emplace_back(i, raw_values[i]);
}
}
return ret;
};
}

std::function<std::vector<std::pair<int, double>>(int idx)>
RowFunctionFromCSR(const void* indptr, int indptr_type, const int32_t* indices, const void* data, int data_type, int64_t , int64_t ) {
if (data_type == C_API_DTYPE_FLOAT32) {