Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix OpenMP thread allocation in Linux #5551

Merged
merged 15 commits into from
Nov 29, 2022
4 changes: 4 additions & 0 deletions include/LightGBM/utils/openmp_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#ifndef LIGHTGBM_OPENMP_WRAPPER_H_
#define LIGHTGBM_OPENMP_WRAPPER_H_

/** In an external multi-threaded environment, the below methods can return different values on
svotaw marked this conversation as resolved.
Show resolved Hide resolved
different threads (in Linux at least). To share allocations between threads, use a constant. **/
#define MAX_THREAD_ALLOCATION 128

#ifdef _OPENMP

#include <LightGBM/utils/log.h>
Expand Down
6 changes: 2 additions & 4 deletions src/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,6 @@ int LGBM_DatasetPushRowsWithMetadata(DatasetHandle dataset,
if (!data) {
Log::Fatal("data cannot be null.");
}
const int num_omp_threads = OMP_NUM_THREADS();
auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromDenseMatric(data, nrow, ncol, data_type, 1);
if (p_dataset->has_raw()) {
Expand All @@ -1085,7 +1084,7 @@ int LGBM_DatasetPushRowsWithMetadata(DatasetHandle dataset,
for (int i = 0; i < nrow; ++i) {
OMP_LOOP_EX_BEGIN();
// convert internal thread id to be unique based on external thread id
const int internal_tid = omp_get_thread_num() + (num_omp_threads * tid);
const int internal_tid = omp_get_thread_num() + (MAX_THREAD_ALLOCATION * tid);
auto one_row = get_row_fun(i);
p_dataset->PushOneRow(internal_tid, start_row + i, one_row);
OMP_LOOP_EX_END();
Expand Down Expand Up @@ -1154,7 +1153,6 @@ int LGBM_DatasetPushRowsByCSRWithMetadata(DatasetHandle dataset,
if (!data) {
Log::Fatal("data cannot be null.");
}
const int num_omp_threads = OMP_NUM_THREADS();
auto p_dataset = reinterpret_cast<Dataset*>(dataset);
auto get_row_fun = RowFunctionFromCSR<int>(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
int32_t nrow = static_cast<int32_t>(nindptr - 1);
Expand All @@ -1166,7 +1164,7 @@ int LGBM_DatasetPushRowsByCSRWithMetadata(DatasetHandle dataset,
for (int i = 0; i < nrow; ++i) {
OMP_LOOP_EX_BEGIN();
// convert internal thread id to be unique based on external thread id
const int internal_tid = omp_get_thread_num() + (num_omp_threads * tid);
const int internal_tid = omp_get_thread_num() + (MAX_THREAD_ALLOCATION * tid);
auto one_row = get_row_fun(i);
p_dataset->PushOneRow(internal_tid, static_cast<data_size_t>(start_row + i), one_row);
OMP_LOOP_EX_END();
Expand Down
6 changes: 3 additions & 3 deletions src/io/sparse_bin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ class SparseBin : public Bin {
~SparseBin() {}

void InitStreaming(uint32_t num_thread) override {
// Each thread needs its own push buffer, so allocate external num_thread times the number of OMP threads
int num_omp_threads = OMP_NUM_THREADS();
push_buffers_.resize(num_omp_threads * num_thread);
// Each external thread needs its own set of OpenMP push buffers,
// so allocate num_thread times the maximum number of OMP threads per external thread
push_buffers_.resize(MAX_THREAD_ALLOCATION * num_thread);
};

void ReSize(data_size_t num_data) override { num_data_ = num_data; }
Expand Down