Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support both row-wise and col-wise multi-threading #2699

Merged
merged 33 commits into from
Feb 2, 2020
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
c8883fc
commit
guolinke Jan 20, 2020
281dd32
fix a bug
guolinke Jan 20, 2020
ea718c2
fix bug
guolinke Jan 21, 2020
2ad4af5
reset to track changes
guolinke Jan 30, 2020
748c95a
refine the auto choose logic
guolinke Jan 30, 2020
0340ffd
sort the time stats output
guolinke Jan 30, 2020
d3434c7
fix include
guolinke Jan 30, 2020
8c4ea1a
change multi_val_bin_sparse_threshold
guolinke Jan 30, 2020
6cac288
add cmake
guolinke Jan 30, 2020
afdbf3c
add _mm_malloc and _mm_free for cross platform
guolinke Jan 30, 2020
210ac4b
fix cmake bug
guolinke Jan 30, 2020
ad2865d
timer for split
guolinke Jan 30, 2020
4c4a33b
try to fix cmake
guolinke Jan 30, 2020
2a33dcb
fix tests
guolinke Jan 30, 2020
256e6d9
refactor DataPartition::Split
guolinke Jan 30, 2020
a722b38
Merge remote-tracking branch 'origin/master' into sparse_bin_clean
guolinke Jan 30, 2020
7a59f19
fix test
guolinke Jan 30, 2020
1ac8283
typo
guolinke Jan 30, 2020
5b8de4f
formating
guolinke Jan 30, 2020
106c081
Revert "formating"
guolinke Jan 31, 2020
382e13e
add document
guolinke Jan 31, 2020
dec3d79
[R-package] Added tests on use of force_col_wise and force_row_wise i…
jameslamb Jan 31, 2020
d2fb9b3
naming
guolinke Jan 31, 2020
5db5d74
fix gpu code
guolinke Jan 31, 2020
7fda05a
Update include/LightGBM/bin.h
guolinke Jan 31, 2020
27a7209
Update src/treelearner/ocl/histogram16.cl
guolinke Jan 31, 2020
4623cd4
test: swap compilers for CI
StrikerRUS Jan 31, 2020
38d1e57
fix omp
guolinke Feb 1, 2020
8e27631
not avx2
guolinke Feb 1, 2020
c86a479
no aligned for feature histogram
guolinke Feb 1, 2020
737e9c9
Revert "refactor DataPartition::Split"
guolinke Feb 1, 2020
ce5f66b
slightly refactor data partition
guolinke Feb 1, 2020
a123c47
reduce the memory cost
guolinke Feb 2, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ if(USE_R35)
ADD_DEFINITIONS(-DR_VER_ABOVE_35)
endif(USE_R35)

if(USE_TIMETAG)
ADD_DEFINITIONS(-DTIMETAG)
endif(USE_TIMETAG)

if(USE_MPI)
find_package(MPI REQUIRED)
ADD_DEFINITIONS(-DUSE_MPI)
Expand Down Expand Up @@ -130,6 +134,21 @@ if(${MM_PREFETCH})
ADD_DEFINITIONS(-DMM_PREFETCH)
endif()

include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
#include <mm_malloc.h>
int main() {
char *a = (char*)_mm_malloc(8, 16);
_mm_free(a);
return 0;
}
" MM_MALLOC)

if(${MM_MALLOC})
message(STATUS "Use _mm_malloc")
ADD_DEFINITIONS(-DMM_MALLOC)
endif()

if(UNIX OR MINGW OR CYGWIN)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread -O3 -Wextra -Wall -Wno-ignored-attributes -Wno-unknown-pragmas -Wno-return-type")
if(USE_SWIG)
Expand All @@ -152,10 +171,13 @@ if(MSVC)
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_RELWITHDEBINFO
)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /O2 /Ob2 /Oi /Ot /Oy /GL /MP")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /O2 /Ob2 /Oi /Ot /Oy /GL /MP /arch:AVX2")
else()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops")
if (NOT APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
endif()
endif(MSVC)

SET(LightGBM_HEADER_DIR ${PROJECT_SOURCE_DIR}/include)
Expand Down
43 changes: 43 additions & 0 deletions R-package/tests/testthat/test_basic.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,46 @@ test_that("lgb.train() throws an informative error if 'valids' contains lgb.Data
)
}, regexp = "each element of valids must have a name")
})

test_that("lgb.train() works with force_col_wise and force_row_wise", {
set.seed(1234L)
nrounds <- 10L
dtrain <- lgb.Dataset(
train$data
, label = train$label
)
params <- list(
objective = "binary"
, metric = "binary_error"
, force_col_wise = TRUE
)
bst_colwise <- lgb.train(
params = params
, data = dtrain
, nrounds = nrounds
)

params <- list(
objective = "binary"
, metric = "binary_error"
, force_row_wise = TRUE
)
bst_row_wise <- lgb.train(
params = params
, data = dtrain
, nrounds = nrounds
)

expected_error <- 0.003070782
expect_equal(bst_colwise$eval_train()[[1L]][["value"]], expected_error)
expect_equal(bst_row_wise$eval_train()[[1L]][["value"]], expected_error)

# check some basic details of the boosters just to be sure force_col_wise
# and force_row_wise are not causing any weird side effects
for (bst in list(bst_row_wise, bst_colwise)) {
expect_equal(bst$current_iter(), nrounds)
parsed_model <- jsonlite::fromJSON(bst$dump_model())
expect_equal(parsed_model$objective, "binary sigmoid:1")
expect_false(parsed_model$average_output)
}
})
4 changes: 2 additions & 2 deletions R-package/tests/testthat/test_learning_to_rank.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ test_that("learning-to-rank with lgb.train() works as expected", {
}
expect_identical(sapply(eval_results, function(x) {x$name}), eval_names)
expect_equal(eval_results[[1L]][["value"]], 0.825)
expect_true(abs(eval_results[[2L]][["value"]] - 0.795986) < TOLERANCE)
expect_true(abs(eval_results[[3L]][["value"]] - 0.7734639) < TOLERANCE)
expect_true(abs(eval_results[[2L]][["value"]] - 0.7766434) < TOLERANCE)
expect_true(abs(eval_results[[3L]][["value"]] - 0.7527939) < TOLERANCE)
})

test_that("learning-to-rank with lgb.cv() works as expected", {
Expand Down
48 changes: 32 additions & 16 deletions docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,38 @@ Core Parameters
Learning Control Parameters
---------------------------

- ``force_col_wise`` :raw-html:`<a id="force_col_wise" title="Permalink to this parameter" href="#force_col_wise">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool

- set ``force_col_wise=true`` will force LightGBM to use col-wise histogram build

- Recommend ``force_col_wise=true`` when:

- the number of columns is large, or the total number of bin is large

- when ``num_threads`` is large, e.g. ``>20``

- want to use small ``feature_fraction``, e.g. ``0.5``, to speed-up

- want to reduce memory cost

- when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one

- ``force_row_wise`` :raw-html:`<a id="force_row_wise" title="Permalink to this parameter" href="#force_row_wise">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool

- set ``force_row_wise=true`` will force LightGBM to use row-wise histogram build

- Recommend ``force_row_wise=true`` when:

- the number of data is large, and the number of total bin is relatively small

- want to use small ``bagging``, or ``goss``, to speed-up

- when ``num_threads`` is relatively small, e.g. ``<=16``

- set ``force_row_wise=true`` will double the memory cost for Dataset object, if your memory is not enough, you can try ``force_col_wise=true``

- when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one.

- ``max_depth`` :raw-html:`<a id="max_depth" title="Permalink to this parameter" href="#max_depth">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int

- limit the max depth for tree model. This is used to deal with over-fitting when ``#data`` is small. Tree still grows leaf-wise
Expand Down Expand Up @@ -559,22 +591,6 @@ IO Parameters

- **Note**: disabling this may cause the slow training speed for sparse datasets

- ``max_conflict_rate`` :raw-html:`<a id="max_conflict_rate" title="Permalink to this parameter" href="#max_conflict_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, constraints: ``0.0 <= max_conflict_rate < 1.0``

- max conflict rate for bundles in EFB

- set this to ``0.0`` to disallow the conflict and provide more accurate results

- set this to a larger value to achieve faster speed

- ``is_enable_sparse`` :raw-html:`<a id="is_enable_sparse" title="Permalink to this parameter" href="#is_enable_sparse">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool, aliases: ``is_sparse``, ``enable_sparse``, ``sparse``

- used to enable/disable sparse optimization

- ``sparse_threshold`` :raw-html:`<a id="sparse_threshold" title="Permalink to this parameter" href="#sparse_threshold">&#x1F517;&#xFE0E;</a>`, default = ``0.8``, type = double, constraints: ``0.0 < sparse_threshold <= 1.0``

- the threshold of zero elements percentage for treating a feature as a sparse one

- ``use_missing`` :raw-html:`<a id="use_missing" title="Permalink to this parameter" href="#use_missing">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool

- set this to ``false`` to disable the special handle of missing value
Expand Down
122 changes: 68 additions & 54 deletions include/LightGBM/bin.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,36 +29,29 @@ enum MissingType {
NaN
};

/*! \brief Store data for one histogram bin */
struct HistogramBinEntry {
public:
/*! \brief Sum of gradients on this bin */
double sum_gradients = 0.0f;
/*! \brief Sum of hessians on this bin */
double sum_hessians = 0.0f;
/*! \brief Number of data on this bin */
data_size_t cnt = 0;
/*!
* \brief Sum up (reducers) functions for histogram bin
*/
inline static void SumReducer(const char *src, char *dst, int type_size, comm_size_t len) {
comm_size_t used_size = 0;
const HistogramBinEntry* p1;
HistogramBinEntry* p2;
while (used_size < len) {
// convert
p1 = reinterpret_cast<const HistogramBinEntry*>(src);
p2 = reinterpret_cast<HistogramBinEntry*>(dst);
// add
p2->cnt += p1->cnt;
p2->sum_gradients += p1->sum_gradients;
p2->sum_hessians += p1->sum_hessians;
src += type_size;
dst += type_size;
used_size += type_size;
}
typedef double hist_t;

const size_t KHistEntrySize = 2 * sizeof(hist_t);
const int KHistOffset = 2;
const double kSparseThreshold = 0.7;

#define GET_GRAD(hist, i) hist[(i) << 1]
#define GET_HESS(hist, i) hist[((i) << 1) + 1]

inline static void HistogramSumReducer(const char* src, char* dst, int type_size, comm_size_t len) {
comm_size_t used_size = 0;
const hist_t* p1;
hist_t* p2;
while (used_size < len) {
// convert
p1 = reinterpret_cast<const hist_t*>(src);
p2 = reinterpret_cast<hist_t*>(dst);
*p2 += *p1;
src += type_size;
dst += type_size;
used_size += type_size;
}
};
}

/*! \brief This class used to convert feature values into bin,
* and store some meta information for bin*/
Expand Down Expand Up @@ -252,7 +245,7 @@ class OrderedBin {
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const score_t* gradients,
const score_t* hessians, HistogramBinEntry* out) const = 0;
const score_t* hessians, hist_t* out) const = 0;

/*!
* \brief Construct histogram by using this bin
Expand All @@ -262,7 +255,7 @@ class OrderedBin {
* \param gradients Gradients, Note:non-ordered by leaf
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const score_t* gradients, HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(int leaf, const score_t* gradients, hist_t* out) const = 0;

/*!
* \brief Split current bin, and perform re-order by leaf
Expand Down Expand Up @@ -360,11 +353,11 @@ class Bin {
virtual void ConstructHistogram(
const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0;
hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0;
hist_t* out) const = 0;

/*!
* \brief Construct histogram of this feature,
Expand All @@ -380,10 +373,10 @@ class Bin {
* \param out Output Result
*/
virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
const score_t* ordered_gradients, hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
const score_t* ordered_gradients, hist_t* out) const = 0;

/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
Expand Down Expand Up @@ -423,30 +416,11 @@ class Bin {
data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;

/*!
* \brief Create the ordered bin for this bin
* \return Pointer to ordered bin
*/
virtual OrderedBin* CreateOrderedBin() const = 0;

/*!
* \brief After pushed all feature data, call this could have better refactor for bin data
*/
virtual void FinishLoad() = 0;

/*!
* \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse"
* \param num_data Total number of data
* \param num_bin Number of bin
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature
* \param sparse_threshold Threshold for treating a feature as a sparse feature
* \param is_sparse Will set to true if this bin is sparse
* \return The bin data object
*/
static Bin* CreateBin(data_size_t num_data, int num_bin,
double sparse_rate, bool is_enable_sparse, double sparse_threshold, bool* is_sparse);

/*!
* \brief Create object for bin data of one feature, used for dense feature
* \param num_data Total number of data
Expand All @@ -469,6 +443,46 @@ class Bin {
virtual Bin* Clone() = 0;
};


class MultiValBin {
public:

virtual ~MultiValBin() {}

virtual data_size_t num_data() const = 0;

virtual int32_t num_bin() const = 0;

virtual void ReSize(data_size_t num_data) = 0;

virtual void PushOneRow(int tid, data_size_t idx, const std::vector<uint32_t>& values) = 0;

virtual void CopySubset(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) = 0;

virtual void ConstructHistogram(
const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
hist_t* out) const = 0;

virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const = 0;

virtual void FinishLoad() = 0;

virtual bool IsSparse() = 0;

static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature, double sparse_rate);

virtual MultiValBin* Clone() = 0;
};

inline uint32_t BinMapper::ValueToBin(double value) const {
if (std::isnan(value)) {
if (missing_type_ == MissingType::NaN) {
Expand Down
34 changes: 18 additions & 16 deletions include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,24 @@ struct Config {

#pragma region Learning Control Parameters

// desc = set ``force_col_wise=true`` will force LightGBM to use col-wise histogram build
// desc = Recommend ``force_col_wise=true`` when:
// descl2 = the number of columns is large, or the total number of bin is large
// descl2 = when ``num_threads`` is large, e.g. ``>20``
// descl2 = want to use small ``feature_fraction``, e.g. ``0.5``, to speed-up
// descl2 = want to reduce memory cost
// desc = when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one
bool force_col_wise = false;

// desc = set ``force_row_wise=true`` will force LightGBM to use row-wise histogram build
// desc = Recommend ``force_row_wise=true`` when:
// descl2 = the number of data is large, and the number of total bin is relatively small
// descl2 = want to use small ``bagging``, or ``goss``, to speed-up
// descl2 = when ``num_threads`` is relatively small, e.g. ``<=16``
// desc = set ``force_row_wise=true`` will double the memory cost for Dataset object, if your memory is not enough, you can try ``force_col_wise=true``
// desc = when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one.
bool force_row_wise = false;

// desc = limit the max depth for tree model. This is used to deal with over-fitting when ``#data`` is small. Tree still grows leaf-wise
// desc = ``<= 0`` means no limit
int max_depth = -1;
Expand Down Expand Up @@ -534,22 +552,6 @@ struct Config {
// desc = **Note**: disabling this may cause the slow training speed for sparse datasets
bool enable_bundle = true;

// check = >=0.0
// check = <1.0
// desc = max conflict rate for bundles in EFB
// desc = set this to ``0.0`` to disallow the conflict and provide more accurate results
// desc = set this to a larger value to achieve faster speed
double max_conflict_rate = 0.0;

// alias = is_sparse, enable_sparse, sparse
// desc = used to enable/disable sparse optimization
bool is_enable_sparse = true;

// check = >0.0
// check = <=1.0
// desc = the threshold of zero elements percentage for treating a feature as a sparse one
double sparse_threshold = 0.8;

// desc = set this to ``false`` to disable the special handle of missing value
bool use_missing = true;

Expand Down
Loading