microsoft · StrikerRUS · Feb 8, 2020 · Feb 5, 2020 · Feb 8, 2020 · Feb 8, 2020
@@ -130,7 +130,7 @@ int main() {
 " MM_PREFETCH)
 
 if(${MM_PREFETCH})
-  message(STATUS "Use _mm_prefetch")
+  message(STATUS "Using _mm_prefetch")
   ADD_DEFINITIONS(-DMM_PREFETCH)
 endif()
 
@@ -145,7 +145,7 @@ int main() {
 " MM_MALLOC)
 
 if(${MM_MALLOC})
-  message(STATUS "Use _mm_malloc")
+  message(STATUS "Using _mm_malloc")
   ADD_DEFINITIONS(-DMM_MALLOC)
 endif()
 

@@ -265,7 +265,7 @@ test_that("lgb.train() works with force_col_wise and force_row_wise", {
     , metric = "binary_error"
     , force_col_wise = TRUE
   )
-  bst_colwise <- lgb.train(
+  bst_col_wise <- lgb.train(
     params = params
     , data = dtrain
     , nrounds = nrounds
@@ -283,12 +283,12 @@ test_that("lgb.train() works with force_col_wise and force_row_wise", {
   )
 
   expected_error <- 0.003070782
-  expect_equal(bst_colwise$eval_train()[[1L]][["value"]], expected_error)
+  expect_equal(bst_col_wise$eval_train()[[1L]][["value"]], expected_error)
   expect_equal(bst_row_wise$eval_train()[[1L]][["value"]], expected_error)
 
   # check some basic details of the boosters just to be sure force_col_wise
   # and force_row_wise are not causing any weird side effects
-  for (bst in list(bst_row_wise, bst_colwise)) {
+  for (bst in list(bst_row_wise, bst_col_wise)) {
     expect_equal(bst$current_iter(), nrounds)
     parsed_model <- jsonlite::fromJSON(bst$dump_model())
     expect_equal(parsed_model$objective, "binary sigmoid:1")

@@ -73,7 +73,11 @@ Core Parameters
 
       -  ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications>`__
 
-   -  ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
+   -  binary classification application
+
+      -  ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression)
+
+      -  requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
 
    -  multi-class classification application
 
@@ -93,7 +97,7 @@ Core Parameters
 
    -  ranking application
 
-      -  ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#objective-parameters>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
+      -  ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
 
       -  ``rank_xendcg``, `XE_NDCG_MART <https://arxiv.org/abs/1911.09798>`__ ranking objective function. To obtain reproducible results, you should disable parallelism by setting ``num_threads`` to 1, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart``
 
@@ -188,35 +192,43 @@ Learning Control Parameters
 
 -  ``force_col_wise`` :raw-html:`<a id="force_col_wise" title="Permalink to this parameter" href="#force_col_wise">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
 
-   -  set ``force_col_wise=true`` will force LightGBM to use col-wise histogram build
+   -  used only with ``cpu`` device type
+
+   -  set this to ``true`` to force col-wise histogram building
 
-   -  Recommend ``force_col_wise=true`` when:
+   -  enabling this is recommended when:
 
-      -  the number of columns is large, or the total number of bin is large
+      -  the number of columns is large, or the total number of bins is large
 
-      -  when ``num_threads`` is large, e.g. ``>20``
+      -  ``num_threads`` is large, e.g. ``>20``
 
-      -  want to use small ``feature_fraction``, e.g. ``0.5``, to speed-up
+      -  you want to use small ``feature_fraction`` (e.g. ``0.5``) to speed up
 
-      -  want to reduce memory cost
+      -  you want to reduce memory cost
 
-   -  when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one
+   -  **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+
+   -  **Note**: this parameter cannot be used at the same time with ``force_row_wise``, choose only one of them
 
 -  ``force_row_wise`` :raw-html:`<a id="force_row_wise" title="Permalink to this parameter" href="#force_row_wise">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
 
-   -  set ``force_row_wise=true`` will force LightGBM to use row-wise histogram build
+   -  used only with ``cpu`` device type
+
+   -  set this to ``true`` to force row-wise histogram building
+
+   -  enabling this is recommended when:
 
-   -  Recommend ``force_row_wise=true`` when:
+      -  the number of data points is large, and the total number of bins is relatively small
 
-      -  the number of data is large, and the number of total bin is relatively small
+      -  ``num_threads`` is relatively small, e.g. ``<=16``
 
-      -  want to use small ``bagging``, or ``goss``, to speed-up
+      -  you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
 
-      -  when ``num_threads`` is relatively small, e.g. ``<=16``
+   -  **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
 
-   -  set ``force_row_wise=true`` will double the memory cost for Dataset object, if your memory is not enough, you can try ``force_col_wise=true``
+   -  **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
 
-   -  when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one.
+   -  **Note**: this parameter cannot be used at the same time with ``force_col_wise``, choose only one of them
 
 -  ``max_depth`` :raw-html:`<a id="max_depth" title="Permalink to this parameter" href="#max_depth">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int
 
@@ -428,7 +440,7 @@ Learning Control Parameters
 
 -  ``top_k`` :raw-html:`<a id="top_k" title="Permalink to this parameter" href="#top_k">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, aliases: ``topk``, constraints: ``top_k > 0``
 
-   -  used in `Voting parallel <./Parallel-Learning-Guide.rst#choose-appropriate-parallel-algorithm>`__
+   -  used only in ``voting`` tree learner, refer to `Voting parallel <./Parallel-Learning-Guide.rst#choose-appropriate-parallel-algorithm>`__
 
    -  set this to larger value for more accurate result, but it will slow down the training speed
 

@@ -31,8 +31,8 @@ enum MissingType {
 
 typedef double hist_t;
 
-const size_t KHistEntrySize = 2 * sizeof(hist_t);
-const int KHistOffset = 2;
+const size_t kHistEntrySize = 2 * sizeof(hist_t);
+const int kHistOffset = 2;
 const double kSparseThreshold = 0.7;
 
 #define GET_GRAD(hist, i) hist[(i) << 1]
@@ -445,8 +445,7 @@ class Bin {
 
 
 class MultiValBin {
-public:
-
+ public:
   virtual ~MultiValBin() {}
 
   virtual data_size_t num_data() const = 0;

@@ -114,7 +114,9 @@ struct Config {
   // descl2 = ``mape``, `MAPE loss <https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`__, aliases: ``mean_absolute_percentage_error``
   // descl2 = ``gamma``, Gamma regression with log-link. It might be useful, e.g., for modeling insurance claims severity, or for any target that might be `gamma-distributed <https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications>`__
   // descl2 = ``tweedie``, Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any target that might be `tweedie-distributed <https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications>`__
-  // desc = ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression). Requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
+  // desc = binary classification application
+  // descl2 = ``binary``, binary `log loss <https://en.wikipedia.org/wiki/Cross_entropy>`__ classification (or logistic regression)
+  // descl2 = requires labels in {0, 1}; see ``cross-entropy`` application for general probability labels in [0, 1]
   // desc = multi-class classification application
   // descl2 = ``multiclass``, `softmax <https://en.wikipedia.org/wiki/Softmax_function>`__ objective function, aliases: ``softmax``
   // descl2 = ``multiclassova``, `One-vs-All <https://en.wikipedia.org/wiki/Multiclass_classification#One-vs.-rest>`__ binary objective function, aliases: ``multiclass_ova``, ``ova``, ``ovr``
@@ -124,7 +126,7 @@ struct Config {
   // descl2 = ``cross_entropy_lambda``, alternative parameterization of cross-entropy, aliases: ``xentlambda``
   // descl2 = label is anything in interval [0, 1]
   // desc = ranking application
-  // descl2 = ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#objective-parameters>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
+  // descl2 = ``lambdarank``, `lambdarank <https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf>`__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain``
   // descl2 = ``rank_xendcg``, `XE_NDCG_MART <https://arxiv.org/abs/1911.09798>`__ ranking objective function. To obtain reproducible results, you should disable parallelism by setting ``num_threads`` to 1, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart``
   // descl2 = label should be ``int`` type, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect)
   std::string objective = "regression";
@@ -212,22 +214,26 @@ struct Config {
 
   #pragma region Learning Control Parameters
 
-  // desc = set ``force_col_wise=true`` will force LightGBM to use col-wise histogram build
-  // desc = Recommend ``force_col_wise=true`` when:
-  // descl2 = the number of columns is large, or the total number of bin is large
-  // descl2 = when ``num_threads`` is large, e.g. ``>20``
-  // descl2 = want to use small ``feature_fraction``, e.g. ``0.5``, to speed-up
-  // descl2 = want to reduce memory cost
-  // desc = when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one
+  // desc = used only with ``cpu`` device type
+  // desc = set this to ``true`` to force col-wise histogram building
+  // desc = enabling this is recommended when:
+  // descl2 = the number of columns is large, or the total number of bins is large
+  // descl2 = ``num_threads`` is large, e.g. ``>20``
+  // descl2 = you want to use small ``feature_fraction`` (e.g. ``0.5``) to speed up
+  // descl2 = you want to reduce memory cost
+  // desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+  // desc = **Note**: this parameter cannot be used at the same time with ``force_row_wise``, choose only one of them
   bool force_col_wise = false;
 
-  // desc = set ``force_row_wise=true`` will force LightGBM to use row-wise histogram build
-  // desc = Recommend ``force_row_wise=true`` when:
-  // descl2 = the number of data is large, and the number of total bin is relatively small
-  // descl2 = want to use small ``bagging``, or ``goss``, to speed-up
-  // descl2 = when ``num_threads`` is relatively small, e.g. ``<=16``
-  // desc = set ``force_row_wise=true`` will double the memory cost for Dataset object, if your memory is not enough, you can try ``force_col_wise=true``
-  // desc = when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one.
+  // desc = used only with ``cpu`` device type
+  // desc = set this to ``true`` to force row-wise histogram building
+  // desc = enabling this is recommended when:
+  // descl2 = the number of data points is large, and the total number of bins is relatively small
+  // descl2 = ``num_threads`` is relatively small, e.g. ``<=16``
+  // descl2 = you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
+  // desc = **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
+  // desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+  // desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise``, choose only one of them
   bool force_row_wise = false;
 
   // desc = limit the max depth for tree model. This is used to deal with over-fitting when ``#data`` is small. Tree still grows leaf-wise
@@ -411,7 +417,7 @@ struct Config {
 
   // alias = topk
   // check = >0
-  // desc = used in `Voting parallel <./Parallel-Learning-Guide.rst#choose-appropriate-parallel-algorithm>`__
+  // desc = used only in ``voting`` tree learner, refer to `Voting parallel <./Parallel-Learning-Guide.rst#choose-appropriate-parallel-algorithm>`__
   // desc = set this to larger value for more accurate result, but it will slow down the training speed
   int top_k = 20;
 

@@ -8,7 +8,6 @@
 #include <LightGBM/config.h>
 #include <LightGBM/feature_group.h>
 #include <LightGBM/meta.h>
-#include <LightGBM/utils/array_args.h>
 #include <LightGBM/utils/common.h>
 #include <LightGBM/utils/openmp_wrapper.h>
 #include <LightGBM/utils/random.h>
@@ -439,9 +438,9 @@ class Dataset {
                            hist_t* histogram_data) const;
 
   void ConstructHistogramsMultiVal(const MultiValBin* multi_val_bin, const data_size_t* data_indices, data_size_t num_data,
-                                  const score_t* gradients, const score_t* hessians,
-                                  bool is_constant_hessian,
-                                  hist_t* histogram_data) const;
+                                   const score_t* gradients, const score_t* hessians,
+                                   bool is_constant_hessian,
+                                   hist_t* histogram_data) const;
 
   void FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, hist_t* data) const;
 
@@ -656,7 +655,6 @@ class Dataset {
   bool zero_as_missing_;
   std::vector<int> feature_need_push_zeros_;
   mutable std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>> hist_buf_;
-
 };
 
 }  // namespace LightGBM

@@ -76,7 +76,7 @@ class FeatureGroup {
       num_total_bin_ += num_bin;
       bin_offsets_.emplace_back(num_total_bin_);
     }
-    if (bin_mappers_[0]->sparse_rate() >=  kSparseThreshold) {
+    if (bin_mappers_[0]->sparse_rate() >= kSparseThreshold) {
       is_sparse_ = true;
       bin_data_.reset(Bin::CreateSparseBin(num_data, num_total_bin_));
     } else {
@@ -142,6 +142,7 @@ class FeatureGroup {
       bin_data_->LoadFromMemory(memory_ptr, local_used_indices);
     }
   }
+
   /*! \brief Destructor */
   ~FeatureGroup() {
   }
@@ -252,6 +253,7 @@ class FeatureGroup {
       }
     }
   }
+
   /*!
   * \brief From bin to feature value
   * \param bin
@@ -280,6 +282,7 @@ class FeatureGroup {
       bin_data_->SaveBinaryToFile(writer);
     }
   }
+
   /*!
   * \brief Get sizes in byte of this object
   */
@@ -297,8 +300,10 @@ class FeatureGroup {
     }
     return ret;
   }
+
   /*! \brief Disable copy */
   FeatureGroup& operator=(const FeatureGroup&) = delete;
+
   /*! \brief Deep copy */
   FeatureGroup(const FeatureGroup& other) {
     num_feature_ = other.num_feature_;

@@ -89,7 +89,7 @@ class Tree {
 
   /*! \brief Set the output of one leaf */
   inline void SetLeafOutput(int leaf, double output) {
-    // Prevent denormal values because they can cause std::out_of_range exception when converting strings to doubles 
+    // Prevent denormal values because they can cause std::out_of_range exception when converting strings to doubles
     if (IsZero(output)) {
       leaf_value_[leaf] = 0;
     } else {
@@ -155,7 +155,7 @@ class Tree {
     #pragma omp parallel for schedule(static, 1024) if (num_leaves_ >= 2048)
     for (int i = 0; i < num_leaves_; ++i) {
       double new_leaf_value = leaf_value_[i] * rate;
-      // Prevent denormal values because they can cause std::out_of_range exception when converting strings to doubles 
+      // Prevent denormal values because they can cause std::out_of_range exception when converting strings to doubles
       if (IsZero(new_leaf_value)) {
         leaf_value_[i] = 0;
       } else {
@@ -173,7 +173,7 @@ class Tree {
     #pragma omp parallel for schedule(static, 1024) if (num_leaves_ >= 2048)
     for (int i = 0; i < num_leaves_; ++i) {
       double new_leaf_value = val + leaf_value_[i];
-      // Prevent denormal values because they can cause std::out_of_range exception when converting strings to doubles 
+      // Prevent denormal values because they can cause std::out_of_range exception when converting strings to doubles
       if (IsZero(new_leaf_value)) {
         leaf_value_[i] = 0;
       } else {
@@ -231,6 +231,7 @@ class Tree {
   void RecomputeMaxDepth();
 
   int NextLeafId() const { return num_leaves_; }
+
  private:
   std::string NumericalDecisionIfElse(int node) const;