Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change type of hist buffer to float #5624

Merged
merged 6 commits into from
Jun 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions doc/parameter.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,15 @@ Parameters for Tree Booster
list is a group of indices of features that are allowed to interact with each other.
See tutorial for more information

Additional parameters for `gpu_hist` tree method
Additional parameters for `hist` and 'gpu_hist' tree method
================================================

* ``single_precision_histogram``, [default=``false``]
SmirnovEgorRu marked this conversation as resolved.
Show resolved Hide resolved

- Use single precision to build histograms. See document for GPU support for more details.
- Use single precision to build histograms instead of double precision.

Additional parameters for `gpu_hist` tree method
================================================

* ``deterministic_histogram``, [default=``true``]

Expand Down
14 changes: 11 additions & 3 deletions include/xgboost/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,24 @@ class GradientPairInternal {
public:
using ValueT = T;

inline void Add(const ValueT& grad, const ValueT& hess) {
grad_ += grad;
hess_ += hess;
}

inline static void Reduce(GradientPairInternal<T>& a, const GradientPairInternal<T>& b) { // NOLINT(*)
SmirnovEgorRu marked this conversation as resolved.
Show resolved Hide resolved
a += b;
}

XGBOOST_DEVICE GradientPairInternal() : grad_(0), hess_(0) {}

XGBOOST_DEVICE GradientPairInternal(T grad, T hess) {
SetGrad(grad);
SetHess(hess);
}

// Copy constructor if of same value type
XGBOOST_DEVICE GradientPairInternal(const GradientPairInternal<T> &g)
: grad_(g.grad_), hess_(g.hess_) {} // NOLINT
// Copy constructor if of same value type, marked as default to be trivially_copyable
GradientPairInternal(const GradientPairInternal<T> &g) = default;

// Copy constructor if different value type - use getters and setters to
// perform conversion
Expand Down
123 changes: 91 additions & 32 deletions src/common/hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -830,54 +830,78 @@ void GHistIndexBlockMatrix::Init(const GHistIndexMatrix& gmat,
/*!
* \brief fill a histogram by zeros in range [begin, end)
*/
void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end) {
template<typename GradientSumT>
void InitilizeHistByZeroes(GHistRow<GradientSumT> hist, size_t begin, size_t end) {
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
std::fill(hist.begin() + begin, hist.begin() + end, tree::GradStats());
std::fill(hist.begin() + begin, hist.begin() + end,
xgboost::detail::GradientPairInternal<GradientSumT>());
#else // defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
memset(hist.data() + begin, '\0', (end-begin)*sizeof(tree::GradStats));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use std::fill consistently?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have to consider it separately as performance can be affected.
#5579

memset(hist.data() + begin, '\0', (end-begin)*
sizeof(xgboost::detail::GradientPairInternal<GradientSumT>));
#endif // defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
}
template void InitilizeHistByZeroes(GHistRow<float> hist, size_t begin,
size_t end);
template void InitilizeHistByZeroes(GHistRow<double> hist, size_t begin,
size_t end);

/*!
* \brief Increment hist as dst += add in range [begin, end)
*/
void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end) {
using FPType = decltype(tree::GradStats::sum_grad);
FPType* pdst = reinterpret_cast<FPType*>(dst.data());
const FPType* padd = reinterpret_cast<const FPType*>(add.data());
template<typename GradientSumT>
void IncrementHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> add,
size_t begin, size_t end) {
GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst.data());
const GradientSumT* padd = reinterpret_cast<const GradientSumT*>(add.data());

for (size_t i = 2 * begin; i < 2 * end; ++i) {
pdst[i] += padd[i];
}
}
template void IncrementHist(GHistRow<float> dst, const GHistRow<float> add,
size_t begin, size_t end);
template void IncrementHist(GHistRow<double> dst, const GHistRow<double> add,
size_t begin, size_t end);

/*!
* \brief Copy hist from src to dst in range [begin, end)
*/
void CopyHist(GHistRow dst, const GHistRow src, size_t begin, size_t end) {
using FPType = decltype(tree::GradStats::sum_grad);
FPType* pdst = reinterpret_cast<FPType*>(dst.data());
const FPType* psrc = reinterpret_cast<const FPType*>(src.data());
template<typename GradientSumT>
void CopyHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> src,
size_t begin, size_t end) {
GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst.data());
const GradientSumT* psrc = reinterpret_cast<const GradientSumT*>(src.data());

for (size_t i = 2 * begin; i < 2 * end; ++i) {
pdst[i] = psrc[i];
}
}
template void CopyHist(GHistRow<float> dst, const GHistRow<float> src,
size_t begin, size_t end);
template void CopyHist(GHistRow<double> dst, const GHistRow<double> src,
size_t begin, size_t end);

/*!
* \brief Compute Subtraction: dst = src1 - src2 in range [begin, end)
*/
void SubtractionHist(GHistRow dst, const GHistRow src1, const GHistRow src2,
template<typename GradientSumT>
void SubtractionHist(GHistRow<GradientSumT> dst, const GHistRow<GradientSumT> src1,
const GHistRow<GradientSumT> src2,
size_t begin, size_t end) {
using FPType = decltype(tree::GradStats::sum_grad);
FPType* pdst = reinterpret_cast<FPType*>(dst.data());
const FPType* psrc1 = reinterpret_cast<const FPType*>(src1.data());
const FPType* psrc2 = reinterpret_cast<const FPType*>(src2.data());
GradientSumT* pdst = reinterpret_cast<GradientSumT*>(dst.data());
const GradientSumT* psrc1 = reinterpret_cast<const GradientSumT*>(src1.data());
const GradientSumT* psrc2 = reinterpret_cast<const GradientSumT*>(src2.data());

for (size_t i = 2 * begin; i < 2 * end; ++i) {
pdst[i] = psrc1[i] - psrc2[i];
}
}
template void SubtractionHist(GHistRow<float> dst, const GHistRow<float> src1,
const GHistRow<float> src2,
size_t begin, size_t end);
template void SubtractionHist(GHistRow<double> dst, const GHistRow<double> src1,
const GHistRow<double> src2,
size_t begin, size_t end);

struct Prefetch {
public:
Expand Down Expand Up @@ -908,7 +932,7 @@ void BuildHistDenseKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
const size_t n_features,
GHistRow hist) {
GHistRow<FPType> hist) {
const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin;
const float* pgh = reinterpret_cast<const float*>(gpair.data());
Expand Down Expand Up @@ -948,7 +972,7 @@ template<typename FPType, bool do_prefetch>
void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow hist) {
GHistRow<FPType> hist) {
const size_t size = row_indices.Size();
const size_t* rid = row_indices.begin;
const float* pgh = reinterpret_cast<const float*>(gpair.data());
Expand Down Expand Up @@ -987,7 +1011,7 @@ void BuildHistSparseKernel(const std::vector<GradientPair>& gpair,
template<typename FPType, bool do_prefetch, typename BinIdxType>
void BuildHistDispatchKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, GHistRow hist, bool isDense) {
const GHistIndexMatrix& gmat, GHistRow<FPType> hist, bool isDense) {
if (isDense) {
const size_t* row_ptr = gmat.row_ptr.data();
const size_t n_features = row_ptr[row_indices.begin[0]+1] - row_ptr[row_indices.begin[0]];
Expand All @@ -1002,7 +1026,7 @@ void BuildHistDispatchKernel(const std::vector<GradientPair>& gpair,
template<typename FPType, bool do_prefetch>
void BuildHistKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const bool isDense, GHistRow hist) {
const GHistIndexMatrix& gmat, const bool isDense, GHistRow<FPType> hist) {
const bool is_dense = row_indices.Size() && isDense;
switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize:
Expand All @@ -1022,12 +1046,12 @@ void BuildHistKernel(const std::vector<GradientPair>& gpair,
}
}

void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,
template<typename GradientSumT>
void GHistBuilder<GradientSumT>::BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow hist,
GHistRowT hist,
bool isDense) {
using FPType = decltype(tree::GradStats::sum_grad);
const size_t nrows = row_indices.Size();
const size_t no_prefetch_size = Prefetch::NoPrefetchSize(nrows);

Expand All @@ -1036,29 +1060,42 @@ void GHistBuilder::BuildHist(const std::vector<GradientPair>& gpair,

if (contiguousBlock) {
// contiguous memory access, built-in HW prefetching is enough
BuildHistKernel<FPType, false>(gpair, row_indices, gmat, isDense, hist);
BuildHistKernel<GradientSumT, false>(gpair, row_indices, gmat, isDense, hist);
} else {
const RowSetCollection::Elem span1(row_indices.begin, row_indices.end - no_prefetch_size);
const RowSetCollection::Elem span2(row_indices.end - no_prefetch_size, row_indices.end);

BuildHistKernel<FPType, true>(gpair, span1, gmat, isDense, hist);
BuildHistKernel<GradientSumT, true>(gpair, span1, gmat, isDense, hist);
// no prefetching to avoid loading extra memory
BuildHistKernel<FPType, false>(gpair, span2, gmat, isDense, hist);
BuildHistKernel<GradientSumT, false>(gpair, span2, gmat, isDense, hist);
}
}
template
void GHistBuilder<float>::BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<float> hist,
bool isDense);
template
void GHistBuilder<double>::BuildHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat,
GHistRow<double> hist,
bool isDense);

void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
template<typename GradientSumT>
void GHistBuilder<GradientSumT>::BuildBlockHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexBlockMatrix& gmatb,
GHistRow hist) {
GHistRowT hist) {
constexpr int kUnroll = 8; // loop unrolling factor
const size_t nblock = gmatb.GetNumBlock();
const size_t nrows = row_indices.end - row_indices.begin;
const size_t rest = nrows % kUnroll;
#if defined(_OPENMP)
const auto nthread = static_cast<bst_omp_uint>(this->nthread_); // NOLINT
#endif // defined(_OPENMP)
tree::GradStats* p_hist = hist.data();
xgboost::detail::GradientPairInternal<GradientSumT>* p_hist = hist.data();

#pragma omp parallel for num_threads(nthread) schedule(guided)
for (bst_omp_uint bid = 0; bid < nblock; ++bid) {
Expand All @@ -1079,7 +1116,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
for (int k = 0; k < kUnroll; ++k) {
for (size_t j = ibegin[k]; j < iend[k]; ++j) {
const uint32_t bin = gmat.index[j];
p_hist[bin].Add(stat[k]);
p_hist[bin].Add(stat[k].GetGrad(), stat[k].GetHess());
}
}
}
Expand All @@ -1090,13 +1127,27 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
const GradientPair stat = gpair[rid];
for (size_t j = ibegin; j < iend; ++j) {
const uint32_t bin = gmat.index[j];
p_hist[bin].Add(stat);
p_hist[bin].Add(stat.GetGrad(), stat.GetHess());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not get this into GradientPair with operator+=?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no possibility to call GradientPairInternal<float>::operator+=(const GradientPairInternal<double>&)

}
}
}
}
template
void GHistBuilder<float>::BuildBlockHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexBlockMatrix& gmatb,
GHistRow<float> hist);
template
void GHistBuilder<double>::BuildBlockHist(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexBlockMatrix& gmatb,
GHistRow<double> hist);


void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
template<typename GradientSumT>
void GHistBuilder<GradientSumT>::SubtractionTrick(GHistRowT self,
GHistRowT sibling,
GHistRowT parent) {
const size_t size = self.size();
CHECK_EQ(sibling.size(), size);
CHECK_EQ(parent.size(), size);
Expand All @@ -1111,6 +1162,14 @@ void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow pa
SubtractionHist(self, parent, sibling, ibegin, iend);
}
}
template
void GHistBuilder<float>::SubtractionTrick(GHistRow<float> self,
GHistRow<float> sibling,
GHistRow<float> parent);
template
void GHistBuilder<double>::SubtractionTrick(GHistRow<double> self,
GHistRow<double> sibling,
GHistRow<double> parent);

} // namespace common
} // namespace xgboost
Loading