Skip to content

Commit 72cd151

Browse files
canonizerRAMitchell
authored andcommitted
Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (dmlc#3446)
* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. - added distributions to HostDeviceVector - using HostDeviceVector for labels, weights and base margings in MetaInfo - using HostDeviceVector for offset and data in SparsePage - other necessary refactoring * Added const version of HostDeviceVector API calls. - const versions added to calls that can trigger data transfers, e.g. DevicePointer() - updated the code that uses HostDeviceVector - objective functions now accept const HostDeviceVector<bst_float>& for predictions * Updated src/linear/updater_gpu_coordinate.cu. * Added read-only state for HostDeviceVector sync. - this means no copies are performed if both host and devices access the HostDeviceVector read-only * Fixed linter and test errors. - updated the lz4 plugin - added ConstDeviceSpan to HostDeviceVector - using device % dh::NVisibleDevices() for the physical device number, e.g. in calls to cudaSetDevice() * Fixed explicit template instantiation errors for HostDeviceVector. - replaced HostDeviceVector<unsigned int> with HostDeviceVector<int> * Fixed HostDeviceVector tests that require multiple GPUs. - added a mock set device handler; when set, it is called instead of cudaSetDevice()
1 parent 58d783d commit 72cd151

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1141
-560
lines changed

include/xgboost/data.h

+48-36
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include "./base.h"
1818
#include "../../src/common/span.h"
1919

20+
#include "../../src/common/host_device_vector.h"
21+
2022
namespace xgboost {
2123
// forward declare learner.
2224
class LearnerImpl;
@@ -41,7 +43,7 @@ class MetaInfo {
4143
/*! \brief number of nonzero entries in the data */
4244
uint64_t num_nonzero_{0};
4345
/*! \brief label of each instance */
44-
std::vector<bst_float> labels_;
46+
HostDeviceVector<bst_float> labels_;
4547
/*!
4648
* \brief specified root index of each instance,
4749
* can be used for multi task setting
@@ -53,15 +55,15 @@ class MetaInfo {
5355
*/
5456
std::vector<bst_uint> group_ptr_;
5557
/*! \brief weights of each instance, optional */
56-
std::vector<bst_float> weights_;
58+
HostDeviceVector<bst_float> weights_;
5759
/*! \brief session-id of each instance, optional */
5860
std::vector<uint64_t> qids_;
5961
/*!
6062
* \brief initialized margins,
6163
* if specified, xgboost will start from this init margin
6264
* can be used to specify initial prediction to boost from.
6365
*/
64-
std::vector<bst_float> base_margin_;
66+
HostDeviceVector<bst_float> base_margin_;
6567
/*! \brief version flag, used to check version of this info */
6668
static const int kVersion = 2;
6769
/*! \brief version that introduced qid field */
@@ -74,7 +76,7 @@ class MetaInfo {
7476
* \return The weight.
7577
*/
7678
inline bst_float GetWeight(size_t i) const {
77-
return weights_.size() != 0 ? weights_[i] : 1.0f;
79+
return weights_.Size() != 0 ? weights_.HostVector()[i] : 1.0f;
7880
}
7981
/*!
8082
* \brief Get the root index of i-th instance.
@@ -86,12 +88,12 @@ class MetaInfo {
8688
}
8789
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
8890
inline const std::vector<size_t>& LabelAbsSort() const {
89-
if (label_order_cache_.size() == labels_.size()) {
91+
if (label_order_cache_.size() == labels_.Size()) {
9092
return label_order_cache_;
9193
}
92-
label_order_cache_.resize(labels_.size());
94+
label_order_cache_.resize(labels_.Size());
9395
std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
94-
const auto l = labels_;
96+
const auto& l = labels_.HostVector();
9597
XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
9698
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});
9799

@@ -151,9 +153,9 @@ struct Entry {
151153
*/
152154
class SparsePage {
153155
public:
154-
std::vector<size_t> offset;
156+
HostDeviceVector<size_t> offset;
155157
/*! \brief the data of the segments */
156-
std::vector<Entry> data;
158+
HostDeviceVector<Entry> data;
157159

158160
size_t base_rowid;
159161

@@ -162,8 +164,10 @@ class SparsePage {
162164

163165
/*! \brief get i-th row from the batch */
164166
inline Inst operator[](size_t i) const {
165-
return {data.data() + offset[i],
166-
static_cast<Inst::index_type>(offset[i + 1] - offset[i])};
167+
const auto& data_vec = data.HostVector();
168+
const auto& offset_vec = offset.HostVector();
169+
return {data_vec.data() + offset_vec[i],
170+
static_cast<Inst::index_type>(offset_vec[i + 1] - offset_vec[i])};
167171
}
168172

169173
/*! \brief constructor */
@@ -172,73 +176,81 @@ class SparsePage {
172176
}
173177
/*! \return number of instance in the page */
174178
inline size_t Size() const {
175-
return offset.size() - 1;
179+
return offset.Size() - 1;
176180
}
177181
/*! \return estimation of memory cost of this page */
178182
inline size_t MemCostBytes() const {
179-
return offset.size() * sizeof(size_t) + data.size() * sizeof(Entry);
183+
return offset.Size() * sizeof(size_t) + data.Size() * sizeof(Entry);
180184
}
181185
/*! \brief clear the page */
182186
inline void Clear() {
183187
base_rowid = 0;
184-
offset.clear();
185-
offset.push_back(0);
186-
data.clear();
188+
auto& offset_vec = offset.HostVector();
189+
offset_vec.clear();
190+
offset_vec.push_back(0);
191+
data.HostVector().clear();
187192
}
188193

189194
/*!
190195
* \brief Push row block into the page.
191196
* \param batch the row batch.
192197
*/
193198
inline void Push(const dmlc::RowBlock<uint32_t>& batch) {
194-
data.reserve(data.size() + batch.offset[batch.size] - batch.offset[0]);
195-
offset.reserve(offset.size() + batch.size);
199+
auto& data_vec = data.HostVector();
200+
auto& offset_vec = offset.HostVector();
201+
data_vec.reserve(data.Size() + batch.offset[batch.size] - batch.offset[0]);
202+
offset_vec.reserve(offset.Size() + batch.size);
196203
CHECK(batch.index != nullptr);
197204
for (size_t i = 0; i < batch.size; ++i) {
198-
offset.push_back(offset.back() + batch.offset[i + 1] - batch.offset[i]);
205+
offset_vec.push_back(offset_vec.back() + batch.offset[i + 1] - batch.offset[i]);
199206
}
200207
for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
201208
uint32_t index = batch.index[i];
202209
bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
203-
data.emplace_back(index, fvalue);
210+
data_vec.emplace_back(index, fvalue);
204211
}
205-
CHECK_EQ(offset.back(), data.size());
212+
CHECK_EQ(offset_vec.back(), data.Size());
206213
}
207214
/*!
208215
* \brief Push a sparse page
209216
* \param batch the row page
210217
*/
211218
inline void Push(const SparsePage &batch) {
212-
size_t top = offset.back();
213-
data.resize(top + batch.data.size());
214-
std::memcpy(dmlc::BeginPtr(data) + top,
215-
dmlc::BeginPtr(batch.data),
216-
sizeof(Entry) * batch.data.size());
217-
size_t begin = offset.size();
218-
offset.resize(begin + batch.Size());
219+
auto& data_vec = data.HostVector();
220+
auto& offset_vec = offset.HostVector();
221+
const auto& batch_offset_vec = batch.offset.HostVector();
222+
const auto& batch_data_vec = batch.data.HostVector();
223+
size_t top = offset_vec.back();
224+
data_vec.resize(top + batch.data.Size());
225+
std::memcpy(dmlc::BeginPtr(data_vec) + top,
226+
dmlc::BeginPtr(batch_data_vec),
227+
sizeof(Entry) * batch.data.Size());
228+
size_t begin = offset.Size();
229+
offset_vec.resize(begin + batch.Size());
219230
for (size_t i = 0; i < batch.Size(); ++i) {
220-
offset[i + begin] = top + batch.offset[i + 1];
231+
offset_vec[i + begin] = top + batch_offset_vec[i + 1];
221232
}
222233
}
223234
/*!
224235
* \brief Push one instance into page
225236
* \param inst an instance row
226237
*/
227238
inline void Push(const Inst &inst) {
228-
offset.push_back(offset.back() + inst.size());
229-
size_t begin = data.size();
230-
data.resize(begin + inst.size());
239+
auto& data_vec = data.HostVector();
240+
auto& offset_vec = offset.HostVector();
241+
offset_vec.push_back(offset_vec.back() + inst.size());
242+
243+
size_t begin = data_vec.size();
244+
data_vec.resize(begin + inst.size());
231245
if (inst.size() != 0) {
232-
std::memcpy(dmlc::BeginPtr(data) + begin, inst.data(),
246+
std::memcpy(dmlc::BeginPtr(data_vec) + begin, inst.data(),
233247
sizeof(Entry) * inst.size());
234248
}
235249
}
236250

237-
size_t Size() { return offset.size() - 1; }
251+
size_t Size() { return offset.Size() - 1; }
238252
};
239253

240-
241-
242254
/*!
243255
* \brief This is data structure that user can pass to DMatrix::Create
244256
* to create a DMatrix for training, user can create this data structure

include/xgboost/objective.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class ObjFunction {
4444
* \param iteration current iteration number.
4545
* \param out_gpair output of get gradient, saves gradient and second order gradient in
4646
*/
47-
virtual void GetGradient(HostDeviceVector<bst_float>* preds,
47+
virtual void GetGradient(const HostDeviceVector<bst_float>& preds,
4848
const MetaInfo& info,
4949
int iteration,
5050
HostDeviceVector<GradientPair>* out_gpair) = 0;

plugin/example/custom_obj.cc

+6-5
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,22 @@ class MyLogistic : public ObjFunction {
3333
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
3434
param_.InitAllowUnknown(args);
3535
}
36-
void GetGradient(HostDeviceVector<bst_float> *preds,
36+
void GetGradient(const HostDeviceVector<bst_float> &preds,
3737
const MetaInfo &info,
3838
int iter,
3939
HostDeviceVector<GradientPair> *out_gpair) override {
40-
out_gpair->Resize(preds->Size());
41-
std::vector<bst_float>& preds_h = preds->HostVector();
40+
out_gpair->Resize(preds.Size());
41+
const std::vector<bst_float>& preds_h = preds.HostVector();
4242
std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
43+
const std::vector<bst_float>& labels_h = info.labels_.HostVector();
4344
for (size_t i = 0; i < preds_h.size(); ++i) {
4445
bst_float w = info.GetWeight(i);
4546
// scale the negative examples!
46-
if (info.labels_[i] == 0.0f) w *= param_.scale_neg_weight;
47+
if (labels_h[i] == 0.0f) w *= param_.scale_neg_weight;
4748
// logistic transformation
4849
bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
4950
// this is the gradient
50-
bst_float grad = (p - info.labels_[i]) * w;
51+
bst_float grad = (p - labels_h[i]) * w;
5152
// this is the second order gradient
5253
bst_float hess = p * (1.0f - p) * w;
5354
out_gpair_h.at(i) = GradientPair(grad, hess);

plugin/lz4/sparse_page_lz4_format.cc

+28-23
Original file line numberDiff line numberDiff line change
@@ -177,15 +177,17 @@ class SparsePageLZ4Format : public SparsePageFormat {
177177
}
178178

179179
bool Read(SparsePage* page, dmlc::SeekStream* fi) override {
180-
if (!fi->Read(&(page->offset))) return false;
181-
CHECK_NE(page->offset.size(), 0) << "Invalid SparsePage file";
180+
auto& offset_vec = page->offset.HostVector();
181+
auto& data_vec = page->data.HostVector();
182+
if (!fi->Read(&(offset_vec))) return false;
183+
CHECK_NE(offset_vec.size(), 0) << "Invalid SparsePage file";
182184
this->LoadIndexValue(fi);
183185

184-
page->data.resize(page->offset.back());
186+
data_vec.resize(offset_vec.back());
185187
CHECK_EQ(index_.data.size(), value_.data.size());
186-
CHECK_EQ(index_.data.size(), page->data.size());
187-
for (size_t i = 0; i < page->data.size(); ++i) {
188-
page->data[i] = Entry(index_.data[i] + min_index_, value_.data[i]);
188+
CHECK_EQ(index_.data.size(), data_vec.size());
189+
for (size_t i = 0; i < data_vec.size(); ++i) {
190+
data_vec[i] = Entry(index_.data[i] + min_index_, value_.data[i]);
189191
}
190192
return true;
191193
}
@@ -195,47 +197,50 @@ class SparsePageLZ4Format : public SparsePageFormat {
195197
const std::vector<bst_uint>& sorted_index_set) override {
196198
if (!fi->Read(&disk_offset_)) return false;
197199
this->LoadIndexValue(fi);
198-
199-
page->offset.clear();
200-
page->offset.push_back(0);
200+
auto& offset_vec = page->offset.HostVector();
201+
auto& data_vec = page->data.HostVector();
202+
offset_vec.clear();
203+
offset_vec.push_back(0);
201204
for (bst_uint cid : sorted_index_set) {
202-
page->offset.push_back(
203-
page->offset.back() + disk_offset_[cid + 1] - disk_offset_[cid]);
205+
offset_vec.push_back(
206+
offset_vec.back() + disk_offset_[cid + 1] - disk_offset_[cid]);
204207
}
205-
page->data.resize(page->offset.back());
208+
data_vec.resize(offset_vec.back());
206209
CHECK_EQ(index_.data.size(), value_.data.size());
207210
CHECK_EQ(index_.data.size(), disk_offset_.back());
208211

209212
for (size_t i = 0; i < sorted_index_set.size(); ++i) {
210213
bst_uint cid = sorted_index_set[i];
211-
size_t dst_begin = page->offset[i];
214+
size_t dst_begin = offset_vec[i];
212215
size_t src_begin = disk_offset_[cid];
213216
size_t num = disk_offset_[cid + 1] - disk_offset_[cid];
214217
for (size_t j = 0; j < num; ++j) {
215-
page->data[dst_begin + j] = Entry(
218+
data_vec[dst_begin + j] = Entry(
216219
index_.data[src_begin + j] + min_index_, value_.data[src_begin + j]);
217220
}
218221
}
219222
return true;
220223
}
221224

222225
void Write(const SparsePage& page, dmlc::Stream* fo) override {
223-
CHECK(page.offset.size() != 0 && page.offset[0] == 0);
224-
CHECK_EQ(page.offset.back(), page.data.size());
225-
fo->Write(page.offset);
226+
const auto& offset_vec = page.offset.HostVector();
227+
const auto& data_vec = page.data.HostVector();
228+
CHECK(offset_vec.size() != 0 && offset_vec[0] == 0);
229+
CHECK_EQ(offset_vec.back(), data_vec.size());
230+
fo->Write(offset_vec);
226231
min_index_ = page.base_rowid;
227232
fo->Write(&min_index_, sizeof(min_index_));
228-
index_.data.resize(page.data.size());
229-
value_.data.resize(page.data.size());
233+
index_.data.resize(data_vec.size());
234+
value_.data.resize(data_vec.size());
230235

231-
for (size_t i = 0; i < page.data.size(); ++i) {
232-
bst_uint idx = page.data[i].index - min_index_;
236+
for (size_t i = 0; i < data_vec.size(); ++i) {
237+
bst_uint idx = data_vec[i].index - min_index_;
233238
CHECK_LE(idx, static_cast<bst_uint>(std::numeric_limits<StorageIndex>::max()))
234239
<< "The storage index is chosen to limited to smaller equal than "
235240
<< std::numeric_limits<StorageIndex>::max()
236241
<< "min_index=" << min_index_;
237242
index_.data[i] = static_cast<StorageIndex>(idx);
238-
value_.data[i] = page.data[i].fvalue;
243+
value_.data[i] = data_vec[i].fvalue;
239244
}
240245

241246
index_.InitCompressChunks(kChunkSize, kMaxChunk);
@@ -259,7 +264,7 @@ class SparsePageLZ4Format : public SparsePageFormat {
259264
raw_bytes_value_ += value_.RawBytes();
260265
encoded_bytes_index_ += index_.EncodedBytes();
261266
encoded_bytes_value_ += value_.EncodedBytes();
262-
raw_bytes_ += page.offset.size() * sizeof(size_t);
267+
raw_bytes_ += offset_vec.size() * sizeof(size_t);
263268
}
264269

265270
inline void LoadIndexValue(dmlc::SeekStream* fi) {

0 commit comments

Comments
 (0)