Skip to content

Commit

Permalink
fix inference batch_size configuration (PaddlePaddle#57)
Browse files Browse the repository at this point in the history
* gpu_graph_infer

* simplify infer

* fix

* remove logs

* remove logs

* change logs

* speed up dumpfile

* reduce inference time

* replace to_string

* fix inference

* change log

* fix infer batch_size

* remove log

* check tensor initialization
  • Loading branch information
seemingwang authored Jul 4, 2022
1 parent 1ce2252 commit 967cb54
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
6 changes: 3 additions & 3 deletions paddle/fluid/framework/data_feed.cu
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ int GraphDataGenerator::GenerateBatch() {
}

cudaStreamSynchronize(stream_);
if (!gpu_graph_training_) return total_instance / 2;
if (!gpu_graph_training_) return 1;
ins_buf_pair_len_ -= total_instance / 2;
if (debug_mode_) {
uint64_t h_slot_tensor[slot_num_][total_instance];
Expand Down Expand Up @@ -966,12 +966,12 @@ void GraphDataGenerator::SetConfig(
window_ = graph_config.window();
once_sample_startid_len_ = graph_config.once_sample_startid_len();
debug_mode_ = graph_config.debug_mode();
if (debug_mode_) {
gpu_graph_training_ = graph_config.gpu_graph_training();
if (debug_mode_ || !gpu_graph_training_) {
batch_size_ = graph_config.batch_size();
} else {
batch_size_ = once_sample_startid_len_;
}
gpu_graph_training_ = graph_config.gpu_graph_training();
repeat_time_ = graph_config.sample_times_one_chunk();
buf_size_ =
once_sample_startid_len_ * walk_len_ * walk_degree_ * repeat_time_;
Expand Down
31 changes: 28 additions & 3 deletions paddle/fluid/framework/device_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,35 @@ void DeviceWorker::DumpField(const Scope& scope, int dump_mode,
size_t batch_size = device_reader_->GetCurBatchSize();
auto& ins_id_vec = device_reader_->GetInsIdVec();
auto& ins_content_vec = device_reader_->GetInsContentVec();
if (ins_id_vec.size() > 0) {
if (dump_mode_ == 3) {
batch_size = std::string::npos;
bool has_valid_batch = false;
for (auto& field : *dump_fields_) {
Variable* var = scope.FindVar(field);
if (var == nullptr) {
VLOG(0) << "Note: field[" << field
<< "] cannot be find in scope, so it was skipped.";
continue;
}
LoDTensor* tensor = var->GetMutable<LoDTensor>();
if (!tensor->IsInitialized()) {
VLOG(0) << "Note: field[" << field
<< "] is not initialized, so it was skipped.";
continue;
}
auto& dims = tensor->dims();
if (dims.size() == 2 && dims[0] > 0) {
batch_size = std::min(batch_size, static_cast<size_t>(dims[0]));
// VLOG(0)<<"in dump field ---> "<<field<<" dim_size = "<<dims[0]<<"
// "<<dims[1]<<" batch_size = "<<batch_size;
has_valid_batch = true;
}
}
if (!has_valid_batch) return;
} else if (ins_id_vec.size() > 0) {
batch_size = ins_id_vec.size();
}
std::vector<std::string> ars(batch_size);
std::vector<bool> hit(batch_size, false);
if (dump_mode_ == 3) {
if (dump_fields_ == NULL || (*dump_fields_).size() == 0) {
return;
Expand Down Expand Up @@ -308,7 +332,7 @@ void DeviceWorker::DumpField(const Scope& scope, int dump_mode,
tensor = &cpu_tensor;
}
auto& dims = tensor->dims();
if (dims.size() != 2 || dims[0] != static_cast<int>(batch_size)) {
if (dims.size() != 2 || dims[0] <= 0) {
VLOG(0) << "Note: field[" << field << "] cannot pass check, so it was "
"skipped. Maybe the dimension is "
"wrong ";
Expand Down Expand Up @@ -348,6 +372,7 @@ void DeviceWorker::DumpField(const Scope& scope, int dump_mode,
}
return;
}
std::vector<bool> hit(batch_size, false);
std::default_random_engine engine(0);
std::uniform_int_distribution<size_t> dist(0U, INT_MAX);
for (size_t i = 0; i < batch_size; i++) {
Expand Down

0 comments on commit 967cb54

Please sign in to comment.