Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
improve error message of cudnn operators
Browse files Browse the repository at this point in the history
  • Loading branch information
Hao Jin committed Jul 25, 2018
1 parent 0b8b939 commit c6d6b2e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 11 deletions.
16 changes: 12 additions & 4 deletions src/operator/nn/cudnn/cudnn_convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,9 @@ class CuDNNConvolutionOp {
&& fwd_algo[i].memory > workspace_byte)))
++i;
if (i == nalgo) {
LOG(FATAL) << "Failed to find a forward convolution algorithm.";
LOG(FATAL) << "Failed to find a forward convolution algorithm. "
"Usually this is due to lack of GPU memory, "
"please consider reducing batch size or model size.";
} else {
forward_algo_.Set(fwd_algo[i].algo, false);
}
Expand Down Expand Up @@ -752,7 +754,9 @@ class CuDNNConvolutionOp {
&& bwd_filter_algo[i].memory > workspace_byte)))
++i;
if (i == nalgo) {
LOG(FATAL) << "Failed to find a backward filter convolution algorithm.";
LOG(FATAL) << "Failed to find a backward filter convolution algorithm. "
"Usually this is due to lack of GPU memory, "
"please consider reducing batch size or model size.";
} else {
back_algo_w_.Set(bwd_filter_algo[i].algo, false);
}
Expand Down Expand Up @@ -786,7 +790,9 @@ class CuDNNConvolutionOp {
&& bwd_data_algo[i].memory > workspace_byte)))
++i;
if (i == nalgo) {
LOG(FATAL) << "Failed to find a backward data convolution algorithm.";
LOG(FATAL) << "Failed to find a backward data convolution algorithm. "
"Usually this is due to lack of GPU memory, "
"please consider reducing batch size or model size.";
} else {
back_algo_.Set(bwd_data_algo[i].algo, false);
}
Expand Down Expand Up @@ -833,7 +839,9 @@ class CuDNNConvolutionOp {
}
}
auto mode = param_.cudnn_tune.value() == conv::kOff ? " get " : " find ";
LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " convolution algorithm.";
LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " convolution algorithm. "
<< "Usually this is caused by lack of GPU memory, "
<< "please consider reducing batch size of model size";
}

void GetTempSize(const OpContext& ctx) {
Expand Down
22 changes: 15 additions & 7 deletions src/operator/nn/cudnn/cudnn_deconvolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -651,8 +651,10 @@ class CuDNNDeconvolutionOp {
&& fwd_algo[i].memory > workspace_byte)))
++i;
if (i == nalgo) {
LOG(FATAL) << "Failed to find a 'forward' convolution algorithm " <<
"(for use in deconvolution operator backprop-to-data).";
LOG(FATAL) << "Failed to find a 'forward' convolution algorithm "
<< "(for use in deconvolution operator backprop-to-data). "
<< "Usually this is due to lack of GPU memory, "
<< "please consider reducing batch size or model size.";
} else {
forward_algo_.Set(fwd_algo[i].algo, false);
}
Expand Down Expand Up @@ -686,8 +688,10 @@ class CuDNNDeconvolutionOp {
&& bwd_filter_algo[i].memory > workspace_byte)))
++i;
if (i == nalgo) {
LOG(FATAL) << "Failed to find a backward filter convolution algorithm " <<
"(for use in deconvolution operator backprop-to-filter).";
LOG(FATAL) << "Failed to find a backward filter convolution algorithm "
<< "(for use in deconvolution operator backprop-to-filter). "
<< "Usually this is due to lack of GPU memory, "
<< "please consider reducing batch size or model size.";
} else {
back_algo_w_.Set(bwd_filter_algo[i].algo, false);
}
Expand Down Expand Up @@ -721,8 +725,10 @@ class CuDNNDeconvolutionOp {
&& bwd_data_algo[i].memory > workspace_byte)))
++i;
if (i == nalgo) {
LOG(FATAL) << "Failed to find a backward data convolution algorithm." <<
"(for use in deconvolution operator forward inference).";
LOG(FATAL) << "Failed to find a backward data convolution algorithm. "
<< "(for use in deconvolution operator forward inference). "
<< "Usually this is due to lack of GPU memory, "
<< "please consider reducing batch size or model size.";
} else {
back_algo_.Set(bwd_data_algo[i].algo, false);
}
Expand Down Expand Up @@ -774,7 +780,9 @@ class CuDNNDeconvolutionOp {
}
}
auto mode = param_.cudnn_tune.value() == conv::kOff ? " get " : " find ";
LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " deconvolution algorithm.";
LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " deconvolution algorithm. "
<< "Usually this is due to lack of GPU memory, "
<< "please consider reducing batch size or model size.";
}

void GetTempSize(const OpContext& ctx) {
Expand Down

0 comments on commit c6d6b2e

Please sign in to comment.