diff --git a/src/operator/nn/mkldnn/mkldnn_convolution.cc b/src/operator/nn/mkldnn/mkldnn_convolution.cc index 1415c0b45140..9cf1b71880a8 100644 --- a/src/operator/nn/mkldnn/mkldnn_convolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_convolution.cc @@ -85,6 +85,11 @@ mkldnn::convolution_forward::primitive_desc GetConvFwdImpl( attr.set_int_output_round_mode(round_nearest); } + // MKL-DNN introduced padded formats since 0.15 which require more memory + // for computation compared with the actual tensor size. Currently, MKL-DNN + // operators are still reusing those memory from memory planning and the + // memory size may smaller than what MKL-DNN kernels require. So here we need + // select suboptimal kernel for computation according to tensor sizes. if (param.conv_param.dilate.ndim() == 0 && bias == nullptr) { mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct, data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero); @@ -155,6 +160,12 @@ static mkldnn::convolution_backward_data::primitive_desc GetConvBwdData( mkldnn::memory::dims padding{0, 0}; padding[0] = param.pad[0]; padding[1] = param.pad[1]; + + // MKL-DNN introduced padded formats since 0.15 which require more memory + // for computation compared with the actual tensor size. Currently, MKL-DNN + // operators are still reusing those memory from memory planning and the + // memory size may smaller than what MKL-DNN kernels require. So here we need + // select suboptimal kernel for computation according to tensor sizes. if (param.dilate.ndim() == 0) { mkldnn::convolution_backward_data::desc desc(mkldnn::algorithm::convolution_direct, data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero); @@ -199,6 +210,12 @@ static mkldnn::convolution_backward_weights::primitive_desc GetConvBwdWeights( mkldnn::memory::dims padding{0, 0}; padding[0] = param.pad[0]; padding[1] = param.pad[1]; + + // MKL-DNN introduced padded formats since 0.15 which require more memory + // for computation compared with the actual tensor size. Currently, MKL-DNN + // operators are still reusing those memory from memory planning and the + // memory size may smaller than what MKL-DNN kernels require. So here we need + // select suboptimal kernel for computation according to tensor sizes. if (param.dilate.ndim() == 0 && bias == nullptr) { mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero); @@ -229,26 +246,26 @@ static mkldnn::convolution_backward_weights::primitive_desc GetConvBwdWeights( mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, data_md, weight_md, out_md, strides, dilates, padding, padding, mkldnn::padding_kind::zero); - auto conv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); - while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) || - conv_pd.src_primitive_desc().get_size() != GetArraySize(data) || - conv_pd.diff_weights_primitive_desc().get_size() != GetArraySize(weights)) { - CHECK(conv_pd.next_impl()) << "No implementation"; - } - return conv_pd; + auto conv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); + while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) || + conv_pd.src_primitive_desc().get_size() != GetArraySize(data) || + conv_pd.diff_weights_primitive_desc().get_size() != GetArraySize(weights)) { + CHECK(conv_pd.next_impl()) << "No implementation"; + } + return conv_pd; } else { auto bias_md = GetMemDesc(*bias); mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, data_md, weight_md, bias_md, out_md, strides, dilates, padding, padding, mkldnn::padding_kind::zero); - auto conv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); - while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) || - conv_pd.src_primitive_desc().get_size() != GetArraySize(data) || - conv_pd.diff_weights_primitive_desc().get_size() != GetArraySize(weights)) { - CHECK(conv_pd.next_impl()) << "No implementation"; - } - return conv_pd; + auto conv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd); + while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) || + conv_pd.src_primitive_desc().get_size() != GetArraySize(data) || + conv_pd.diff_weights_primitive_desc().get_size() != GetArraySize(weights)) { + CHECK(conv_pd.next_impl()) << "No implementation"; + } + return conv_pd; } } } diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc index 87e89646d7fe..93032f7c92d6 100644 --- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc @@ -52,6 +52,11 @@ static mkldnn::convolution_forward::primitive_desc GetDeconvBwd_( bool has_bias, const mkldnn::memory::desc &out_md, const mkldnn::engine &engine, const mkldnn::memory::dims &strides, const mkldnn::memory::dims &padding, const mkldnn::memory::dims &dilates) { + // MKL-DNN introduced padded formats since 0.15 which require more memory + // for computation compared with the actual tensor size. Currently, MKL-DNN + // operators are still reusing those memory from memory planning and the + // memory size may smaller than what MKL-DNN kernels require. So here we need + // select suboptimal kernel for computation according to tensor sizes. if (!has_bias) { mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training, mkldnn::algorithm::convolution_direct, out_md, weights_md, data_md, strides, @@ -103,6 +108,11 @@ static mkldnn::convolution_backward_data::primitive_desc GetDeconvFwdImpl( out_md, weight_md, data_md, strides, dilate, padding, padding, mkldnn::padding_kind::zero); auto deconv_pd = mkldnn::convolution_backward_data::primitive_desc(desc, engine, bwd_pd); + // MKL-DNN introduced padded formats since 0.15 which require more memory + // for computation compared with the actual tensor size. Currently, MKL-DNN + // operators are still reusing those memory from memory planning and the + // memory size may smaller than what MKL-DNN kernels require. So here we need + // select suboptimal kernel for computation according to tensor sizes. while (deconv_pd.diff_dst_primitive_desc().get_size() != GetMemDescSize(data_md) || deconv_pd.diff_src_primitive_desc().get_size() != GetMemDescSize(out_md) || deconv_pd.weights_primitive_desc().get_size() != GetMemDescSize(weight_md)) { @@ -155,6 +165,12 @@ GetDeconvBwdWeightsImpl( mkldnn::memory::dims dilate{0, 0}; dilate[0] = param.dilate[0] - 1; dilate[1] = param.dilate[1] - 1; + + // MKL-DNN introduced padded formats since 0.15 which require more memory + // for computation compared with the actual tensor size. Currently, MKL-DNN + // operators are still reusing those memory from memory planning and the + // memory size may smaller than what MKL-DNN kernels require. So here we need + // select suboptimal kernel for computation according to tensor sizes. if (!has_bias) { mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct, out_md, weight_md, data_md, strides, dilate, padding, padding, mkldnn::padding_kind::zero);