From c8db1bd987e63d34bd514cfe9e168b4dbe0e8d33 Mon Sep 17 00:00:00 2001 From: chenfeiyu Date: Tue, 16 Nov 2021 08:58:37 +0000 Subject: [PATCH 1/4] disable copying of datatype when sharing buffer between two tensors. --- paddle/fluid/framework/tensor.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 539859c45c907..c53ff6b1bcc6b 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -254,7 +254,10 @@ class Tensor { void ShareBufferWith(const Tensor& tensor) { holder_ = tensor.holder_; offset_ = tensor.offset_; - type_ = tensor.type_; + // NOTE(chenfeiyu): when sharing buffer, by definition only holder + // to the memory allocation and offset should be shared. Shape, + // data type, layout, and other metadata associated with a Tensor + // should not be copied. } bool IsSharedBufferWith(const Tensor& src) const { From f7864fc01b161fe66cfad364434e0703a48ac5a6 Mon Sep 17 00:00:00 2001 From: chenfeiyu Date: Fri, 19 Nov 2021 08:34:31 +0000 Subject: [PATCH 2/4] fix for elementwise_mkldnn_op, mannually set the data type when reusing memory --- .../mkldnn/elementwise_mkldnn_op.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h index ffcdc079985fa..6881c17646324 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h @@ -62,9 +62,22 @@ class EltwiseMKLDNNKernel : public framework::OpKernel { // and they share a buffer (of // shape x) then this buffer is not big enough to hold result of elementwise // operation. - auto dst_memory = (x->numel() == z->numel() && x->IsSharedBufferWith(*z)) - ? src_x_memory - : handler.AcquireDstMemory(z); + const bool reuse_x_memopry = + x->numel() == z->numel() && x->IsSharedBufferWith(*z); + std::shared_ptr dst_memory = nullptr; + if (reuse_x_memopry) { + dst_memory = src_x_memory; + // NOTE(chenfeiyu): when the output reuses memory from other tensor rather + // than allocate its own, it's still need to take care of its data type. + // Unfortunately, paddle's operator only infers the output' shape, but not + // the data type. mutable_data takes care of allocation and data type + // normally, but if the memory is already allocated and there is no need + // to re-allocate, it just set the data type. So this it added there to + // get the right data type. + z->mutable_data(ctx.GetPlace()); + } else { + dst_memory = handler.AcquireDstMemory(z); + } const auto binary_prim = handler.AcquireForwardPrimitive(); From c76cd83d85b3ede030acf2d536534025f1c436e5 Mon Sep 17 00:00:00 2001 From: chenfeiyu Date: Sun, 21 Nov 2021 09:07:33 +0000 Subject: [PATCH 3/4] fix for sum_mkldnn_op, set output datatype manually when sharing buffer --- paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 8208a484b4a32..2760bcecd5b67 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -137,8 +137,13 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { ++input_index; } - auto dst_mem = in_place ? handler.AcquireDstMemory() - : handler.AcquireDstMemory(output); + std::shared_ptr dst_mem = nullptr; + if (in_place) { + dst_mem = handler.AcquireDstMemory(); + output->mutable_data(ctx.GetPlace()); + } else { + dst_mem = handler.AcquireDstMemory(output); + } auto sum_p = handler.AcquireForwardPrimitive(); From 78793c0de7e2991556470f14d7742164ea852b2f Mon Sep 17 00:00:00 2001 From: chenfeiyu Date: Mon, 22 Nov 2021 04:25:11 +0000 Subject: [PATCH 4/4] =?UTF-8?q?fix=20for=20other=20mkldnn=20kernels=20that?= =?UTF-8?q?=20support=20inplace=20computation=EF=BC=88softplus,=20softmax,?= =?UTF-8?q?=20scale,=20activation)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../operators/mkldnn/activation_mkldnn_op.cc | 8 +++++++- .../fluid/operators/mkldnn/scale_mkldnn_op.cc | 9 +++++++-- .../operators/mkldnn/softmax_mkldnn_op.cc | 10 +++++++--- .../operators/mkldnn/softplus_mkldnn_op.h | 20 ++++++++++++------- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 29e18da590ebc..4bde641d2c1af 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -91,7 +91,13 @@ void eltwise_forward(const framework::ExecutionContext &ctx, ctx.GetPlace(), x); auto src_memory_p = handler.AcquireSrcMemory(x); - auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(y); + std::shared_ptr dst_memory_p = nullptr; + if (is_inplaced) { + dst_memory_p = src_memory_p; + y->mutable_data(ctx.GetPlace()); + } else { + dst_memory_p = handler.AcquireDstMemory(y); + } auto activation_p = handler.AcquireForwardPrimitive(); auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); diff --git a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc index 84ac14d04b85b..b8b735e96d24d 100644 --- a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc @@ -41,8 +41,13 @@ class ScaleMKLDNNKernel : public framework::OpKernel { x); auto src_memory_p = handler.AcquireSrcMemory(x); - auto dst_memory_p = - is_inplaced ? src_memory_p : handler.AcquireDstMemory(out); + std::shared_ptr dst_memory_p = nullptr; + if (is_inplaced) { + dst_memory_p = src_memory_p; + out->mutable_data(ctx.GetPlace()); + } else { + dst_memory_p = handler.AcquireDstMemory(out); + } auto activation_p = handler.AcquireForwardPrimitive(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index b0f27719bf9ad..c26c017596d32 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -103,9 +103,13 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { auto softmax_src_memory_p = handler.AcquireSrcMemory(input); // For Inplace src and and dst are the same memory object - auto softmax_dst_memory_p = - is_inplaced ? softmax_src_memory_p : handler.AcquireDstMemory(output); - + std::shared_ptr softmax_dst_memory_p = nullptr; + if (is_inplaced) { + softmax_dst_memory_p = softmax_src_memory_p; + output->mutable_data(ctx.GetPlace()); + } else { + softmax_dst_memory_p = handler.AcquireDstMemory(output); + } auto softmax_p = handler.AcquireForwardPrimitive(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h index 60ea5136905de..c8c539a956505 100644 --- a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h +++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma once #include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { @@ -43,7 +44,7 @@ class SoftplusMKLDNNHandler 1.0f / beta, 0.0f); } - AppendFusedActivationIfExists(ctx, post_ops); + AppendFusedActivationIfExists(ctx, &post_ops); dnnl::primitive_attr attrs; attrs.set_post_ops(post_ops); @@ -59,16 +60,16 @@ class SoftplusMKLDNNHandler private: void AppendFusedActivationIfExists(const framework::ExecutionContext& ctx, - dnnl::post_ops& post_ops) { + dnnl::post_ops* post_ops) { const auto& fused_activation_type = algo_map.find(ctx.Attr("fuse_activation_type")); if (fused_activation_type != algo_map.end()) { auto scale_out = ctx.Attr("fuse_activation_scale"); // for future int8 support - post_ops.append_eltwise(scale_out, fused_activation_type->second, - ctx.Attr("fuse_activation_alpha"), - ctx.Attr("fuse_activation_beta")); + post_ops->append_eltwise(scale_out, fused_activation_type->second, + ctx.Attr("fuse_activation_alpha"), + ctx.Attr("fuse_activation_beta")); } } @@ -109,8 +110,13 @@ void custom_softplus_eltwise_forward(const framework::ExecutionContext& ctx) { auto src_memory_p = handler.AcquireSrcMemory(x); auto beta_memory_p = handler.AcquireBetaMemory(&beta); - auto dst_memory_p = - is_inplaced ? src_memory_p : handler.AcquireDstMemory(out); + std::shared_ptr dst_memory_p = nullptr; + if (is_inplaced) { + dst_memory_p = src_memory_p; + out->mutable_data(ctx.GetPlace()); + } else { + dst_memory_p = handler.AcquireDstMemory(out); + } auto binary_p = handler.AcquireForwardPrimitive(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();