Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added oneDNN reduce_op FWD kernel #31816

Merged
merged 30 commits into from
Apr 14, 2021
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1ecc4cf
added external reorder to profiler
Dec 2, 2020
d4f9ad4
resolved conflicts
jakpiase Mar 8, 2021
f85e7a3
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jakpiase Mar 9, 2021
5c02f89
added mkldnn reduce op kernel
jakpiase Mar 22, 2021
7c3b736
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jakpiase Mar 22, 2021
4147b25
refactored reduce op
jakpiase Mar 23, 2021
726846f
reverted old file
jakpiase Mar 23, 2021
6763404
added clang formatting
jakpiase Mar 23, 2021
f2555e5
removed unnecessary imports and comments
jakpiase Mar 23, 2021
8f80eb5
minor change
jakpiase Mar 23, 2021
539fe3c
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jakpiase Mar 25, 2021
3dfabd9
merged with develop
jakpiase Mar 25, 2021
895f948
Revert "merged with develop"
jakpiase Mar 25, 2021
cd9d2f3
minor change
jakpiase Mar 25, 2021
87fc5a1
fixed mispelling
jakpiase Mar 25, 2021
a75ee12
Minor refactoring
jakpiase Mar 26, 2021
b442889
minor change
jakpiase Mar 26, 2021
27dec3a
importet necessary modules
jakpiase Mar 26, 2021
71089fe
minor change
jakpiase Mar 26, 2021
29097ce
minor formatting change
jakpiase Mar 26, 2021
164043a
excluded cuda from bf test
jakpiase Mar 29, 2021
be36f94
fixed static mode in test_resnet_v2
jakpiase Mar 29, 2021
424083f
added formatting
jakpiase Mar 29, 2021
87b5b38
added support for edge case
jakpiase Apr 7, 2021
e8aac01
removed unnecessary instruction
jakpiase Apr 7, 2021
2eb95dc
added restriction to iterator
jakpiase Apr 13, 2021
4f62cc8
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jakpiase Apr 13, 2021
f7b3544
switched from int to size_t for iterator to avoid warning
jakpiase Apr 13, 2021
a00ea30
minor fix
jakpiase Apr 13, 2021
83e7d5c
formatting change
jakpiase Apr 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
platform::ReductionMKLDNNHandler<T> handler_sum(
dnnl::algorithm::reduction_sum, 0.0f, 0.0f, dev_ctx, onednn_engine,
ctx.GetPlace(), dout, dy,
ctx.InputName(framework::GradVarName("Out")));
ctx.InputName(framework::GradVarName("Out")),
CalculateBroadcastedDims(dout, dy));
auto dy_memory_p = handler_sum.AcquireDstMemory(dy);
auto reduction_p = handler_sum.AcquireForwardPrimitive();
reduction_p->execute(astream, {{DNNL_ARG_SRC, *reorder_src_memory_p},
Expand Down
17 changes: 17 additions & 0 deletions paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,22 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
z->set_format(platform::GetMKLDNNFormat(*dst_memory));
}
};

inline std::vector<int64_t> CalculateBroadcastedDims(const Tensor* x,
const Tensor* y) {
const auto src_tz = framework::vectorize(x->dims());
const auto dst_tz = framework::vectorize(y->dims());

int j = 0;
std::vector<int64_t> dst_tz_ex(src_tz.size(), 1);
for (size_t i = 0; i < src_tz.size(); ++i) {
dst_tz_ex[i] = (src_tz[i] != dst_tz[j]) ? 1 : dst_tz[j++];
jakpiase marked this conversation as resolved.
Show resolved Hide resolved
if (j == dst_tz.size()) {
jakpiase marked this conversation as resolved.
Show resolved Hide resolved
j--;
}
}

return dst_tz_ex;
}
} // namespace operators
} // namespace paddle
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
platform::ReductionMKLDNNHandler<T> handler_sum(
dnnl::algorithm::reduction_sum, 0.0f, 0.0f, dev_ctx, mkldnn_engine,
ctx.GetPlace(), dout, dy,
ctx.InputName(framework::GradVarName("Out")));
ctx.InputName(framework::GradVarName("Out")),
CalculateBroadcastedDims(dout, dy));
auto dy_memory_p = handler_sum.AcquireDstMemory(dy);
auto reduction_p = handler_sum.AcquireForwardPrimitive();
// As source we use mem object with results from binary operation
Expand Down
34 changes: 34 additions & 0 deletions paddle/fluid/operators/reduce_ops/mkldnn/reduce_max_mkldnn_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h"

namespace paddle {
namespace operators {

template <typename T>
class ReduceMaxMKLDNNKernel : public ReduceMKLDNNKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
this->RunKernel(ctx, dnnl::algorithm::reduction_max);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_KERNEL(reduce_max, MKLDNN, paddle::platform::CPUPlace,
ops::ReduceMaxMKLDNNKernel<float>,
ops::ReduceMaxMKLDNNKernel<paddle::platform::bfloat16>);
34 changes: 34 additions & 0 deletions paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h"

namespace paddle {
namespace operators {

template <typename T>
class ReduceMeanMKLDNNKernel : public ReduceMKLDNNKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
this->RunKernel(ctx, dnnl::algorithm::reduction_mean);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_KERNEL(reduce_mean, MKLDNN, paddle::platform::CPUPlace,
ops::ReduceMeanMKLDNNKernel<float>,
ops::ReduceMeanMKLDNNKernel<paddle::platform::bfloat16>);
34 changes: 34 additions & 0 deletions paddle/fluid/operators/reduce_ops/mkldnn/reduce_min_mkldnn_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h"

namespace paddle {
namespace operators {

template <typename T>
class ReduceMinMKLDNNKernel : public ReduceMKLDNNKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
this->RunKernel(ctx, dnnl::algorithm::reduction_min);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_KERNEL(reduce_min, MKLDNN, paddle::platform::CPUPlace,
ops::ReduceMinMKLDNNKernel<float>,
ops::ReduceMinMKLDNNKernel<paddle::platform::bfloat16>);
125 changes: 125 additions & 0 deletions paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/platform/mkldnn_reuse.h"

namespace paddle {
namespace operators {

using paddle::framework::LoDTensor;
using paddle::framework::Tensor;
using platform::to_void_cast;

template <typename T>
class ReduceMKLDNNKernel : public framework::OpKernel<T> {
public:
void RunKernel(const framework::ExecutionContext& ctx,
dnnl::algorithm reduction_type) const {
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& onednn_engine = dev_ctx.GetEngine();

const auto* input = ctx.Input<LoDTensor>("X");
auto* output = ctx.Output<Tensor>("Out");

auto reduce_dims = ctx.Attr<std::vector<int>>("dim");
bool reduce_all = ctx.Attr<bool>("reduce_all");
bool keep_dim = ctx.Attr<bool>("keep_dim");

std::vector<int64_t> output_dims =
CalculateOutputDims(input, output, reduce_dims, reduce_all, keep_dim);

auto input_dims = framework::vectorize(input->dims());

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();

// oneDNN reduce op does not support edge case in which memory is being
// copied without actual reduction.
// In that case reorder must be executed to maintain compatibility with
// PaddlePaddle reduce op
if (input_dims == output_dims) {
mkldnn::memory::data_type input_type =
framework::ToMKLDNNDataType(input->type());
std::string key = platform::CreateKey(
dev_ctx, input_dims, input->format(), input->format(), input_type);
platform::ReorderMKLDNNHandler reorder_handler(
input_dims, input->type(), input_type, dev_ctx, onednn_engine, key);

auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
input->format(), platform::to_void_cast(input->data<T>()));

auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(
output, input->format(), ctx.GetPlace());

auto reorder_p = reorder_handler.AcquireReorder(reorder_src_memory_p,
reorder_dst_memory_p);

platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);

reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();

output->set_layout(framework::DataLayout::kMKLDNN);
output->set_format(
platform::GetMKLDNNFormat(reorder_dst_memory_p->get_desc().reshape(
paddle::framework::vectorize<int64_t>(output->dims()))));
} else {
platform::ReductionMKLDNNHandler<T> handler(
reduction_type, 0.0f, 0.0f, dev_ctx, onednn_engine, ctx.GetPlace(),
input, output, ctx.InputName("X"), output_dims);

auto src_memory_p = handler.AcquireSrcMemory(input);
auto dst_memory_p = handler.AcquireDstMemory(output);

std::unordered_map<int, dnnl::memory> reduction_args = {
{DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}};

auto reduction_p = handler.AcquireForwardPrimitive();

reduction_p->execute(astream, reduction_args);
astream.wait();
output->set_layout(framework::DataLayout::kMKLDNN);
output->set_format(
platform::GetMKLDNNFormat(dst_memory_p->get_desc().reshape(
paddle::framework::vectorize<int64_t>(output->dims()))));
}
}

private:
std::vector<int64_t> CalculateOutputDims(const Tensor* input,
const Tensor* output,
std::vector<int>& reduce_dims,
bool reduce_all,
bool keep_dim) const {
if (keep_dim) return framework::vectorize(output->dims());

if (reduce_all)
return std::vector<int64_t>(framework::vectorize(input->dims()).size(),
1);

std::vector<int64_t> output_dims(framework::vectorize(input->dims()));
for (size_t i = 0; i < reduce_dims.size(); ++i) {
reduce_dims[i] = (reduce_dims[i] >= 0)
? reduce_dims[i]
: input->dims().size() + reduce_dims[i];
output_dims[reduce_dims[i]] = 1;
}

return output_dims;
}
};

} // namespace operators
} // namespace paddle
34 changes: 34 additions & 0 deletions paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h"

namespace paddle {
namespace operators {

template <typename T>
class ReduceSumMKLDNNKernel : public ReduceMKLDNNKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
this->RunKernel(ctx, dnnl::algorithm::reduction_sum);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_KERNEL(reduce_sum, MKLDNN, paddle::platform::CPUPlace,
ops::ReduceSumMKLDNNKernel<float>,
ops::ReduceSumMKLDNNKernel<paddle::platform::bfloat16>);
27 changes: 27 additions & 0 deletions paddle/fluid/operators/reduce_ops/reduce_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,30 @@ class ReduceOp : public framework::OperatorWithKernel {
}
}
}

framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
// choose cudnn kernel if the runtime supported.
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");

if (ctx.Input<paddle::framework::LoDTensor>("X")->dims().size() > 5)
return framework::OpKernelType(input_data_type, ctx.GetPlace());

#ifdef PADDLE_WITH_MKLDNN
if (this->CanMKLDNNBeUsed(ctx, input_data_type)) {
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::DataLayout::kMKLDNN,
framework::LibraryType::kMKLDNN);
}
#endif

if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true,
platform::errors::InvalidArgument(
"float16 can only be used on GPU place"));
}
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};

class ReduceOpUseInputPlace : public ReduceOp {
Expand Down Expand Up @@ -579,6 +603,9 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker {
"(int, default -1)"
"The dtype of output, default value is -1, the dtype is same as intput")
.SetDefault(-1);
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(string::Sprintf(R"DOC(
%s Operator.

Expand Down
14 changes: 3 additions & 11 deletions paddle/fluid/platform/mkldnn_reuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,8 @@ class ReductionMKLDNNHandler
const float eps, const MKLDNNDeviceContext& dev_ctx,
const mkldnn::engine engine, platform::Place cpu_place,
const Tensor* x, const Tensor* y,
const std::string& uniq_name)
const std::string& uniq_name,
std::vector<int64_t> output_dims)
: platform::MKLDNNHandlerT<T, dnnl::reduction>(
dev_ctx, engine, cpu_place,
platform::CreateKey(dev_ctx, framework::vectorize(x->dims()),
Expand All @@ -653,20 +654,11 @@ class ReductionMKLDNNHandler
platform::errors::InvalidArgument("Wrong format set for X tensor."));

const auto src_tz = framework::vectorize(x->dims());
const auto dst_tz = framework::vectorize(y->dims());

// For oneDNN dimensionality should match so we need to
// extend Y tensor dims with values of 1 (before and after pattern)
int j = 0;
std::vector<int64_t> dst_tz_ex(src_tz.size(), 1);
for (size_t i = 0; i < src_tz.size(); ++i) {
dst_tz_ex[i] = (src_tz[i] != dst_tz[j]) ? 1 : dst_tz[j++];
}

const auto src_md = dnnl::memory::desc(
src_tz, platform::MKLDNNGetDataType<T>(), x->format());
const auto dst_md = memory::desc(
dst_tz_ex, platform::MKLDNNGetDataType<T>(), x->format());
output_dims, platform::MKLDNNGetDataType<T>(), x->format());

this->AcquireForwardPrimitiveDescriptor(algo, src_md, dst_md, p, eps);
}
Expand Down
Loading