From 06a5f3a5f8f593fba2f4882e7108aad4d53c376c Mon Sep 17 00:00:00 2001 From: oyxuan-11 <963650125@qq.com> Date: Mon, 15 Mar 2021 12:46:27 +0800 Subject: [PATCH 1/2] Support npu kernel scatter op --- paddle/fluid/operators/scatter_op_npu.cc | 93 +++++++++++++++++++ .../unittests/npu/test_scatter_op_npu.py | 59 ++++++++++++ 2 files changed, 152 insertions(+) create mode 100755 paddle/fluid/operators/scatter_op_npu.cc create mode 100755 python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py diff --git a/paddle/fluid/operators/scatter_op_npu.cc b/paddle/fluid/operators/scatter_op_npu.cc new file mode 100755 index 0000000000000..249e77ecf08b2 --- /dev/null +++ b/paddle/fluid/operators/scatter_op_npu.cc @@ -0,0 +1,93 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include +#include + +#include "paddle/fluid/operators/scatter_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { + +inline framework::Tensor UnsqueezeTo(const framework::Tensor& src, int ndims) { + const framework::DDim& shape = src.dims(); + int rank = shape.size(); + framework::Tensor res; + res.ShareDataWith(src); + PADDLE_ENFORCE_LE( + rank, ndims, + platform::errors::InvalidArgument( + "The input Tensor's rank should be less than or equal to ndims" + "Received input Tensor's rank = %d, ndims = %d", + rank, ndims)); + if (rank < ndims) { + std::vector new_dim(ndims, 1); + for (int i = ndims - rank; i < ndims; i++) { + new_dim[i] = shape[i - ndims + rank]; + } + res.Resize(framework::make_ddim(new_dim)); + } + return res; +} + +using Tensor = framework::Tensor; + +template +class ScatterNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + + auto* x = ctx.Input("X"); + auto* ids = ctx.Input("Ids"); + auto* updates = ctx.Input("Updates"); + bool overwrite = ctx.Attr("overwrite"); + + auto* out = ctx.Output("Out"); + + auto place = ctx.GetPlace(); + out->mutable_data(place); + + const auto index_dims = ids->dims(); + if (index_dims.size() == 1) { + framework::Tensor tmp_index = UnsqueezeTo(*ids, 2); + ids = &tmp_index; + } + + auto stream = + ctx.template device_context() + .stream(); + + if (overwrite){ + auto runner_update = NpuOpRunner("TensorScatterUpdate", {*x, *ids, *updates}, {*out}, {}); + runner_update.Run(stream); + } + else{ + auto runner_add = NpuOpRunner("TensorScatterAdd", {*x, *ids, *updates}, {*out}, {}); + runner_add.Run(stream); + } + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + scatter, + ops::ScatterNPUKernel, + ops::ScatterNPUKernel); +#endif diff --git a/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py new file mode 100755 index 0000000000000..3cdf61ca8e693 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py @@ -0,0 +1,59 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core + +paddle.enable_static() +SEED = 2021 + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestCast1(OpTest): + def setUp(self): + self.set_npu() + self.op_type = "scatter" + self.place = paddle.NPUPlace(0) + + #ref_np = np.ones((3, 50)).astype("float32") + #index_np = np.array([1, 2]).astype("int32") + #updates_np = np.random.random((2, 50)).astype("float32") + + ref_np = np.ones((3, 2)).astype("float32") + index_np = np.array([1]).astype("int32") + updates_np = np.random.random((1, 2)).astype("float32") + + output_np = np.copy(ref_np) + output_np[index_np] = updates_np + self.inputs = {'X': ref_np, 'Ids': index_np, 'Updates': updates_np} + self.outputs = {'Out': output_np} + + def set_npu(self): + self.__class__.use_npu = True + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +if __name__ == '__main__': + unittest.main() From dfc3d4d7863380939ce9e673928bcd22959b1103 Mon Sep 17 00:00:00 2001 From: oyxuan-11 <963650125@qq.com> Date: Mon, 15 Mar 2021 15:15:50 +0800 Subject: [PATCH 2/2] Add more test --- paddle/fluid/operators/scatter_op_npu.cc | 39 +++------- .../unittests/npu/test_scatter_op_npu.py | 73 ++++++++++++++++++- 2 files changed, 80 insertions(+), 32 deletions(-) diff --git a/paddle/fluid/operators/scatter_op_npu.cc b/paddle/fluid/operators/scatter_op_npu.cc index 249e77ecf08b2..fb6958e9046cd 100755 --- a/paddle/fluid/operators/scatter_op_npu.cc +++ b/paddle/fluid/operators/scatter_op_npu.cc @@ -17,32 +17,12 @@ limitations under the License. */ #include #include "paddle/fluid/operators/scatter_op.h" +#include "paddle/fluid/operators/kron_op.h" #include "paddle/fluid/operators/npu_op_runner.h" namespace paddle { namespace operators { -inline framework::Tensor UnsqueezeTo(const framework::Tensor& src, int ndims) { - const framework::DDim& shape = src.dims(); - int rank = shape.size(); - framework::Tensor res; - res.ShareDataWith(src); - PADDLE_ENFORCE_LE( - rank, ndims, - platform::errors::InvalidArgument( - "The input Tensor's rank should be less than or equal to ndims" - "Received input Tensor's rank = %d, ndims = %d", - rank, ndims)); - if (rank < ndims) { - std::vector new_dim(ndims, 1); - for (int i = ndims - rank; i < ndims; i++) { - new_dim[i] = shape[i - ndims + rank]; - } - res.Resize(framework::make_ddim(new_dim)); - } - return res; -} - using Tensor = framework::Tensor; template @@ -51,7 +31,7 @@ class ScatterNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); - auto* ids = ctx.Input("Ids"); + auto* index = ctx.Input("Ids"); auto* updates = ctx.Input("Updates"); bool overwrite = ctx.Attr("overwrite"); @@ -60,22 +40,25 @@ class ScatterNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); out->mutable_data(place); - const auto index_dims = ids->dims(); + framework::Tensor tmp_tensor(index->type()); + const auto index_dims = index->dims(); if (index_dims.size() == 1) { - framework::Tensor tmp_index = UnsqueezeTo(*ids, 2); - ids = &tmp_index; - } + tmp_tensor.ShareDataWith(*index); + std::vector new_dim = {index_dims[0], 1}; + tmp_tensor.Resize(framework::make_ddim(new_dim)); + index = &tmp_tensor; + } auto stream = ctx.template device_context() .stream(); if (overwrite){ - auto runner_update = NpuOpRunner("TensorScatterUpdate", {*x, *ids, *updates}, {*out}, {}); + auto runner_update = NpuOpRunner("TensorScatterUpdate", {*x, *index, *updates}, {*out}, {}); runner_update.Run(stream); } else{ - auto runner_add = NpuOpRunner("TensorScatterAdd", {*x, *ids, *updates}, {*out}, {}); + auto runner_add = NpuOpRunner("TensorScatterAdd", {*x, *index, *updates}, {*out}, {}); runner_add.Run(stream); } } diff --git a/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py index 3cdf61ca8e693..3110672b2dab6 100755 --- a/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_scatter_op_npu.py @@ -35,10 +35,6 @@ def setUp(self): self.op_type = "scatter" self.place = paddle.NPUPlace(0) - #ref_np = np.ones((3, 50)).astype("float32") - #index_np = np.array([1, 2]).astype("int32") - #updates_np = np.random.random((2, 50)).astype("float32") - ref_np = np.ones((3, 2)).astype("float32") index_np = np.array([1]).astype("int32") updates_np = np.random.random((1, 2)).astype("float32") @@ -47,6 +43,7 @@ def setUp(self): output_np[index_np] = updates_np self.inputs = {'X': ref_np, 'Ids': index_np, 'Updates': updates_np} self.outputs = {'Out': output_np} + self.attrs = {'overwrite': True} def set_npu(self): self.__class__.use_npu = True @@ -55,5 +52,73 @@ def test_check_output(self): self.check_output_with_place(self.place, check_dygraph=False) +class TestCast2(OpTest): + def setUp(self): + self.set_npu() + self.op_type = "scatter" + self.place = paddle.NPUPlace(0) + + ref_np = np.ones((3, 2)).astype("int32") + index_np = np.array([1]).astype("int32") + updates_np = np.zeros((1, 2)).astype("int32") + + output_np = np.copy(ref_np) + output_np[index_np] = updates_np + self.inputs = {'X': ref_np, 'Ids': index_np, 'Updates': updates_np} + self.outputs = {'Out': output_np} + self.attrs = {'overwrite': True} + + def set_npu(self): + self.__class__.use_npu = True + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + +class TestCast3(OpTest): + def setUp(self): + self.set_npu() + self.op_type = "scatter" + self.place = paddle.NPUPlace(0) + + ref_np = np.ones((3, 2)).astype("float32") + index_np = np.array([1]).astype("int32") + updates_np = np.random.random((1, 2)).astype("float32") + + output_np = np.copy(ref_np) + output_np[index_np] += updates_np + self.inputs = {'X': ref_np, 'Ids': index_np, 'Updates': updates_np} + self.outputs = {'Out': output_np} + self.attrs = {'overwrite': False} + + def set_npu(self): + self.__class__.use_npu = True + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +class TestCast4(OpTest): + def setUp(self): + self.set_npu() + self.op_type = "scatter" + self.place = paddle.NPUPlace(0) + + ref_np = np.ones((3, 2)).astype("float32") + index_np = np.array([1, 2]).astype("int32") + updates_np = np.random.random((2, 2)).astype("float32") + + output_np = np.copy(ref_np) + output_np[1] = updates_np[0] + output_np[2] = updates_np[1] + self.inputs = {'X': ref_np, 'Ids': index_np, 'Updates': updates_np} + self.outputs = {'Out': output_np} + self.attrs = {'overwrite': True} + + def set_npu(self): + self.__class__.use_npu = True + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + if __name__ == '__main__': unittest.main()