From 896b4504de31dec5410e42e6ff34fee0813a3f5e Mon Sep 17 00:00:00 2001 From: xiayanming Date: Sat, 6 Mar 2021 23:15:55 +0800 Subject: [PATCH 1/6] add gather npu op --- paddle/fluid/operators/CMakeLists.txt | 5 + paddle/fluid/operators/gather_op_npu.cc | 118 ++++++++++++ paddle/fluid/operators/gather_op_npu_test.cc | 172 ++++++++++++++++++ .../tests/unittests/test_gather_op_npu.py | 109 +++++++++++ 4 files changed, 404 insertions(+) create mode 100644 paddle/fluid/operators/gather_op_npu.cc create mode 100644 paddle/fluid/operators/gather_op_npu_test.cc create mode 100644 python/paddle/fluid/tests/unittests/test_gather_op_npu.py diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 17234edb116e3..a3964b28eab31 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -151,6 +151,11 @@ else() cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3) endif() +# ascend gather_op_npu unittest +if (WITH_ASCEND_CL) + cc_test(gather_op_npu_test SRCS gather_op_npu_test.cc DEPS gather_op tensor op_registry scope device_context enforce executor) +endif() + cc_library(tensor_formatter SRCS tensor_formatter.cc DEPS ${OP_HEADER_DEPS}) if (WITH_PYTHON) cc_library(py_func_op SRCS py_func_op.cc DEPS op_registry python pybind) diff --git a/paddle/fluid/operators/gather_op_npu.cc b/paddle/fluid/operators/gather_op_npu.cc new file mode 100644 index 0000000000000..cb80c61796d38 --- /dev/null +++ b/paddle/fluid/operators/gather_op_npu.cc @@ -0,0 +1,118 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include +#include +#include +#include "paddle/fluid/operators/npu_op_runner.h" +#include "paddle/fluid/operators/gather_op.h" +#include "paddle/fluid/framework/tensor_util.h" + +namespace paddle { +namespace operators { + +inline framework::Tensor UnsqueezeTo(const framework::Tensor& src, int ndims) { + const framework::DDim& shape = src.dims(); + int rank = shape.size(); + framework::Tensor res; + res.ShareDataWith(src); + PADDLE_ENFORCE_LE( + rank, ndims, + platform::errors::InvalidArgument( + "The input Tensor's rank should be less than or equal to ndims" + "Received input Tensor's rank = %d, ndims = %d", + rank, ndims)); + if (rank < ndims) { + std::vector new_dim(ndims, 1); + for (int i = ndims - rank; i < ndims; i++) { + new_dim[i] = shape[i - ndims + rank]; + } + res.Resize(framework::make_ddim(new_dim)); + } + return res; +} + +template +class GatherOpNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *x = ctx.Input("X"); + auto *index = ctx.Input("Index"); + auto *out = ctx.Output("Out"); + + out->mutable_data(ctx.GetPlace()); + auto runner = NpuOpRunner("Gather", {*x, *index}, {*out}, {{"validate_indices", true}}); + auto stream = + ctx.template device_context() + .stream(); + runner.Run(stream); + } +}; + +template +class GatherGradOpNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *index = ctx.Input("Index"); + auto *x = ctx.Input("X"); + auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); + + // step1: Unsqueeze index + const auto index_dims = index->dims(); + if (index_dims.size() == 1) { + framework::Tensor tmp_index = UnsqueezeTo(*index, 2); + index = &tmp_index; + } + + auto stream = + ctx.template device_context() + .stream(); + + // step2: ZerosLike x in device + Tensor* tmp_zerox = const_cast(x); + Tensor zeroslike_xout(x->type()); + zeroslike_xout.Resize(x->dims()); + zeroslike_xout.mutable_data(ctx.GetPlace()); + + auto runner_zeroslike = NpuOpRunner("ZerosLike", {*x}, {zeroslike_xout}, {}); + runner_zeroslike.Run(stream); + tmp_zerox = &zeroslike_xout; + + // step3: scatter(x_grad) + dx->mutable_data(ctx.GetPlace()); + auto runner_scatter = NpuOpRunner("TensorScatterUpdate", + {*tmp_zerox, *index, *dout}, + {*dx}, {}); + runner_scatter.Run(stream); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_NPU_KERNEL( + gather, + ops::GatherOpNPUKernel, + ops::GatherOpNPUKernel); + +REGISTER_OP_NPU_KERNEL( + gather_grad, + ops::GatherGradOpNPUKernel, + ops::GatherGradOpNPUKernel); +#endif diff --git a/paddle/fluid/operators/gather_op_npu_test.cc b/paddle/fluid/operators/gather_op_npu_test.cc new file mode 100644 index 0000000000000..025b04ac77b75 --- /dev/null +++ b/paddle/fluid/operators/gather_op_npu_test.cc @@ -0,0 +1,172 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef _WIN32 +#include +#endif + +#include +#include // NOLINT +#include + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/operators/gather_op.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/string/printf.h" + +namespace f = paddle::framework; +namespace p = paddle::platform; +namespace m = paddle::operators::math; + +USE_OP(gather); +USE_OP_DEVICE_KERNEL(gather, NPU); +USE_OP(gather_grad); +USE_OP_DEVICE_KERNEL(gather_grad, NPU); + +template +void Compare(f::Scope* scope, const p::DeviceContext& ctx, + std::string op_type) { + // init + auto x = scope->Var("X"); + auto tensor_x = x->GetMutable(); + + auto index = scope->Var("Index"); + auto tensor_index = index->GetMutable(); + + std::vector init_x; + for (int64_t i = 1; i < 7; ++i) { + // 1,2,3,4,5,6 + init_x.push_back(static_cast(i)); + } + + // [[1, 2],[3, 4],[5, 6]] + TensorFromVector(init_x, ctx, tensor_x); + tensor_x->Resize(paddle::framework::make_ddim({3, 2})); + + std::vector init_index = {1, 2}; + paddle::framework::TensorFromVector(init_index, ctx, tensor_index); + tensor_index->Resize(paddle::framework::make_ddim({2})); + + ctx.Wait(); + + auto out = scope->Var("Out"); + auto tensor_out = out->GetMutable(); + + // run + f::AttributeMap attrs = {{"validate_indices", true}}; + auto op = f::OpRegistry::CreateOp(op_type, {{"X", {"X"}}, {"Index", {"Index"}}}, + {{"Out", {"Out"}}}, attrs); + + auto place = ctx.GetPlace(); + op->Run(*scope, place); + + std::vector out_vec; + TensorToVector(*tensor_out, ctx, &out_vec); + + ctx.Wait(); + + // ref:https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/tensor/manipulation/gather_cn.html#gather + for(int i=0; i< static_cast(out_vec.size()); ++i){ + VLOG(3) << "out_vec[" << i<< "] : "<< out_vec[i]; + } + uint32_t expected_size = 4; + EXPECT_EQ((uint32_t)out_vec.size(), expected_size); + + // {3, 4, 5, 6} + std::vector expected_out_vec; + for (int64_t i = 3; i < 7; ++i) { + expected_out_vec.push_back(static_cast(i)); + } + for (uint32_t i = 0; i < out_vec.size(); i++) { + EXPECT_EQ(out_vec[i], expected_out_vec[i]); + } +} + + +template +void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, + std::string op_type) { + // init + auto index = scope->Var("Index"); + auto tensor_index = index->GetMutable(); + + auto x = scope->Var("X"); + auto tensor_x = x->GetMutable(); + + auto dout = scope->Var("DOut"); + auto tensor_dout = dout->GetMutable(); + + //https://tensorflow.google.cn/api_docs/python/tf/raw_ops/TensorScatterUpdate + //https://tensorflow.google.cn/api_docs/python/tf/tensor_scatter_nd_update + std::vector init_index = {0, 1, 2, 0}; + paddle::framework::TensorFromVector(init_index, ctx, tensor_index); + tensor_index->Resize(paddle::framework::make_ddim({2, 2})); + + std::vector init_x = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + TensorFromVector(init_x, ctx, tensor_x); + tensor_x->Resize(paddle::framework::make_ddim({3, 2})); + + std::vector init_dout = {5.0, 10.0}; + TensorFromVector(init_dout, ctx, tensor_dout); + tensor_dout->Resize(paddle::framework::make_ddim({2})); + + ctx.Wait(); + + auto dx = scope->Var("DX"); + auto tensor_dx = dx->GetMutable(); + + // run + f::AttributeMap attrs; + auto op = f::OpRegistry::CreateOp(op_type, + {{"X", {"X"}}, {"Index", {"Index"}}, {"Out@GRAD", {"DOut"}}}, + {{"X@GRAD", {"DX"}}}, attrs); + + auto place = ctx.GetPlace(); + op->Run(*scope, place); + + std::vector dx_vec; + TensorToVector(*tensor_dx, ctx, &dx_vec); + + ctx.Wait(); + + uint32_t expected_size = 3 * 2; + EXPECT_EQ((uint32_t)dx_vec.size(), expected_size); + + std::vector expected_dx_vec = {0.0, 5.0, 0.0, 0.0, 10.0, 0.0}; + for (uint32_t i = 0; i < dx_vec.size(); i++) { + VLOG(3) << "dx_vec[i]=" << dx_vec[i]; + EXPECT_EQ(dx_vec[i], expected_dx_vec[i]); + } +} + +TEST(gather, NPU_fp32) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + Compare(&scope, ctx, "gather"); +} + +TEST(gather, NPU_fp16) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + Compare(&scope, ctx, "gather"); +} + +TEST(gather_grad, NPU) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + CompareGrad(&scope, ctx, "gather_grad"); +} diff --git a/python/paddle/fluid/tests/unittests/test_gather_op_npu.py b/python/paddle/fluid/tests/unittests/test_gather_op_npu.py new file mode 100644 index 0000000000000..b026861c7e9fa --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_gather_op_npu.py @@ -0,0 +1,109 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest, _set_use_system_allocator +import paddle +import paddle.fluid as fluid + + +paddle.enable_static() + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestGatherOp(OpTest): + def setUp(self): + self.set_npu() + self.op_type = "gather" + self.place = paddle.NPUPlace(0) + self.init_dtype() + self.init_input_output() + + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + 'Index': OpTest.np_dtype_to_fluid_dtype(self.index) + } + self.attrs = {'validate_indices': True} + self.outputs = {'Out': self.out} + + def set_npu(self): + self.__class__.use_npu = True + + def init_input_output(self): + self.x = np.array([[1, 2], [3, 4], [5, 6]]).astype(self.dtype) + self.index = np.array([1, 2]).astype(np.int) + self.out = np.array([[3, 4], [5, 6]]).astype(self.dtype) + + def init_dtype(self): + self.dtype = np.float32 + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestGatherAPI(unittest.TestCase): + def test_name(self): + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data(name="x", shape=[3, 2], dtype="float32") + index = paddle.static.data(name='index', shape=[1], dtype='int32') + + out = paddle.gather(x, index, name='gather') + self.assertEqual(('gather' in out.name), True) + + def test_static(self): + with paddle.static.program_guard(paddle.static.Program()): + + x_np = np.array([[1, 2], [3, 4], [5, 6]]).astype('float32') + index_np = np.array([1, 2]).astype('int32') + + x = paddle.static.data(name="x", shape=[3, 2], dtype='float32') + index = paddle.static.data(name="index", shape=[2], dtype='int32') + + z = paddle.gather(x, index) + + place = paddle.NPUPlace(0) + exe = paddle.static.Executor(place) + x_value, index_value, z_value = exe.run( + feed={"x": x_np, + "index": index_np}, fetch_list=[x, index, z]) + + z_expected = np.array([[3, 4], [5, 6]]) + self.assertEqual( + (x_value == x_np).all(), + True, + msg="x_value = {}, but expected {}".format(x_value, x_np)) + self.assertEqual( + (index_value == index_np).all(), + True, + msg="index_value = {}, but expected {}".format(index_value, + index_np)) + self.assertEqual( + (z_value == z_expected).all(), + True, + msg="z_value = {}, but expected {}".format(z_value, z_expected)) + + def test_backward(self): + # TODO(ascendrc): Test backward after add grad npu op implemented. + pass + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From eea89c124c9b47f646129ceffe62a76e82d0b28b Mon Sep 17 00:00:00 2001 From: xiayanming Date: Mon, 8 Mar 2021 15:14:15 +0800 Subject: [PATCH 2/6] code review done --- paddle/fluid/operators/gather_op_npu.cc | 5 ++--- python/paddle/fluid/tests/unittests/test_gather_op_npu.py | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/gather_op_npu.cc b/paddle/fluid/operators/gather_op_npu.cc index cb80c61796d38..796617efc1833 100644 --- a/paddle/fluid/operators/gather_op_npu.cc +++ b/paddle/fluid/operators/gather_op_npu.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_ASCEND_CL #include #include #include @@ -115,4 +114,4 @@ REGISTER_OP_NPU_KERNEL( ops::GatherGradOpNPUKernel, ops::GatherGradOpNPUKernel); -#endif + diff --git a/python/paddle/fluid/tests/unittests/test_gather_op_npu.py b/python/paddle/fluid/tests/unittests/test_gather_op_npu.py index b026861c7e9fa..9f9e260b8cff9 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_op_npu.py +++ b/python/paddle/fluid/tests/unittests/test_gather_op_npu.py @@ -106,4 +106,5 @@ def test_backward(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() + \ No newline at end of file From 203e842fe82329ed943e7516beccdfa112cc97f2 Mon Sep 17 00:00:00 2001 From: xiayanming Date: Mon, 8 Mar 2021 15:19:48 +0800 Subject: [PATCH 3/6] update python new line --- python/paddle/fluid/tests/unittests/test_gather_op_npu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_gather_op_npu.py b/python/paddle/fluid/tests/unittests/test_gather_op_npu.py index 9f9e260b8cff9..87f0cd2359995 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_op_npu.py +++ b/python/paddle/fluid/tests/unittests/test_gather_op_npu.py @@ -107,4 +107,3 @@ def test_backward(self): if __name__ == '__main__': unittest.main() - \ No newline at end of file From f6d5f8545276704d1254267e64650970d8e48ab6 Mon Sep 17 00:00:00 2001 From: xiayanming Date: Tue, 9 Mar 2021 15:29:45 +0800 Subject: [PATCH 4/6] precommit --- paddle/fluid/operators/gather_op_npu.cc | 41 ++++++++-------- paddle/fluid/operators/gather_op_npu_test.cc | 49 ++++++++++---------- 2 files changed, 44 insertions(+), 46 deletions(-) diff --git a/paddle/fluid/operators/gather_op_npu.cc b/paddle/fluid/operators/gather_op_npu.cc index 796617efc1833..0ef54aca08528 100644 --- a/paddle/fluid/operators/gather_op_npu.cc +++ b/paddle/fluid/operators/gather_op_npu.cc @@ -12,18 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/gather_op.h" #include #include #include -#include "paddle/fluid/operators/npu_op_runner.h" -#include "paddle/fluid/operators/gather_op.h" #include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/npu_op_runner.h" namespace paddle { namespace operators { -inline framework::Tensor UnsqueezeTo(const framework::Tensor& src, int ndims) { - const framework::DDim& shape = src.dims(); +inline framework::Tensor UnsqueezeTo(const framework::Tensor &src, int ndims) { + const framework::DDim &shape = src.dims(); int rank = shape.size(); framework::Tensor res; res.ShareDataWith(src); @@ -50,9 +50,10 @@ class GatherOpNPUKernel : public framework::OpKernel { auto *x = ctx.Input("X"); auto *index = ctx.Input("Index"); auto *out = ctx.Output("Out"); - + out->mutable_data(ctx.GetPlace()); - auto runner = NpuOpRunner("Gather", {*x, *index}, {*out}, {{"validate_indices", true}}); + auto runner = NpuOpRunner("Gather", {*x, *index}, {*out}, + {{"validate_indices", true}}); auto stream = ctx.template device_context() .stream(); @@ -68,7 +69,7 @@ class GatherGradOpNPUKernel : public framework::OpKernel { auto *x = ctx.Input("X"); auto *dout = ctx.Input(framework::GradVarName("Out")); auto *dx = ctx.Output(framework::GradVarName("X")); - + // step1: Unsqueeze index const auto index_dims = index->dims(); if (index_dims.size() == 1) { @@ -79,22 +80,22 @@ class GatherGradOpNPUKernel : public framework::OpKernel { auto stream = ctx.template device_context() .stream(); - - // step2: ZerosLike x in device - Tensor* tmp_zerox = const_cast(x); + + // step2: ZerosLike x in device + Tensor *tmp_zerox = const_cast(x); Tensor zeroslike_xout(x->type()); zeroslike_xout.Resize(x->dims()); zeroslike_xout.mutable_data(ctx.GetPlace()); - auto runner_zeroslike = NpuOpRunner("ZerosLike", {*x}, {zeroslike_xout}, {}); + auto runner_zeroslike = + NpuOpRunner("ZerosLike", {*x}, {zeroslike_xout}, {}); runner_zeroslike.Run(stream); tmp_zerox = &zeroslike_xout; // step3: scatter(x_grad) dx->mutable_data(ctx.GetPlace()); - auto runner_scatter = NpuOpRunner("TensorScatterUpdate", - {*tmp_zerox, *index, *dout}, - {*dx}, {}); + auto runner_scatter = NpuOpRunner("TensorScatterUpdate", + {*tmp_zerox, *index, *dout}, {*dx}, {}); runner_scatter.Run(stream); } }; @@ -104,14 +105,12 @@ class GatherGradOpNPUKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_NPU_KERNEL( - gather, - ops::GatherOpNPUKernel, - ops::GatherOpNPUKernel, + ops::GatherOpNPUKernel); - + REGISTER_OP_NPU_KERNEL( - gather_grad, + gather_grad, ops::GatherGradOpNPUKernel, - ops::GatherGradOpNPUKernel); - diff --git a/paddle/fluid/operators/gather_op_npu_test.cc b/paddle/fluid/operators/gather_op_npu_test.cc index 025b04ac77b75..9348e1c0b516c 100644 --- a/paddle/fluid/operators/gather_op_npu_test.cc +++ b/paddle/fluid/operators/gather_op_npu_test.cc @@ -39,14 +39,14 @@ USE_OP_DEVICE_KERNEL(gather_grad, NPU); template void Compare(f::Scope* scope, const p::DeviceContext& ctx, - std::string op_type) { + std::string op_type) { // init auto x = scope->Var("X"); auto tensor_x = x->GetMutable(); - + auto index = scope->Var("Index"); auto tensor_index = index->GetMutable(); - + std::vector init_x; for (int64_t i = 1; i < 7; ++i) { // 1,2,3,4,5,6 @@ -68,8 +68,8 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, // run f::AttributeMap attrs = {{"validate_indices", true}}; - auto op = f::OpRegistry::CreateOp(op_type, {{"X", {"X"}}, {"Index", {"Index"}}}, - {{"Out", {"Out"}}}, attrs); + auto op = f::OpRegistry::CreateOp( + op_type, {{"X", {"X"}}, {"Index", {"Index"}}}, {{"Out", {"Out"}}}, attrs); auto place = ctx.GetPlace(); op->Run(*scope, place); @@ -78,10 +78,10 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, TensorToVector(*tensor_out, ctx, &out_vec); ctx.Wait(); - + // ref:https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api/paddle/tensor/manipulation/gather_cn.html#gather - for(int i=0; i< static_cast(out_vec.size()); ++i){ - VLOG(3) << "out_vec[" << i<< "] : "<< out_vec[i]; + for (int i = 0; i < static_cast(out_vec.size()); ++i) { + VLOG(3) << "out_vec[" << i << "] : " << out_vec[i]; } uint32_t expected_size = 4; EXPECT_EQ((uint32_t)out_vec.size(), expected_size); @@ -96,7 +96,6 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx, } } - template void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, std::string op_type) { @@ -106,12 +105,12 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, auto x = scope->Var("X"); auto tensor_x = x->GetMutable(); - + auto dout = scope->Var("DOut"); auto tensor_dout = dout->GetMutable(); - //https://tensorflow.google.cn/api_docs/python/tf/raw_ops/TensorScatterUpdate - //https://tensorflow.google.cn/api_docs/python/tf/tensor_scatter_nd_update + // https://tensorflow.google.cn/api_docs/python/tf/raw_ops/TensorScatterUpdate + // https://tensorflow.google.cn/api_docs/python/tf/tensor_scatter_nd_update std::vector init_index = {0, 1, 2, 0}; paddle::framework::TensorFromVector(init_index, ctx, tensor_index); tensor_index->Resize(paddle::framework::make_ddim({2, 2})); @@ -131,9 +130,9 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, // run f::AttributeMap attrs; - auto op = f::OpRegistry::CreateOp(op_type, - {{"X", {"X"}}, {"Index", {"Index"}}, {"Out@GRAD", {"DOut"}}}, - {{"X@GRAD", {"DX"}}}, attrs); + auto op = f::OpRegistry::CreateOp( + op_type, {{"X", {"X"}}, {"Index", {"Index"}}, {"Out@GRAD", {"DOut"}}}, + {{"X@GRAD", {"DX"}}}, attrs); auto place = ctx.GetPlace(); op->Run(*scope, place); @@ -154,19 +153,19 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, } TEST(gather, NPU_fp32) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "gather"); + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + Compare(&scope, ctx, "gather"); } TEST(gather, NPU_fp16) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - Compare(&scope, ctx, "gather"); + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + Compare(&scope, ctx, "gather"); } -TEST(gather_grad, NPU) { - f::Scope scope; - p::NPUDeviceContext ctx(p::NPUPlace(0)); - CompareGrad(&scope, ctx, "gather_grad"); +TEST(gather_grad, NPU_fp32) { + f::Scope scope; + p::NPUDeviceContext ctx(p::NPUPlace(0)); + CompareGrad(&scope, ctx, "gather_grad"); } From 6941eb783a20111066abcad418078eefe42f96f0 Mon Sep 17 00:00:00 2001 From: xiayanming Date: Tue, 9 Mar 2021 15:57:43 +0800 Subject: [PATCH 5/6] fix review --- paddle/fluid/operators/gather_op_npu.cc | 22 +------------------ .../unittests/{ => npu}/test_gather_op_npu.py | 2 +- 2 files changed, 2 insertions(+), 22 deletions(-) rename python/paddle/fluid/tests/unittests/{ => npu}/test_gather_op_npu.py (100%) diff --git a/paddle/fluid/operators/gather_op_npu.cc b/paddle/fluid/operators/gather_op_npu.cc index 0ef54aca08528..2d7b5b93ad651 100644 --- a/paddle/fluid/operators/gather_op_npu.cc +++ b/paddle/fluid/operators/gather_op_npu.cc @@ -17,32 +17,12 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/kron_op.h" #include "paddle/fluid/operators/npu_op_runner.h" namespace paddle { namespace operators { -inline framework::Tensor UnsqueezeTo(const framework::Tensor &src, int ndims) { - const framework::DDim &shape = src.dims(); - int rank = shape.size(); - framework::Tensor res; - res.ShareDataWith(src); - PADDLE_ENFORCE_LE( - rank, ndims, - platform::errors::InvalidArgument( - "The input Tensor's rank should be less than or equal to ndims" - "Received input Tensor's rank = %d, ndims = %d", - rank, ndims)); - if (rank < ndims) { - std::vector new_dim(ndims, 1); - for (int i = ndims - rank; i < ndims; i++) { - new_dim[i] = shape[i - ndims + rank]; - } - res.Resize(framework::make_ddim(new_dim)); - } - return res; -} - template class GatherOpNPUKernel : public framework::OpKernel { public: diff --git a/python/paddle/fluid/tests/unittests/test_gather_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/test_gather_op_npu.py rename to python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py index 87f0cd2359995..0fcb2bee658fa 100644 --- a/python/paddle/fluid/tests/unittests/test_gather_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_gather_op_npu.py @@ -22,9 +22,9 @@ import paddle import paddle.fluid as fluid - paddle.enable_static() + @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestGatherOp(OpTest): From 98b75bef92ac306216097b3f9c8555bf4476af35 Mon Sep 17 00:00:00 2001 From: xiayanming Date: Tue, 9 Mar 2021 19:53:57 +0800 Subject: [PATCH 6/6] del commit --- paddle/fluid/operators/gather_op_npu_test.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/paddle/fluid/operators/gather_op_npu_test.cc b/paddle/fluid/operators/gather_op_npu_test.cc index 9348e1c0b516c..4cd46da6f26f8 100644 --- a/paddle/fluid/operators/gather_op_npu_test.cc +++ b/paddle/fluid/operators/gather_op_npu_test.cc @@ -109,8 +109,6 @@ void CompareGrad(f::Scope* scope, const p::DeviceContext& ctx, auto dout = scope->Var("DOut"); auto tensor_dout = dout->GetMutable(); - // https://tensorflow.google.cn/api_docs/python/tf/raw_ops/TensorScatterUpdate - // https://tensorflow.google.cn/api_docs/python/tf/tensor_scatter_nd_update std::vector init_index = {0, 1, 2, 0}; paddle::framework::TensorFromVector(init_index, ctx, tensor_index); tensor_index->Resize(paddle::framework::make_ddim({2, 2}));