From 2d933a7eba6499b10d6d23ead1054b298ce1d77e Mon Sep 17 00:00:00 2001 From: DanielSun11 <1395924413@qq.com> Date: Wed, 14 May 2025 14:25:33 +0000 Subject: [PATCH 1/7] fix 0 size Tensor for expand kernel in onednn --- paddle/phi/kernels/onednn/expand_kernel.cc | 37 ++++++++++++++++++- test/legacy_test/test_expand_v2_op.py | 42 ++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/paddle/phi/kernels/onednn/expand_kernel.cc b/paddle/phi/kernels/onednn/expand_kernel.cc index 91dc75409a5ba4..450b748a863307 100644 --- a/paddle/phi/kernels/onednn/expand_kernel.cc +++ b/paddle/phi/kernels/onednn/expand_kernel.cc @@ -39,16 +39,51 @@ void ExpandKernel(const Context& dev_ctx, auto x_vec_dims = common::vectorize(x.dims()); auto out_new_dims = shape.GetData(); + bool has_zero_size = false; for (size_t i = 0; i < out_new_dims.size(); ++i) { - out_new_dims[i] = out_new_dims[i] > 0 ? out_new_dims[i] : x_vec_dims[i]; + out_new_dims[i] = out_new_dims[i] >= 0 ? out_new_dims[i] : x_vec_dims[i]; } if (x_vec_dims.size() != out_new_dims.size()) { x_vec_dims = GetExtendedXDims(x_vec_dims, out_new_dims.size()); // NOLINT } + for (size_t i = 0; i < x_vec_dims.size(); ++i) { + PADDLE_ENFORCE_GE( + out_new_dims[i], + 0, + common::errors::InvalidArgument( + "The expanded size (%d) for non-existing dimensions must be " + "positive for expand_v2 op.", + out_new_dims[i])); + + PADDLE_ENFORCE_GE( + x_vec_dims[i], + 0, + common::errors::InvalidArgument( + "The expanded size (%d) for non-existing dimensions must be " + "positive for expand_v2 op.", + x_vec_dims[i])); + + PADDLE_ENFORCE_EQ( + x_vec_dims[i] == 1 || x_vec_dims[i] == out_new_dims[i], + true, + common::errors::InvalidArgument( + "The value (%d) of the non-singleton dimension does not match" + " the corresponding value (%d) in shape for expand_v2 op.", + x_vec_dims[i], + out_new_dims[i])); + if (out_new_dims[i] == 0) { + has_zero_size = true; + } + } + out->Resize(common::make_ddim(out_new_dims)); + if (has_zero_size) { + dev_ctx.template Alloc(out); + return; + } funcs::BroadcastDataOneDNNHandler handler(dnnl::algorithm::binary_add, onednn_engine, dev_ctx.GetPlace(), diff --git a/test/legacy_test/test_expand_v2_op.py b/test/legacy_test/test_expand_v2_op.py index 275f18a5e3a282..3ddac9134b0840 100644 --- a/test/legacy_test/test_expand_v2_op.py +++ b/test/legacy_test/test_expand_v2_op.py @@ -684,6 +684,48 @@ def test_value_list_shape2(self): x = paddle.expand(x, shape=[shape1, 1, -1, -1]) np.testing.assert_equal(tuple(x.shape), (-1, 1, -1, -1)) +class TestExpandV2OneDNNOp(OpTest): + def setUp(self): + self.op_type = "expand_v2" + self.init_data() + self.x = np.random.random(self.ori_shape).astype("float32") + self.attrs = {'shape': self.shape, 'use_mkldnn': True} + self.set_inputs() + self.set_additional_inputs() + output = np.zeros(self.expect_shape).astype("float32") + self.outputs = {'Out': output} + + def set_inputs(self): + self.inputs = {'X': self.x} + + def set_additional_inputs(self): + pass + + def init_data(self): + self.ori_shape = [1, 1, 1, 140] + self.shape = [2, 3, 0, 140] + self.expect_shape = [2, 3, 0, 140] + + def test_check_output(self): + self.check_output_with_place(core.CPUPlace(), check_pir_onednn=True,check_dygraph=False) + + # def test_check_grad(self): + # self.check_grad_with_place( + # core.CPUPlace(), ["X"], "Out", check_pir_onednn=True, check_dygraph=False + # ) +class TestExpandV2ZeroSizeOneDNNOp(TestExpandV2OneDNNOp): + + def init_data(self): + self.ori_shape = (1, 3) + self.shape = (0, 3) + self.expect_shape = (0, 3) + +class TestExpandV2ZeroSizeOneDNNOp2(TestExpandV2OneDNNOp): + + def init_data(self): + self.ori_shape = (1, 3) + self.shape = (1, 0, 3) + self.expect_shape = (1, 0, 3) if __name__ == "__main__": paddle.enable_static() From fd73310013fab7bd3d1fd5d5bd1f23b3247b6dcd Mon Sep 17 00:00:00 2001 From: DanielSun11 <1395924413@qq.com> Date: Wed, 14 May 2025 14:27:37 +0000 Subject: [PATCH 2/7] format --- paddle/phi/kernels/onednn/expand_kernel.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/phi/kernels/onednn/expand_kernel.cc b/paddle/phi/kernels/onednn/expand_kernel.cc index 450b748a863307..2c8fc702d7ff78 100644 --- a/paddle/phi/kernels/onednn/expand_kernel.cc +++ b/paddle/phi/kernels/onednn/expand_kernel.cc @@ -57,7 +57,7 @@ void ExpandKernel(const Context& dev_ctx, "The expanded size (%d) for non-existing dimensions must be " "positive for expand_v2 op.", out_new_dims[i])); - + PADDLE_ENFORCE_GE( x_vec_dims[i], 0, From 2fcecfbd29dca951c261ec2bb0f7e4e5522ccc09 Mon Sep 17 00:00:00 2001 From: DanielSun11 <1395924413@qq.com> Date: Fri, 16 May 2025 04:33:53 +0000 Subject: [PATCH 3/7] expand_grad support 0 size Tensor --- paddle/phi/kernels/gpu/expand_grad_kernel.cu | 6 ++ .../kernels/impl/expand_grad_kernel_impl.h | 7 ++ .../phi/kernels/onednn/expand_grad_kernel.cc | 8 ++ test/legacy_test/test_expand_v2_op.py | 100 +++++++++++++++--- 4 files changed, 104 insertions(+), 17 deletions(-) diff --git a/paddle/phi/kernels/gpu/expand_grad_kernel.cu b/paddle/phi/kernels/gpu/expand_grad_kernel.cu index 224e435e58c851..63f174a0129528 100644 --- a/paddle/phi/kernels/gpu/expand_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/expand_grad_kernel.cu @@ -17,6 +17,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/reduce_function.h" #include "paddle/phi/kernels/reduce_sum_kernel.h" @@ -29,6 +30,11 @@ void ExpandGradKernel(const Context& ctx, const IntArray& shape, DenseTensor* x_grad) { ctx.template Alloc(x_grad); + if ((x_grad && x_grad->numel() == 0) || out_grad.numel() == 0) { + phi::Full( + ctx, phi::IntArray(common::vectorize(x_grad->dims())), 0, x_grad); + return; + } if (x_grad->dims() == out_grad.dims()) { phi::Copy(ctx, out_grad, ctx.GetPlace(), false, x_grad); } else { diff --git a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h index 7bc5cc14f31b96..604805d486c1a2 100644 --- a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h @@ -15,6 +15,7 @@ #pragma once #include "paddle/phi/core/tensor_utils.h" +#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/impl/expand_kernel_impl.h" @@ -54,6 +55,12 @@ void ExpandGradKernel(const Context& ctx, DenseTensor* in_grad) { auto expand_shape = shape.GetData(); auto x_dims = x.dims(); + if (out_grad.numel() == 0 || (in_grad && in_grad->numel() == 0)) { + ctx.template Alloc(in_grad); + phi::Full( + ctx, phi::IntArray(common::vectorize(in_grad->dims())), 0, in_grad); + return; + } if (in_grad->dims() == out_grad.dims()) { phi::Copy(ctx, out_grad, ctx.GetPlace(), false, in_grad); diff --git a/paddle/phi/kernels/onednn/expand_grad_kernel.cc b/paddle/phi/kernels/onednn/expand_grad_kernel.cc index 7de901df9561d7..fd78a2e8f02928 100644 --- a/paddle/phi/kernels/onednn/expand_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/expand_grad_kernel.cc @@ -16,6 +16,7 @@ #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/full_kernel.h" namespace phi { template @@ -26,6 +27,13 @@ void ExpandGradKernel(const Context& dev_ctx, DenseTensor* in_grad) { const auto& onednn_engine = dev_ctx.GetEngine(); + if ((in_grad && in_grad->numel() == 0) || out_grad.numel() == 0) { + dev_ctx.template Alloc(in_grad); + phi::Full( + dev_ctx, phi::IntArray(common::vectorize(in_grad->dims())), 0, in_grad); + return; + } + auto in_grad_vec_dims = common::vectorize(in_grad->dims()); auto out_grad_vec_dims = common::vectorize(out_grad.dims()); diff --git a/test/legacy_test/test_expand_v2_op.py b/test/legacy_test/test_expand_v2_op.py index 3ddac9134b0840..55f46a73d2b9f3 100644 --- a/test/legacy_test/test_expand_v2_op.py +++ b/test/legacy_test/test_expand_v2_op.py @@ -684,12 +684,18 @@ def test_value_list_shape2(self): x = paddle.expand(x, shape=[shape1, 1, -1, -1]) np.testing.assert_equal(tuple(x.shape), (-1, 1, -1, -1)) + class TestExpandV2OneDNNOp(OpTest): def setUp(self): self.op_type = "expand_v2" self.init_data() - self.x = np.random.random(self.ori_shape).astype("float32") - self.attrs = {'shape': self.shape, 'use_mkldnn': True} + self.python_api = paddle.expand + self.x = np.zeros(self.ori_shape).astype("float32") + self.attrs = { + 'shape': self.shape, + 'use_mkldnn': True, + 'dtype': int(paddle.float32), + } self.set_inputs() self.set_additional_inputs() output = np.zeros(self.expect_shape).astype("float32") @@ -702,30 +708,90 @@ def set_additional_inputs(self): pass def init_data(self): - self.ori_shape = [1, 1, 1, 140] - self.shape = [2, 3, 0, 140] - self.expect_shape = [2, 3, 0, 140] + self.ori_shape = [1, 0, 1, 140] + self.shape = [1, 0, 1, 140] + self.expect_shape = [1, 0, 1, 140] def test_check_output(self): - self.check_output_with_place(core.CPUPlace(), check_pir_onednn=True,check_dygraph=False) - - # def test_check_grad(self): - # self.check_grad_with_place( - # core.CPUPlace(), ["X"], "Out", check_pir_onednn=True, check_dygraph=False - # ) + self.check_output_with_place( + core.CPUPlace(), check_pir_onednn=True, check_dygraph=False + ) + + def test_check_grad(self): + self.check_grad_with_place( + core.CPUPlace(), + ["X"], + "Out", + check_pir_onednn=True, + check_dygraph=False, + ) + + class TestExpandV2ZeroSizeOneDNNOp(TestExpandV2OneDNNOp): def init_data(self): - self.ori_shape = (1, 3) - self.shape = (0, 3) - self.expect_shape = (0, 3) + self.ori_shape = (0, 130) + self.shape = (4, 0, 130) + self.expect_shape = (4, 0, 130) + class TestExpandV2ZeroSizeOneDNNOp2(TestExpandV2OneDNNOp): def init_data(self): - self.ori_shape = (1, 3) - self.shape = (1, 0, 3) - self.expect_shape = (1, 0, 3) + self.ori_shape = (0, 1, 8) + self.shape = (0, 8, 8) + self.expect_shape = (0, 8, 8) + + +class TestExpandV2GPUOp(TestExpandV2OneDNNOp): + def test_check_output(self): + self.check_output_with_place(core.CUDAPlace(0), check_dygraph=True) + + def test_check_grad(self): + if core.is_compiled_with_cuda(): + self.check_grad_with_place( + core.CUDAPlace(0), ["X"], "Out", check_dygraph=True + ) + + +class TestExpandV2ZeroSizeGPUOp(TestExpandV2GPUOp): + def init_data(self): + self.ori_shape = (0, 130) + self.shape = (4, 0, 130) + self.expect_shape = (4, 0, 130) + + +class TestExpandV2ZeroSizeGPUOp2(TestExpandV2GPUOp): + def init_data(self): + self.ori_shape = (0, 1) + self.shape = (0, 8) + self.expect_shape = (0, 8) + + +class TestExpandV2CPUOp(TestExpandV2OneDNNOp): + def test_check_output(self): + self.check_output_with_place(core.CPUPlace(), check_dygraph=True) + + def test_check_grad(self): + if core.is_compiled_with_cuda(): + self.check_grad_with_place( + core.CPUPlace(), ["X"], "Out", check_dygraph=True + ) + + +class TestExpandV2CPUOp1(TestExpandV2CPUOp): + def init_data(self): + self.ori_shape = (0, 1) + self.shape = (0, 8) + self.expect_shape = (0, 8) + + +class TestExpandV2CPUOp2(TestExpandV2CPUOp): + def init_data(self): + self.ori_shape = (0, 130) + self.shape = (4, 0, 130) + self.expect_shape = (4, 0, 130) + if __name__ == "__main__": paddle.enable_static() From 37a37ff3f7fd4e8a4825e69b0310633a1fcc786f Mon Sep 17 00:00:00 2001 From: DanielSun11 <1395924413@qq.com> Date: Sat, 17 May 2025 18:12:59 +0000 Subject: [PATCH 4/7] fix bug and add unittest --- .../kernels/impl/expand_grad_kernel_impl.h | 9 +- test/legacy_test/test_expand_v2_op.py | 87 +++++++------------ test/mkldnn/test_expand_v2_mkldnn_op.py | 25 ++++++ 3 files changed, 64 insertions(+), 57 deletions(-) diff --git a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h index 604805d486c1a2..653846680677af 100644 --- a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h @@ -55,10 +55,13 @@ void ExpandGradKernel(const Context& ctx, DenseTensor* in_grad) { auto expand_shape = shape.GetData(); auto x_dims = x.dims(); - if (out_grad.numel() == 0 || (in_grad && in_grad->numel() == 0)) { + if (x.numel() == 0 || out_grad.numel() == 0 || + (in_grad && in_grad->numel() == 0)) { ctx.template Alloc(in_grad); - phi::Full( - ctx, phi::IntArray(common::vectorize(in_grad->dims())), 0, in_grad); + if (in_grad->numel() != 0) { + phi::Full( + ctx, phi::IntArray(common::vectorize(in_grad->dims())), 0, in_grad); + } return; } diff --git a/test/legacy_test/test_expand_v2_op.py b/test/legacy_test/test_expand_v2_op.py index 55f46a73d2b9f3..6cde4329ae327d 100644 --- a/test/legacy_test/test_expand_v2_op.py +++ b/test/legacy_test/test_expand_v2_op.py @@ -685,20 +685,19 @@ def test_value_list_shape2(self): np.testing.assert_equal(tuple(x.shape), (-1, 1, -1, -1)) -class TestExpandV2OneDNNOp(OpTest): +class TestExpandV2ZeroSizeOp(OpTest): def setUp(self): self.op_type = "expand_v2" self.init_data() + self.init_place() self.python_api = paddle.expand - self.x = np.zeros(self.ori_shape).astype("float32") + self.x = np.zeros(self.ori_shape).astype("float64") self.attrs = { 'shape': self.shape, - 'use_mkldnn': True, - 'dtype': int(paddle.float32), } self.set_inputs() self.set_additional_inputs() - output = np.zeros(self.expect_shape).astype("float32") + output = np.zeros(self.expect_shape).astype("float64") self.outputs = {'Out': output} def set_inputs(self): @@ -712,87 +711,67 @@ def init_data(self): self.shape = [1, 0, 1, 140] self.expect_shape = [1, 0, 1, 140] + def init_place(self): + self.place = core.CPUPlace() + def test_check_output(self): - self.check_output_with_place( - core.CPUPlace(), check_pir_onednn=True, check_dygraph=False - ) + self.check_output_with_place(self.place, check_dygraph=False) def test_check_grad(self): self.check_grad_with_place( - core.CPUPlace(), + self.place, ["X"], "Out", - check_pir_onednn=True, check_dygraph=False, ) -class TestExpandV2ZeroSizeOneDNNOp(TestExpandV2OneDNNOp): +class TestExpandV2CPUOp1(TestExpandV2ZeroSizeOp): + def init_data(self): + self.ori_shape = (0, 1) + self.shape = (0, 8) + self.expect_shape = (0, 8) + +class TestExpandV2CPUOp2(TestExpandV2ZeroSizeOp): def init_data(self): self.ori_shape = (0, 130) self.shape = (4, 0, 130) self.expect_shape = (4, 0, 130) -class TestExpandV2ZeroSizeOneDNNOp2(TestExpandV2OneDNNOp): - - def init_data(self): - self.ori_shape = (0, 1, 8) - self.shape = (0, 8, 8) - self.expect_shape = (0, 8, 8) - - -class TestExpandV2GPUOp(TestExpandV2OneDNNOp): - def test_check_output(self): - self.check_output_with_place(core.CUDAPlace(0), check_dygraph=True) +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA", +) +class TestExpandV2ZeroSizeGPUOp(TestExpandV2ZeroSizeOp): - def test_check_grad(self): - if core.is_compiled_with_cuda(): - self.check_grad_with_place( - core.CUDAPlace(0), ["X"], "Out", check_dygraph=True - ) + def init_place(self): + self.place = core.CUDAPlace(0) -class TestExpandV2ZeroSizeGPUOp(TestExpandV2GPUOp): +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA", +) +class TestExpandV2ZeroSizeGPUOp1(TestExpandV2ZeroSizeGPUOp): def init_data(self): self.ori_shape = (0, 130) self.shape = (4, 0, 130) self.expect_shape = (4, 0, 130) -class TestExpandV2ZeroSizeGPUOp2(TestExpandV2GPUOp): - def init_data(self): - self.ori_shape = (0, 1) - self.shape = (0, 8) - self.expect_shape = (0, 8) - - -class TestExpandV2CPUOp(TestExpandV2OneDNNOp): - def test_check_output(self): - self.check_output_with_place(core.CPUPlace(), check_dygraph=True) - - def test_check_grad(self): - if core.is_compiled_with_cuda(): - self.check_grad_with_place( - core.CPUPlace(), ["X"], "Out", check_dygraph=True - ) - - -class TestExpandV2CPUOp1(TestExpandV2CPUOp): +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA", +) +class TestExpandV2ZeroSizeGPUOp2(TestExpandV2ZeroSizeGPUOp): def init_data(self): self.ori_shape = (0, 1) self.shape = (0, 8) self.expect_shape = (0, 8) -class TestExpandV2CPUOp2(TestExpandV2CPUOp): - def init_data(self): - self.ori_shape = (0, 130) - self.shape = (4, 0, 130) - self.expect_shape = (4, 0, 130) - - if __name__ == "__main__": paddle.enable_static() unittest.main() diff --git a/test/mkldnn/test_expand_v2_mkldnn_op.py b/test/mkldnn/test_expand_v2_mkldnn_op.py index 3855e9060ff20f..dcfa4844171c23 100644 --- a/test/mkldnn/test_expand_v2_mkldnn_op.py +++ b/test/mkldnn/test_expand_v2_mkldnn_op.py @@ -171,6 +171,31 @@ def test_check_grad(self): create_expand_v2_bf16_test_class(TestExpandV2ExpandShapesTensor2OneDNNOp) create_expand_v2_bf16_test_class(TestExpandV2ShapesTensorOneDNNOp) + +class TestExpandV2OneDNNOpZeroSize(TestExpandV2OneDNNOp): + def setUp(self): + self.op_type = "expand_v2" + self.init_data() + self.x = np.random.random(self.ori_shape).astype("float32") + self.attrs = {'shape': self.shape, 'use_mkldnn': True} + self.set_inputs() + output = np.zeros(self.expect_shape) + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = (0, 130) + self.shape = (4, 0, 130) + self.expect_shape = (4, 0, 130) + + +class TestExpandV2OneDNNOpZeroSize1(TestExpandV2OneDNNOpZeroSize): + + def init_data(self): + self.ori_shape = (0, 1, 8) + self.shape = (0, 8, 8) + self.expect_shape = (0, 8, 8) + + if __name__ == '__main__': paddle.enable_static() unittest.main() From 37f0042d25857e2ce4a280baf33f33fa976e2c6d Mon Sep 17 00:00:00 2001 From: DanielSun11 <1395924413@qq.com> Date: Sun, 18 May 2025 17:37:51 +0000 Subject: [PATCH 5/7] modify the unittest for onednn kernel --- test/legacy_test/test_expand_v2_op.py | 55 +++++++++++++++++++++++++ test/mkldnn/test_expand_v2_mkldnn_op.py | 24 ----------- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/test/legacy_test/test_expand_v2_op.py b/test/legacy_test/test_expand_v2_op.py index 6cde4329ae327d..ad19bb4497b243 100644 --- a/test/legacy_test/test_expand_v2_op.py +++ b/test/legacy_test/test_expand_v2_op.py @@ -772,6 +772,61 @@ def init_data(self): self.expect_shape = (0, 8) +class TestExpandV2ZeroSizeOneDNNOp(TestExpandV2ZeroSizeOp): + def setUp(self): + self.op_type = "expand_v2" + self.init_data() + self.init_place() + self.python_api = paddle.expand + self.x = np.zeros(self.ori_shape).astype("float32") + self.attrs = {'shape': self.shape, 'use_mkldnn': True} + self.use_mkldnn = True + self.set_inputs() + self.set_additional_inputs() + output = np.zeros(self.expect_shape).astype("float32") + self.outputs = {'Out': output} + + def init_data(self): + self.ori_shape = [1, 0, 1, 140] + self.shape = [1, 0, 1, 140] + self.expect_shape = [1, 0, 1, 140] + + def init_place(self): + self.place = core.CPUPlace() + + def test_check_output(self): + self.check_output_with_place( + self.place, + check_dygraph=False, + check_pir=True, + check_pir_onednn=True, + ) + + def test_check_grad(self): + self.check_grad_with_place( + self.place, + ["X"], + "Out", + check_dygraph=False, + check_pir=True, + check_pir_onednn=True, + ) + + +class TestExpandV2ZeroSizeOneDNNOp1(TestExpandV2ZeroSizeOneDNNOp): + def init_data(self): + self.ori_shape = (0, 130) + self.shape = (4, 0, 130) + self.expect_shape = (4, 0, 130) + + +class TestExpandV2ZeroSizeOneDNNOp2(TestExpandV2ZeroSizeOneDNNOp): + def init_data(self): + self.ori_shape = (0, 1, 8) + self.shape = (0, 8, 8) + self.expect_shape = (0, 8, 8) + + if __name__ == "__main__": paddle.enable_static() unittest.main() diff --git a/test/mkldnn/test_expand_v2_mkldnn_op.py b/test/mkldnn/test_expand_v2_mkldnn_op.py index dcfa4844171c23..8d30412e510dd0 100644 --- a/test/mkldnn/test_expand_v2_mkldnn_op.py +++ b/test/mkldnn/test_expand_v2_mkldnn_op.py @@ -172,30 +172,6 @@ def test_check_grad(self): create_expand_v2_bf16_test_class(TestExpandV2ShapesTensorOneDNNOp) -class TestExpandV2OneDNNOpZeroSize(TestExpandV2OneDNNOp): - def setUp(self): - self.op_type = "expand_v2" - self.init_data() - self.x = np.random.random(self.ori_shape).astype("float32") - self.attrs = {'shape': self.shape, 'use_mkldnn': True} - self.set_inputs() - output = np.zeros(self.expect_shape) - self.outputs = {'Out': output} - - def init_data(self): - self.ori_shape = (0, 130) - self.shape = (4, 0, 130) - self.expect_shape = (4, 0, 130) - - -class TestExpandV2OneDNNOpZeroSize1(TestExpandV2OneDNNOpZeroSize): - - def init_data(self): - self.ori_shape = (0, 1, 8) - self.shape = (0, 8, 8) - self.expect_shape = (0, 8, 8) - - if __name__ == '__main__': paddle.enable_static() unittest.main() From f6e82daaeceb9c5904e66f19a4ec5d9b375c954e Mon Sep 17 00:00:00 2001 From: DanielSun11 <1395924413@qq.com> Date: Mon, 19 May 2025 02:47:44 +0000 Subject: [PATCH 6/7] modify expand unittest to force enable onednn op --- test/legacy_test/test_expand_v2_op.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/legacy_test/test_expand_v2_op.py b/test/legacy_test/test_expand_v2_op.py index ad19bb4497b243..d0be33d1221ccb 100644 --- a/test/legacy_test/test_expand_v2_op.py +++ b/test/legacy_test/test_expand_v2_op.py @@ -795,22 +795,28 @@ def init_place(self): self.place = core.CPUPlace() def test_check_output(self): + flags_use_mkldnn = core.globals()["FLAGS_use_mkldnn"] + paddle.set_flags({'FLAGS_use_mkldnn': True}) self.check_output_with_place( self.place, check_dygraph=False, - check_pir=True, + check_pir=False, check_pir_onednn=True, ) + paddle.set_flags({'FLAGS_use_mkldnn': flags_use_mkldnn}) def test_check_grad(self): + flags_use_mkldnn = core.globals()["FLAGS_use_mkldnn"] + paddle.set_flags({'FLAGS_use_mkldnn': True}) self.check_grad_with_place( self.place, ["X"], "Out", check_dygraph=False, - check_pir=True, + check_pir=False, check_pir_onednn=True, ) + paddle.set_flags({'FLAGS_use_mkldnn': flags_use_mkldnn}) class TestExpandV2ZeroSizeOneDNNOp1(TestExpandV2ZeroSizeOneDNNOp): From c816f3ab94eb07b424c68d8e566c1586fc19c7a5 Mon Sep 17 00:00:00 2001 From: DanielSun11 <1395924413@qq.com> Date: Fri, 23 May 2025 11:13:48 +0000 Subject: [PATCH 7/7] fix fmax fmin grad kernel --- paddle/phi/kernels/funcs/elementwise_base.h | 3 ++ .../impl/elementwise_grad_kernel_impl.h | 42 ++++++++++++++++++ test/legacy_test/test_fmax_op.py | 43 +++++++++++++++++-- test/legacy_test/test_fmin_op.py | 21 +++++++-- 4 files changed, 103 insertions(+), 6 deletions(-) diff --git a/paddle/phi/kernels/funcs/elementwise_base.h b/paddle/phi/kernels/funcs/elementwise_base.h index f1bd21d419b72b..91f233d6215177 100644 --- a/paddle/phi/kernels/funcs/elementwise_base.h +++ b/paddle/phi/kernels/funcs/elementwise_base.h @@ -373,6 +373,9 @@ void ElementwiseCompute(const CPUContext &dev_ctx, DenseTensor *z, int axis = -1) { dev_ctx.Alloc(z); + if (z && z->numel() == 0) { + return; + } auto x_dims = x.dims(); auto y_dims = y.dims(); bool is_xsize_larger = true; diff --git a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h index 4d1ccfba701423..1e867e0a7c351a 100644 --- a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h @@ -746,6 +746,27 @@ void ElementwiseFMaxGradKernel(const Context& dev_ctx, auto x_dim = x.dims(); auto y_dim = y.dims(); int axis = -1; + if (out_grad.numel() == 0) { + if (x_grad) { + dev_ctx.template Alloc(x_grad); + if (x_grad->numel() != 0) { + phi::Full(dev_ctx, + phi::IntArray(common::vectorize(x_grad->dims())), + 0, + x_grad); + } + } + if (y_grad) { + dev_ctx.template Alloc(y_grad); + if (y_grad->numel() != 0) { + phi::Full(dev_ctx, + phi::IntArray(common::vectorize(y_grad->dims())), + 0, + y_grad); + } + } + return; + } if (x.dims() == y.dims()) { funcs::ElemwiseGradComputeNoBroadcast(x_grad); + if (x_grad->numel() != 0) { + phi::Full(dev_ctx, + phi::IntArray(common::vectorize(x_grad->dims())), + 0, + x_grad); + } + } + if (y_grad) { + dev_ctx.template Alloc(y_grad); + if (y_grad->numel() != 0) { + phi::Full(dev_ctx, + phi::IntArray(common::vectorize(y_grad->dims())), + 0, + y_grad); + } + } + return; + } auto x_dim = x.dims(); auto y_dim = y.dims(); int axis = -1; diff --git a/test/legacy_test/test_fmax_op.py b/test/legacy_test/test_fmax_op.py index 6047187457e1ab..0f76922ea39098 100644 --- a/test/legacy_test/test_fmax_op.py +++ b/test/legacy_test/test_fmax_op.py @@ -139,12 +139,16 @@ def setUp(self): # If x and y have the same value, the max() is not differentiable. # So we generate test data by the following method # to avoid them being too close to each other. - x = np.random.uniform(0.1, 1, [13, 17]).astype("float64") - sgn = np.random.choice([-1, 1], [13, 17]).astype("float64") - y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype("float64") + self.init_shape() + x = np.random.uniform(0.1, 1, self.shape).astype("float64") + sgn = np.random.choice([-1, 1], self.shape).astype("float64") + y = x + sgn * np.random.uniform(0.1, 1, self.shape).astype("float64") self.inputs = {'X': x, 'Y': y} self.outputs = {'Out': np.fmax(self.inputs['X'], self.inputs['Y'])} + def init_shape(self): + self.shape = [13, 17] + def test_check_output(self): """test_check_output""" self.check_output(check_pir=True, check_symbol_infer=False) @@ -286,5 +290,38 @@ def test_check_grad(self): ) +class TestElementwiseFmaxOpZeroSize(TestElementwiseFmaxOp): + def init_shape(self): + self.shape = [0, 15] + + +class TestElementwiseFmaxOpZeroSize1(TestElementwiseFmaxOp): + def init_shape(self): + self.shape = [0, 15, 0] + + +class ApiFMaxTestZeroSize(unittest.TestCase): + """ApiFMaxTest""" + + def setUp(self): + """setUp""" + if core.is_compiled_with_cuda(): + self.place = core.CUDAPlace(0) + else: + self.place = core.CPUPlace() + + self.input_x = np.random.rand(0, 15).astype("float32") + self.input_y = np.random.rand(0, 15).astype("float32") + self.input_z = np.random.rand(1, 15).astype("float32") + self.input_a = np.random.rand(15, 0).astype('int64') + self.input_b = np.random.rand(15, 0, 1).astype('int64') + self.input_c = np.random.rand(15, 0, 2).astype('int64') + + self.np_expected1 = np.fmax(self.input_x, self.input_y) + self.np_expected2 = np.fmax(self.input_x, self.input_z) + self.np_expected3 = np.fmax(self.input_a, self.input_c) + self.np_expected4 = np.fmax(self.input_b, self.input_c) + + if __name__ == "__main__": unittest.main() diff --git a/test/legacy_test/test_fmin_op.py b/test/legacy_test/test_fmin_op.py index aef24f819c457e..2f6ba91fd60165 100644 --- a/test/legacy_test/test_fmin_op.py +++ b/test/legacy_test/test_fmin_op.py @@ -141,12 +141,17 @@ def setUp(self): # If x and y have the same value, the min() is not differentiable. # So we generate test data by the following method # to avoid them being too close to each other. - x = np.random.uniform(0.1, 1, [13, 17]).astype("float64") - sgn = np.random.choice([-1, 1], [13, 17]).astype("float64") - y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype("float64") + self.init_shape() + x = np.random.uniform(0.1, 1, self.shape).astype("float64") + sgn = np.random.choice([-1, 1], self.shape).astype("float64") + y = x + sgn * np.random.uniform(0.1, 1, self.shape).astype("float64") self.inputs = {'X': x, 'Y': y} self.outputs = {'Out': np.fmin(self.inputs['X'], self.inputs['Y'])} + def init_shape(self): + """init_shape""" + self.shape = [13, 17] + def test_check_output(self): """test_check_output""" self.check_output(check_pir=True, check_symbol_infer=False) @@ -288,6 +293,16 @@ def test_check_grad(self): ) +class TestElementwiseFminOpZeroSize(TestElementwiseFminOp): + def init_shape(self): + self.shape = [0, 9] + + +class TestElementwiseFminOpZeroSize1(TestElementwiseFminOp): + def init_shape(self): + self.shape = [9, 0] + + if __name__ == "__main__": paddle.enable_static() unittest.main()