expand_grad support 0 size Tensor

DanielSun11 · DanielSun11 · commit 2fcecfbd29dc · 2025-05-16T04:33:53.000Z
diff --git a/paddle/phi/kernels/gpu/expand_grad_kernel.cu b/paddle/phi/kernels/gpu/expand_grad_kernel.cu
@@ -17,6 +17,7 @@
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"
 #include "paddle/phi/kernels/reduce_sum_kernel.h"
 
@@ -29,6 +30,11 @@ void ExpandGradKernel(const Context& ctx,
                       const IntArray& shape,
                       DenseTensor* x_grad) {
   ctx.template Alloc<T>(x_grad);
+  if ((x_grad && x_grad->numel() == 0) || out_grad.numel() == 0) {
+    phi::Full<T, Context>(
+        ctx, phi::IntArray(common::vectorize(x_grad->dims())), 0, x_grad);
+    return;
+  }
   if (x_grad->dims() == out_grad.dims()) {
     phi::Copy(ctx, out_grad, ctx.GetPlace(), false, x_grad);
   } else {
diff --git a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
 #include "paddle/phi/kernels/impl/expand_kernel_impl.h"
@@ -54,6 +55,12 @@ void ExpandGradKernel(const Context& ctx,
                       DenseTensor* in_grad) {
   auto expand_shape = shape.GetData();
   auto x_dims = x.dims();
+  if (out_grad.numel() == 0 || (in_grad && in_grad->numel() == 0)) {
+    ctx.template Alloc<T>(in_grad);
+    phi::Full<T, Context>(
+        ctx, phi::IntArray(common::vectorize(in_grad->dims())), 0, in_grad);
+    return;
+  }
 
   if (in_grad->dims() == out_grad.dims()) {
     phi::Copy(ctx, out_grad, ctx.GetPlace(), false, in_grad);
diff --git a/paddle/phi/kernels/onednn/expand_grad_kernel.cc b/paddle/phi/kernels/onednn/expand_grad_kernel.cc
@@ -16,6 +16,7 @@
 
 #include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/full_kernel.h"
 
 namespace phi {
 template <typename T, typename Context>
@@ -26,6 +27,13 @@ void ExpandGradKernel(const Context& dev_ctx,
                       DenseTensor* in_grad) {
   const auto& onednn_engine = dev_ctx.GetEngine();
 
+  if ((in_grad && in_grad->numel() == 0) || out_grad.numel() == 0) {
+    dev_ctx.template Alloc<T>(in_grad);
+    phi::Full<T, Context>(
+        dev_ctx, phi::IntArray(common::vectorize(in_grad->dims())), 0, in_grad);
+    return;
+  }
+
   auto in_grad_vec_dims = common::vectorize(in_grad->dims());
   auto out_grad_vec_dims = common::vectorize(out_grad.dims());
 
diff --git a/test/legacy_test/test_expand_v2_op.py b/test/legacy_test/test_expand_v2_op.py
@@ -684,12 +684,18 @@ def test_value_list_shape2(self):
                 x = paddle.expand(x, shape=[shape1, 1, -1, -1])
                 np.testing.assert_equal(tuple(x.shape), (-1, 1, -1, -1))
 
+
 class TestExpandV2OneDNNOp(OpTest):
     def setUp(self):
         self.op_type = "expand_v2"
         self.init_data()
-        self.x = np.random.random(self.ori_shape).astype("float32")
-        self.attrs = {'shape': self.shape, 'use_mkldnn': True}
+        self.python_api = paddle.expand
+        self.x = np.zeros(self.ori_shape).astype("float32")
+        self.attrs = {
+            'shape': self.shape,
+            'use_mkldnn': True,
+            'dtype': int(paddle.float32),
+        }
         self.set_inputs()
         self.set_additional_inputs()
         output = np.zeros(self.expect_shape).astype("float32")
@@ -702,30 +708,90 @@ def set_additional_inputs(self):
         pass
 
     def init_data(self):
-        self.ori_shape = [1, 1, 1, 140]
-        self.shape = [2, 3, 0, 140]
-        self.expect_shape = [2, 3, 0, 140]
+        self.ori_shape = [1, 0, 1, 140]
+        self.shape = [1, 0, 1, 140]
+        self.expect_shape = [1, 0, 1, 140]
 
     def test_check_output(self):
-        self.check_output_with_place(core.CPUPlace(), check_pir_onednn=True,check_dygraph=False)
-        
-    # def test_check_grad(self):
-    #     self.check_grad_with_place(
-    #         core.CPUPlace(), ["X"], "Out", check_pir_onednn=True, check_dygraph=False
-    #     )
+        self.check_output_with_place(
+            core.CPUPlace(), check_pir_onednn=True, check_dygraph=False
+        )
+
+    def test_check_grad(self):
+        self.check_grad_with_place(
+            core.CPUPlace(),
+            ["X"],
+            "Out",
+            check_pir_onednn=True,
+            check_dygraph=False,
+        )
+
+
 class TestExpandV2ZeroSizeOneDNNOp(TestExpandV2OneDNNOp):
 
     def init_data(self):
-        self.ori_shape = (1, 3)
-        self.shape = (0, 3)
-        self.expect_shape = (0, 3)
+        self.ori_shape = (0, 130)
+        self.shape = (4, 0, 130)
+        self.expect_shape = (4, 0, 130)
+
 
 class TestExpandV2ZeroSizeOneDNNOp2(TestExpandV2OneDNNOp):
 
     def init_data(self):
-        self.ori_shape = (1, 3)
-        self.shape = (1, 0, 3)
-        self.expect_shape = (1, 0, 3)
+        self.ori_shape = (0, 1, 8)
+        self.shape = (0, 8, 8)
+        self.expect_shape = (0, 8, 8)
+
+
+class TestExpandV2GPUOp(TestExpandV2OneDNNOp):
+    def test_check_output(self):
+        self.check_output_with_place(core.CUDAPlace(0), check_dygraph=True)
+
+    def test_check_grad(self):
+        if core.is_compiled_with_cuda():
+            self.check_grad_with_place(
+                core.CUDAPlace(0), ["X"], "Out", check_dygraph=True
+            )
+
+
+class TestExpandV2ZeroSizeGPUOp(TestExpandV2GPUOp):
+    def init_data(self):
+        self.ori_shape = (0, 130)
+        self.shape = (4, 0, 130)
+        self.expect_shape = (4, 0, 130)
+
+
+class TestExpandV2ZeroSizeGPUOp2(TestExpandV2GPUOp):
+    def init_data(self):
+        self.ori_shape = (0, 1)
+        self.shape = (0, 8)
+        self.expect_shape = (0, 8)
+
+
+class TestExpandV2CPUOp(TestExpandV2OneDNNOp):
+    def test_check_output(self):
+        self.check_output_with_place(core.CPUPlace(), check_dygraph=True)
+
+    def test_check_grad(self):
+        if core.is_compiled_with_cuda():
+            self.check_grad_with_place(
+                core.CPUPlace(), ["X"], "Out", check_dygraph=True
+            )
+
+
+class TestExpandV2CPUOp1(TestExpandV2CPUOp):
+    def init_data(self):
+        self.ori_shape = (0, 1)
+        self.shape = (0, 8)
+        self.expect_shape = (0, 8)
+
+
+class TestExpandV2CPUOp2(TestExpandV2CPUOp):
+    def init_data(self):
+        self.ori_shape = (0, 130)
+        self.shape = (4, 0, 130)
+        self.expect_shape = (4, 0, 130)
+
 
 if __name__ == "__main__":
     paddle.enable_static()