PaddlePaddle · wanghuancoder · May 20, 2025 · May 14, 2025 · May 14, 2025 · May 16, 2025
diff --git a/paddle/phi/kernels/gpu/expand_grad_kernel.cu b/paddle/phi/kernels/gpu/expand_grad_kernel.cu
@@ -17,6 +17,7 @@
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"
 #include "paddle/phi/kernels/reduce_sum_kernel.h"
 
@@ -29,6 +30,11 @@ void ExpandGradKernel(const Context& ctx,
                       const IntArray& shape,
                       DenseTensor* x_grad) {
   ctx.template Alloc<T>(x_grad);
+  if ((x_grad && x_grad->numel() == 0) || out_grad.numel() == 0) {
+    phi::Full<T, Context>(
+        ctx, phi::IntArray(common::vectorize(x_grad->dims())), 0, x_grad);
+    return;
+  }
   if (x_grad->dims() == out_grad.dims()) {
     phi::Copy(ctx, out_grad, ctx.GetPlace(), false, x_grad);
   } else {

diff --git a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
 #include "paddle/phi/kernels/impl/expand_kernel_impl.h"
@@ -54,6 +55,15 @@ void ExpandGradKernel(const Context& ctx,
                       DenseTensor* in_grad) {
   auto expand_shape = shape.GetData();
   auto x_dims = x.dims();
+  if (x.numel() == 0 || out_grad.numel() == 0 ||
+      (in_grad && in_grad->numel() == 0)) {
+    ctx.template Alloc<T>(in_grad);
+    if (in_grad->numel() != 0) {
+      phi::Full<T, Context>(
+          ctx, phi::IntArray(common::vectorize(in_grad->dims())), 0, in_grad);
+    }
+    return;
+  }
 
   if (in_grad->dims() == out_grad.dims()) {
     phi::Copy(ctx, out_grad, ctx.GetPlace(), false, in_grad);

diff --git a/paddle/phi/kernels/onednn/expand_grad_kernel.cc b/paddle/phi/kernels/onednn/expand_grad_kernel.cc
@@ -16,6 +16,7 @@
 
 #include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/full_kernel.h"
 
 namespace phi {
 template <typename T, typename Context>
@@ -26,6 +27,13 @@ void ExpandGradKernel(const Context& dev_ctx,
                       DenseTensor* in_grad) {
   const auto& onednn_engine = dev_ctx.GetEngine();
 
+  if ((in_grad && in_grad->numel() == 0) || out_grad.numel() == 0) {
+    dev_ctx.template Alloc<T>(in_grad);
+    phi::Full<T, Context>(
+        dev_ctx, phi::IntArray(common::vectorize(in_grad->dims())), 0, in_grad);
+    return;
+  }
+
   auto in_grad_vec_dims = common::vectorize(in_grad->dims());
   auto out_grad_vec_dims = common::vectorize(out_grad.dims());
 

diff --git a/paddle/phi/kernels/onednn/expand_kernel.cc b/paddle/phi/kernels/onednn/expand_kernel.cc
@@ -39,16 +39,51 @@ void ExpandKernel(const Context& dev_ctx,
   auto x_vec_dims = common::vectorize(x.dims());
 
   auto out_new_dims = shape.GetData();
+  bool has_zero_size = false;
 
   for (size_t i = 0; i < out_new_dims.size(); ++i) {
-    out_new_dims[i] = out_new_dims[i] > 0 ? out_new_dims[i] : x_vec_dims[i];
+    out_new_dims[i] = out_new_dims[i] >= 0 ? out_new_dims[i] : x_vec_dims[i];
   }
 
   if (x_vec_dims.size() != out_new_dims.size()) {
     x_vec_dims = GetExtendedXDims(x_vec_dims, out_new_dims.size());  // NOLINT
   }
 
+  for (size_t i = 0; i < x_vec_dims.size(); ++i) {
+    PADDLE_ENFORCE_GE(
+        out_new_dims[i],
+        0,
+        common::errors::InvalidArgument(
+            "The expanded size (%d) for non-existing dimensions must be "
+            "positive for expand_v2 op.",
+            out_new_dims[i]));
+
+    PADDLE_ENFORCE_GE(
+        x_vec_dims[i],
+        0,
+        common::errors::InvalidArgument(
+            "The expanded size (%d) for non-existing dimensions must be "
+            "positive for expand_v2 op.",
+            x_vec_dims[i]));
+
+    PADDLE_ENFORCE_EQ(
+        x_vec_dims[i] == 1 || x_vec_dims[i] == out_new_dims[i],
+        true,
+        common::errors::InvalidArgument(
+            "The value (%d) of the non-singleton dimension does not match"
+            " the corresponding value (%d) in shape for expand_v2 op.",
+            x_vec_dims[i],
+            out_new_dims[i]));
+    if (out_new_dims[i] == 0) {
+      has_zero_size = true;
+    }
+  }
+
   out->Resize(common::make_ddim(out_new_dims));
+  if (has_zero_size) {
+    dev_ctx.template Alloc<T>(out);
+    return;
+  }
   funcs::BroadcastDataOneDNNHandler<T> handler(dnnl::algorithm::binary_add,
                                                onednn_engine,
                                                dev_ctx.GetPlace(),

diff --git a/test/legacy_test/test_expand_v2_op.py b/test/legacy_test/test_expand_v2_op.py
@@ -685,6 +685,154 @@ def test_value_list_shape2(self):
                 np.testing.assert_equal(tuple(x.shape), (-1, 1, -1, -1))
 
 
+class TestExpandV2ZeroSizeOp(OpTest):
+    def setUp(self):
+        self.op_type = "expand_v2"
+        self.init_data()
+        self.init_place()
+        self.python_api = paddle.expand
+        self.x = np.zeros(self.ori_shape).astype("float64")
+        self.attrs = {
+            'shape': self.shape,
+        }
+        self.set_inputs()
+        self.set_additional_inputs()
+        output = np.zeros(self.expect_shape).astype("float64")
+        self.outputs = {'Out': output}
+
+    def set_inputs(self):
+        self.inputs = {'X': self.x}
+
+    def set_additional_inputs(self):
+        pass
+
+    def init_data(self):
+        self.ori_shape = [1, 0, 1, 140]
+        self.shape = [1, 0, 1, 140]
+        self.expect_shape = [1, 0, 1, 140]
+
+    def init_place(self):
+        self.place = core.CPUPlace()
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place, check_dygraph=False)
+
+    def test_check_grad(self):
+        self.check_grad_with_place(
+            self.place,
+            ["X"],
+            "Out",
+            check_dygraph=False,
+        )
+
+
+class TestExpandV2CPUOp1(TestExpandV2ZeroSizeOp):
+    def init_data(self):
+        self.ori_shape = (0, 1)
+        self.shape = (0, 8)
+        self.expect_shape = (0, 8)
+
+
+class TestExpandV2CPUOp2(TestExpandV2ZeroSizeOp):
+    def init_data(self):
+        self.ori_shape = (0, 130)
+        self.shape = (4, 0, 130)
+        self.expect_shape = (4, 0, 130)
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA",
+)
+class TestExpandV2ZeroSizeGPUOp(TestExpandV2ZeroSizeOp):
+
+    def init_place(self):
+        self.place = core.CUDAPlace(0)
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA",
+)
+class TestExpandV2ZeroSizeGPUOp1(TestExpandV2ZeroSizeGPUOp):
+    def init_data(self):
+        self.ori_shape = (0, 130)
+        self.shape = (4, 0, 130)
+        self.expect_shape = (4, 0, 130)
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA",
+)
+class TestExpandV2ZeroSizeGPUOp2(TestExpandV2ZeroSizeGPUOp):
+    def init_data(self):
+        self.ori_shape = (0, 1)
+        self.shape = (0, 8)
+        self.expect_shape = (0, 8)
+
+
+class TestExpandV2ZeroSizeOneDNNOp(TestExpandV2ZeroSizeOp):
+    def setUp(self):
+        self.op_type = "expand_v2"
+        self.init_data()
+        self.init_place()
+        self.python_api = paddle.expand
+        self.x = np.zeros(self.ori_shape).astype("float32")
+        self.attrs = {'shape': self.shape, 'use_mkldnn': True}
+        self.use_mkldnn = True
+        self.set_inputs()
+        self.set_additional_inputs()
+        output = np.zeros(self.expect_shape).astype("float32")
+        self.outputs = {'Out': output}
+
+    def init_data(self):
+        self.ori_shape = [1, 0, 1, 140]
+        self.shape = [1, 0, 1, 140]
+        self.expect_shape = [1, 0, 1, 140]
+
+    def init_place(self):
+        self.place = core.CPUPlace()
+
+    def test_check_output(self):
+        flags_use_mkldnn = core.globals()["FLAGS_use_mkldnn"]
+        paddle.set_flags({'FLAGS_use_mkldnn': True})
+        self.check_output_with_place(
+            self.place,
+            check_dygraph=False,
+            check_pir=False,
+            check_pir_onednn=True,
+        )
+        paddle.set_flags({'FLAGS_use_mkldnn': flags_use_mkldnn})
+
+    def test_check_grad(self):
+        flags_use_mkldnn = core.globals()["FLAGS_use_mkldnn"]
+        paddle.set_flags({'FLAGS_use_mkldnn': True})
+        self.check_grad_with_place(
+            self.place,
+            ["X"],
+            "Out",
+            check_dygraph=False,
+            check_pir=False,
+            check_pir_onednn=True,
+        )
+        paddle.set_flags({'FLAGS_use_mkldnn': flags_use_mkldnn})
+
+
+class TestExpandV2ZeroSizeOneDNNOp1(TestExpandV2ZeroSizeOneDNNOp):
+    def init_data(self):
+        self.ori_shape = (0, 130)
+        self.shape = (4, 0, 130)
+        self.expect_shape = (4, 0, 130)
+
+
+class TestExpandV2ZeroSizeOneDNNOp2(TestExpandV2ZeroSizeOneDNNOp):
+    def init_data(self):
+        self.ori_shape = (0, 1, 8)
+        self.shape = (0, 8, 8)
+        self.expect_shape = (0, 8, 8)
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()
diff --git a/test/mkldnn/test_expand_v2_mkldnn_op.py b/test/mkldnn/test_expand_v2_mkldnn_op.py
@@ -171,6 +171,7 @@ def test_check_grad(self):
 create_expand_v2_bf16_test_class(TestExpandV2ExpandShapesTensor2OneDNNOp)
 create_expand_v2_bf16_test_class(TestExpandV2ShapesTensorOneDNNOp)
 
+
 if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()