[0-size Tensor No.18、42、51、83] Add 0-size Tensor support for hessian、cumulative_trapezoid、einsum、fused_feedforward API. (#73962)

DanielSun11 · web-flow · commit eb9267a114ee · 2025-07-15T19:25:52.000+08:00
* add cumulative_trapezoid test case

* fix fusedFFN and einsum

* fix test_einsum_0d_tensor error

* fix judge condition
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
@@ -1251,7 +1251,7 @@ void EinsumInferMeta(const std::vector<const MetaTensor*>& inputs,
                      const std::string& equation,
                      MetaTensor* out) {
   // collect the following information to prepare einsum.
-  LabelMap labelshape(0);
+  LabelMap labelshape(-1);
   LabelMap labeltype(LabelType::Reduction);
   std::vector<LabelMap> label2perms(inputs.size(), LabelMap(-1));
   std::vector<char> all_labels;
diff --git a/paddle/phi/kernels/fusion/gpu/fused_feedforward_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_feedforward_grad_kernel.cu
@@ -327,6 +327,8 @@ void FusedFeedForwardGradKernel(
   dev_ctx.template Alloc<T>(d_linear2_weight,
                             d_linear2_weight->numel() * sizeof(T));
 
+  if (d_x->numel() == 0) return;
+
   auto x_dim = x.dims();
   auto mat_dim_x = phi::funcs::CreateMatrixDescriptor(
       phi::RowMatrixFromVector(x_dim), 0, false);
diff --git a/paddle/phi/kernels/fusion/gpu/fused_feedforward_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_feedforward_kernel.cu
@@ -250,6 +250,10 @@ void FusedFeedForwardKernel(const Context& dev_ctx,
   dev_ctx.template Alloc<T>(dropout1_out, dropout1_out->numel() * sizeof(T));
   dev_ctx.template Alloc<T>(dropout2_out, dropout2_out->numel() * sizeof(T));
 
+  if (out->numel() == 0) {
+    return;
+  }
+
   auto x_dim = x_ptr->dims();
   auto mat_dim_x = phi::funcs::CreateMatrixDescriptor(
       phi::RowMatrixFromVector(x_dim), 0, false);
diff --git a/paddle/phi/kernels/impl/einsum_grad_kernel_impl.h b/paddle/phi/kernels/impl/einsum_grad_kernel_impl.h
@@ -17,11 +17,11 @@
 
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/kernels/complex_kernel.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/impl/einsum_kernel_impl.h"
 #include "paddle/phi/kernels/tile_grad_kernel.h"
 #include "paddle/phi/kernels/tile_kernel.h"
 #include "paddle/utils/string/string_helper.h"
-
 namespace phi {
 
 template <typename T, typename Context>
@@ -117,6 +117,17 @@ void EinsumGradKernel(const Context& dev_ctx,
                       const std::string& equation,
                       std::vector<DenseTensor*> x_grad) {
   VLOG(5) << "Start EinsumGradKernel:";
+  bool has_zero_size_tensor = out_grad.numel() == 0;
+  for (auto& i : x_grad) {
+    if (i != nullptr) {
+      if (i->numel() == 0) {
+        has_zero_size_tensor = true;
+        phi::Full<T, Context>(
+            dev_ctx, phi::IntArray(common::vectorize(i->dims())), 0, i);
+      }
+    }
+  }
+  if (has_zero_size_tensor) return;
   LabelMap labelshape(0);
   LabelMap labeltype(LabelType::Reduction);
   std::vector<LabelMap> label2perms(x.size(), LabelMap(-1));
diff --git a/paddle/phi/kernels/impl/einsum_kernel_impl.h b/paddle/phi/kernels/impl/einsum_kernel_impl.h
@@ -209,11 +209,19 @@ inline static void InferLabelShape(
   for (size_t i = 0; i < op_labels.size(); ++i) {
     auto& op_str = op_labels[i];
     auto& op_dim = inputs[i];
+    VLOG(5) << "i = " << i << " op_str " << op_str << " op_dim " << op_dim;
     int dim_ptr = 0;
     for (auto& c : op_str) {
       if (!labelshape->exist(c) || abs((*labelshape)[c]) == 1) {
-        (*labelshape)[c] = op_dim[dim_ptr];
+        VLOG(5)
+            << "if (!labelshape->exist(c) || abs((*labelshape)[c]) == 1) c = "
+            << c << " (*labelshape)[c] " << (*labelshape)[c]
+            << " op_dim[dim_ptr] " << op_dim[dim_ptr];
+        (*labelshape)[c] = static_cast<int>(op_dim[dim_ptr]);
       } else if (abs(op_dim[dim_ptr]) != 1) {
+        VLOG(5) << "if (abs(op_dim[dim_ptr]) != 1) c = " << c
+                << " (*labelshape)[c] " << (*labelshape)[c]
+                << " op_dim[dim_ptr] " << op_dim[dim_ptr];
         PADDLE_ENFORCE_EQ(
             (*labelshape)[c],
             op_dim[dim_ptr],
diff --git a/test/legacy_test/test_cumulative_trapezoid.py b/test/legacy_test/test_cumulative_trapezoid.py
@@ -97,6 +97,22 @@ def set_api(self):
         self.ref_api = cumulative_trapezoid
 
 
+class TestCumulativeTrapezoidZeroSizeTensorCase1(TestCumulativeTrapezoidAPI):
+    def set_args(self):
+        self.y = np.random.random((3, 3, 0)).astype('float32')
+        self.x = np.random.random(3).astype('float32')
+        self.dx = None
+        self.axis = 0
+
+
+class TestCumulativeTrapezoidZeroSizeTensorCase2(TestCumulativeTrapezoidAPI):
+    def set_args(self):
+        self.y = np.random.random((1, 3, 3)).astype('float32')
+        self.x = np.random.random((0, 3, 3)).astype('float32')
+        self.dx = None
+        self.axis = -1
+
+
 if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()
diff --git a/test/legacy_test/test_einsum.py b/test/legacy_test/test_einsum.py
@@ -147,6 +147,8 @@ def setUpClass(cls):
             "I": np.random.rand(2, 2),
             "J": np.random.rand(1, 3, 5),
             "K": np.random.rand(1, 2, 3, 4),
+            "L": np.random.rand(2, 0, 13),
+            "M": np.random.rand(13),
         }
 
     def _get_place(self, force_to_use_cpu=False):
@@ -320,6 +322,42 @@ def setUp(self):
         self.sample = {"paradigm": "blq,bhlk->bhlqk", "data": ["J", "K"]}
 
 
+class TestEinsumZeroSizeTensor(TestEinsum):
+    def setUp(self):
+        self.sample = {"paradigm": "...i, ...i", "data": ["L", "M"]}
+
+    def test_backward(self):
+        operands = [
+            TestEinsum.TEST_SAMPLES[operand] for operand in self.sample["data"]
+        ]
+        expected_result = np.einsum(self.sample["paradigm"], *operands)
+        equation = self.sample["paradigm"]
+
+        with paddle.base.dygraph.guard(self._get_place(force_to_use_cpu=False)):
+            pd_operands = [
+                paddle.to_tensor(operand, stop_gradient=False)
+                for operand in operands
+            ]
+            result = paddle.einsum(equation, *pd_operands)
+            self.check_output_equal(result.numpy(), expected_result)
+            loss = result.sum()
+            loss.backward()
+            for x in pd_operands:
+                np.testing.assert_allclose(x.grad.shape, x.shape)
+
+        with paddle.base.dygraph.guard(self._get_place(force_to_use_cpu=True)):
+            pd_operands = [
+                paddle.to_tensor(operand, stop_gradient=False)
+                for operand in operands
+            ]
+            result = paddle.einsum(equation, *pd_operands)
+            self.check_output_equal(result.numpy(), expected_result)
+            loss = result.sum()
+            loss.backward()
+            for x in pd_operands:
+                np.testing.assert_allclose(x.grad.shape, x.shape)
+
+
 class TestNumpyTests(unittest.TestCase):
     def setUp(self):
         pass
diff --git a/test/legacy_test/test_fused_feedforward_op.py b/test/legacy_test/test_fused_feedforward_op.py
@@ -496,5 +496,14 @@ def test_dropout_mode():
             self.assertRaises(ValueError, test_dropout_mode)
 
 
+class APITestStaticFusedFFNZeroSizeTensor(unittest.TestCase):
+    def setUp(self):
+        self.dtype = "float32"
+        self.layer_norm_dtype = "float32"
+        self.batch_size = 1
+        self.d_model = 8
+        self.dim_feedforward = 0
+
+
 if __name__ == "__main__":
     unittest.main()