PaddlePaddle · luotao1 · Jun 13, 2025 · Jun 12, 2025 · DanielSun11 · Jun 5, 2025
diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
@@ -2992,7 +2992,12 @@ void MatmulInferMeta(const MetaTensor& x,
   } else {
     new_dims.reserve(ndims_x);
     for (size_t i = 0; i < ndims_x - 2; ++i) {
-      new_dims.push_back(std::max(dims_x[i], dims_y[i]));
+      // If one of them is 0, choose 0.
+      if (dims_x[i] == 0 || dims_y[i] == 0) {
+        new_dims.push_back(0);
+      } else {
+        new_dims.push_back(std::max(dims_x[i], dims_y[i]));
+      }
     }
   }
   if (!x_broadcasted) {

diff --git a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
@@ -230,11 +230,15 @@ void MatmulGradKernel(const Context& dev_ctx,
                       DenseTensor* dx,
                       DenseTensor* dy) {
   if (x.numel() == 0) {
-    if (dy != nullptr) {
-      dev_ctx.template Alloc<T>(dx);
-      phi::FullKernel<T>(
-          dev_ctx, common::vectorize(y.dims()), 0.0, y.dtype(), dy);
-    }
+    dev_ctx.template Alloc<T>(dx);
+    phi::Full<T, Context>(
+        dev_ctx, phi::IntArray(common::vectorize(y.dims())), 0, dy);
+    return;
+  }
+  if (y.numel() == 0) {
+    dev_ctx.template Alloc<T>(dy);
+    phi::Full<T, Context>(
+        dev_ctx, phi::IntArray(common::vectorize(x.dims())), 0, dx);
     return;
   }
   // get dims

diff --git a/paddle/phi/kernels/impl/matmul_kernel_impl.h b/paddle/phi/kernels/impl/matmul_kernel_impl.h
@@ -38,6 +38,7 @@ limitations under the License. */
 #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060
 #include "paddle/phi/kernels/autotune/auto_tune_base.h"
 #endif
+#include "paddle/phi/kernels/full_kernel.h"
 
 COMMON_DECLARE_bool(cuda_core_int8_gemm);
 
@@ -2007,23 +2008,9 @@ void MatmulKernel(const Context& dev_ctx,
                   bool transpose_y,
                   DenseTensor* out) {
   if (x.numel() == 0 || y.numel() == 0) {
-    auto x_dims = x.dims();
-    auto y_dims = y.dims();
-    if (transpose_x) {
-      std::swap(x_dims[x_dims.size() - 1], x_dims[x_dims.size() - 2]);
-    }
-    if (transpose_y) {
-      std::swap(y_dims[y_dims.size() - 1], y_dims[y_dims.size() - 2]);
-    }
-    std::vector<std::int64_t> out_dims(x_dims.size() - 1 + y_dims.size() - 1);
-    for (int64_t i = 0; i < x_dims.size() - 1; ++i) {
-      out_dims[i] = x_dims[i];
-    }
-    for (int64_t i = 1; i < y_dims.size(); ++i) {
-      out_dims[x_dims.size() - 1 + i - 1] = y_dims[i];
-    }
-    out->Resize(phi::make_ddim(out_dims));
-    dev_ctx.template Alloc<T>(out);
+    // input shape [1, 1, 5, 0], [1, 1, 0, 5], result shape is [1, 1, 5, 5]
+    phi::Full<T, Context>(
+        dev_ctx, phi::IntArray(common::vectorize(out->dims())), 0, out);
     return;
   }
   PADDLE_ENFORCE_GE(

diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
@@ -2942,8 +2942,16 @@ def outer(x: Tensor, y: Tensor, name: str | None = None) -> Tensor:
 
 
     """
-    nx = x.reshape((-1, 1))
-    ny = y.reshape((1, -1))
+    xshape = x.shape
+    yshape = y.shape
+    if math.prod(xshape) == 0:  # If the size is 0
+        nx = x.reshape((0, 0))
+    else:
+        nx = x.reshape((-1, 1))
+    if math.prod(yshape) == 0:  # If the size is 0
+        ny = y.reshape((0, 0))
+    else:
+        ny = y.reshape((1, -1))
 
     if in_dynamic_mode():
         return _C_ops.matmul(nx, ny, False, False)

diff --git a/test/legacy_test/test_matmul_v2_op.py b/test/legacy_test/test_matmul_v2_op.py
@@ -943,6 +943,41 @@ def func_dygraph_matmul(self):
         paddle.enable_static()
 
 
+class TestMatMulOp_ZeroSize(OpTest):
+    def setUp(self):
+        self.op_type = "matmul_v2"
+        self.python_api = paddle.matmul
+        self.init_input_output()
+
+        self.inputs = {
+            'X': OpTest.np_dtype_to_base_dtype(self.x),
+            'Y': OpTest.np_dtype_to_base_dtype(self.y),
+        }
+        self.out = np.matmul(self.x, self.y)
+        self.attrs = {'axis': -1, 'use_mkldnn': False}
+        self.outputs = {'Out': self.out}
+
+    def init_input_output(self):
+        self.x = np.random.random((1, 1, 2, 3))
+        self.y = np.random.random((1, 0, 3, 2))
+
+    def test_check_output(self):
+        self.check_output(check_pir=True)
+
+    def test_check_grad(self):
+        self.check_grad(
+            ['X', 'Y'],
+            'Out',
+            check_pir=True,
+        )
+
+
+class TestMatMulOp_ZeroSize2(TestMatMulOp_ZeroSize):
+    def init_input_output(self):
+        self.x = np.random.random((0, 3, 2, 3))
+        self.y = np.random.random((1, 3, 3, 2))
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()
diff --git a/test/legacy_test/test_outer.py b/test/legacy_test/test_outer.py
@@ -172,5 +172,23 @@ def test_errors_dynamic(self):
         self.assertRaises(Exception, paddle.outer, x_data, y_data)
 
 
+class TestMultiplyApi_ZeroSize(unittest.TestCase):
+    def test_multiply_dynamic(self):
+        x_data = np.random.rand(5, 10, 0).astype(np.float64)
+        y_data = np.random.rand(0, 10).astype(np.float64)
+        paddle.disable_static()
+        x = paddle.to_tensor(x_data)
+        y = paddle.to_tensor(y_data)
+        x.stop_gradient = False
+        y.stop_gradient = False
+        res = paddle.outer(x, y)
+        np.testing.assert_allclose(
+            res.numpy(), np.outer(x_data, y_data), rtol=1e-05
+        )
+        loss = paddle.sum(res)
+        loss.backward()
+        np.testing.assert_allclose(x.grad.shape, x.shape)
+
+
 if __name__ == '__main__':
     unittest.main()