Fix

co63oc · co63oc · commit 8e557e2c6814 · 2025-06-06T21:18:15.000+08:00
diff --git a/paddle/phi/kernels/impl/multi_dot_kernel_impl.h b/paddle/phi/kernels/impl/multi_dot_kernel_impl.h
@@ -12,25 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
 #pragma once
 
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
-
 namespace phi {
 
 template <typename Context, typename T>
@@ -195,6 +181,19 @@ void MultiDotKernel(const Context& ctx,
   std::vector<phi::DDim> ins_dims(n);
   GetDims<Context, T>(ins, &ins_dims);
 
+  // If any numel is 0, then return.
+  bool size_0 = false;
+  for (size_t i = 0; i < n; i++) {
+    if (x[i]->numel() == 0) size_0 = true;
+  }
+  if (size_0) {
+    // For example: [2, 0], [0, 4] -> [2, 4]
+    if (out && out->numel() > 0) {
+      phi::Full<T, Context>(
+          ctx, phi::IntArray(common::vectorize(out->dims())), 0, out);
+    }
+    return;
+  }
   const T scale = static_cast<T>(1.0);
   if (n == 2) {
     auto mat_dim_a = phi::funcs::CreateMatrixDescriptor(ins_dims[0], 0, false);
@@ -347,9 +346,23 @@ void MultiDotGradKernel(const Context& ctx,
 
   auto blas = phi::funcs::GetBlas<Context, T>(ctx);
 
+  bool size_0 = false;
   const auto n = ins.size();
   for (size_t i = 0; i < n; i++) {
     ctx.template Alloc<T>(dx[i]);
+
+    if (dx[i]->numel() == 0) {
+      size_0 = true;
+    }
+  }
+  if (size_0) {
+    for (size_t i = 0; i < n; i++) {
+      if (dx[i]->numel() > 0) {
+        phi::Full<T, Context>(
+            ctx, phi::IntArray(common::vectorize(dx[i]->dims())), 0, dx[i]);
+      }
+    }
+    return;
   }
 
   std::vector<phi::DDim> ins_dims(n);
diff --git a/test/legacy_test/test_multi_dot_op.py b/test/legacy_test/test_multi_dot_op.py
@@ -31,14 +31,19 @@ def setUp(self):
         self.op_type = "multi_dot"
         self.python_api = paddle.linalg.multi_dot
         self.dtype = self.get_dtype()
+        self.init_shape()
         self.get_inputs_and_outputs()
 
+    def init_shape(self):
+        self.A_shape = (2, 8)
+        self.B_shape = (8, 4)
+
     def get_dtype(self):
         return "float64"
 
     def get_inputs_and_outputs(self):
-        self.A = np.random.random((2, 8)).astype(self.dtype)
-        self.B = np.random.random((8, 4)).astype(self.dtype)
+        self.A = np.random.random(self.A_shape).astype(self.dtype)
+        self.B = np.random.random(self.B_shape).astype(self.dtype)
         self.inputs = {'X': [('x0', self.A), ('x1', self.B)]}
         self.outputs = {'Out': multi_dot([self.A, self.B])}
 
@@ -55,6 +60,36 @@ def get_dtype(self):
         return "float16"
 
 
+class TestMultiDotOp_ZeroSize1(TestMultiDotOp):
+    def get_inputs_and_outputs(self):
+        # result shape: [2, 3]
+        self.A = np.random.random((2, 10)).astype(self.dtype)
+        self.B = np.random.random((10, 0)).astype(self.dtype)
+        self.C = np.random.random((0, 3)).astype(self.dtype)
+        self.inputs = {'X': [('x0', self.A), ('x1', self.B), ('x2', self.C)]}
+        self.outputs = {'Out': multi_dot([self.A, self.B, self.C])}
+
+    def test_check_grad(self):
+        self.check_grad(['x0'], 'Out', check_pir=True)
+        self.check_grad(['x1'], 'Out', check_pir=True)
+        self.check_grad(['x2'], 'Out', check_pir=True)
+
+
+class TestMultiDotOp_ZeroSize2(TestMultiDotOp):
+    def get_inputs_and_outputs(self):
+        # result shape: [0, 3]
+        self.A = np.random.random((0, 10)).astype(self.dtype)
+        self.B = np.random.random((10, 4)).astype(self.dtype)
+        self.C = np.random.random((4, 3)).astype(self.dtype)
+        self.inputs = {'X': [('x0', self.A), ('x1', self.B), ('x2', self.C)]}
+        self.outputs = {'Out': multi_dot([self.A, self.B, self.C])}
+
+    def test_check_grad(self):
+        self.check_grad(['x0'], 'Out', check_pir=True)
+        self.check_grad(['x1'], 'Out', check_pir=True)
+        self.check_grad(['x2'], 'Out', check_pir=True)
+
+
 @unittest.skipIf(
     not core.is_compiled_with_cuda()
     or not core.is_bfloat16_supported(core.CUDAPlace(0)),