diff --git a/paddle/phi/kernels/cpu/fill_diagonal_grad_kernel.cc b/paddle/phi/kernels/cpu/fill_diagonal_grad_kernel.cc index 41dcc70deaa927..6952390fd87efb 100644 --- a/paddle/phi/kernels/cpu/fill_diagonal_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/fill_diagonal_grad_kernel.cc @@ -28,6 +28,7 @@ void FillDiagonalGradKernel(const Context& dev_ctx, DenseTensor* x_grad) { if (x_grad) { T* data = dev_ctx.template Alloc(x_grad); + if (x_grad->numel() == 0) return; phi::Copy(dev_ctx, out_grad, dev_ctx.GetPlace(), false, x_grad); auto dx_dims = x_grad->dims(); diff --git a/paddle/phi/kernels/cpu/fill_diagonal_kernel.cc b/paddle/phi/kernels/cpu/fill_diagonal_kernel.cc index 0e7804c99736a9..fed6a03135d61c 100644 --- a/paddle/phi/kernels/cpu/fill_diagonal_kernel.cc +++ b/paddle/phi/kernels/cpu/fill_diagonal_kernel.cc @@ -30,6 +30,8 @@ void FillDiagonalKernel(const Context& dev_ctx, T temp_var = static_cast(value); T* out_data = dev_ctx.template Alloc(out); + if (out && out->numel() == 0) return; + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); auto out_dims = out->dims(); diff --git a/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu b/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu index 5483d69460d8d1..39744870fdb568 100644 --- a/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu @@ -52,6 +52,7 @@ void FillDiagonalGradKernel(const Context& dev_ctx, DenseTensor* x_grad) { const int64_t kMaxBlockDim = 512; auto* in_data = dev_ctx.template Alloc(x_grad); + if (x_grad && x_grad->numel() == 0) return; phi::Copy(dev_ctx, out_grad, dev_ctx.GetPlace(), false, x_grad); diff --git a/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu b/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu index 31e5b975afa10f..99c6b468a7cf7c 100644 --- a/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu +++ b/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu @@ -50,6 +50,10 @@ void FillDiagonalKernel(const Context& dev_ctx, int offset, bool wrap, DenseTensor* out) { + if (out && out->numel() == 0) { + dev_ctx.template Alloc(out); + return; + } const int64_t kMaxBlockDim = 512; phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); diff --git a/test/legacy_test/test_tensor_fill_diagonal_.py b/test/legacy_test/test_tensor_fill_diagonal_.py index 6f743e9794647c..4d629c83cf5b6c 100644 --- a/test/legacy_test/test_tensor_fill_diagonal_.py +++ b/test/legacy_test/test_tensor_fill_diagonal_.py @@ -296,5 +296,47 @@ def test_dim_larger2_normal(self): ) +class TensorFillDiagonal_ZeroSize(unittest.TestCase): + def _test_normal(self, shape): + expected_np = np.random.random(shape) + expected_grad = np.random.random(shape) + + places = [] + if ( + os.environ.get('FLAGS_CI_both_cpu_and_gpu', 'False').lower() + in ['1', 'true', 'on'] + or not base.core.is_compiled_with_cuda() + ): + places.append(base.CPUPlace()) + if base.core.is_compiled_with_cuda(): + places.append(base.CUDAPlace(0)) + + for idx, p in enumerate(places): + if idx == 0: + paddle.set_device('cpu') + else: + paddle.set_device('gpu') + + x = paddle.ones(shape) + x.stop_gradient = False + y = x * 2 + y.retain_grads() + y.fill_diagonal_(1, offset=0, wrap=True) + loss = y.sum() + loss.backward() + + self.assertEqual( + (y.numpy().astype('float32') == expected_np).all(), True + ) + self.assertEqual( + (y.grad.numpy().astype('float32') == expected_grad).all(), + True, + ) + + def test_normal(self): + self._test_normal([0, 3]) + self._test_normal([0, 0]) + + if __name__ == '__main__': unittest.main()