diff --git a/paddle/phi/kernels/impl/kldiv_loss_grad_kernel_impl.h b/paddle/phi/kernels/impl/kldiv_loss_grad_kernel_impl.h index dffb3f7b108559..490b4c255d19d6 100644 --- a/paddle/phi/kernels/impl/kldiv_loss_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/kldiv_loss_grad_kernel_impl.h @@ -48,6 +48,11 @@ void KLDivLossGradKernel(const Context& dev_ctx, const std::string& reduction, bool log_target, DenseTensor* d_x) { + if (d_x->numel() == 0) { + dev_ctx.template Alloc(d_x); + return; + } + auto& place = *dev_ctx.eigen_device(); auto* target = &label; auto* input_grad = d_x; diff --git a/paddle/phi/kernels/impl/kldiv_loss_kernel_impl.h b/paddle/phi/kernels/impl/kldiv_loss_kernel_impl.h index 6afbfe5d529786..03c2f8dd935c10 100644 --- a/paddle/phi/kernels/impl/kldiv_loss_kernel_impl.h +++ b/paddle/phi/kernels/impl/kldiv_loss_kernel_impl.h @@ -18,8 +18,8 @@ #include "paddle/common/hostdevice.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" - namespace phi { using Array1 = Eigen::DSizes; template @@ -48,6 +48,11 @@ void KLDivLossKernel(const Context& dev_ctx, const std::string& reduction, bool log_target, DenseTensor* out) { + if (x.numel() == 0) { + phi::Full( + dev_ctx, phi::IntArray(common::vectorize(out->dims())), NAN, out); + return; + } auto& place = *(dev_ctx.eigen_device()); auto* input = &x; auto* target = &label; diff --git a/paddle/phi/kernels/xpu/kldiv_loss_kernel.cc b/paddle/phi/kernels/xpu/kldiv_loss_kernel.cc index 30d441c3fd6dfa..23333e3c5826d0 100644 --- a/paddle/phi/kernels/xpu/kldiv_loss_kernel.cc +++ b/paddle/phi/kernels/xpu/kldiv_loss_kernel.cc @@ -15,8 +15,8 @@ limitations under the License. */ #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/softmax_kernel.h" - namespace phi { template @@ -31,6 +31,11 @@ void KLDivLossKernel(const Context& dev_ctx, if (out->numel() == 0) { return; } + if (x.numel() == 0) { + phi::Full( + dev_ctx, phi::IntArray(common::vectorize(out->dims())), NAN, out); + return; + } int r = 0; diff --git a/test/legacy_test/test_hinge_embedding_loss.py b/test/legacy_test/test_hinge_embedding_loss.py index 436d4e82adaf5a..1bd2c27e84aaae 100644 --- a/test/legacy_test/test_hinge_embedding_loss.py +++ b/test/legacy_test/test_hinge_embedding_loss.py @@ -201,5 +201,44 @@ def test_value_error(): self.assertRaises(ValueError, test_value_error) +class TestFunctionalHingeEmbeddingLoss_ZeroSize(unittest.TestCase): + def setUp(self): + self.margin = 1.0 + self.shape = (0, 10, 5) # zero size + self.input_np = np.random.random(size=self.shape).astype(np.float64) + self.label_np = 2 * np.random.randint(0, 2, size=self.shape) - 1.0 + + def run_dynamic_check(self, place=paddle.CPUPlace()): + paddle.disable_static(place=place) + input = paddle.to_tensor(self.input_np) + input.stop_gradient = False + label = paddle.to_tensor(self.label_np, dtype="float64") + + dy_result = paddle.nn.functional.hinge_embedding_loss(input, label) + expected = calc_hinge_embedding_loss(self.input_np, self.label_np) + np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) + self.assertEqual(dy_result.shape, []) + + dy_result = paddle.nn.functional.hinge_embedding_loss( + input, label, reduction='none' + ) + expected = calc_hinge_embedding_loss( + self.input_np, self.label_np, reduction='none' + ) + np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) + + loss = paddle.sum(dy_result) + loss.backward() + self.assertEqual(input.grad.shape, input.shape) + + def test_cpu(self): + self.run_dynamic_check(place=paddle.CPUPlace()) + + def test_gpu(self): + if not paddle.is_compiled_with_cuda(): + return + self.run_dynamic_check(place=paddle.CUDAPlace(0)) + + if __name__ == "__main__": unittest.main() diff --git a/test/legacy_test/test_kldiv_loss_op.py b/test/legacy_test/test_kldiv_loss_op.py index 780474221db1a9..8d35adddd4769c 100644 --- a/test/legacy_test/test_kldiv_loss_op.py +++ b/test/legacy_test/test_kldiv_loss_op.py @@ -115,6 +115,52 @@ def initTestCase(self): self.log_target = True +class TestKLDivLossOp_ZeroSize1(TestKLDivLossOp): + def setUp(self): + self.initTestCase() + self.op_type = 'kldiv_loss' + self.python_api = kl_div + self.public_python_api = paddle.nn.functional.kl_div + x = np.random.uniform(-10, 10, self.x_shape).astype('float64') + target = np.random.uniform(-10, 10, self.x_shape).astype('float64') + + self.attrs = { + "reduction": self.reduction, + "log_target": self.log_target, + } + + self.inputs = { + 'X': x, + 'Target': target, + } + loss = kldiv_loss(x, target, self.reduction, self.log_target) + self.outputs = {'Loss': loss.astype('float64')} + + def initTestCase(self): + # return NAN + self.x_shape = (0, 2, 7, 7) + self.reduction = 'mean' + self.log_target = False + + def test_check_output(self): + self.check_output(check_pir=True, equal_nan=True) + + def test_check_grad(self): + self.check_grad( + ['X'], + 'Loss', + no_grad_set={"Target"}, + check_pir=True, + ) + + +class TestKLDivLossOp_ZeroSize2(TestKLDivLossOp_ZeroSize1): + def initTestCase(self): + self.x_shape = (0, 2, 7, 7) + self.reduction = 'none' + self.log_target = False + + class TestKLDivLossDygraph(unittest.TestCase): def run_kl_loss(self, reduction, shape=(5, 20), log_target=False): x = np.random.uniform(-10, 10, shape).astype('float64') diff --git a/test/legacy_test/test_l1_loss.py b/test/legacy_test/test_l1_loss.py index 4061652831f5b0..22236bb3f1c403 100644 --- a/test/legacy_test/test_l1_loss.py +++ b/test/legacy_test/test_l1_loss.py @@ -201,6 +201,44 @@ def test_value_error(): self.assertRaises(ValueError, test_value_error) +class TestClassL1Loss_ZeroSize(unittest.TestCase): + def setUp(self): + self.input_np = np.random.random(size=(0, 10, 5)).astype(np.float32) + self.label_np = np.random.random(size=(0, 10, 5)).astype(np.float32) + + def run_imperative(self): + input = paddle.to_tensor(self.input_np) + label = paddle.to_tensor(self.label_np) + input.stop_gradient = False + l1_loss = paddle.nn.loss.L1Loss() + dy_result = l1_loss(input, label) + expected = np.mean(np.abs(self.input_np - self.label_np)) + np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) + self.assertEqual(dy_result.shape, []) + + l1_loss = paddle.nn.loss.L1Loss(reduction='sum') + dy_result = l1_loss(input, label) + expected = np.sum(np.abs(self.input_np - self.label_np)) + np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) + self.assertEqual(dy_result.shape, []) + + loss = paddle.sum(dy_result) + loss.backward() + np.testing.assert_allclose(input.grad.shape, input.shape) + + def test_cpu(self): + paddle.disable_static(place=paddle.base.CPUPlace()) + self.run_imperative() + paddle.enable_static() + + def test_gpu(self): + if not base.core.is_compiled_with_cuda(): + return + paddle.disable_static(place=paddle.base.CUDAPlace(0)) + self.run_imperative() + paddle.enable_static() + + if __name__ == "__main__": paddle.enable_static() unittest.main()