From 569d3381f017acd734c8483bb11312fd654d8f0e Mon Sep 17 00:00:00 2001 From: zhiboniu Date: Wed, 29 Sep 2021 06:56:08 +0000 Subject: [PATCH] fill_diagonal op fix border cross caused by offset --- paddle/fluid/operators/fill_diagonal_op.cc | 18 ++++++++--- paddle/fluid/operators/fill_diagonal_op.cu | 16 +++++++--- .../unittests/test_tensor_fill_diagonal_.py | 30 +++++++++++++++++++ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/fill_diagonal_op.cc b/paddle/fluid/operators/fill_diagonal_op.cc index db55c3e99693a..be3239d504844 100644 --- a/paddle/fluid/operators/fill_diagonal_op.cc +++ b/paddle/fluid/operators/fill_diagonal_op.cc @@ -108,8 +108,15 @@ class FillIDiagonalKernel : public framework::OpKernel { size = std::min(size, out_dims[1] * out_dims[1]); } - for (int64_t i = offset; i < size; i += strides) { - out_data[i] = temp_var; + for (int64_t i = 0; i < size; i += strides) { + // to check if the new position with offset is still in the same line; + // this modify should not affect across lines. + // out_dims[1] is also work for tensor with dim>2, for which the dims must + // be the same number + if (i % out_dims[1] + offset >= 0 && + i % out_dims[1] + offset < out_dims[1]) { + out_data[i + offset] = temp_var; + } } } }; @@ -176,8 +183,11 @@ class FillIDiagonalGradKernel : public framework::OpKernel { wrapsize = size; } - for (int64_t i = offset; i < wrapsize; i += strides) { - data[i] = T(0); + for (int64_t i = 0; i < wrapsize; i += strides) { + if (i % dx_dims[1] + offset >= 0 && + i % dx_dims[1] + offset < dx_dims[1]) { + data[i + offset] = T(0); + } } } } diff --git a/paddle/fluid/operators/fill_diagonal_op.cu b/paddle/fluid/operators/fill_diagonal_op.cu index 5047059fb364d..15eabd4216d0b 100644 --- a/paddle/fluid/operators/fill_diagonal_op.cu +++ b/paddle/fluid/operators/fill_diagonal_op.cu @@ -22,11 +22,19 @@ using CUDADeviceContext = paddle::platform::CUDADeviceContext; template __global__ void fill_constant_kernel(const int64_t featuresize, T* in_data, - int64_t strides, int offset, T fillvar) { + int64_t strides, int offset, T fillvar, + int dims) { for (int64_t idx = blockIdx.x * featuresize + threadIdx.x; idx * strides + offset < (blockIdx.x + 1) * featuresize; idx += blockDim.x) { - in_data[idx * strides + offset] = fillvar; + // to check if the new position with offset is still in the same line; + // this modify should not affect across lines. + // out_dims[1] is also work for tensor with dim>2, for which the dims must + // be the same number + if ((idx * strides) % dims + offset < dims && + (idx * strides) % dims + offset >= 0) { + in_data[idx * strides + offset] = fillvar; + } } } @@ -62,7 +70,7 @@ class FillIDiagonalCUDAKernel : public framework::OpKernel { int64_t kBlockDim = std::min(int64_t(size / strides), kMaxBlockDim); fill_constant_kernel<<<1, kBlockDim, 0>>>(size, out_data, strides, - offset, temp_var); + offset, temp_var, out_dims[1]); } }; @@ -96,7 +104,7 @@ class FillIDiagonalGradCUDAKernel : public framework::OpKernel { int64_t kBlockDim = std::min(int64_t(size), kMaxBlockDim); fill_constant_kernel<<<1, kBlockDim, 0>>>(wrapsize, in_data, strides, - offset, T(0)); + offset, T(0), out_dims[1]); } }; diff --git a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py index 41a8a9750cb64..3beb6a537eca0 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py +++ b/python/paddle/fluid/tests/unittests/test_tensor_fill_diagonal_.py @@ -50,6 +50,36 @@ def test_dim2_normal(self): (y.grad.numpy().astype('float32') == expected_grad).all(), True) + def test_offset(self): + expected_np = np.array( + [[2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype('float32') + expected_grad = np.array( + [[1, 1, 0], [1, 1, 1], [1, 1, 1]]).astype('float32') + + typelist = ['float32', 'float64', 'int32', 'int64'] + places = [fluid.CPUPlace()] + if fluid.core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + + for idx, p in enumerate(places): + if idx == 0: + paddle.set_device('cpu') + else: + paddle.set_device('gpu') + for dtype in typelist: + x = paddle.ones((3, 3), dtype=dtype) + x.stop_gradient = False + y = x * 2 + y.fill_diagonal_(1, offset=2, wrap=True) + loss = y.sum() + loss.backward() + + self.assertEqual( + (y.numpy().astype('float32') == expected_np).all(), True) + self.assertEqual( + (y.grad.numpy().astype('float32') == expected_grad).all(), + True) + def test_bool(self): expected_np = np.array( [[False, True, True], [True, False, True], [True, True, False]])