Skip to content

Commit b040e89

Browse files
[Accuracy diff No.167] Fix accuracy (output type) diff for paddle.cumsum API (#74625)
* fix(math.py, unary.cc): fix output type diff for cumsum kernel * fix(math.py): fix output type diff for cumsum kernel * fix(math.py): fix `cumsum` documentation * fix(cum/cum_grad.cc/cu, test_cumsum_op.py): fix output type diff for cumsum kernel and add unit test
1 parent 1d9d7a3 commit b040e89

File tree

6 files changed

+229
-1
lines changed

6 files changed

+229
-1
lines changed

paddle/phi/kernels/cpu/cum_grad_kernel.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ PD_REGISTER_KERNEL(cumsum_grad,
5454
phi::CumsumGradKernel,
5555
float,
5656
double,
57+
uint8_t,
58+
int8_t,
5759
int16_t,
5860
int,
5961
int64_t,

paddle/phi/kernels/cpu/cum_kernel.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ PD_REGISTER_KERNEL(cumsum,
273273
phi::CumsumKernel,
274274
float,
275275
double,
276+
uint8_t,
277+
int8_t,
276278
int16_t,
277279
int,
278280
int64_t,

paddle/phi/kernels/gpu/cum_grad_kernel.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ PD_REGISTER_KERNEL(cumsum_grad,
8181
phi::CumsumGradKernel,
8282
float,
8383
double,
84+
uint8_t,
85+
int8_t,
8486
int16_t,
8587
int,
8688
int64_t,

paddle/phi/kernels/gpu/cum_kernel.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,8 @@ PD_REGISTER_KERNEL(cumsum,
508508
phi::CumsumKernel,
509509
float,
510510
double,
511+
uint8_t,
512+
int8_t,
511513
int16_t,
512514
int,
513515
int64_t,

python/paddle/tensor/math.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4152,7 +4152,7 @@ def cumsum(
41524152
Args:
41534153
x (Tensor): The input tensor needed to be cumsumed.
41544154
axis (int, optional): The dimension to accumulate along. -1 means the last dimension. The default (None) is to compute the cumsum over the flattened array.
4155-
dtype (str|paddle.dtype|np.dtype|None, optional): The data type of the output tensor, can be bfloat16, float16, float32, float64, int32, int64, complex64, complex128. If specified, the input tensor is casted to dtype before the operation is performed. This is useful for preventing data type overflows. The default value is None.
4155+
dtype (str|paddle.dtype|np.dtype|None, optional): The data type of the output tensor, can be bfloat16, float16, float32, float64, int32, int64, complex64, complex128. By default, it is int64 if the input x is int8/int16/int32; otherwise, it is None. If it is not None, the input tensor is casted to dtype before the operation is performed. This is useful for preventing data type overflows.
41564156
name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
41574157
41584158
Returns:
@@ -4194,6 +4194,13 @@ def cumsum(
41944194
flatten = False
41954195
if dtype is not None and x.dtype != convert_np_dtype_to_dtype_(dtype):
41964196
x = cast(x, dtype)
4197+
elif isinstance(x, paddle.Tensor) and x.dtype in [
4198+
paddle.uint8,
4199+
paddle.int8,
4200+
paddle.int16,
4201+
paddle.int32,
4202+
]:
4203+
x = cast(x, "int64")
41974204

41984205
if in_dynamic_or_pir_mode():
41994206
if axis is None:

test/legacy_test/test_cumsum_op.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,219 @@ def test_name(self):
124124
self.assertTrue('out' in y.name)
125125

126126

127+
class TestCumsumOp_INT(unittest.TestCase):
128+
def run_cases(self):
129+
data_np = np.arange(12).reshape(3, 4).astype(np.uint8)
130+
data = paddle.to_tensor(data_np)
131+
y = paddle.cumsum(data)
132+
z = np.cumsum(data_np)
133+
np.testing.assert_array_equal(z, y.numpy())
134+
y = paddle.cumsum(data, axis=0)
135+
z = np.cumsum(data_np, axis=0)
136+
np.testing.assert_array_equal(z, y.numpy())
137+
y = paddle.cumsum(data, axis=-1)
138+
z = np.cumsum(data_np, axis=-1)
139+
np.testing.assert_array_equal(z, y.numpy())
140+
y = paddle.cumsum(data, axis=-2)
141+
z = np.cumsum(data_np, axis=-2)
142+
np.testing.assert_array_equal(z, y.numpy())
143+
144+
data_np = np.arange(12).reshape(3, 4).astype(np.int8)
145+
data = paddle.to_tensor(data_np)
146+
y = paddle.cumsum(data)
147+
z = np.cumsum(data_np)
148+
np.testing.assert_array_equal(z, y.numpy())
149+
y = paddle.cumsum(data, axis=0)
150+
z = np.cumsum(data_np, axis=0)
151+
np.testing.assert_array_equal(z, y.numpy())
152+
y = paddle.cumsum(data, axis=-1)
153+
z = np.cumsum(data_np, axis=-1)
154+
np.testing.assert_array_equal(z, y.numpy())
155+
y = paddle.cumsum(data, axis=-2)
156+
z = np.cumsum(data_np, axis=-2)
157+
np.testing.assert_array_equal(z, y.numpy())
158+
159+
data_np = np.arange(12).reshape(3, 4).astype(np.int16)
160+
data = paddle.to_tensor(data_np)
161+
y = paddle.cumsum(data)
162+
z = np.cumsum(data_np)
163+
np.testing.assert_array_equal(z, y.numpy())
164+
y = paddle.cumsum(data, axis=0)
165+
z = np.cumsum(data_np, axis=0)
166+
np.testing.assert_array_equal(z, y.numpy())
167+
y = paddle.cumsum(data, axis=-1)
168+
z = np.cumsum(data_np, axis=-1)
169+
np.testing.assert_array_equal(z, y.numpy())
170+
y = paddle.cumsum(data, axis=-2)
171+
z = np.cumsum(data_np, axis=-2)
172+
np.testing.assert_array_equal(z, y.numpy())
173+
174+
data_np = np.arange(12).reshape(3, 4).astype(np.int32)
175+
data = paddle.to_tensor(data_np)
176+
y = paddle.cumsum(data)
177+
z = np.cumsum(data_np)
178+
np.testing.assert_array_equal(z, y.numpy())
179+
y = paddle.cumsum(data, axis=0)
180+
z = np.cumsum(data_np, axis=0)
181+
np.testing.assert_array_equal(z, y.numpy())
182+
y = paddle.cumsum(data, axis=-1)
183+
z = np.cumsum(data_np, axis=-1)
184+
np.testing.assert_array_equal(z, y.numpy())
185+
y = paddle.cumsum(data, axis=-2)
186+
z = np.cumsum(data_np, axis=-2)
187+
np.testing.assert_array_equal(z, y.numpy())
188+
189+
def run_static_uint8(self, use_gpu=False):
190+
with paddle.static.program_guard(paddle.static.Program()):
191+
data_np = np.random.random((100, 100)).astype(np.uint8)
192+
x = paddle.static.data('X', [100, 100], dtype='uint8')
193+
y = paddle.cumsum(x)
194+
y2 = paddle.cumsum(x, axis=0)
195+
y3 = paddle.cumsum(x, axis=-1)
196+
y4 = paddle.cumsum(x, axis=-2)
197+
place = base.CUDAPlace(0) if use_gpu else base.CPUPlace()
198+
exe = base.Executor(place)
199+
exe.run(paddle.static.default_startup_program())
200+
out = exe.run(
201+
feed={'X': data_np},
202+
fetch_list=[
203+
y,
204+
y2,
205+
y3,
206+
y4,
207+
],
208+
)
209+
z = np.cumsum(data_np)
210+
np.testing.assert_allclose(z, out[0], rtol=1e-05)
211+
z = np.cumsum(data_np, axis=0)
212+
np.testing.assert_allclose(z, out[1], rtol=1e-05)
213+
z = np.cumsum(data_np, axis=-1)
214+
np.testing.assert_allclose(z, out[2], rtol=1e-05)
215+
z = np.cumsum(data_np, axis=-2)
216+
np.testing.assert_allclose(z, out[3], rtol=1e-05)
217+
218+
def run_static_int8(self, use_gpu=False):
219+
with paddle.static.program_guard(paddle.static.Program()):
220+
data_np = np.random.random((100, 100)).astype(np.int8)
221+
x = paddle.static.data('X', [100, 100], dtype='int8')
222+
y = paddle.cumsum(x)
223+
y2 = paddle.cumsum(x, axis=0)
224+
y3 = paddle.cumsum(x, axis=-1)
225+
y4 = paddle.cumsum(x, axis=-2)
226+
place = base.CUDAPlace(0) if use_gpu else base.CPUPlace()
227+
exe = base.Executor(place)
228+
exe.run(paddle.static.default_startup_program())
229+
out = exe.run(
230+
feed={'X': data_np},
231+
fetch_list=[
232+
y,
233+
y2,
234+
y3,
235+
y4,
236+
],
237+
)
238+
z = np.cumsum(data_np)
239+
np.testing.assert_allclose(z, out[0], rtol=1e-05)
240+
z = np.cumsum(data_np, axis=0)
241+
np.testing.assert_allclose(z, out[1], rtol=1e-05)
242+
z = np.cumsum(data_np, axis=-1)
243+
np.testing.assert_allclose(z, out[2], rtol=1e-05)
244+
z = np.cumsum(data_np, axis=-2)
245+
np.testing.assert_allclose(z, out[3], rtol=1e-05)
246+
247+
def run_static_int16(self, use_gpu=False):
248+
with paddle.static.program_guard(paddle.static.Program()):
249+
data_np = np.random.random((100, 100)).astype(np.int16)
250+
x = paddle.static.data('X', [100, 100], dtype='int16')
251+
y = paddle.cumsum(x)
252+
y2 = paddle.cumsum(x, axis=0)
253+
y3 = paddle.cumsum(x, axis=-1)
254+
y4 = paddle.cumsum(x, axis=-2)
255+
place = base.CUDAPlace(0) if use_gpu else base.CPUPlace()
256+
exe = base.Executor(place)
257+
exe.run(paddle.static.default_startup_program())
258+
out = exe.run(
259+
feed={'X': data_np},
260+
fetch_list=[
261+
y,
262+
y2,
263+
y3,
264+
y4,
265+
],
266+
)
267+
z = np.cumsum(data_np)
268+
np.testing.assert_allclose(z, out[0], rtol=1e-05)
269+
z = np.cumsum(data_np, axis=0)
270+
np.testing.assert_allclose(z, out[1], rtol=1e-05)
271+
z = np.cumsum(data_np, axis=-1)
272+
np.testing.assert_allclose(z, out[2], rtol=1e-05)
273+
z = np.cumsum(data_np, axis=-2)
274+
np.testing.assert_allclose(z, out[3], rtol=1e-05)
275+
276+
def run_static_uint16(self, use_gpu=False):
277+
with paddle.static.program_guard(paddle.static.Program()):
278+
data_np = np.random.random((100, 100)).astype(np.uint16)
279+
x = paddle.static.data('X', [100, 100], dtype='uint16')
280+
y = paddle.cumsum(x)
281+
y2 = paddle.cumsum(x, axis=0)
282+
y3 = paddle.cumsum(x, axis=-1)
283+
y4 = paddle.cumsum(x, axis=-2)
284+
place = base.CUDAPlace(0) if use_gpu else base.CPUPlace()
285+
exe = base.Executor(place)
286+
exe.run(paddle.static.default_startup_program())
287+
out = exe.run(
288+
feed={'X': data_np},
289+
fetch_list=[
290+
y,
291+
y2,
292+
y3,
293+
y4,
294+
],
295+
)
296+
z = np.cumsum(data_np)
297+
np.testing.assert_allclose(z, out[0], rtol=1e-05)
298+
z = np.cumsum(data_np, axis=0)
299+
np.testing.assert_allclose(z, out[1], rtol=1e-05)
300+
z = np.cumsum(data_np, axis=-1)
301+
np.testing.assert_allclose(z, out[2], rtol=1e-05)
302+
z = np.cumsum(data_np, axis=-2)
303+
np.testing.assert_allclose(z, out[3], rtol=1e-05)
304+
305+
def test_cpu_dygraph(self):
306+
paddle.disable_static(paddle.base.CPUPlace())
307+
self.run_cases()
308+
paddle.enable_static()
309+
310+
def test_cpu_static(self):
311+
self.run_static_uint8()
312+
self.run_static_int8()
313+
self.run_static_int16()
314+
315+
def test_gpu_dygraph(self):
316+
if not base.core.is_compiled_with_cuda():
317+
return
318+
paddle.disable_static(paddle.base.CUDAPlace(0))
319+
self.run_cases()
320+
paddle.enable_static()
321+
322+
def test_gpu_static(self):
323+
if not base.core.is_compiled_with_cuda():
324+
return
325+
self.run_static_uint8(use_gpu=True)
326+
self.run_static_int8(use_gpu=True)
327+
self.run_static_uint16(use_gpu=True)
328+
self.run_static_int16(use_gpu=True)
329+
330+
def test_name(self):
331+
with (
332+
paddle.pir_utils.OldIrGuard(),
333+
base.program_guard(base.Program()),
334+
):
335+
x = paddle.static.data('x', [3, 4])
336+
y = paddle.cumsum(x, name='out')
337+
self.assertTrue('out' in y.name)
338+
339+
127340
def cumsum_wrapper(x, axis=-1, flatten=False, exclusive=False, reverse=False):
128341
return paddle._C_ops.cumsum(x, axis, flatten, exclusive, reverse)
129342

0 commit comments

Comments
 (0)