Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions paddle/phi/kernels/cpu/cum_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ PD_REGISTER_KERNEL(cumsum_grad,
phi::CumsumGradKernel,
float,
double,
uint8_t,
int8_t,
int16_t,
int,
int64_t,
Expand Down
2 changes: 2 additions & 0 deletions paddle/phi/kernels/cpu/cum_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ PD_REGISTER_KERNEL(cumsum,
phi::CumsumKernel,
float,
double,
uint8_t,
int8_t,
int16_t,
int,
int64_t,
Expand Down
2 changes: 2 additions & 0 deletions paddle/phi/kernels/gpu/cum_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ PD_REGISTER_KERNEL(cumsum_grad,
phi::CumsumGradKernel,
float,
double,
uint8_t,
int8_t,
int16_t,
int,
int64_t,
Expand Down
2 changes: 2 additions & 0 deletions paddle/phi/kernels/gpu/cum_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ PD_REGISTER_KERNEL(cumsum,
phi::CumsumKernel,
float,
double,
uint8_t,
int8_t,
int16_t,
int,
int64_t,
Expand Down
9 changes: 8 additions & 1 deletion python/paddle/tensor/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -4308,7 +4308,7 @@ def cumsum(
Args:
x (Tensor): The input tensor needed to be cumsumed.
axis (int, optional): The dimension to accumulate along. -1 means the last dimension. The default (None) is to compute the cumsum over the flattened array.
dtype (str|paddle.dtype|np.dtype|None, optional): The data type of the output tensor, can be bfloat16, float16, float32, float64, int32, int64, complex64, complex128. If specified, the input tensor is casted to dtype before the operation is performed. This is useful for preventing data type overflows. The default value is None.
dtype (str|paddle.dtype|np.dtype|None, optional): The data type of the output tensor, can be bfloat16, float16, float32, float64, int32, int64, complex64, complex128. By default, it is int64 if the input x is int8/int16/int32; otherwise, it is None. If it is not None, the input tensor is casted to dtype before the operation is performed. This is useful for preventing data type overflows.
name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.

Returns:
Expand Down Expand Up @@ -4350,6 +4350,13 @@ def cumsum(
flatten = False
if dtype is not None and x.dtype != convert_np_dtype_to_dtype_(dtype):
x = cast(x, dtype)
elif isinstance(x, paddle.Tensor) and x.dtype in [
paddle.uint8,
paddle.int8,
paddle.int16,
paddle.int32,
]:
x = cast(x, "int64")

if in_dynamic_or_pir_mode():
if axis is None:
Expand Down
213 changes: 213 additions & 0 deletions test/legacy_test/test_cumsum_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,219 @@ def test_name(self):
self.assertTrue('out' in y.name)


class TestCumsumOp_INT(unittest.TestCase):
def run_cases(self):
data_np = np.arange(12).reshape(3, 4).astype(np.uint8)
data = paddle.to_tensor(data_np)
y = paddle.cumsum(data)
z = np.cumsum(data_np)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=0)
z = np.cumsum(data_np, axis=0)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=-1)
z = np.cumsum(data_np, axis=-1)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=-2)
z = np.cumsum(data_np, axis=-2)
np.testing.assert_array_equal(z, y.numpy())

data_np = np.arange(12).reshape(3, 4).astype(np.int8)
data = paddle.to_tensor(data_np)
y = paddle.cumsum(data)
z = np.cumsum(data_np)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=0)
z = np.cumsum(data_np, axis=0)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=-1)
z = np.cumsum(data_np, axis=-1)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=-2)
z = np.cumsum(data_np, axis=-2)
np.testing.assert_array_equal(z, y.numpy())

data_np = np.arange(12).reshape(3, 4).astype(np.int16)
data = paddle.to_tensor(data_np)
y = paddle.cumsum(data)
z = np.cumsum(data_np)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=0)
z = np.cumsum(data_np, axis=0)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=-1)
z = np.cumsum(data_np, axis=-1)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=-2)
z = np.cumsum(data_np, axis=-2)
np.testing.assert_array_equal(z, y.numpy())

data_np = np.arange(12).reshape(3, 4).astype(np.int32)
data = paddle.to_tensor(data_np)
y = paddle.cumsum(data)
z = np.cumsum(data_np)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=0)
z = np.cumsum(data_np, axis=0)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=-1)
z = np.cumsum(data_np, axis=-1)
np.testing.assert_array_equal(z, y.numpy())
y = paddle.cumsum(data, axis=-2)
z = np.cumsum(data_np, axis=-2)
np.testing.assert_array_equal(z, y.numpy())

def run_static_uint8(self, use_gpu=False):
with paddle.static.program_guard(paddle.static.Program()):
data_np = np.random.random((100, 100)).astype(np.uint8)
x = paddle.static.data('X', [100, 100], dtype='uint8')
y = paddle.cumsum(x)
y2 = paddle.cumsum(x, axis=0)
y3 = paddle.cumsum(x, axis=-1)
y4 = paddle.cumsum(x, axis=-2)
place = base.CUDAPlace(0) if use_gpu else base.CPUPlace()
exe = base.Executor(place)
exe.run(paddle.static.default_startup_program())
out = exe.run(
feed={'X': data_np},
fetch_list=[
y,
y2,
y3,
y4,
],
)
z = np.cumsum(data_np)
np.testing.assert_allclose(z, out[0], rtol=1e-05)
z = np.cumsum(data_np, axis=0)
np.testing.assert_allclose(z, out[1], rtol=1e-05)
z = np.cumsum(data_np, axis=-1)
np.testing.assert_allclose(z, out[2], rtol=1e-05)
z = np.cumsum(data_np, axis=-2)
np.testing.assert_allclose(z, out[3], rtol=1e-05)

def run_static_int8(self, use_gpu=False):
with paddle.static.program_guard(paddle.static.Program()):
data_np = np.random.random((100, 100)).astype(np.int8)
x = paddle.static.data('X', [100, 100], dtype='int8')
y = paddle.cumsum(x)
y2 = paddle.cumsum(x, axis=0)
y3 = paddle.cumsum(x, axis=-1)
y4 = paddle.cumsum(x, axis=-2)
place = base.CUDAPlace(0) if use_gpu else base.CPUPlace()
exe = base.Executor(place)
exe.run(paddle.static.default_startup_program())
out = exe.run(
feed={'X': data_np},
fetch_list=[
y,
y2,
y3,
y4,
],
)
z = np.cumsum(data_np)
np.testing.assert_allclose(z, out[0], rtol=1e-05)
z = np.cumsum(data_np, axis=0)
np.testing.assert_allclose(z, out[1], rtol=1e-05)
z = np.cumsum(data_np, axis=-1)
np.testing.assert_allclose(z, out[2], rtol=1e-05)
z = np.cumsum(data_np, axis=-2)
np.testing.assert_allclose(z, out[3], rtol=1e-05)

def run_static_int16(self, use_gpu=False):
with paddle.static.program_guard(paddle.static.Program()):
data_np = np.random.random((100, 100)).astype(np.int16)
x = paddle.static.data('X', [100, 100], dtype='int16')
y = paddle.cumsum(x)
y2 = paddle.cumsum(x, axis=0)
y3 = paddle.cumsum(x, axis=-1)
y4 = paddle.cumsum(x, axis=-2)
place = base.CUDAPlace(0) if use_gpu else base.CPUPlace()
exe = base.Executor(place)
exe.run(paddle.static.default_startup_program())
out = exe.run(
feed={'X': data_np},
fetch_list=[
y,
y2,
y3,
y4,
],
)
z = np.cumsum(data_np)
np.testing.assert_allclose(z, out[0], rtol=1e-05)
z = np.cumsum(data_np, axis=0)
np.testing.assert_allclose(z, out[1], rtol=1e-05)
z = np.cumsum(data_np, axis=-1)
np.testing.assert_allclose(z, out[2], rtol=1e-05)
z = np.cumsum(data_np, axis=-2)
np.testing.assert_allclose(z, out[3], rtol=1e-05)

def run_static_uint16(self, use_gpu=False):
with paddle.static.program_guard(paddle.static.Program()):
data_np = np.random.random((100, 100)).astype(np.uint16)
x = paddle.static.data('X', [100, 100], dtype='uint16')
y = paddle.cumsum(x)
y2 = paddle.cumsum(x, axis=0)
y3 = paddle.cumsum(x, axis=-1)
y4 = paddle.cumsum(x, axis=-2)
place = base.CUDAPlace(0) if use_gpu else base.CPUPlace()
exe = base.Executor(place)
exe.run(paddle.static.default_startup_program())
out = exe.run(
feed={'X': data_np},
fetch_list=[
y,
y2,
y3,
y4,
],
)
z = np.cumsum(data_np)
np.testing.assert_allclose(z, out[0], rtol=1e-05)
z = np.cumsum(data_np, axis=0)
np.testing.assert_allclose(z, out[1], rtol=1e-05)
z = np.cumsum(data_np, axis=-1)
np.testing.assert_allclose(z, out[2], rtol=1e-05)
z = np.cumsum(data_np, axis=-2)
np.testing.assert_allclose(z, out[3], rtol=1e-05)

def test_cpu_dygraph(self):
paddle.disable_static(paddle.base.CPUPlace())
self.run_cases()
paddle.enable_static()

def test_cpu_static(self):
self.run_static_uint8()
self.run_static_int8()
self.run_static_int16()

def test_gpu_dygraph(self):
if not base.core.is_compiled_with_cuda():
return
paddle.disable_static(paddle.base.CUDAPlace(0))
self.run_cases()
paddle.enable_static()

def test_gpu_static(self):
if not base.core.is_compiled_with_cuda():
return
self.run_static_uint8(use_gpu=True)
self.run_static_int8(use_gpu=True)
self.run_static_uint16(use_gpu=True)
self.run_static_int16(use_gpu=True)

def test_name(self):
with (
paddle.pir_utils.OldIrGuard(),
base.program_guard(base.Program()),
):
x = paddle.static.data('x', [3, 4])
y = paddle.cumsum(x, name='out')
self.assertTrue('out' in y.name)


def cumsum_wrapper(x, axis=-1, flatten=False, exclusive=False, reverse=False):
return paddle._C_ops.cumsum(x, axis, flatten, exclusive, reverse)

Expand Down