diff --git a/projects/rocprim/rocprim/include/rocprim/device/detail/device_batch_memcpy.hpp b/projects/rocprim/rocprim/include/rocprim/device/detail/device_batch_memcpy.hpp index 84198f6cbc8..b47c893f089 100644 --- a/projects/rocprim/rocprim/include/rocprim/device/detail/device_batch_memcpy.hpp +++ b/projects/rocprim/rocprim/include/rocprim/device/detail/device_batch_memcpy.hpp @@ -341,6 +341,7 @@ struct batch_memcpy_impl typename std::iterator_traits::value_type>::value_type>::type; + // Offset over buffers. using buffer_offset_type = uint32_t; @@ -545,8 +546,9 @@ struct batch_memcpy_impl if(blev_buffer_offset < num_blev_buffers) { auto tile_buffer_id = buffer_by_size_class[blev_buffer_offset].buffer_id; + unsigned long long size = static_cast(buffers.sizes[tile_buffer_id]); tile_offsets[i] - = rocprim::detail::ceiling_div(buffers.sizes[tile_buffer_id], + = rocprim::detail::ceiling_div(size, blev_block_size * blev_bytes_per_thread); } else @@ -620,10 +622,11 @@ struct batch_memcpy_impl buffer_offset += warps_per_block) { const auto buffer_id = buffers_by_size_class[buffer_offset].buffer_id; - + + unsigned long long size = static_cast(tile_buffers.sizes[buffer_id]); batch_memcpy::copy_items(tile_buffers.srcs[buffer_id], - tile_buffers.dsts[buffer_id], - tile_buffers.sizes[buffer_id]); + tile_buffers.dsts[buffer_id], + size); } }