Skip to content

Commit

Permalink
Fix past-the-end reads in cub::DeviceSpmv indexing calculations.
Browse files Browse the repository at this point in the history
This extends the solution in PR NVIDIA#160.
  • Loading branch information
alliepiper committed Jul 30, 2021
1 parent 57528d4 commit 72e8c79
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions cub/agent/agent_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,10 @@ struct AgentSpmv
// Gather the row end-offsets for the merge tile into shared memory
for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS)
{
s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item];
const OffsetT offset =
(cub::min)(static_cast<OffsetT>(tile_start_coord.x + item),
static_cast<OffsetT>(spmv_params.num_rows - 1));
s_tile_row_end_offsets[item] = wd_row_end_offsets[offset];
}

CTA_SYNC();
Expand Down Expand Up @@ -473,7 +476,10 @@ struct AgentSpmv
#pragma unroll 1
for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS)
{
s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item];
const OffsetT offset =
(cub::min)(static_cast<OffsetT>(tile_start_coord.x + item),
static_cast<OffsetT>(spmv_params.num_rows - 1));
s_tile_row_end_offsets[item] = wd_row_end_offsets[offset];
}

CTA_SYNC();
Expand Down

0 comments on commit 72e8c79

Please sign in to comment.