diff --git a/cub/agent/agent_spmv_orig.cuh b/cub/agent/agent_spmv_orig.cuh index a77fb77f5..d596b0a71 100644 --- a/cub/agent/agent_spmv_orig.cuh +++ b/cub/agent/agent_spmv_orig.cuh @@ -647,9 +647,24 @@ struct AgentSpmv if (threadIdx.x == 0) { if (HAS_ALPHA) + { tile_carry.value *= spmv_params.alpha; + } tile_carry.key += tile_start_coord.x; + if (tile_carry.key >= spmv_params.num_rows) + { + // Prevent the AgentSegmentFixup kernel from updating + // out-of-bounds values. + // FIXME This creates several no-op atomicAdd(..., 0) calls. + // Ideally the fixup kernel would be aware of the keys. This + // should be possible as the fixup kernel is only used by spmv. + // This should give a perf boost by eliminating these + // unneccessary global atomic updates. + tile_carry.key = spmv_params.num_rows - 1; + tile_carry.value = ValueT{}; + }; + d_tile_carry_pairs[tile_idx] = tile_carry; } }