Skip to content

Commit f8a3fda

Browse files
committed
bypass comparison of uint with zero if dipt is 1
1 parent 11596d6 commit f8a3fda

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

cub/cub/block/block_run_length_decode.cuh

+7-1
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,13 @@ public:
382382
{
383383
decoded_items[i] = val;
384384
item_offsets[i] = thread_decoded_offset - assigned_run_begin;
385-
if ((i == DECODED_ITEMS_PER_THREAD - 1) && (thread_decoded_offset == assigned_run_end - 1))
385+
386+
// A thread only needs to fetch the next run if this was not the last loop iteration
387+
// The cast to int32_t is needed to work around the compiler complaining about "pointless comparison of unsigned
388+
// integer with zero" when DECODED_ITEMS_PER_THREAD is 1
389+
const bool may_need_to_fetch_next_run =
390+
static_cast<int32_t>(i) < (static_cast<int32_t>(DECODED_ITEMS_PER_THREAD) - 1);
391+
if (may_need_to_fetch_next_run && (thread_decoded_offset == assigned_run_end - 1))
386392
{
387393
// We make sure that a thread is not re-entering this conditional when being assigned to the last run already by
388394
// extending the last run's length to all the thread's item

0 commit comments

Comments
 (0)