Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Introduce DebugSyncStream for debug_synchronous syncs.
Browse files Browse the repository at this point in the history
`SyncStream` should continue to be used for required synchronizations.

`DebugSyncStream` should only be used for sync that are non-essential
and only used to handle the `debug_synchronous` flag of the device
algorithms.
  • Loading branch information
alliepiper committed May 18, 2022
1 parent ee5959f commit 24ca6c1
Show file tree
Hide file tree
Showing 15 changed files with 308 additions and 60 deletions.
4 changes: 2 additions & 2 deletions cub/device/dispatch/dispatch_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ struct DispatchAdjacentDifference : public SelectedPolicy

if (debug_synchronous)
{
if (CubDebug(error = SyncStream(stream)))
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
Expand Down Expand Up @@ -307,7 +307,7 @@ struct DispatchAdjacentDifference : public SelectedPolicy

if (debug_synchronous)
{
if (CubDebug(error = SyncStream(stream)))
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
Expand Down
13 changes: 11 additions & 2 deletions cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -617,10 +617,19 @@ struct DispatchHistogram
tile_queue);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

}
while (0);
Expand Down
6 changes: 3 additions & 3 deletions cub/device/dispatch/dispatch_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ struct DispatchMergeSort : SelectedPolicy

if (debug_synchronous)
{
if (CubDebug(error = SyncStream(stream)))
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
Expand Down Expand Up @@ -771,7 +771,7 @@ struct DispatchMergeSort : SelectedPolicy

if (debug_synchronous)
{
if (CubDebug(error = SyncStream(stream)))
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
Expand All @@ -788,7 +788,7 @@ struct DispatchMergeSort : SelectedPolicy

if (debug_synchronous)
{
if (CubDebug(error = SyncStream(stream)))
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
Expand Down
64 changes: 55 additions & 9 deletions cub/device/dispatch/dispatch_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,13 @@ struct DispatchRadixSort :
if (CubDebug(error = cudaPeekAtLastError())) break;

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

// Update selector
d_keys.selector ^= 1;
Expand Down Expand Up @@ -1134,10 +1140,20 @@ struct DispatchRadixSort :
pass_config.even_share);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
// Sync the stream if specified to flush runtime errors
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

// Log scan_kernel configuration
if (debug_synchronous) _CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n",
Expand All @@ -1151,10 +1167,20 @@ struct DispatchRadixSort :
pass_spine_length);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
// Sync the stream if specified to flush runtime errors
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

// Log downsweep_kernel configuration
if (debug_synchronous) _CubLog("Invoking downsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n",
Expand All @@ -1177,10 +1203,20 @@ struct DispatchRadixSort :
pass_config.even_share);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
// Sync the stream if specified to flush runtime errors
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

// Update current bit
current_bit += pass_bits;
Expand Down Expand Up @@ -1774,10 +1810,20 @@ struct DispatchSegmentedRadixSort :
current_bit, pass_bits);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
// Sync the stream if specified to flush runtime errors
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

// Update current bit
current_bit += pass_bits;
Expand Down
52 changes: 44 additions & 8 deletions cub/device/dispatch/dispatch_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,19 @@ struct DispatchReduce :
init);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}
}
while (0);

Expand Down Expand Up @@ -501,10 +510,19 @@ struct DispatchReduce :
reduction_op);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

// Log single_reduce_sweep_kernel configuration
if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n",
Expand All @@ -523,10 +541,19 @@ struct DispatchReduce :
init);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}
}
while (0);

Expand Down Expand Up @@ -734,10 +761,19 @@ struct DispatchSegmentedReduce :
init);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}
}
while (0);

Expand Down
28 changes: 24 additions & 4 deletions cub/device/dispatch/dispatch_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,25 @@ struct DispatchReduceByKey
d_num_runs_out);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

// Return if empty problem
if (num_items == 0)
{
break;
}

// Get SM occupancy for reduce_by_key_kernel
int reduce_by_key_sm_occupancy;
Expand Down Expand Up @@ -346,10 +357,19 @@ struct DispatchReduceByKey
num_items);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}
}
}
while (0);
Expand Down
29 changes: 24 additions & 5 deletions cub/device/dispatch/dispatch_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -302,14 +302,25 @@ struct DeviceRleDispatch
d_num_runs_out);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}

// Return if empty problem
if (num_items == 0)
{
break;
}

// Get SM occupancy for device_rle_sweep_kernel
int device_rle_kernel_sm_occupancy;
Expand Down Expand Up @@ -346,11 +357,19 @@ struct DeviceRleDispatch
num_tiles);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;

if (debug_synchronous)
{
if (CubDebug(error = DebugSyncStream(stream)))
{
break;
}
}
}
while (0);

Expand Down
Loading

0 comments on commit 24ca6c1

Please sign in to comment.