Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Introduce DebugSyncStream for debug_synchronous syncs.
Browse files Browse the repository at this point in the history
`SyncStream` should continue to be used for required synchronizations.

`DebugSyncStream` should only be used for sync that are non-essential
and only used to handle the `debug_synchronous` flag of the device
algorithms.
  • Loading branch information
alliepiper committed Jun 28, 2022
1 parent 87644c6 commit d825c47
Show file tree
Hide file tree
Showing 15 changed files with 304 additions and 122 deletions.
17 changes: 7 additions & 10 deletions cub/device/dispatch/dispatch_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -259,12 +259,10 @@ struct DispatchAdjacentDifference : public SelectedPolicy
num_tiles,
tile_size);

if (debug_synchronous)
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
if (CubDebug(error = SyncStream(stream)))
{
break;
}
break;
}

// Check for failure to launch
Expand Down Expand Up @@ -302,12 +300,11 @@ struct DispatchAdjacentDifference : public SelectedPolicy
difference_op,
num_items);

if (debug_synchronous)

error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
if (CubDebug(error = SyncStream(stream)))
{
break;
}
break;
}

// Check for failure to launch
Expand Down
12 changes: 9 additions & 3 deletions cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -667,11 +667,17 @@ public:
tile_queue);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;

error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}
}
while (0);

Expand Down
24 changes: 9 additions & 15 deletions cub/device/dispatch/dispatch_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -704,12 +704,10 @@ struct DispatchMergeSort : SelectedPolicy

block_sort_launcher.launch();

if (debug_synchronous)
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
if (CubDebug(error = SyncStream(stream)))
{
break;
}
break;
}

// Check for failure to launch
Expand Down Expand Up @@ -769,12 +767,10 @@ struct DispatchMergeSort : SelectedPolicy
target_merged_tiles_number,
tile_size);

if (debug_synchronous)
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
if (CubDebug(error = SyncStream(stream)))
{
break;
}
break;
}

// Check for failure to launch
Expand All @@ -786,12 +782,10 @@ struct DispatchMergeSort : SelectedPolicy
// Merge
merge_launcher.launch(ping, target_merged_tiles_number);

if (debug_synchronous)
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
if (CubDebug(error = SyncStream(stream)))
{
break;
}
break;
}

// Check for failure to launch
Expand Down
85 changes: 58 additions & 27 deletions cub/device/dispatch/dispatch_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1073,10 +1073,17 @@ struct DispatchRadixSort :
end_bit);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}

// Update selector
d_keys.selector ^= 1;
Expand Down Expand Up @@ -1134,10 +1141,17 @@ struct DispatchRadixSort :
pass_config.even_share);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}

// Log scan_kernel configuration
if (debug_synchronous) _CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n",
Expand All @@ -1151,10 +1165,17 @@ struct DispatchRadixSort :
pass_spine_length);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}

// Log downsweep_kernel configuration
if (debug_synchronous) _CubLog("Invoking downsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n",
Expand All @@ -1177,10 +1198,17 @@ struct DispatchRadixSort :
pass_config.even_share);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}

// Update current bit
current_bit += pass_bits;
Expand Down Expand Up @@ -1345,14 +1373,13 @@ struct DispatchRadixSort :
{
break;
}
if (debug_synchronous)

error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
if (CubDebug(error = SyncStream(stream)))
{
break;
}
break;
}

// exclusive sums to determine starts
const int SCAN_BLOCK_THREADS = ActivePolicyT::ExclusiveSumPolicy::BLOCK_THREADS;

Expand All @@ -1370,14 +1397,13 @@ struct DispatchRadixSort :
d_bins);
if (CubDebug(error))
{
break;
break;
}
if (debug_synchronous)

error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
if (CubDebug(error = SyncStream(stream)))
{
break;
}
break;
}

// use the other buffer if no overwrite is allowed
Expand Down Expand Up @@ -1435,12 +1461,10 @@ struct DispatchRadixSort :
break;
}

if (debug_synchronous)
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
if (CubDebug(error = SyncStream(stream)))
{
break;
}
break;
}
}

Expand Down Expand Up @@ -1827,10 +1851,17 @@ struct DispatchSegmentedRadixSort :
current_bit, pass_bits);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}

// Update current bit
current_bit += pass_bits;
Expand Down
44 changes: 36 additions & 8 deletions cub/device/dispatch/dispatch_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,17 @@ struct DispatchReduce :
init);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}
}
while (0);

Expand Down Expand Up @@ -501,10 +508,17 @@ struct DispatchReduce :
reduction_op);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}

// Log single_reduce_sweep_kernel configuration
if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n",
Expand All @@ -523,10 +537,17 @@ struct DispatchReduce :
init);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}
}
while (0);

Expand Down Expand Up @@ -734,10 +755,17 @@ struct DispatchSegmentedReduce :
init);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}
}
while (0);

Expand Down
24 changes: 20 additions & 4 deletions cub/device/dispatch/dispatch_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,23 @@ struct DispatchReduceByKey
d_num_runs_out);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}

// Return if empty problem
if (num_items == 0)
{
break;
}

// Get SM occupancy for reduce_by_key_kernel
int reduce_by_key_sm_occupancy;
Expand Down Expand Up @@ -346,10 +355,17 @@ struct DispatchReduceByKey
num_items);

// Check for failure to launch
if (CubDebug(error = cudaPeekAtLastError())) break;
if (CubDebug(error = cudaPeekAtLastError()))
{
break;
}

// Sync the stream if specified to flush runtime errors
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
error = detail::DebugSyncStream(stream, debug_synchronous);
if (CubDebug(error))
{
break;
}
}
}
while (0);
Expand Down
Loading

0 comments on commit d825c47

Please sign in to comment.