diff --git a/cub/device/dispatch/dispatch_adjacent_difference.cuh b/cub/device/dispatch/dispatch_adjacent_difference.cuh index 5438dcccd..cd7f78d45 100644 --- a/cub/device/dispatch/dispatch_adjacent_difference.cuh +++ b/cub/device/dispatch/dispatch_adjacent_difference.cuh @@ -264,7 +264,7 @@ struct DispatchAdjacentDifference : public SelectedPolicy if (debug_synchronous) { - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { break; } @@ -307,7 +307,7 @@ struct DispatchAdjacentDifference : public SelectedPolicy if (debug_synchronous) { - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { break; } diff --git a/cub/device/dispatch/dispatch_histogram.cuh b/cub/device/dispatch/dispatch_histogram.cuh index 4efb0e616..bb67b2289 100644 --- a/cub/device/dispatch/dispatch_histogram.cuh +++ b/cub/device/dispatch/dispatch_histogram.cuh @@ -617,10 +617,19 @@ struct DispatchHistogram tile_queue); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } while (0); diff --git a/cub/device/dispatch/dispatch_merge_sort.cuh b/cub/device/dispatch/dispatch_merge_sort.cuh index 0b480899c..3ff78fc14 100644 --- a/cub/device/dispatch/dispatch_merge_sort.cuh +++ b/cub/device/dispatch/dispatch_merge_sort.cuh @@ -706,7 +706,7 @@ struct DispatchMergeSort : SelectedPolicy if (debug_synchronous) { - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { break; } @@ -771,7 +771,7 @@ struct DispatchMergeSort : SelectedPolicy if (debug_synchronous) { - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { break; } @@ -788,7 +788,7 @@ struct DispatchMergeSort : SelectedPolicy if (debug_synchronous) { - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { break; } diff --git a/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/device/dispatch/dispatch_radix_sort.cuh index f042a4106..07a6c4d50 100644 --- a/cub/device/dispatch/dispatch_radix_sort.cuh +++ b/cub/device/dispatch/dispatch_radix_sort.cuh @@ -1076,7 +1076,13 @@ struct DispatchRadixSort : if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Update selector d_keys.selector ^= 1; @@ -1134,10 +1140,20 @@ struct DispatchRadixSort : pass_config.even_share); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + // Sync the stream if specified to flush runtime errors + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Log scan_kernel configuration if (debug_synchronous) _CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n", @@ -1151,10 +1167,20 @@ struct DispatchRadixSort : pass_spine_length); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + // Sync the stream if specified to flush runtime errors + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Log downsweep_kernel configuration if (debug_synchronous) _CubLog("Invoking downsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", @@ -1177,10 +1203,20 @@ struct DispatchRadixSort : pass_config.even_share); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + // Sync the stream if specified to flush runtime errors + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Update current bit current_bit += pass_bits; @@ -1774,10 +1810,20 @@ struct DispatchSegmentedRadixSort : current_bit, pass_bits); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + // Sync the stream if specified to flush runtime errors + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Update current bit current_bit += pass_bits; diff --git a/cub/device/dispatch/dispatch_reduce.cuh b/cub/device/dispatch/dispatch_reduce.cuh index 68aa7949b..93130c59f 100644 --- a/cub/device/dispatch/dispatch_reduce.cuh +++ b/cub/device/dispatch/dispatch_reduce.cuh @@ -414,10 +414,19 @@ struct DispatchReduce : init); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } while (0); @@ -501,10 +510,19 @@ struct DispatchReduce : reduction_op); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Log single_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n", @@ -523,10 +541,19 @@ struct DispatchReduce : init); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } while (0); @@ -734,10 +761,19 @@ struct DispatchSegmentedReduce : init); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } while (0); diff --git a/cub/device/dispatch/dispatch_reduce_by_key.cuh b/cub/device/dispatch/dispatch_reduce_by_key.cuh index 30f823c86..5c0cb5fa8 100644 --- a/cub/device/dispatch/dispatch_reduce_by_key.cuh +++ b/cub/device/dispatch/dispatch_reduce_by_key.cuh @@ -301,14 +301,25 @@ struct DispatchReduceByKey d_num_runs_out); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Return if empty problem if (num_items == 0) + { break; + } // Get SM occupancy for reduce_by_key_kernel int reduce_by_key_sm_occupancy; @@ -346,10 +357,19 @@ struct DispatchReduceByKey num_items); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } } while (0); diff --git a/cub/device/dispatch/dispatch_rle.cuh b/cub/device/dispatch/dispatch_rle.cuh index 4e983bba7..19050975a 100644 --- a/cub/device/dispatch/dispatch_rle.cuh +++ b/cub/device/dispatch/dispatch_rle.cuh @@ -302,14 +302,25 @@ struct DeviceRleDispatch d_num_runs_out); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Return if empty problem if (num_items == 0) + { break; + } // Get SM occupancy for device_rle_sweep_kernel int device_rle_kernel_sm_occupancy; @@ -346,11 +357,19 @@ struct DeviceRleDispatch num_tiles); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; - + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } while (0); diff --git a/cub/device/dispatch/dispatch_scan.cuh b/cub/device/dispatch/dispatch_scan.cuh index 8c5d19e65..f4ba7b53c 100644 --- a/cub/device/dispatch/dispatch_scan.cuh +++ b/cub/device/dispatch/dispatch_scan.cuh @@ -323,11 +323,19 @@ struct DispatchScan: num_tiles); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; - + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Get SM occupancy for scan_kernel int scan_sm_occupancy; @@ -361,10 +369,19 @@ struct DispatchScan: num_items); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } } while (0); diff --git a/cub/device/dispatch/dispatch_scan_by_key.cuh b/cub/device/dispatch/dispatch_scan_by_key.cuh index d694bc3cc..67a837400 100644 --- a/cub/device/dispatch/dispatch_scan_by_key.cuh +++ b/cub/device/dispatch/dispatch_scan_by_key.cuh @@ -304,10 +304,19 @@ struct DispatchScanByKey: ).doit(init_kernel, tile_state, num_tiles); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Get SM occupancy for scan_kernel @@ -345,10 +354,19 @@ struct DispatchScanByKey: num_items); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } } while (0); diff --git a/cub/device/dispatch/dispatch_segmented_sort.cuh b/cub/device/dispatch/dispatch_segmented_sort.cuh index 6e190cdd5..1a11b7551 100644 --- a/cub/device/dispatch/dispatch_segmented_sort.cuh +++ b/cub/device/dispatch/dispatch_segmented_sort.cuh @@ -607,7 +607,7 @@ DeviceSegmentedSortContinuation( // Sync the stream if specified to flush runtime errors if (debug_synchronous) { - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { return error; } @@ -668,7 +668,7 @@ DeviceSegmentedSortContinuation( // Sync the stream if specified to flush runtime errors if (debug_synchronous) { - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { return error; } @@ -1634,7 +1634,7 @@ private: \ if (debug_synchronous) \ { \ - if (CubDebug(error = SyncStream(stream))) \ + if (CubDebug(error = DebugSyncStream(stream))) \ { \ return error; \ } \ @@ -1658,7 +1658,7 @@ private: return error; } - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { return error; } @@ -1739,7 +1739,7 @@ private: // Sync the stream if specified to flush runtime errors if (debug_synchronous) { - if (CubDebug(error = SyncStream(stream))) + if (CubDebug(error = DebugSyncStream(stream))) { return error; } diff --git a/cub/device/dispatch/dispatch_select_if.cuh b/cub/device/dispatch/dispatch_select_if.cuh index 49f3eff12..c2d4e0af1 100644 --- a/cub/device/dispatch/dispatch_select_if.cuh +++ b/cub/device/dispatch/dispatch_select_if.cuh @@ -294,10 +294,19 @@ struct DispatchSelectIf d_num_selected_out); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Return if empty problem if (num_items == 0) @@ -351,10 +360,19 @@ struct DispatchSelectIf num_tiles); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } while (0); diff --git a/cub/device/dispatch/dispatch_spmv_orig.cuh b/cub/device/dispatch/dispatch_spmv_orig.cuh index 7f6a29dd9..0216c5f27 100644 --- a/cub/device/dispatch/dispatch_spmv_orig.cuh +++ b/cub/device/dispatch/dispatch_spmv_orig.cuh @@ -525,10 +525,19 @@ struct DispatchSpmv spmv_params); // Check for failure to launch - if (CubDebug(error = cudaPeekAtLastError())) break; + if (CubDebug(error = cudaPeekAtLastError())) + { + break; + } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } break; } @@ -633,7 +642,13 @@ struct DispatchSpmv if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } // Log spmv_kernel configuration @@ -655,7 +670,13 @@ struct DispatchSpmv if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Run reduce-by-key fixup if necessary if (num_merge_tiles > 1) @@ -679,7 +700,13 @@ struct DispatchSpmv if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } } while (0); diff --git a/cub/device/dispatch/dispatch_three_way_partition.cuh b/cub/device/dispatch/dispatch_three_way_partition.cuh index cb0643806..3b9ff71fc 100644 --- a/cub/device/dispatch/dispatch_three_way_partition.cuh +++ b/cub/device/dispatch/dispatch_three_way_partition.cuh @@ -362,7 +362,7 @@ struct DispatchThreeWayPartitionIf // Sync the stream if specified to flush runtime errors if (debug_synchronous) { - if (CubDebug(error = cub::SyncStream(stream))) + if (CubDebug(error = cub::DebugSyncStream(stream))) { break; } @@ -432,7 +432,7 @@ struct DispatchThreeWayPartitionIf // Sync the stream if specified to flush runtime errors if (debug_synchronous) { - if (CubDebug(error = cub::SyncStream(stream))) + if (CubDebug(error = cub::DebugSyncStream(stream))) { break; } diff --git a/cub/device/dispatch/dispatch_unique_by_key.cuh b/cub/device/dispatch/dispatch_unique_by_key.cuh index 971424bca..ee548d330 100644 --- a/cub/device/dispatch/dispatch_unique_by_key.cuh +++ b/cub/device/dispatch/dispatch_unique_by_key.cuh @@ -282,7 +282,13 @@ struct DispatchUniqueByKey: SelectedPolicy if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } // Return if empty problem if (num_items == 0) break; @@ -341,7 +347,13 @@ struct DispatchUniqueByKey: SelectedPolicy } // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + if (debug_synchronous) + { + if (CubDebug(error = DebugSyncStream(stream))) + { + break; + } + } } while(0); diff --git a/cub/util_device.cuh b/cub/util_device.cuh index 6870cc49e..743dc378c 100644 --- a/cub/util_device.cuh +++ b/cub/util_device.cuh @@ -518,6 +518,32 @@ CUB_RUNTIME_FUNCTION inline cudaError_t SyncStream(cudaStream_t stream) } +/** + * Same as SyncStream, but intended for use with the debug_synchronous flags + * in device algorithms. This should not be used if synchronization is required + * for correctness. + * + * If synchronization is supported by the current compilation target and + * settings, the sync is performed and the sync result is returned. + * + * If syncs are not supported then no sync is performed, but a message is logged + * via _CubLog and cudaSuccess is returned. + */ +CUB_RUNTIME_FUNCTION inline cudaError_t DebugSyncStream(cudaStream_t stream) +{ +#if 1 // All valid targets currently support device-side synchronization + _CubLog("%s\n", "Synchronizing..."); + return SyncStream(stream); +#else + (void)stream; + _CubLog("%s\n", + "WARNING: Skipping CUB `debug_synchronous` synchronization " + "(unsupported target)."); + return cudaSuccess; +#endif +} + + /** * \brief Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer \p kernel_ptr on the current device with \p block_threads per thread block. *