Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor inline comments #581

Merged
merged 1 commit into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 230 additions & 90 deletions cub/cub/agent/agent_histogram.cuh

Large diffs are not rendered by default.

104 changes: 75 additions & 29 deletions cub/cub/agent/agent_radix_sort_downsweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -63,30 +63,61 @@ CUB_NAMESPACE_BEGIN
******************************************************************************/

/**
* Parameterizable tuning policy type for AgentRadixSortDownsweep
* @brief Parameterizable tuning policy type for AgentRadixSortDownsweep
*
* @tparam NOMINAL_BLOCK_THREADS_4B
* Threads per thread block
*
* @tparam NOMINAL_ITEMS_PER_THREAD_4B
* Items per thread (per tile of input)
*
* @tparam ComputeT
* Dominant compute type
*
* @tparam _LOAD_ALGORITHM
* The BlockLoad algorithm to use
*
* @tparam _LOAD_MODIFIER
* Cache load modifier for reading keys (and values)
*
* @tparam _RANK_ALGORITHM
* The radix ranking algorithm to use
*
* @tparam _SCAN_ALGORITHM
* The block scan algorithm to use
*
* @tparam _RADIX_BITS
* The number of radix bits, i.e., log2(bins)
*/
template <
int NOMINAL_BLOCK_THREADS_4B, ///< Threads per thread block
int NOMINAL_ITEMS_PER_THREAD_4B, ///< Items per thread (per tile of input)
typename ComputeT, ///< Dominant compute type
BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use
CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys (and values)
RadixRankAlgorithm _RANK_ALGORITHM, ///< The radix ranking algorithm to use
BlockScanAlgorithm _SCAN_ALGORITHM, ///< The block scan algorithm to use
int _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins)
typename ScalingType = RegBoundScaling<NOMINAL_BLOCK_THREADS_4B, NOMINAL_ITEMS_PER_THREAD_4B, ComputeT> >
struct AgentRadixSortDownsweepPolicy :
ScalingType
template <int NOMINAL_BLOCK_THREADS_4B,
int NOMINAL_ITEMS_PER_THREAD_4B,
typename ComputeT,
BlockLoadAlgorithm _LOAD_ALGORITHM,
CacheLoadModifier _LOAD_MODIFIER,
RadixRankAlgorithm _RANK_ALGORITHM,
BlockScanAlgorithm _SCAN_ALGORITHM,
int _RADIX_BITS,
typename ScalingType =
RegBoundScaling<NOMINAL_BLOCK_THREADS_4B, NOMINAL_ITEMS_PER_THREAD_4B, ComputeT>>
struct AgentRadixSortDownsweepPolicy : ScalingType
{
enum
{
RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins)
};
enum
{
/// The number of radix bits, i.e., log2(bins)
RADIX_BITS = _RADIX_BITS,
};

/// The BlockLoad algorithm to use
static constexpr BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;

/// Cache load modifier for reading keys (and values)
static constexpr CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;

static constexpr BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use
static constexpr CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys (and values)
static constexpr RadixRankAlgorithm RANK_ALGORITHM = _RANK_ALGORITHM; ///< The radix ranking algorithm to use
static constexpr BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use
/// The radix ranking algorithm to use
static constexpr RadixRankAlgorithm RANK_ALGORITHM = _RANK_ALGORITHM;

/// The BlockScan algorithm to use
static constexpr BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM;
Comment on lines +110 to +120
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not a fan of comments that are essentially respellings of the variable name.

That is the reason you just want good descriptive names

};


Expand All @@ -99,15 +130,30 @@ struct AgentRadixSortDownsweepPolicy :


/**
* \brief AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep .
* @brief AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in
* device-wide radix sort downsweep .
*
* @tparam AgentRadixSortDownsweepPolicy
* Parameterized AgentRadixSortDownsweepPolicy tuning policy type
*
* @tparam IS_DESCENDING
* Whether or not the sorted-order is high-to-low
*
* @tparam KeyT
* KeyT type
*
* @tparam ValueT
* ValueT type
*
* @tparam OffsetT
* Signed integer type for global offsets
*/
template <
typename AgentRadixSortDownsweepPolicy, ///< Parameterized AgentRadixSortDownsweepPolicy tuning policy type
bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low
typename KeyT, ///< KeyT type
typename ValueT, ///< ValueT type
typename OffsetT, ///< Signed integer type for global offsets
typename DecomposerT = detail::identity_decomposer_t>
template <typename AgentRadixSortDownsweepPolicy,
bool IS_DESCENDING,
typename KeyT,
typename ValueT,
typename OffsetT,
typename DecomposerT = detail::identity_decomposer_t>
struct AgentRadixSortDownsweep
{
//---------------------------------------------------------------------
Expand Down
79 changes: 53 additions & 26 deletions cub/cub/agent/agent_radix_sort_upsweep.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -56,39 +56,63 @@ CUB_NAMESPACE_BEGIN
******************************************************************************/

/**
* Parameterizable tuning policy type for AgentRadixSortUpsweep
* @brief Parameterizable tuning policy type for AgentRadixSortUpsweep
*
* @tparam NOMINAL_BLOCK_THREADS_4B
* Threads per thread block
*
* @tparam NOMINAL_ITEMS_PER_THREAD_4B
* Items per thread (per tile of input)
*
* @tparam ComputeT
* Dominant compute type
*
* @tparam _LOAD_MODIFIER
* Cache load modifier for reading keys
*
* @tparam _RADIX_BITS
* The number of radix bits, i.e., log2(bins)
*/
template <
int NOMINAL_BLOCK_THREADS_4B, ///< Threads per thread block
int NOMINAL_ITEMS_PER_THREAD_4B, ///< Items per thread (per tile of input)
typename ComputeT, ///< Dominant compute type
CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys
int _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins)
typename ScalingType = RegBoundScaling<NOMINAL_BLOCK_THREADS_4B, NOMINAL_ITEMS_PER_THREAD_4B, ComputeT> >
struct AgentRadixSortUpsweepPolicy :
ScalingType
template <int NOMINAL_BLOCK_THREADS_4B,
int NOMINAL_ITEMS_PER_THREAD_4B,
typename ComputeT,
CacheLoadModifier _LOAD_MODIFIER,
int _RADIX_BITS,
typename ScalingType =
RegBoundScaling<NOMINAL_BLOCK_THREADS_4B, NOMINAL_ITEMS_PER_THREAD_4B, ComputeT>>
struct AgentRadixSortUpsweepPolicy : ScalingType
{
enum
{
RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins)
};

static constexpr CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys
enum
{
/// The number of radix bits, i.e., log2(bins)
RADIX_BITS = _RADIX_BITS,
};

/// Cache load modifier for reading keys
static constexpr CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
};


/******************************************************************************
* Thread block abstractions
******************************************************************************/

/**
* \brief AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep .
* @brief AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for
* participating in device-wide radix sort upsweep .
*
* @tparam AgentRadixSortUpsweepPolicy
* Parameterized AgentRadixSortUpsweepPolicy tuning policy type
*
* @tparam KeyT
* KeyT type
*
* @tparam DecomposerT = detail::identity_decomposer_t
* Signed integer type for global offsets
*/
template <
typename AgentRadixSortUpsweepPolicy, ///< Parameterized AgentRadixSortUpsweepPolicy tuning policy type
typename KeyT, ///< KeyT type
typename OffsetT,
typename DecomposerT = detail::identity_decomposer_t> ///< Signed integer type for global offsets
template <typename AgentRadixSortUpsweepPolicy,
typename KeyT,
typename OffsetT,
typename DecomposerT = detail::identity_decomposer_t>
struct AgentRadixSortUpsweep
{

Expand Down Expand Up @@ -483,11 +507,14 @@ struct AgentRadixSortUpsweep


/**
* Extract counts
* @brief Extract counts
*
* @param[out] bin_count
* The exclusive prefix sum for the digits
* [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]
*/
template <int BINS_TRACKED_PER_THREAD>
__device__ __forceinline__ void ExtractCounts(
OffsetT (&bin_count)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]
__device__ __forceinline__ void ExtractCounts(OffsetT (&bin_count)[BINS_TRACKED_PER_THREAD])
{
unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS;
unsigned int warp_tid = LaneId();
Expand Down
Loading