Skip to content

Commit

Permalink
Remove CUB_PTX_ARCH, etc from cub::DeviceSpmv.
Browse files Browse the repository at this point in the history
`cub::DeviceSpmv` has not been ported to use cub::detail::ptx_dispatch
and uses an older dispatch mechanism that is incompatible with nvc++.

It was not ported because there are no tests and thus no way to verify
whether or not the ported version works. Rather than risk breaking
user code, we'll port this later, after we add tests. NV HPC doesn't
require this algorithm so it's low priority for now. Tracked by
NVIDIA#285.
  • Loading branch information
alliepiper committed Jul 20, 2021
1 parent 7faf799 commit 973e4cf
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 57 deletions.
6 changes: 3 additions & 3 deletions cub/agent/agent_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,8 @@ struct AgentSpmv
int tile_num_rows = tile_end_coord.x - tile_start_coord.x;
int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y;

#if (CUB_PTX_ARCH >= 520)
// Not porting these for NVC++; see note in DispatchSpmv in the "__NVCOMPILER_CUDA__" block.
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 520)

OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset;
ValueT* s_tile_nonzeros = &temp_storage.aliasable.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero;
Expand Down Expand Up @@ -465,9 +466,8 @@ struct AgentSpmv
ValueT value = wd_values[tile_start_coord.y + nonzero_idx];

ValueT vector_value = spmv_params.t_vector_x[column_idx];
#if (CUB_PTX_ARCH >= 350)

vector_value = wd_vector_x[column_idx];
#endif
ValueT nonzero = value * vector_value;

s_tile_nonzeros[nonzero_idx] = nonzero;
Expand Down
115 changes: 61 additions & 54 deletions cub/device/dispatch/dispatch_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -368,19 +368,36 @@ struct DispatchSpmv
// Tuning policies of current PTX compiler pass
//---------------------------------------------------------------------

#if (CUB_PTX_ARCH >= 600)
typedef Policy600 PtxPolicy;

#elif (CUB_PTX_ARCH >= 500)
typedef Policy500 PtxPolicy;

#elif (CUB_PTX_ARCH >= 370)
typedef Policy370 PtxPolicy;

#else
#ifdef __NVCOMPILER_CUDA__

// cub::DeviceSpmv has not been ported to use cub::detail::ptx_dispatch
// and uses an older dispatch mechanism that is incompatible with nvc++.
//
// It was not ported because there are no tests and thus no way to verify
// whether or not the ported version works. Rather than risk breaking
// users code, we'll port this later, after we add tests. NV HPC doesn't
// require this algorithm so it's low priority for now. Tracked by
// NVIDIA/cub#285.
//
// This will fail to compile if the Spmv algorithm is instantiated on nvc++,
// but will still build if the header is just included:
template <typename> struct dependent_false : std::false_type {};
static_assert(dependent_false<ValueT>::value,
"cub::DeviceSpmv is not supported on nvc++.");
// Define this so the headers still parse:
typedef Policy350 PtxPolicy;

#endif
#else
#if (__CUDA_ARCH__ >= 600)
typedef Policy600 PtxPolicy;
#elif (__CUDA_ARCH__ >= 500)
typedef Policy500 PtxPolicy;
#elif (__CUDA_ARCH__ >= 370)
typedef Policy370 PtxPolicy;
#else
typedef Policy350 PtxPolicy;
#endif
#endif // __NVCOMPILER_CUDA__

// "Opaque" policies (whose parameterizations aren't reflected in the type signature)
struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {};
Expand All @@ -401,40 +418,34 @@ struct DispatchSpmv
KernelConfig &spmv_config,
KernelConfig &segment_fixup_config)
{
if (CUB_IS_DEVICE_CODE)
// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
#ifdef __CUDA_ARCH__
// We're on the device, so initialize the kernel dispatch configurations with the current PTX policy
spmv_config.template Init<PtxSpmvPolicyT>();
segment_fixup_config.template Init<PtxSegmentFixupPolicy>();
#else
// We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version
if (ptx_version >= 600)
{
#if CUB_INCLUDE_DEVICE_CODE
// We're on the device, so initialize the kernel dispatch configurations with the current PTX policy
spmv_config.template Init<PtxSpmvPolicyT>();
segment_fixup_config.template Init<PtxSegmentFixupPolicy>();
#endif
spmv_config.template Init<typename Policy600::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy600::SegmentFixupPolicyT>();
}
else if (ptx_version >= 500)
{
spmv_config.template Init<typename Policy500::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy500::SegmentFixupPolicyT>();
}
else if (ptx_version >= 370)
{
spmv_config.template Init<typename Policy370::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy370::SegmentFixupPolicyT>();
}
else
{
#if CUB_INCLUDE_HOST_CODE
// We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version
if (ptx_version >= 600)
{
spmv_config.template Init<typename Policy600::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy600::SegmentFixupPolicyT>();
}
else if (ptx_version >= 500)
{
spmv_config.template Init<typename Policy500::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy500::SegmentFixupPolicyT>();
}
else if (ptx_version >= 370)
{
spmv_config.template Init<typename Policy370::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy370::SegmentFixupPolicyT>();
}
else
{
spmv_config.template Init<typename Policy350::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy350::SegmentFixupPolicyT>();
}
#endif
spmv_config.template Init<typename Policy350::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy350::SegmentFixupPolicyT>();
}
#endif
}


Expand Down Expand Up @@ -603,13 +614,11 @@ struct DispatchSpmv
int search_block_size = INIT_KERNEL_THREADS;
int search_grid_size = cub::DivideAndRoundUp(num_merge_tiles + 1, search_block_size);

#if CUB_INCLUDE_HOST_CODE
if (CUB_IS_HOST_CODE)
{
// Init textures
if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break;
}
#endif
// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
#ifndef __CUDA_ARCH__
// Init textures
if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break;
#endif

if (search_grid_size < sm_count)
// if (num_merge_tiles < spmv_sm_occupancy * sm_count)
Expand Down Expand Up @@ -686,13 +695,11 @@ struct DispatchSpmv
if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
}

#if CUB_INCLUDE_HOST_CODE
if (CUB_IS_HOST_CODE)
{
// Free textures
if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break;
}
#endif
// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
#ifndef __CUDA_ARCH__
// Free textures
if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break;
#endif
}
while (0);

Expand Down

0 comments on commit 973e4cf

Please sign in to comment.