Remove CUB_PTX_ARCH, etc from cub::DeviceSpmv.

`cub::DeviceSpmv` has not been ported to use cub::detail::ptx_dispatch and uses an older dispatch mechanism that is incompatible with nvc++. It was not ported because there are no tests and thus no way to verify whether or not the ported version works. Rather than risk breaking user code, we'll port this later, after we add tests. NV HPC doesn't require this algorithm so it's low priority for now. Tracked by NVIDIA#285.
alliepiper · Jul 20, 2021 · 973e4cf · 973e4cf
1 parent 7faf799
commit 973e4cf
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 57 deletions.
diff --git a/cub/agent/agent_spmv_orig.cuh b/cub/agent/agent_spmv_orig.cuh
@@ -416,7 +416,8 @@ struct AgentSpmv
         int         tile_num_rows           = tile_end_coord.x - tile_start_coord.x;
         int         tile_num_nonzeros       = tile_end_coord.y - tile_start_coord.y;
 
-#if (CUB_PTX_ARCH >= 520)
+// Not porting these for NVC++; see note in DispatchSpmv in the "__NVCOMPILER_CUDA__" block.
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 520)
 
         OffsetT*    s_tile_row_end_offsets  = &temp_storage.aliasable.merge_items[0].row_end_offset;
         ValueT*     s_tile_nonzeros         = &temp_storage.aliasable.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero;
@@ -465,9 +466,8 @@ struct AgentSpmv
                 ValueT  value                   = wd_values[tile_start_coord.y + nonzero_idx];
 
                 ValueT  vector_value            = spmv_params.t_vector_x[column_idx];
-#if (CUB_PTX_ARCH >= 350)
+
                 vector_value                    = wd_vector_x[column_idx];
-#endif
                 ValueT  nonzero                 = value * vector_value;
 
                 s_tile_nonzeros[nonzero_idx]    = nonzero;

diff --git a/cub/device/dispatch/dispatch_spmv_orig.cuh b/cub/device/dispatch/dispatch_spmv_orig.cuh
@@ -368,19 +368,36 @@ struct DispatchSpmv
     // Tuning policies of current PTX compiler pass
     //---------------------------------------------------------------------
 
-#if (CUB_PTX_ARCH >= 600)
-    typedef Policy600 PtxPolicy;
-
-#elif (CUB_PTX_ARCH >= 500)
-    typedef Policy500 PtxPolicy;
-
-#elif (CUB_PTX_ARCH >= 370)
-    typedef Policy370 PtxPolicy;
-
-#else
+#ifdef __NVCOMPILER_CUDA__
+
+    // cub::DeviceSpmv has not been ported to use cub::detail::ptx_dispatch
+    // and uses an older dispatch mechanism that is incompatible with nvc++.
+    //
+    // It was not ported because there are no tests and thus no way to verify
+    // whether or not the ported version works. Rather than risk breaking
+    // users code, we'll port this later, after we add tests. NV HPC doesn't
+    // require this algorithm so it's low priority for now. Tracked by
+    // NVIDIA/cub#285.
+    //
+    // This will fail to compile if the Spmv algorithm is instantiated on nvc++,
+    // but will still build if the header is just included:
+    template <typename> struct dependent_false : std::false_type {};
+    static_assert(dependent_false<ValueT>::value,
+                  "cub::DeviceSpmv is not supported on nvc++.");
+    // Define this so the headers still parse:
     typedef Policy350 PtxPolicy;
 
-#endif
+#else
+    #if (__CUDA_ARCH__ >= 600)
+        typedef Policy600 PtxPolicy;
+    #elif (__CUDA_ARCH__ >= 500)
+        typedef Policy500 PtxPolicy;
+    #elif (__CUDA_ARCH__ >= 370)
+        typedef Policy370 PtxPolicy;
+    #else
+        typedef Policy350 PtxPolicy;
+    #endif
+#endif // __NVCOMPILER_CUDA__
 
     // "Opaque" policies (whose parameterizations aren't reflected in the type signature)
     struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {};
@@ -401,40 +418,34 @@ struct DispatchSpmv
         KernelConfig    &spmv_config,
         KernelConfig    &segment_fixup_config)
     {
-        if (CUB_IS_DEVICE_CODE)
+// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
+#ifdef __CUDA_ARCH__
+        // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy
+        spmv_config.template Init<PtxSpmvPolicyT>();
+        segment_fixup_config.template Init<PtxSegmentFixupPolicy>();
+#else
+        // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version
+        if (ptx_version >= 600)
         {
-            #if CUB_INCLUDE_DEVICE_CODE
-                // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy
-                spmv_config.template Init<PtxSpmvPolicyT>();
-                segment_fixup_config.template Init<PtxSegmentFixupPolicy>();
-            #endif
+            spmv_config.template            Init<typename Policy600::SpmvPolicyT>();
+            segment_fixup_config.template   Init<typename Policy600::SegmentFixupPolicyT>();
+        }
+        else if (ptx_version >= 500)
+        {
+            spmv_config.template            Init<typename Policy500::SpmvPolicyT>();
+            segment_fixup_config.template   Init<typename Policy500::SegmentFixupPolicyT>();
+        }
+        else if (ptx_version >= 370)
+        {
+            spmv_config.template            Init<typename Policy370::SpmvPolicyT>();
+            segment_fixup_config.template   Init<typename Policy370::SegmentFixupPolicyT>();
         }
         else
         {
-            #if CUB_INCLUDE_HOST_CODE
-                // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version
-                if (ptx_version >= 600)
-                {
-                    spmv_config.template            Init<typename Policy600::SpmvPolicyT>();
-                    segment_fixup_config.template   Init<typename Policy600::SegmentFixupPolicyT>();
-                }
-                else if (ptx_version >= 500)
-                {
-                    spmv_config.template            Init<typename Policy500::SpmvPolicyT>();
-                    segment_fixup_config.template   Init<typename Policy500::SegmentFixupPolicyT>();
-                }
-                else if (ptx_version >= 370)
-                {
-                    spmv_config.template            Init<typename Policy370::SpmvPolicyT>();
-                    segment_fixup_config.template   Init<typename Policy370::SegmentFixupPolicyT>();
-                }
-                else
-                {
-                    spmv_config.template            Init<typename Policy350::SpmvPolicyT>();
-                    segment_fixup_config.template   Init<typename Policy350::SegmentFixupPolicyT>();
-                }
-            #endif
+            spmv_config.template            Init<typename Policy350::SpmvPolicyT>();
+            segment_fixup_config.template   Init<typename Policy350::SegmentFixupPolicyT>();
         }
+#endif
     }
 
 
@@ -603,13 +614,11 @@ struct DispatchSpmv
             int search_block_size   = INIT_KERNEL_THREADS;
             int search_grid_size    = cub::DivideAndRoundUp(num_merge_tiles + 1, search_block_size);
 
-            #if CUB_INCLUDE_HOST_CODE
-                if (CUB_IS_HOST_CODE)
-                {
-                    // Init textures
-                    if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break;
-                }
-            #endif
+// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
+#ifndef __CUDA_ARCH__
+            // Init textures
+            if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break;
+#endif
 
             if (search_grid_size < sm_count)
 //            if (num_merge_tiles < spmv_sm_occupancy * sm_count)
@@ -686,13 +695,11 @@ struct DispatchSpmv
                 if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break;
             }
 
-            #if CUB_INCLUDE_HOST_CODE
-                if (CUB_IS_HOST_CODE)
-                {
-                    // Free textures
-                    if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break;
-                }
-            #endif
+// Not porting these for NVC++; see note above in the "__NVCOMPILER_CUDA__" block.
+#ifndef __CUDA_ARCH__
+            // Free textures
+            if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break;
+#endif
         }
         while (0);