NVIDIA · miscco · Jun 14, 2024 · May 15, 2024 · May 15, 2024 · May 15, 2024
@@ -1,4 +1,4 @@
 ctk_11_1: &ctk_11_1 '11.1'
 ctk_11_8: &ctk_11_8 '11.8'
 ctk_12_0: &ctk_12_0 '12.0'
 ctk_curr: &ctk_curr '12.4'
@@ -71,18 +71,18 @@
     # verify-codegen:
     - {jobs: ['verify_codegen'], project: 'libcudacxx'}
     # cudax has different CTK reqs:
-    # - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc9, *gcc10, *gcc11]}
-    # - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*llvm9, *llvm10, *llvm11, *llvm12, *llvm13, *llvm14]}
-    # - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*llvm15]}
-    # - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0,          ], std: 'all', cxx: [*msvc2022_1436]}
-    # - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*msvc2022]}
-    # - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0           ], std: 17,    cxx: [*gcc12], sm: "90"}
-    # - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 17,    cxx: [*gcc12], sm: "90a"}
-    # - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*gcc12, *llvm16], cpu: 'arm64'}
-    # - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 17,    cxx: [*oneapi]}
-    # - {jobs: ['test'],  project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc12]}
-    # - {jobs: ['test'],  project: 'cudax', ctk: [*ctk_12_0           ], std: 'all', cxx: [*llvm14]}
-    # - {jobs: ['test'],  project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*llvm16]}
+    - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc9, *gcc10, *gcc11]}
+    - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*llvm9, *llvm10, *llvm11, *llvm12, *llvm13, *llvm14]}
+    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*llvm15]}
+    - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0,          ], std: 20,    cxx: [*msvc2022_1436]}
+    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 20,    cxx: [*msvc2022]}
+    - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0           ], std: 17,    cxx: [*gcc12], sm: "90"}
+    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 17,    cxx: [*gcc12], sm: "90a"}
+    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*gcc12, *llvm16], cpu: 'arm64'}
+    - {jobs: ['build'], project: 'cudax', ctk: [           *ctk_curr], std: 17,    cxx: [*oneapi]}
+    - {jobs: ['test'],  project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc12]}
+    - {jobs: ['test'],  project: 'cudax', ctk: [*ctk_12_0           ], std: 'all', cxx: [*llvm14]}
+    - {jobs: ['test'],  project: 'cudax', ctk: [           *ctk_curr], std: 'all', cxx: [*llvm16]}
     # cccl-infra:
     - {jobs: ['infra'], project: 'cccl', ctk: *ctk_11_1, cxx: [*gcc-oldest, *llvm-oldest]}
     - {jobs: ['infra'], project: 'cccl', ctk: *ctk_curr, cxx: [*gcc-newest, *llvm-newest]}

@@ -0,0 +1,145 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDAX__HIERARCHY_DIMENSIONS
+#define _CUDAX__HIERARCHY_DIMENSIONS
+
+#include <cuda/std/mdspan>
+
+#if _CCCL_STD_VER >= 2017
+namespace cuda::experimental
+{
+
+template <typename T, size_t... Extents>
+using dimensions = ::cuda::std::extents<T, Extents...>;
+
+// not unsigned because of a bug in ::cuda::std::extents
+using dimensions_index_type = int;
+
+/**
+ * @brief Type representing a result of a multi-dimensional hierarchy query.
+ *
+ * Returned from extents and index queries.
+ *
+ * @par Snippet
+ * @code
+ * #include <cudax/hierarchy_dimensions.cuh>
+ *
+ * template <typename Dimensions>
+ * __global__ void kernel(Dimensions dims)
+ * {
+ *     auto ext = dims.extents();
+ *
+ *     // Can be accessed like cuda::std::extents or like dim3
+ *     assert(ext.extent(0) == expected);
+ *     assert(ext.x == expected);
+ *
+ *     // Can be converted to dim3
+ *     dim3 dimensions = ext;
+ * }
+ * @endcode
+ * @par
+ *
+ * @tparam T
+ *   Type of the result for each dimension
+ *
+ * @tparam Extents
+ *   Extents of the result
+ */
+template <typename T, size_t... Extents>
+struct hierarchy_query_result : public dimensions<T, Extents...>
+{
+  using Dims = dimensions<T, Extents...>;
+  using Dims::Dims;
+  _CCCL_HOST_DEVICE explicit constexpr hierarchy_query_result(const Dims& dims)
+      : Dims(dims)
+  {}
+  static_assert(Dims::rank() > 0 && Dims::rank() <= 3);
+
+  const T x = Dims::extent(0);
+  const T y = Dims::rank() > 1 ? Dims::extent(1) : 1;
+  const T z = Dims::rank() > 2 ? Dims::extent(2) : 1;
+
+  _CCCL_HOST_DEVICE constexpr operator dim3() const
+  {
+    return dim3(static_cast<uint32_t>(x), static_cast<uint32_t>(y), static_cast<uint32_t>(z));
+  }
+};
+
+namespace detail
+{
+template <typename OpType>
+_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr size_t merge_extents(size_t e1, size_t e2)
+{
+  if (e1 == ::cuda::std::dynamic_extent || e2 == ::cuda::std::dynamic_extent)
+  {
+    return ::cuda::std::dynamic_extent;
+  }
+  else
+  {
+    OpType op;
+    return op(e1, e2);
+  }
+}
+
+template <typename DstType, typename OpType, typename T1, size_t... Extents1, typename T2, size_t... Extents2>
+_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto
+dims_op(const OpType& op, const dimensions<T1, Extents1...>& h1, const dimensions<T2, Extents2...>& h2) noexcept
+{
+  // For now target only 3 dim extents
+  static_assert(sizeof...(Extents1) == sizeof...(Extents2));
+  static_assert(sizeof...(Extents1) == 3);
+
+  return dimensions<DstType, merge_extents<OpType>(Extents1, Extents2)...>(
+    op(static_cast<DstType>(h1.extent(0)), h2.extent(0)),
+    op(static_cast<DstType>(h1.extent(1)), h2.extent(1)),
+    op(static_cast<DstType>(h1.extent(2)), h2.extent(2)));
+}
+
+template <typename DstType, typename T1, size_t... Extents1, typename T2, size_t... Extents2>
+_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto
+dims_product(const dimensions<T1, Extents1...>& h1, const dimensions<T2, Extents2...>& h2) noexcept
+{
+  return dims_op<DstType>(::cuda::std::multiplies(), h1, h2);
+}
+
+template <typename DstType, typename T1, size_t... Extents1, typename T2, size_t... Extents2>
+_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto
+dims_sum(const dimensions<T1, Extents1...>& h1, const dimensions<T2, Extents2...>& h2) noexcept
+{
+  return dims_op<DstType>(::cuda::std::plus(), h1, h2);
+}
+
+template <typename T, size_t... Extents>
+_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto convert_to_query_result(const dimensions<T, Extents...>& result)
+{
+  return hierarchy_query_result<T, Extents...>(result);
+}
+
+_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto dim3_to_dims(const dim3& dims)
+{
+  return dimensions<dimensions_index_type,
+                    ::cuda::std::dynamic_extent,
+                    ::cuda::std::dynamic_extent,
+                    ::cuda::std::dynamic_extent>(dims.x, dims.y, dims.z);
+}
+
+template <typename TyTrunc, typename Index, typename Dims>
+_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto index_to_linear(const Index& index, const Dims& dims)
+{
+  static_assert(Dims::rank() == 3);
+
+  return (static_cast<TyTrunc>(index.extent(2)) * dims.extent(1) + index.extent(1)) * dims.extent(0) + index.extent(0);
+}
+
+} // namespace detail
+} // namespace cuda::experimental
+#endif // _CCCL_STD_VER >= 2017
+#endif // _CUDAX__HIERARCHY_DIMENSIONS