From 0647f3ea62e76093f53dcf150fa776e998d3c9de Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Thu, 23 Oct 2025 20:30:17 -0400 Subject: [PATCH] Enable host-only rocThrust builds with g++/clang++ Currently, all rocThrust build configurations depend on rocPRIM. Since rocPRIM requires a GPU, this makes it tricky to use rocThrust for host-only computation. In addition, since building rocPRIM requires a hip-aware clang (eg. hipcc), so does building rocThrust. This means you can't use gcc or vanilla-clang to build rocThrust, even if you're only doing host-side compute. Upstream thrust does allow users to build for host-only compute, and this is typically done with gcc. It turns out that there are some differences between the way that clang/hipcc and gcc interpret the C++ standard. This can lead to situations where applications that work when built with Thrust do not work when built with rocThrust. Addressing these situations creates additional load for the compiler team. With this in mind, this change: - adds a new cmake option, `ROCTHRUST_DEVICE_SYSTEM`, which is similar to Thrust's `THRUST_DEVICE_SYSTEM` macro. It can be set to one of: `HIP`, `CUDA`, `TBB`, `OpenMP`, or `CPP`. If it's set to `HIP` or `CUDA`, then the code links against device dependencies (eg. for HIP, it looks for rocPRIM). If it's set to `CPP`, then it does not link against device dependencies. - Because the benchmarks, tests, and examples all currently depend on rocPRIM, I've added a cmake fatal error that triggers when rocThrust is built with device system `CPP` and benchmarks, tests, or examples are enabled. We can probably enable many of the tests going forward, but it'll require a bit of work, so it may be best to follow-up on that in a separate change. - The cmake dependencies file has been tweaked so we don't try to fetch rocPRIM when `ROCTHRUEST_DEVICE_SYSTEM == CPP`. - I've modified the cmake compiler verification and rocThrust device_system.h header to allow gcc to be used as a host compiler again. - I updated a number of cases where we were assuming that we could call into rocPRIM as a fallback. I've added a macro `_THRUST_USE_ROCPRIM` that's set to 1 in cases where have access to it. --- projects/rocthrust/CHANGELOG.md | 6 ++++ projects/rocthrust/CMakeLists.txt | 17 ++++++++++- projects/rocthrust/cmake/Dependencies.cmake | 6 ++-- projects/rocthrust/cmake/Summary.cmake | 1 + projects/rocthrust/cmake/VerifyCompiler.cmake | 29 ++++++++++++------- .../install/rocThrust-install-with-cmake.rst | 1 + projects/rocthrust/thrust/CMakeLists.txt | 28 ++++++++++++------ projects/rocthrust/thrust/detail/alignment.h | 6 ++-- .../thrust/detail/config/device_system.h | 2 +- .../rocthrust/thrust/detail/config/libcxx.h | 3 ++ .../rocthrust/thrust/detail/type_traits.h | 12 ++++++-- .../rocthrust/thrust/iterator/zip_iterator.h | 1 + projects/rocthrust/thrust/pair.h | 2 +- .../system/detail/sequential/iter_swap.h | 2 +- 14 files changed, 85 insertions(+), 31 deletions(-) diff --git a/projects/rocthrust/CHANGELOG.md b/projects/rocthrust/CHANGELOG.md index e69d45b151cc..dcab1c3aa18f 100644 --- a/projects/rocthrust/CHANGELOG.md +++ b/projects/rocthrust/CHANGELOG.md @@ -3,6 +3,12 @@ Documentation for rocThrust available at [https://rocm.docs.amd.com/projects/rocThrust/en/latest/](https://rocm.docs.amd.com/projects/rocThrust/en/latest/). +## rocThrust x.y.z for ROCm x.y + +### Added + +* If you are using rocThrust on the host-side only, you can now build using g++ or non-HIP-aware clang++. To configure rocThrust in this-way, set the new CMake option `ROCTHRUST_DEVICE_SYSTEM` to `CPP` (other options include `HIP`, `CUDA`, `OpenMP`, and `TBB`), and set `CXX` to g++ or clang++. Then install rocThrust via `make install`. When you compile your application, don't forget to include the rocThrust include directory (`-I /opt/rocm/include`), since this won't happen automatically like it does when building with hipcc. Note that currently, rocThrust tests and benchmarks cannot be built when configuring rocThrust for host-side-only use. + ## rocThrust 4.2.0 for ROCm 7.2 ### Added diff --git a/projects/rocthrust/CMakeLists.txt b/projects/rocthrust/CMakeLists.txt index 36cc0c8da250..eb3838ea0f60 100644 --- a/projects/rocthrust/CMakeLists.txt +++ b/projects/rocthrust/CMakeLists.txt @@ -64,7 +64,22 @@ cmake_dependent_option(ENABLE_UPSTREAM_TESTS "Enable upstream (thrust) tests" ON cmake_dependent_option(USE_SYSTEM_LIB "Use existing system ROCm library installation when building tests" OFF BUILD_TEST OFF) option(EXTERNAL_DEPS_FORCE_DOWNLOAD "Force download of non-ROCm dependencies (eg. Google Test, Google Benchmark)" OFF) -check_language(HIP) +set(DEVICE_SYSTEM_OPTIONS "HIP" "CUDA" "CPP" "OpenMP" "TBB") +set(ROCTHRUST_DEVICE_SYSTEM "HIP" CACHE STRING "Set device system for thrust (options are ${DEVICE_SYSTEM_OPTIONS})") + +if(NOT ${ROCTHRUST_DEVICE_SYSTEM} IN_LIST DEVICE_SYSTEM_OPTIONS) + message(FATAL_ERROR "Unrecognized ROCTHRUST_DEVICE_SYSTEM option \"${ROCTHRUST_DEVICE_SYSTEM}\". Valid options are: ${DEVICE_SYSTEM_OPTIONS}") +endif() + +if((BUILD_TEST OR BUILD_BENCHMARK OR BUILD_EXAMPLE) AND + NOT (${ROCTHRUST_DEVICE_SYSTEM} STREQUAL "HIP" OR $ROCTHRUST_DEVICE_SYSTEM STREQUAL "CUDA")) + message(FATAL_ERROR "rocThrust tests, benchmarks, and examples must be built with ROCTHRUST_DEVICE_SYSTEM=HIP|CUDA since they require device acceleration.") +endif() + +if(${ROCTHRUST_DEVICE_SYSTEM} STREQUAL "HIP") + check_language(HIP) +endif() + cmake_dependent_option(USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF) include(CheckCXXCompilerFlag) diff --git a/projects/rocthrust/cmake/Dependencies.cmake b/projects/rocthrust/cmake/Dependencies.cmake index dece37f3112d..3291945f6e72 100644 --- a/projects/rocthrust/cmake/Dependencies.cmake +++ b/projects/rocthrust/cmake/Dependencies.cmake @@ -236,9 +236,10 @@ function(fetch_dep method repo_name repo_path download_branch) endif() endfunction() -fetch_dep(ROCPRIM_FETCH_METHOD rocprim ROCPRIM_PATH ROCM_DEP_RELEASE_BRANCH) +if(${ROCTHRUST_DEVICE_SYSTEM} STREQUAL "HIP") + fetch_dep(ROCPRIM_FETCH_METHOD rocprim ROCPRIM_PATH ROCM_DEP_RELEASE_BRANCH) -if(${ROCPRIM_FETCH_METHOD} STREQUAL "DOWNLOAD" OR ${ROCPRIM_FETCH_METHOD} STREQUAL "MONOREPO") + if(${ROCPRIM_FETCH_METHOD} STREQUAL "DOWNLOAD" OR ${ROCPRIM_FETCH_METHOD} STREQUAL "MONOREPO") # The fetch_dep call above should have downloaded/located the source. We just need to make it available. message(STATUS "Configuring rocPRIM") FetchContent_Declare( @@ -258,6 +259,7 @@ if(${ROCPRIM_FETCH_METHOD} STREQUAL "DOWNLOAD" OR ${ROCPRIM_FETCH_METHOD} STREQU add_library(roc::rocprim_hip ALIAS rocprim_hip) endif() endif() +endif() # Test dependencies if(BUILD_TEST OR BUILD_HIPSTDPAR_TEST) diff --git a/projects/rocthrust/cmake/Summary.cmake b/projects/rocthrust/cmake/Summary.cmake index c1abdaabb18b..4edaf01fbf35 100644 --- a/projects/rocthrust/cmake/Summary.cmake +++ b/projects/rocthrust/cmake/Summary.cmake @@ -107,6 +107,7 @@ endif() message(STATUS " USE_SYSTEM_LIB : ${USE_SYSTEM_LIB}") message(STATUS " BUILD_ADDRESS_SANITIZER : ${BUILD_ADDRESS_SANITIZER}") message(STATUS " EXTERNAL_DEPS_FORCE_DOWNLOAD : ${EXTERNAL_DEPS_FORCE_DOWNLOAD}") + message(STATUS " ROCTHRUST_DEVICE_SYSTEM : ${ROCTHRUST_DEVICE_SYSTEM}") message(STATUS "") message(STATUS "Detailed:") message(STATUS " C++ compiler details : \n${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") diff --git a/projects/rocthrust/cmake/VerifyCompiler.cmake b/projects/rocthrust/cmake/VerifyCompiler.cmake index 3be10785eb63..54a1e60bd3aa 100644 --- a/projects/rocthrust/cmake/VerifyCompiler.cmake +++ b/projects/rocthrust/cmake/VerifyCompiler.cmake @@ -20,21 +20,28 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip) -find_package(hip REQUIRED CONFIG PATHS /opt/rocm) +if(${ROCTHRUST_DEVICE_SYSTEM} STREQUAL "HIP") + list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip) + find_package(hip REQUIRED CONFIG PATHS /opt/rocm) +endif() if(HIP_COMPILER STREQUAL "nvcc") message(FATAL_ERROR "rocThrust does not support the CUDA backend.") -elseif(HIP_COMPILER STREQUAL "clang") - if(USE_HIPCXX) - if(NOT (CMAKE_HIP_COMPILER MATCHES ".*hipcc$" OR CMAKE_HIP_COMPILER MATCHES ".*clang\\+\\+")) - message(FATAL_ERROR "On ROCm platform 'hipcc' or HIP-aware Clang must be used as HIP compiler.") +endif() + +if(${ROCTHRUST_DEVICE_SYSTEM} STREQUAL "HIP") + # When building for HIP, make sure we have a hip-aware clang. + if(HIP_COMPILER STREQUAL "clang") + if(USE_HIPCXX) + if(NOT (CMAKE_HIP_COMPILER MATCHES ".*hipcc$" OR CMAKE_HIP_COMPILER MATCHES ".*clang\\+\\+")) + message(FATAL_ERROR "When ROCTHRUST_DEVICE_SYSTEM is set to 'HIP', then 'hipcc' or a HIP-aware Clang must be used as the C++ compiler.") + endif() + else() + if(NOT (CMAKE_CXX_COMPILER MATCHES ".*hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+")) + message(FATAL_ERROR "When ROCTHRUST_DEVICE_SYSTEM is set to 'HIP', then 'hipcc' or a HIP-aware Clang must be used as the C++ compiler.") + endif() endif() else() - if(NOT (CMAKE_CXX_COMPILER MATCHES ".*hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+")) - message(FATAL_ERROR "On ROCm platform 'hipcc' or HIP-aware Clang must be used as C++ compiler.") - endif() + message(FATAL_ERROR "When ROCTHRUST_DEVICE_SYSTEM is set to 'HIP', HIP_COMPILER must be `clang` (AMD ROCm platform)") endif() -else() - message(FATAL_ERROR "HIP_COMPILER must be `clang` (AMD ROCm platform)") endif() diff --git a/projects/rocthrust/docs/install/rocThrust-install-with-cmake.rst b/projects/rocthrust/docs/install/rocThrust-install-with-cmake.rst index 70ef7ca63ab5..2fdf08a53d1f 100644 --- a/projects/rocthrust/docs/install/rocThrust-install-with-cmake.rst +++ b/projects/rocthrust/docs/install/rocThrust-install-with-cmake.rst @@ -44,6 +44,7 @@ The build options are: * ``EXTERNAL_DEPS_FORCE_DOWNLOAD``. Set this to ``ON`` to download the non-ROCm dependencies such as Google Test even if they're already installed. Default is ``OFF``. * ``USE_HIPCXX``. Set this to ``ON`` to build with CMake HIP language support. Setting this to ``ON`` eliminates the need to use ``CXX=hipcc``. Default is ``OFF``. * ``ROCPRIM_FETCH_METHOD`` and ``ROCRAND_FETCH_METHOD``. Set these to the method to use to download the rocPRIM and rocRAND components, respectively. Can be set to ``PACKAGE``, ``DOWNLOAD``, or ``MONOREPO``. Set to ``MONOREPO`` if the component isn't already installed and you're building rocThrust from within a clone of the `rocm-libraries `_ repository that includes the component. Set to ``DOWNLOAD`` if the component isn't installed and you aren't in a clone of the ``rocm-libraries`` repository that includes the component. ``DOWNLOAD`` will clone the repository using sparse checkout so that only the necessary files are downloaded. Set to ``PACKAGE`` if the component is already installed. If the component isn't installed, it'll be downloaded form the repository in the same way as using the ``DOWNLOAD`` option. The default method is ``PACKAGE``. +* ``ROCTHRUST_DEVICE_SYSTEM``. This option controls how rocThrust is linked. It may be set to one of: ``HIP``, ``CUDA``, ``TBB``, ``OpenMP``, or ``CPP``. If it's set to ``HIP`` or ``CUDA``, then the code links against device dependencies (eg. for ``HIP``, it looks for rocPRIM). If it's set to ``CPP``, then it does not link against device dependencies - it performs host-side compuation only. .. note:: diff --git a/projects/rocthrust/thrust/CMakeLists.txt b/projects/rocthrust/thrust/CMakeLists.txt index 1298b1d9fde9..dc4354604900 100644 --- a/projects/rocthrust/thrust/CMakeLists.txt +++ b/projects/rocthrust/thrust/CMakeLists.txt @@ -20,10 +20,12 @@ target_include_directories(rocthrust $ ) -target_link_libraries(rocthrust - INTERFACE +if(${ROCTHRUST_DEVICE_SYSTEM} STREQUAL "HIP") + target_link_libraries(rocthrust + INTERFACE roc::rocprim_hip -) + ) +endif() # hipstdpar header target add_library(hipstdpar INTERFACE) @@ -57,9 +59,17 @@ rocm_install( ) include(ROCMExportTargetsHeaderOnly) -# Export targets -rocm_export_targets_header_only( - TARGETS roc::rocthrust - DEPENDS PACKAGE rocprim - NAMESPACE roc:: -) + +if(${ROCTHRUST_DEVICE_SYSTEM} STREQUAL "HIP") + # Export targets + rocm_export_targets_header_only( + TARGETS roc::rocthrust + DEPENDS PACKAGE rocprim + NAMESPACE roc:: + ) +else() + rocm_export_targets_header_only( + TARGETS roc::rocthrust + NAMESPACE roc:: + ) +endif() diff --git a/projects/rocthrust/thrust/detail/alignment.h b/projects/rocthrust/thrust/detail/alignment.h index 41aec7cfbca5..ddcbd4bf0ece 100644 --- a/projects/rocthrust/thrust/detail/alignment.h +++ b/projects/rocthrust/thrust/detail/alignment.h @@ -34,7 +34,7 @@ #if _THRUST_HAS_DEVICE_SYSTEM_STD # include _THRUST_LIBCXX_INCLUDE(cmath) -#else +#elif _THRUST_USE_ROCPRIM # include #endif @@ -82,8 +82,10 @@ THRUST_HOST_DEVICE inline _THRUST_STD::size_t aligned_storage_size(_THRUST_STD:: { #if _THRUST_HAS_DEVICE_SYSTEM_STD return _THRUST_LIBCXX::ceil_div(n, align) * align; -#else +#elif _THRUST_USE_ROCPRIM return ::rocprim::detail::ceiling_div(n, align) * align; +#else + return (n / align + (n % align > 0 ? 1 : 0)) * align; #endif } } // end namespace detail diff --git a/projects/rocthrust/thrust/detail/config/device_system.h b/projects/rocthrust/thrust/detail/config/device_system.h index b5720f70137f..0950771033c0 100644 --- a/projects/rocthrust/thrust/detail/config/device_system.h +++ b/projects/rocthrust/thrust/detail/config/device_system.h @@ -29,7 +29,7 @@ #ifndef THRUST_DEVICE_SYSTEM # if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_HIP # define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP -# elif THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_CLANG +# elif THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_CLANG || THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_GCC # define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CPP # else # define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CUDA diff --git a/projects/rocthrust/thrust/detail/config/libcxx.h b/projects/rocthrust/thrust/detail/config/libcxx.h index d7fadee3427c..99470f7c8f36 100644 --- a/projects/rocthrust/thrust/detail/config/libcxx.h +++ b/projects/rocthrust/thrust/detail/config/libcxx.h @@ -53,6 +53,7 @@ # define _THRUST_HAS_DEVICE_SYSTEM_STD 1 # define _THRUST_STD_NAMESPACE_BEGIN _LIBCUDACXX_BEGIN_NAMESPACE_STD # define _THRUST_STD_NAMESPACE_END _LIBCUDACXX_END_NAMESPACE_STD +# define _THRUST_USE_ROCPRIM 0 # endif // Otherwise, if the '::hip::std' namespace from 'libhipcxx' is available. @@ -71,6 +72,7 @@ # define _THRUST_HAS_DEVICE_SYSTEM_STD 1 # define _THRUST_STD_NAMESPACE_BEGIN _LIBCUDACXX_BEGIN_NAMESPACE_STD # define _THRUST_STD_NAMESPACE_END _LIBCUDACXX_END_NAMESPACE_STD +# define _THRUST_USE_ROCPRIM 0 # endif #endif @@ -85,4 +87,5 @@ namespace std \ { # define _THRUST_STD_NAMESPACE_END } +# define _THRUST_USE_ROCPRIM (THRUST_DEVICE_SYSTEM != THRUST_DEVICE_SYSTEM_CPP) #endif diff --git a/projects/rocthrust/thrust/detail/type_traits.h b/projects/rocthrust/thrust/detail/type_traits.h index 7ef586b2766f..b99765559b19 100644 --- a/projects/rocthrust/thrust/detail/type_traits.h +++ b/projects/rocthrust/thrust/detail/type_traits.h @@ -34,7 +34,7 @@ #include _THRUST_STD_INCLUDE(type_traits) -#if !_THRUST_HAS_DEVICE_SYSTEM_STD +#if !_THRUST_HAS_DEVICE_SYSTEM_STD && _THRUST_USE_ROCPRIM # include # include #endif // !_THRUST_HAS_DEVICE_SYSTEM_STD @@ -75,7 +75,11 @@ template using make_unsigned_t = typename ::std::make_unsigned::type; template +#if _THRUST_USE_ROCPRIM using accumulator_t = ::rocprim::accumulator_t; +#else +using accumulator_t = _THRUST_STD::decay_t<_THRUST_STD::invoke_result_t>; +#endif template // If we're not on Windows and we have libstdc++ >= 10, we can use the __decay_t // builtin to reduce compilation time. @@ -146,7 +150,7 @@ struct is_unbounded_array : public thrust::detail::true_type template struct is_bounded_array : public thrust::detail::false_type {}; -template +template struct is_bounded_array : public thrust::detail::true_type {}; @@ -233,8 +237,10 @@ struct larger_type template #if _THRUST_HAS_DEVICE_SYSTEM_STD using invoke_result = _THRUST_STD::__invoke_of; -#else // !_THRUST_HAS_DEVICE_SYSTEM_STD +#elif _THRUST_USE_ROCPRIM // !_THRUST_HAS_DEVICE_SYSTEM_STD using invoke_result = ::rocprim::invoke_result; +#else +using invoke_result = _THRUST_STD::invoke_result; #endif // _THRUST_HAS_DEVICE_SYSTEM_STD template diff --git a/projects/rocthrust/thrust/iterator/zip_iterator.h b/projects/rocthrust/thrust/iterator/zip_iterator.h index 7114fc7905a5..8c854d46fdc6 100644 --- a/projects/rocthrust/thrust/iterator/zip_iterator.h +++ b/projects/rocthrust/thrust/iterator/zip_iterator.h @@ -131,6 +131,7 @@ THRUST_NAMESPACE_BEGIN * \see tuple * \see get */ + template class THRUST_DECLSPEC_EMPTY_BASES zip_iterator : public detail::zip_iterator_base::type { diff --git a/projects/rocthrust/thrust/pair.h b/projects/rocthrust/thrust/pair.h index 19e6a4105f14..377af3a36f34 100644 --- a/projects/rocthrust/thrust/pair.h +++ b/projects/rocthrust/thrust/pair.h @@ -298,7 +298,7 @@ inline THRUST_HOST_DEVICE pair make_pair(T1 x, T2 y); * \tparam N This parameter selects the member of interest. * \tparam T A \c pair type of interest. */ -template +template <_THRUST_STD::size_t N, class T> struct tuple_element; /*! This convenience metafunction is included for compatibility with diff --git a/projects/rocthrust/thrust/system/detail/sequential/iter_swap.h b/projects/rocthrust/thrust/system/detail/sequential/iter_swap.h index ee40acad2062..c7eb2753aa3f 100644 --- a/projects/rocthrust/thrust/system/detail/sequential/iter_swap.h +++ b/projects/rocthrust/thrust/system/detail/sequential/iter_swap.h @@ -43,7 +43,7 @@ namespace sequential template THRUST_HOST_DEVICE void iter_swap(sequential::execution_policy&, Pointer1 a, Pointer2 b) { -#if _THRUST_HAS_DEVICE_SYSTEM_STD +#if _THRUST_HAS_DEVICE_SYSTEM_STD || THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CPP using _THRUST_STD::swap; #else using thrust::swap;