diff --git a/projects/rocthrust/CHANGELOG.md b/projects/rocthrust/CHANGELOG.md index ec4eeaeff72..e69d45b151c 100644 --- a/projects/rocthrust/CHANGELOG.md +++ b/projects/rocthrust/CHANGELOG.md @@ -8,6 +8,7 @@ Documentation for rocThrust available at ### Added * Added `thrust::unique_ptr` - a smart pointer for managing device memory with automatic cleanup. +* Added a new cmake option, `BUILD_OFFLOAD_COMPRESS`. When rocThrust is build with this option enabled, the `--offload-compress` switch is passed to the compiler. This causes the compiler to compress the binary that it generates. Compression can be useful in cases where you are compiling for a large number of targets, since this often results in a large binary. Without compression, in some cases, the generated binary may become so large symbols are placed out of range, resulting in linking errors. The new `BUILD_OFFLOAD_COMPRESS` option is set to `ON` by default. ## rocThrust 4.1.0 for ROCm 7.1 diff --git a/projects/rocthrust/CMakeLists.txt b/projects/rocthrust/CMakeLists.txt index 3ff52a91b32..4df30f528eb 100644 --- a/projects/rocthrust/CMakeLists.txt +++ b/projects/rocthrust/CMakeLists.txt @@ -59,6 +59,7 @@ option(BUILD_HIPSTDPAR_TEST_WITH_TBB "Build hipstdpar tests with TBB" OFF) option(BUILD_EXAMPLE "Build example" OFF) option(BUILD_BENCHMARK "Build benchmark" OFF) option(BUILD_ADDRESS_SANITIZER "Build with address sanitizer enabled" OFF) +option(BUILD_OFFLOAD_COMPRESS "Build rocThrust with offload compression" ON) cmake_dependent_option(ENABLE_UPSTREAM_TESTS "Enable upstream (thrust) tests" ON BUILD_TEST OFF) cmake_dependent_option(USE_SYSTEM_LIB "Use existing system ROCm library installation when building tests" OFF BUILD_TEST OFF) option(EXTERNAL_DEPS_FORCE_DOWNLOAD "Force download of non-ROCm dependencies (eg. Google Test, Google Benchmark)" OFF) @@ -66,6 +67,17 @@ option(EXTERNAL_DEPS_FORCE_DOWNLOAD "Force download of non-ROCm dependencies (eg check_language(HIP) cmake_dependent_option(USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF) +include(CheckCXXCompilerFlag) + +if(BUILD_OFFLOAD_COMPRESS) + check_cxx_compiler_flag("--offload-compress -x hip" CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) + if(CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --offload-compress") + else() + message(STATUS "Warning: BUILD_OFFLOAD_COMPRESS=ON but flag not supported by compiler. Ignoring option.") + endif() +endif() + # Allow the user to optionally select offset type dispatch to fixed 32 or 64 bit types set(THRUST_DISPATCH_TYPE "Dynamic" CACHE STRING "Select Thrust offset type dispatch." FORCE) set_property(CACHE THRUST_DISPATCH_TYPE PROPERTY STRINGS "Dynamic" "Force32bit" "Force64bit") @@ -127,6 +139,13 @@ else() endif() endif() +# Compressed offload binaries are currently not working with the SPIR-V target +if("amdgcnspirv" IN_LIST GPU_TARGETS) + if(BUILD_OFFLOAD_COMPRESS) + message(FATAL_ERROR "Cannot combine SPIR-V and BUILD_OFFLOAD_COMPRESS") + endif() +endif() + # Set up options for obtaining dependencies rocPRIM and rocRAND. # PACKAGE: Search for an install package that contains the dependency. # MONOREPO: Assume this is a monorepo checkout and search for the dependency in the directory at ../../projects/. diff --git a/projects/rocthrust/cmake/Summary.cmake b/projects/rocthrust/cmake/Summary.cmake index 9839ed543f3..c1abdaabb18 100644 --- a/projects/rocthrust/cmake/Summary.cmake +++ b/projects/rocthrust/cmake/Summary.cmake @@ -99,6 +99,7 @@ endif() message(STATUS " BUILD_HIPSTDPAR_TEST_WITH_TBB : ${BUILD_HIPSTDPAR_TEST_WITH_TBB}") message(STATUS " BUILD_EXAMPLE : ${BUILD_EXAMPLE}") message(STATUS " BUILD_BENCHMARK : ${BUILD_BENCHMARK}") + message(STATUS " BUILD_OFFLOAD_COMPRESS : ${BUILD_OFFLOAD_COMPRESS}") if(BUILD_BENCHMARK) message(STATUS " ROCRAND_FETCH_METHOD : ${ROCRAND_FETCH_METHOD}") endif() diff --git a/projects/rocthrust/rmake.py b/projects/rocthrust/rmake.py index b2dec125510..02b8629dfec 100644 --- a/projects/rocthrust/rmake.py +++ b/projects/rocthrust/rmake.py @@ -42,6 +42,7 @@ def parse_args(): help='Set GPU architectures, e.g. all, gfx000, gfx803, gfx906:xnack-;gfx1030 (optional, default: all)') parser.add_argument('-v', '--verbose', required=False, default=False, action='store_true', help='Verbose build (default: False)') + parser.add_argument('--no-offload-compress', required=False, default=False, action='store_true', help='Do not apply offload compression (defult: False)') return parser.parse_args() def os_detect(): @@ -159,6 +160,9 @@ def config_cmd(): if args.build_clients: cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_BENCHMARK=ON -DBUILD_DIR={build_dir}" ) + if args.no_offload_compress: + cmake_options.append( f"-DBUILD_OFFLOAD_COMPRESS=OFF" ) + cmake_options.append( f"-DAMDGPU_TARGETS={args.gpu_architecture}" ) if args.cmake_dargs: