diff --git a/projects/rocprim/CMakeLists.txt b/projects/rocprim/CMakeLists.txt index f75fac64e19..4e1eff3ac44 100644 --- a/projects/rocprim/CMakeLists.txt +++ b/projects/rocprim/CMakeLists.txt @@ -1,309 +1,213 @@ -# MIT License -# -# Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT cmake_minimum_required(VERSION 3.16 FATAL_ERROR) cmake_policy(VERSION 3.16...3.25) -# Install prefix -set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +include(rocprim_shim) -# rocPRIM project -project(rocprim LANGUAGES CXX) - -# Set CXX flags -if (NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 17) -endif() -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -# Set HIP flags -set(CMAKE_HIP_STANDARD 14) -set(CMAKE_HIP_STANDARD_REQUIRED ON) -set(CMAKE_HIP_EXTENSIONS OFF) - -if(NOT CMAKE_CXX_STANDARD EQUAL 17) - message(FATAL_ERROR "Only C++17 is supported") +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + if(WIN32) + set(CMAKE_INSTALL_PREFIX "C:/hipSDK" CACHE PATH "Install path prefix" FORCE) + else() + set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix" FORCE) + endif() endif() -if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) - set(ROCPRIM_PROJECT_IS_TOP_LEVEL TRUE) -else() - set(ROCPRIM_PROJECT_IS_TOP_LEVEL FALSE) -endif() +project(rocprim LANGUAGES CXX) -#Adding CMAKE_PREFIX_PATH -if(WIN32) - set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation") +if("${CMAKE_SOURCE_DIR}" STREQUAL "${PROJECT_SOURCE_DIR}") + set(ROCPRIM_IS_SUBPROJECT FALSE) else() - set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") + set(ROCPRIM_IS_SUBPROJECT TRUE) endif() include(CheckLanguage) include(CMakeDependentOption) +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +# Get dependencies (required here to get rocm-cmake) +include(Dependencies) + +set(VERSION_STRING "4.1.0") +rocm_setup_version(VERSION ${VERSION_STRING}) # Build options -# Disables building tests, benchmarks, examples -option(ONLY_INSTALL "Only install" OFF) -cmake_dependent_option(BUILD_TEST "Build tests (requires googletest)" OFF "NOT ONLY_INSTALL" OFF) -option(WITH_ROCRAND "Build tests with device-side data generation(requires rocRAND)" OFF) -cmake_dependent_option(BUILD_BENCHMARK "Build benchmarks" OFF "NOT ONLY_INSTALL" OFF) -cmake_dependent_option(BUILD_EXAMPLE "Build examples" OFF "NOT ONLY_INSTALL" OFF) -option(BUILD_NAIVE_BENCHMARK "Build naive benchmarks" OFF) -cmake_dependent_option(BUILD_DOCS "Build documentation (requires sphinx)" OFF "NOT ONLY_INSTALL" OFF) -option(BUILD_CODE_COVERAGE "Build with code coverage enabled" OFF) -option(ROCPRIM_INSTALL "Enable installation of rocPRIM (projects embedding rocPRIM may want to turn this OFF)" ON) -option(ROCPRIM_ENABLE_ASSERTS "Enable asserts in release build)" OFF) -option(BUILD_OFFLOAD_COMPRESS "Build rocPRIM with offload compression" ON) -cmake_dependent_option(USE_SYSTEM_LIB "Use installed ROCm libs when building tests" OFF BUILD_TEST OFF) +option(ROCPRIM_ENABLE_INSTALL "Enable installation of rocPRIM" ON) +cmake_dependent_option(ROCPRIM_BUILD_TESTING "Build tests" ON "NOT ROCPRIM_IS_SUBPROJECT" OFF) +cmake_dependent_option(ROCPRIM_ENABLE_BENCHMARK "Build benchmarks" OFF "NOT ROCPRIM_IS_SUBPROJECT" OFF) +cmake_dependent_option(ROCPRIM_ENABLE_EXAMPLES "Build examples" OFF "NOT ROCPRIM_IS_SUBPROJECT" OFF) +option(ROCPRIM_ENABLE_DOCS "Build documentation" OFF) +option(ROCPRIM_ENABLE_COVERAGE "Build with code coverage enabled" OFF) +option(ROCPRIM_ENABLE_ASSERTS "Enable asserts in release build" OFF) +option(ROCPRIM_ENABLE_OFFLOAD_COMPRESS "Build rocPRIM with offload compression" ON) +option(ROCPRIM_ENABLE_NAIVE_BENCHMARK "Build naive benchmarks" OFF) +option(ROCPRIM_ENABLE_CONFIG_TUNING "Benchmark device-level functions using various configs" OFF) +option(ROCPRIM_ENABLE_AUTOTUNED_TYPES_ONLY "Benchmark autotuned types only" OFF) +cmake_dependent_option(ROCPRIM_ENABLE_ROCRAND "Build tests with device-side data generation (requires rocRAND)" OFF "ROCPRIM_BUILD_TESTING" OFF) +cmake_dependent_option(ROCPRIM_USE_SYSTEM_LIBS "Use installed ROCm libs when building tests" OFF "ROCPRIM_BUILD_TESTING" OFF) check_language(HIP) -cmake_dependent_option(USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF) +cmake_dependent_option(ROCPRIM_USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF) include(CheckCXXCompilerFlag) -if(BUILD_OFFLOAD_COMPRESS) - check_cxx_compiler_flag("--offload-compress -x hip" CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) - if(CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --offload-compress") - else() - message(STATUS "Warning: BUILD_OFFLOAD_COMPRESS=ON but flag not supported by compiler. Ignoring option.") - endif() -endif() - -if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) - set(ROCPRIM_PROJECT_IS_TOP_LEVEL TRUE) -else() - set(ROCPRIM_PROJECT_IS_TOP_LEVEL FALSE) -endif() - -#Adding CMAKE_PREFIX_PATH -if(WIN32) - set(ROCM_ROOT "$ENV{HIP_PATH}" CACHE PATH "Root directory of the ROCm installation") -else() - set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") -endif() - -# CMake modules -list(APPEND CMAKE_MODULE_PATH - ${CMAKE_CURRENT_SOURCE_DIR}/cmake - ${ROCM_PATH}/lib/cmake/hip ${HIP_PATH}/cmake ${ROCM_ROOT}/lib/cmake/hip ${ROCM_ROOT}/hip/cmake # FindHIP.cmake -) - -# Set a default build type if none was specified -if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - message(STATUS "Setting build type to 'Release' as none was specified.") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "" "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +if(ROCPRIM_ENABLE_OFFLOAD_COMPRESS) + check_cxx_compiler_flag("--offload-compress -x hip" CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) + if(CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS) + add_compile_options(--offload-compress) + else() + message(STATUS "Warning: ROCPRIM_ENABLE_OFFLOAD_COMPRESS=ON but flag not supported by compiler. Ignoring option.") + endif() endif() if(ROCPRIM_ENABLE_ASSERTS) - if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "") - string(TOUPPER ${CMAKE_BUILD_TYPE} BUILD_TYPE) - set(BUILD_TYPE_CXX_FLAGS "CMAKE_CXX_FLAGS_${BUILD_TYPE}") - set(BUILD_TYPE_C_FLAGS "CMAKE_C_FLAGS_${BUILD_TYPE}") - endif() - - string(REGEX REPLACE "-DNDEBUG( |$)" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REGEX REPLACE "-DNDEBUG( |$)" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "") - string(REGEX REPLACE "-DNDEBUG( |$)" "" ${BUILD_TYPE_CXX_FLAGS} "${${BUILD_TYPE_CXX_FLAGS}}") - string(REGEX REPLACE "-DNDEBUG( |$)" "" ${BUILD_TYPE_C_FLAGS} "${${BUILD_TYPE_C_FLAGS}}") - endif() + add_compile_definitions($<$,$,$>:ROCPRIM_ENABLE_ASSERTS>) + if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "") + string(TOUPPER ${CMAKE_BUILD_TYPE} BUILD_TYPE) + set(BUILD_TYPE_CXX_FLAGS "CMAKE_CXX_FLAGS_${BUILD_TYPE}") + set(BUILD_TYPE_C_FLAGS "CMAKE_C_FLAGS_${BUILD_TYPE}") + string(REGEX REPLACE "-DNDEBUG( |$)" "" ${BUILD_TYPE_CXX_FLAGS} "${${BUILD_TYPE_CXX_FLAGS}}") + string(REGEX REPLACE "-DNDEBUG( |$)" "" ${BUILD_TYPE_C_FLAGS} "${${BUILD_TYPE_C_FLAGS}}") + endif() endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath") -if(DEFINED BUILD_SHARED_LIBS) - set(PKG_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) -else() - set(PKG_BUILD_SHARED_LIBS ON) -endif() -set(BUILD_SHARED_LIBS OFF) # don't build client dependencies as shared - -# Get dependencies (required here to get rocm-cmake) -include(cmake/Dependencies.cmake) - # Use target ID syntax if supported for GPU_TARGETS -if(USE_HIPCXX) - enable_language(HIP) +if(ROCPRIM_USE_HIPCXX) + enable_language(HIP) else() - if (NOT DEFINED AMDGPU_TARGETS) - set(GPU_TARGETS "all" CACHE STRING "GPU architectures to compile for") - else() - set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for") - endif() - set_property(CACHE GPU_TARGETS PROPERTY STRINGS "all") - - if(GPU_TARGETS STREQUAL "all") - if(BUILD_ADDRESS_SANITIZER) - # ASAN builds require xnack - rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS - TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+;gfx950:xnack+" - ) + if(NOT DEFINED AMDGPU_TARGETS) + set(GPU_TARGETS "all" CACHE STRING "GPU architectures to compile for") else() - rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS - TARGETS "gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201" - ) + set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for") + endif() + set_property(CACHE GPU_TARGETS PROPERTY STRINGS "all") + + if(GPU_TARGETS STREQUAL "all") + if(BUILD_ADDRESS_SANITIZER) + rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS + TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+;gfx950:xnack+" + ) + else() + rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS + TARGETS "gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201" + ) + endif() + set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE) endif() - set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE) - endif() endif() # Compressed offload binaries are currently not working with the SPIR-V target if("amdgcnspirv" IN_LIST GPU_TARGETS) - if(BUILD_OFFLOAD_COMPRESS) - message(FATAL_ERROR "Cannot combine SPIR-V and BUILD_OFFLOAD_COMPRESS") - endif() + if(ROCPRIM_ENABLE_OFFLOAD_COMPRESS) + message(FATAL_ERROR "Cannot combine SPIR-V and ROCPRIM_ENABLE_OFFLOAD_COMPRESS") + endif() endif() -# TODO: Fix VerifyCompiler for HIP on Windows -if (NOT WIN32) - include(cmake/VerifyCompiler.cmake) -endif() -list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip ${ROCM_PATH}/llvm ${ROCM_ROOT}/llvm ${ROCM_ROOT} ${ROCM_ROOT}/hip) -find_package(hip REQUIRED CONFIG PATHS ${HIP_DIR} ${ROCM_PATH} /opt/rocm) +find_package(hip REQUIRED CONFIG) -if(BUILD_CODE_COVERAGE) - add_compile_options(-fprofile-arcs -ftest-coverage) - add_link_options(--coverage) +if(NOT WIN32) + include(VerifyCompiler) endif() -# Setup VERSION -set(VERSION_STRING "4.1.0") -rocm_setup_version(VERSION ${VERSION_STRING}) -math(EXPR rocprim_VERSION_NUMBER "${rocprim_VERSION_MAJOR} * 100000 + ${rocprim_VERSION_MINOR} * 100 + ${rocprim_VERSION_PATCH}") +if(ROCPRIM_ENABLE_COVERAGE) + add_compile_options(-fprofile-arcs -ftest-coverage) + add_link_options(--coverage) +endif() # Print configuration summary -include(cmake/Summary.cmake) +include(Summary) print_configuration_summary() # rocPRIM library add_subdirectory(rocprim) -if(ROCPRIM_PROJECT_IS_TOP_LEVEL AND (BUILD_TEST OR BUILD_BENCHMARK)) - rocm_package_setup_component(clients) +if(NOT ROCPRIM_IS_SUBPROJECT AND (ROCPRIM_BUILD_TESTING OR ROCPRIM_ENABLE_BENCHMARK)) + rocm_package_setup_component(clients) endif() # Tests -if(BUILD_TEST) - if(USE_SYSTEM_LIB) - find_package(rocprim REQUIRED CONFIG PATHS "/opt/rocm/rocprim") - if (NOT ${rocprim_VERSION} VERSION_EQUAL ${VERSION_STRING}) - message(WARNING "The installed rocprim version, ${rocprim_VERSION}, does not match project version ${VERSION_STRING}. Building tests with USE_SYSTEM_LIB=ON may not work properly.") +if(ROCPRIM_BUILD_TESTING) + if(ROCPRIM_USE_SYSTEM_LIBS) + find_package(rocprim REQUIRED CONFIG PATHS "/opt/rocm/rocprim") + if(NOT ${rocprim_VERSION} VERSION_EQUAL ${VERSION_STRING}) + message(WARNING "The installed rocprim version, ${rocprim_VERSION}, does not match project version ${VERSION_STRING}. Building tests with ROCPRIM_USE_SYSTEM_LIBS=ON may not work properly.") + endif() endif() - endif() - if (ROCPRIM_PROJECT_IS_TOP_LEVEL) - rocm_package_setup_client_component(tests) - endif() - enable_testing() - add_subdirectory(test) + if(NOT ROCPRIM_IS_SUBPROJECT) + rocm_package_setup_client_component(tests) + endif() + enable_testing() + add_subdirectory(test) endif() # Benchmarks -if(BUILD_BENCHMARK) - if (ROCPRIM_PROJECT_IS_TOP_LEVEL) - rocm_package_setup_client_component(benchmarks) - endif() - add_subdirectory(benchmark) +if(ROCPRIM_ENABLE_BENCHMARK) + if(NOT ROCPRIM_IS_SUBPROJECT) + rocm_package_setup_client_component(benchmarks) + endif() + add_subdirectory(benchmark) endif() # Examples -if(BUILD_EXAMPLE) - add_subdirectory(example) +if(ROCPRIM_ENABLE_EXAMPLES) + add_subdirectory(example) endif() # Docs -if(BUILD_DOCS) - add_subdirectory(docs) +if(ROCPRIM_ENABLE_DOCS) + add_subdirectory(docs) endif() -# set BUILD_SHARED_LIBS for packaging -set(BUILD_SHARED_LIBS ${PKG_BUILD_SHARED_LIBS}) # Package -if (ROCPRIM_PROJECT_IS_TOP_LEVEL) - # add dependency on HIP runtime - set(HIP_RUNTIME_MINIMUM 4.5.0) - if(BUILD_ADDRESS_SANITIZER) - set(DEPENDS_HIP_RUNTIME "hip-runtime-amd-asan" ) - else() - set(DEPENDS_HIP_RUNTIME "hip-runtime-amd" ) - endif() - - rocm_package_add_dependencies(SHARED_DEPENDS "${DEPENDS_HIP_RUNTIME} >= ${HIP_RUNTIME_MINIMUM}") - rocm_package_add_deb_dependencies(STATIC_DEPENDS "hip-static-dev >= ${HIP_RUNTIME_MINIMUM}") - rocm_package_add_rpm_dependencies(STATIC_DEPENDS "hip-static-devel >= ${HIP_RUNTIME_MINIMUM}") - - set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") - set(CPACK_RPM_PACKAGE_LICENSE "MIT") - - set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}" ) - - rocm_create_package( - NAME rocprim - DESCRIPTION "rocPRIM is a header-only library that provides HIP parallel primitives." - MAINTAINER "rocPRIM Maintainer " - HEADER_ONLY - ) +if(NOT ROCPRIM_IS_SUBPROJECT) + set(HIP_RUNTIME_MINIMUM 4.5.0) + if(BUILD_ADDRESS_SANITIZER) + set(DEPENDS_HIP_RUNTIME "hip-runtime-amd-asan") + else() + set(DEPENDS_HIP_RUNTIME "hip-runtime-amd") + endif() + + rocm_package_add_dependencies(SHARED_DEPENDS "${DEPENDS_HIP_RUNTIME} >= ${HIP_RUNTIME_MINIMUM}") + rocm_package_add_deb_dependencies(STATIC_DEPENDS "hip-static-dev >= ${HIP_RUNTIME_MINIMUM}") + rocm_package_add_rpm_dependencies(STATIC_DEPENDS "hip-static-devel >= ${HIP_RUNTIME_MINIMUM}") + + set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") + set(CPACK_RPM_PACKAGE_LICENSE "MIT") + set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PREFIX}") + + rocm_create_package( + NAME rocprim + DESCRIPTION "rocPRIM is a header-only library that provides HIP parallel primitives." + MAINTAINER "rocPRIM Maintainer " + HEADER_ONLY + ) endif() -# -# ADDITIONAL TARGETS FOR CODE COVERAGE -# -if(BUILD_CODE_COVERAGE) - # - # > make coverage_cleanup (clean coverage related files.) - # > # run your tests - # > make coverage (generate html documentation) - # - - # - # Prepare coverage output - # This little script is generated because the option '--gcov-tool ' of lcov cannot take arguments. - # - add_custom_target(coverage - DEPENDS rocprim - COMMAND mkdir -p lcoverage - COMMAND echo "\\#!/bin/bash" > llvm-gcov.sh - COMMAND echo "\\# THIS FILE HAS BEEN GENERATED" >> llvm-gcov.sh - COMMAND printf "exec /opt/rocm/llvm/bin/llvm-cov gcov $$\\@" >> llvm-gcov.sh - COMMAND chmod +x llvm-gcov.sh +# Additional targets for code coverage +if(ROCPRIM_ENABLE_COVERAGE) + add_custom_target(coverage + DEPENDS rocprim + COMMAND mkdir -p lcoverage + COMMAND echo "\\#!/bin/bash" > llvm-gcov.sh + COMMAND echo "\\# THIS FILE HAS BEEN GENERATED" >> llvm-gcov.sh + COMMAND printf "exec /opt/rocm/llvm/bin/llvm-cov gcov $$\\@" >> llvm-gcov.sh + COMMAND chmod +x llvm-gcov.sh ) - # - # Generate coverage output. - # - add_custom_command(TARGET coverage - COMMAND lcov --directory . --base-directory . --gcov-tool ${CMAKE_BINARY_DIR}/llvm-gcov.sh --capture -o lcoverage/raw_main_coverage.info - COMMAND lcov --remove lcoverage/raw_main_coverage.info "'/opt/*'" "'/usr/*'" -o lcoverage/main_coverage.info - COMMAND genhtml lcoverage/main_coverage.info --output-directory lcoverage + add_custom_command(TARGET coverage + COMMAND lcov --directory . --base-directory . --gcov-tool ${CMAKE_BINARY_DIR}/llvm-gcov.sh --capture -o lcoverage/raw_main_coverage.info + COMMAND lcov --remove lcoverage/raw_main_coverage.info "'/opt/*'" "'/usr/*'" -o lcoverage/main_coverage.info + COMMAND genhtml lcoverage/main_coverage.info --output-directory lcoverage ) - # - # Coverage cleanup - # - add_custom_target(coverage_cleanup - COMMAND find ${CMAKE_BINARY_DIR} -name *.gcda -delete - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + add_custom_target(coverage_cleanup + COMMAND find ${CMAKE_BINARY_DIR} -name *.gcda -delete + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) endif() diff --git a/projects/rocprim/CMakePresets.json b/projects/rocprim/CMakePresets.json new file mode 100644 index 00000000000..79a6f6f90c8 --- /dev/null +++ b/projects/rocprim/CMakePresets.json @@ -0,0 +1,57 @@ +{ + "version": 3, + "configurePresets": [ + { + "name": "default:release", + "displayName": "Configure release build with installation to /opt/rocm", + "binaryDir": "${sourceDir}/build", + "cacheVariables": { + "CMAKE_CXX_COMPILER": "/opt/rocm/bin/amdclang++", + "CMAKE_C_COMPILER": "/opt/rocm/bin/amdclang", + "CMAKE_PREFIX_PATH": "/opt/rocm", + "CMAKE_INSTALL_PREFIX": "/opt/rocm", + "CMAKE_BUILD_TYPE": "Release", + "GPU_TARGETS": "all" + } + }, + { + "name": "debug", + "displayName": "Configure debug build", + "inherits": "default:release", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug", + "ROCPRIM_BUILD_TESTING": "ON", + "ROCPRIM_ENABLE_BENCHMARK": "ON", + "ROCPRIM_ENABLE_EXAMPLES": "ON" + } + }, + { + "name": "tests-only", + "displayName": "Configure tests only", + "inherits": "default:release", + "cacheVariables": { + "ROCPRIM_BUILD_TESTING": "ON", + "ROCPRIM_ENABLE_BENCHMARK": "OFF", + "ROCPRIM_ENABLE_EXAMPLES": "OFF" + } + }, + { + "name": "benchmarks-only", + "displayName": "Configure benchmarks only", + "inherits": "default:release", + "cacheVariables": { + "ROCPRIM_BUILD_TESTING": "OFF", + "ROCPRIM_ENABLE_BENCHMARK": "ON", + "ROCPRIM_ENABLE_EXAMPLES": "OFF" + } + }, + { + "name": "coverage", + "displayName": "Configure coverage build", + "inherits": "debug", + "cacheVariables": { + "ROCPRIM_ENABLE_COVERAGE": "ON" + } + } + ] +} diff --git a/projects/rocprim/benchmark/CMakeLists.txt b/projects/rocprim/benchmark/CMakeLists.txt index 928b0663a66..bbf81a9d7e8 100644 --- a/projects/rocprim/benchmark/CMakeLists.txt +++ b/projects/rocprim/benchmark/CMakeLists.txt @@ -1,151 +1,122 @@ -# MIT License -# -# Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -option(BENCHMARK_CONFIG_TUNING "Benchmark device-level functions using various configs" OFF) -option(BENCHMARK_AUTOTUNED_TYPES_ONLY "Benchmark autotuned types only, which lowers the benchmarking runtime" OFF) - -include(../cmake/ConfigAutotune.cmake) -include(ConfigAutotuneSettings.cmake) - -option(BENCHMARK_TUNE_PARAM_NAMES "Tuning parameter names" "") -option(BENCHMARK_TUNE_PARAMS "Tuning parameters" "") - -option(BENCHMARK_USE_AMDSMI "Let benchmarks use AMD SMI to output more GPU statistics" OFF) - -if(BENCHMARK_CONFIG_TUNING) - add_custom_target("benchmark_config_tuning") -endif() +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT -function(add_rocprim_benchmark BENCHMARK_SOURCE) - get_filename_component(BENCHMARK_TARGET ${BENCHMARK_SOURCE} NAME_WE) +include(ConfigAutotune) +include(ConfigAutotuneSettings) - if(USE_HIPCXX) - set_source_files_properties(${BENCHMARK_SOURCE} PROPERTIES LANGUAGE HIP) - endif() +option(ROCPRIM_BENCHMARK_USE_AMDSMI "Let benchmarks use AMD SMI to output more GPU statistics" OFF) +set(ROCPRIM_BENCHMARK_TUNE_PARAM_NAMES "" CACHE STRING "Tuning parameter names") +set(ROCPRIM_BENCHMARK_TUNE_PARAMS "" CACHE STRING "Tuning parameters") - if(BENCHMARK_CONFIG_TUNING) - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${BENCHMARK_TARGET}.parallel.cpp.in") - message(STATUS "found ${BENCHMARK_TARGET}.parallel.cpp.in file, compiling in parallel.") - read_config_autotune_settings(${BENCHMARK_TARGET} list_across_names list_across output_pattern_suffix) +if(ROCPRIM_ENABLE_CONFIG_TUNING) + add_custom_target(benchmark_config_tuning) +endif() - if(BENCHMARK_TUNE_PARAM_NAMES AND BENCHMARK_TUNE_PARAMS) - set(list_across_names "${BENCHMARK_TUNE_PARAM_NAMES}") - set(list_across "${BENCHMARK_TUNE_PARAMS}") - endif() +function(add_rocprim_benchmark BENCHMARK_SOURCE) + get_filename_component(BENCHMARK_TARGET ${BENCHMARK_SOURCE} NAME_WE) - #make sure that variables are not empty, i.e. there actually is an entry for that benchmark in benchmark/ConfigAutotuneSettings.cmake - if(list_across_names) - add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE}) - target_compile_definitions(${BENCHMARK_TARGET} PRIVATE BENCHMARK_CONFIG_TUNING) - add_matrix(TARGET ${BENCHMARK_TARGET} - SHARDS 1 - CURRENT_SHARD 0 - INPUT "${BENCHMARK_TARGET}.parallel.cpp.in" - OUTPUT_PATTERN "${BENCHMARK_TARGET}_${output_pattern_suffix}" - NAMES ${list_across_names} - LISTS ${list_across}) - add_dependencies(benchmark_config_tuning ${BENCHMARK_TARGET}) - else() - message(WARNING "No config-tuning entry in benchmark/ConfigAutotuneSettings.cmake for ${BENCHMARK_TARGET}!") - return() - endif() - else() - #do nothing if BENCHMARK_CONFIG_TUNING is ON but no ${BENCHMARK_TARGET}.parallel.cpp.in exists - return() + if(ROCPRIM_USE_HIPCXX) + set_source_files_properties(${BENCHMARK_SOURCE} PROPERTIES LANGUAGE HIP) endif() - else() - add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE}) - endif() - if(BUILD_NAIVE_BENCHMARK) - target_compile_definitions(${BENCHMARK_TARGET} PUBLIC BUILD_NAIVE_BENCHMARK) - endif() + if(ROCPRIM_ENABLE_CONFIG_TUNING) + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${BENCHMARK_TARGET}.parallel.cpp.in") + message(STATUS "Found ${BENCHMARK_TARGET}.parallel.cpp.in file, compiling in parallel.") + read_config_autotune_settings(${BENCHMARK_TARGET} list_across_names list_across output_pattern_suffix) + + if(ROCPRIM_BENCHMARK_TUNE_PARAM_NAMES AND ROCPRIM_BENCHMARK_TUNE_PARAMS) + set(list_across_names "${ROCPRIM_BENCHMARK_TUNE_PARAM_NAMES}") + set(list_across "${ROCPRIM_BENCHMARK_TUNE_PARAMS}") + endif() + + if(list_across_names) + add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE}) + target_compile_definitions(${BENCHMARK_TARGET} PRIVATE BENCHMARK_CONFIG_TUNING) + add_matrix( + TARGET ${BENCHMARK_TARGET} + SHARDS 1 + CURRENT_SHARD 0 + INPUT "${BENCHMARK_TARGET}.parallel.cpp.in" + OUTPUT_PATTERN "${BENCHMARK_TARGET}_${output_pattern_suffix}" + NAMES ${list_across_names} + LISTS ${list_across} + ) + add_dependencies(benchmark_config_tuning ${BENCHMARK_TARGET}) + else() + message(WARNING "No config-tuning entry in benchmark/ConfigAutotuneSettings.cmake for ${BENCHMARK_TARGET}!") + return() + endif() + else() + return() + endif() + else() + add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE}) + endif() - if(BENCHMARK_AUTOTUNED_TYPES_ONLY) - target_compile_definitions(${BENCHMARK_TARGET} PUBLIC BENCHMARK_AUTOTUNED_TYPES_ONLY) - endif() + if(ROCPRIM_ENABLE_NAIVE_BENCHMARK) + target_compile_definitions(${BENCHMARK_TARGET} PRIVATE BUILD_NAIVE_BENCHMARK) + endif() - target_link_libraries(${BENCHMARK_TARGET} - PRIVATE - rocprim - benchmark::benchmark - ) + if(ROCPRIM_ENABLE_AUTOTUNED_TYPES_ONLY) + target_compile_definitions(${BENCHMARK_TARGET} PRIVATE BENCHMARK_AUTOTUNED_TYPES_ONLY) + endif() - if(USE_HIPCXX) target_link_libraries(${BENCHMARK_TARGET} - PRIVATE - $,hip::host,hip::device> + PRIVATE + roc::rocprim + benchmark::benchmark ) - else() - target_link_libraries(${BENCHMARK_TARGET} - PRIVATE - hip::device) - endif() - - if(BENCHMARK_USE_AMDSMI) - find_library(AMDSMI_LIB NAMES amd_smi) - if(AMDSMI_LIB) - target_link_libraries(${BENCHMARK_TARGET} PRIVATE ${AMDSMI_LIB}) + + if(ROCPRIM_USE_HIPCXX) + target_link_libraries(${BENCHMARK_TARGET} + PRIVATE + $,hip::host,hip::device> + ) else() - message(FATAL_ERROR "BENCHMARK_USE_AMDSMI was ON, but AMD SMI is not installed") + target_link_libraries(${BENCHMARK_TARGET} + PRIVATE + hip::device + ) + endif() + + if(ROCPRIM_BENCHMARK_USE_AMDSMI) + find_library(AMDSMI_LIB NAMES amd_smi REQUIRED) + target_link_libraries(${BENCHMARK_TARGET} PRIVATE ${AMDSMI_LIB}) + target_compile_definitions(${BENCHMARK_TARGET} PRIVATE BENCHMARK_USE_AMDSMI) endif() - target_compile_definitions(${BENCHMARK_TARGET} PRIVATE BENCHMARK_USE_AMDSMI) - endif() - - target_compile_options(${BENCHMARK_TARGET} - PRIVATE - $<$: - /bigobj # number of sections exceeded object file format limit: compile with /bigobj - > - ) - - set_target_properties(${BENCHMARK_TARGET} - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmark" - ) - - if (ROCPRIM_INSTALL) - rocm_install(TARGETS ${BENCHMARK_TARGET} COMPONENT benchmarks) - endif() - if (WIN32 AND NOT DEFINED DLLS_COPIED) - set(DLLS_COPIED "YES") - set(DLLS_COPIED ${DLLS_COPIED} PARENT_SCOPE) - # for now adding in all .dll as dependency chain is not cmake based on win32 - file( GLOB third_party_dlls - LIST_DIRECTORIES ON - CONFIGURE_DEPENDS - ${HIP_DIR}/bin/*.dll - ${CMAKE_SOURCE_DIR}/rtest.* + + target_compile_options(${BENCHMARK_TARGET} + PRIVATE + $<$:/bigobj> + ) + + set_target_properties(${BENCHMARK_TARGET} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmark" ) - foreach( file_i ${third_party_dlls}) - add_custom_command( TARGET ${BENCHMARK_TARGET} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/benchmark ) - endforeach( file_i ) - endif() + + if(ROCPRIM_ENABLE_INSTALL) + rocm_install(TARGETS ${BENCHMARK_TARGET} COMPONENT benchmarks) + endif() + + if(WIN32 AND NOT DEFINED DLLS_COPIED) + set(DLLS_COPIED "YES" PARENT_SCOPE) + file(GLOB third_party_dlls + LIST_DIRECTORIES ON + CONFIGURE_DEPENDS + ${HIP_DIR}/bin/*.dll + ${CMAKE_SOURCE_DIR}/rtest.* + ) + foreach(file_i ${third_party_dlls}) + add_custom_command( + TARGET ${BENCHMARK_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/benchmark + ) + endforeach() + endif() endfunction() -# **************************************************************************** # Benchmarks -# **************************************************************************** - add_rocprim_benchmark(benchmark_block_adjacent_difference.cpp) add_rocprim_benchmark(benchmark_block_discontinuity.cpp) add_rocprim_benchmark(benchmark_block_exchange.cpp) diff --git a/projects/rocprim/benchmark/ConfigAutotuneSettings.cmake b/projects/rocprim/benchmark/ConfigAutotuneSettings.cmake index acd4f129286..cce64f85d0a 100644 --- a/projects/rocprim/benchmark/ConfigAutotuneSettings.cmake +++ b/projects/rocprim/benchmark/ConfigAutotuneSettings.cmake @@ -1,24 +1,5 @@ -# MIT License -# -# Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT # All default fallback types as listed in scripts/autotune/fallback_config.json set(TUNING_TYPES "rocprim::int128_t int64_t int short int8_t double float rocprim::half") @@ -28,124 +9,101 @@ set(TUNING_TYPES "rocprim::int128_t int64_t int short int8_t double float rocpri set(LIMITED_TUNING_TYPES "rocprim::int128_t int64_t int short int8_t") function(read_config_autotune_settings file list_across_names list_across output_pattern_suffix) - if(file STREQUAL "benchmark_device_adjacent_difference") - set(list_across_names "DataType;Left;Aliasing;BlockSize" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};\ -true;no_alias in_place;32 64 128 256 512 1024" PARENT_SCOPE) - set(output_pattern_suffix "@DataType@_@Left@_@Aliasing@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_adjacent_find") - set(list_across_names "InputType;BlockSize" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};64 128 256 512 1024" PARENT_SCOPE) - set(output_pattern_suffix "@InputType@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_histogram") - set(list_across_names "DataType;BlockSize" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};64 128 256" PARENT_SCOPE) - set(output_pattern_suffix "@DataType@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_merge_sort_block_merge") - set(list_across_names "KeyType;ValueType;BlockSize;UseMergePath" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};\ -128 256 512 1024;true" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@_@UseMergePath@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_merge_sort_block_sort") - set(list_across_names "KeyType;ValueType;BlockSize;BlockSortMethod" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};\ -256 512 1024;rocprim::block_sort_algorithm::stable_merge_sort" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@_@BlockSortMethod@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_radix_sort_block_sort") - set(list_across_names "KeyType;ValueType;BlockSize" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};\ -64 128 256 512 1024" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_radix_sort_onesweep") - set(list_across_names "KeyType;ValueType;BlockSize;RadixBits" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};\ -128 256 512 1024;4 5 6 7 8" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@_@RadixBits@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_reduce") - set(list_across_names "DataType;BlockSize;ItemsPerThread" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};64 128 256;1 2 4 8 16" PARENT_SCOPE) - set(output_pattern_suffix "@DataType@_@BlockSize@_@ItemsPerThread@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_scan") - set(list_across_names "DataType;Algo" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};using_warp_scan reduce_then_scan" PARENT_SCOPE) - set(output_pattern_suffix "@DataType@_@Algo@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_scan_by_key") - set(list_across_names "KeyType;ValueType;Algo" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};${LIMITED_TUNING_TYPES};using_warp_scan reduce_then_scan" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@ValueType@_@Algo@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_binary_search") - set(list_across_names "SubAlgorithm;ValueType;OutputType;BlockSize;ItemsPerThread" PARENT_SCOPE) - set(list_across "\ -binary_search upper_bound lower_bound;${TUNING_TYPES};${LIMITED_TUNING_TYPES};64 128 256;1 2 4 8 16" PARENT_SCOPE) - set(output_pattern_suffix "@SubAlgorithm@_@ValueType@_@OutputType@_@BlockSize@_@ItemsPerThread@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_search_n") - set(list_across_names "InputType;BlockSize;ItemsPerThread;Threshold" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};64 128 256 512 1024;1 2 4 8 16;4 8 12 16" PARENT_SCOPE) - set(output_pattern_suffix "@InputType@_@BlockSize@_@ItemsPerThread@_@Threshold@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_segmented_radix_sort_keys") - set(list_across_names "\ -KeyType;RadixBits;BlockSize;ItemsPerThread;WarpSmallLWS;WarpSmallIPT;WarpSmallBS;WarpPartition;WarpMediumLWS;WarpMediumIPT;WarpMediumBS" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};8;256;4 8 16;8;4;256;64;16;8;256" PARENT_SCOPE) - set(output_pattern_suffix "\ -@KeyType@_@RadixBits@_@BlockSize@_@ItemsPerThread@_@WarpSmallLWS@_@WarpSmallIPT@_@WarpSmallBS@_@WarpPartition@_@WarpMediumLWS@_@WarpMediumIPT@_@WarpMediumBS@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_segmented_radix_sort_pairs") - set(list_across_names "\ -KeyType;ValueType;RadixBits;BlockSize;ItemsPerThread;WarpSmallLWS;WarpSmallIPT;WarpSmallBS;WarpPartition;WarpMediumLWS;WarpMediumIPT;WarpMediumBS" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};${LIMITED_TUNING_TYPES};8;256;4 8 16;8;4;256;64;16;8;256" PARENT_SCOPE) - set(output_pattern_suffix "\ -@KeyType@_@ValueType@_@RadixBits@_@BlockSize@_@ItemsPerThread@_@WarpSmallLWS@_@WarpSmallIPT@_@WarpSmallBS@_@WarpPartition@_@WarpMediumLWS@_@WarpMediumIPT@_@WarpMediumBS@" PARENT_SCOPE) -elseif(file STREQUAL "benchmark_device_segmented_reduce") - set(list_across_names "DataType;BlockSize;ItemsPerThread" PARENT_SCOPE) - set(list_across "\ -${TUNING_TYPES};64 128 256;1 2 4 8 16" PARENT_SCOPE) - set(output_pattern_suffix "@DataType@_@BlockSize@_@ItemsPerThread@" PARENT_SCOPE) -elseif(file STREQUAL "benchmark_device_transform") - set(list_across_names "\ -DataType;BlockSize;" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};64 128 256 512 1024" PARENT_SCOPE) - set(output_pattern_suffix "\ -@DataType@_@BlockSize@" PARENT_SCOPE) -elseif(file STREQUAL "benchmark_device_transform_pointer") - set(list_across_names "\ -DataType;BlockSize;LoadType" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};64 128 256 512 1024;rocprim::load_default rocprim::load_nontemporal" PARENT_SCOPE) - set(output_pattern_suffix "\ -@DataType@_@BlockSize@_@LoadType@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_partition") - set(list_across_names "DataType;BlockSize" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};128 192 256 384 512" PARENT_SCOPE) - set(output_pattern_suffix "@DataType@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_select") - set(list_across_names "KeyType;ValueType;BlockSize" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};128 192 256 384 512" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_reduce_by_key") - set(list_across_names "KeyType;ValueType;BlockSize" PARENT_SCOPE) - set(list_across "${LIMITED_TUNING_TYPES};${TUNING_TYPES};128 192 256 384 512" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_find_first_of") - set(list_across_names "DataType;BlockSize" PARENT_SCOPE) - set(list_across "${LIMITED_TUNING_TYPES};32 64 128 256 512 1024" PARENT_SCOPE) - set(output_pattern_suffix "@DataType@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_run_length_encode") - set(list_across_names "KeyType;BlockSize" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};128 192 256 384 512" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@BlockSize@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_run_length_encode_non_trivial_runs") - set(list_across_names "KeyType;BlockSize;BlockLoadMethod" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};64 128 256 512 1024;block_load_vectorize block_load_warp_transpose" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@BlockSize@_@BlockLoadMethod@" PARENT_SCOPE) - elseif(file STREQUAL "benchmark_device_merge") - set(list_across_names "KeyType;ValueType;BlockSize" PARENT_SCOPE) - set(list_across "${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};32 64 128 256 512 1024" PARENT_SCOPE) - set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@" PARENT_SCOPE) - endif() + if(file STREQUAL "benchmark_device_adjacent_difference") + set(list_across_names "DataType;Left;Aliasing;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};true;no_alias in_place;32 64 128 256 512 1024" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@Left@_@Aliasing@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_adjacent_find") + set(list_across_names "InputType;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};64 128 256 512 1024" PARENT_SCOPE) + set(output_pattern_suffix "@InputType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_histogram") + set(list_across_names "DataType;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};64 128 256" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_merge_sort_block_merge") + set(list_across_names "KeyType;ValueType;BlockSize;UseMergePath" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};128 256 512 1024;true" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@_@UseMergePath@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_merge_sort_block_sort") + set(list_across_names "KeyType;ValueType;BlockSize;BlockSortMethod" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};256 512 1024;rocprim::block_sort_algorithm::stable_merge_sort" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@_@BlockSortMethod@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_radix_sort_block_sort") + set(list_across_names "KeyType;ValueType;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};64 128 256 512 1024" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_radix_sort_onesweep") + set(list_across_names "KeyType;ValueType;BlockSize;RadixBits" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};128 256 512 1024;4 5 6 7 8" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@_@RadixBits@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_reduce") + set(list_across_names "DataType;BlockSize;ItemsPerThread" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};64 128 256;1 2 4 8 16" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@BlockSize@_@ItemsPerThread@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_scan") + set(list_across_names "DataType;Algo" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};using_warp_scan reduce_then_scan" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@Algo@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_scan_by_key") + set(list_across_names "KeyType;ValueType;Algo" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};${LIMITED_TUNING_TYPES};using_warp_scan reduce_then_scan" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@Algo@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_binary_search") + set(list_across_names "SubAlgorithm;ValueType;OutputType;BlockSize;ItemsPerThread" PARENT_SCOPE) + set(list_across "binary_search upper_bound lower_bound;${TUNING_TYPES};${LIMITED_TUNING_TYPES};64 128 256;1 2 4 8 16" PARENT_SCOPE) + set(output_pattern_suffix "@SubAlgorithm@_@ValueType@_@OutputType@_@BlockSize@_@ItemsPerThread@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_search_n") + set(list_across_names "InputType;BlockSize;ItemsPerThread;Threshold" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};64 128 256 512 1024;1 2 4 8 16;4 8 12 16" PARENT_SCOPE) + set(output_pattern_suffix "@InputType@_@BlockSize@_@ItemsPerThread@_@Threshold@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_segmented_radix_sort_keys") + set(list_across_names "KeyType;RadixBits;BlockSize;ItemsPerThread;WarpSmallLWS;WarpSmallIPT;WarpSmallBS;WarpPartition;WarpMediumLWS;WarpMediumIPT;WarpMediumBS" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};8;256;4 8 16;8;4;256;64;16;8;256" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@RadixBits@_@BlockSize@_@ItemsPerThread@_@WarpSmallLWS@_@WarpSmallIPT@_@WarpSmallBS@_@WarpPartition@_@WarpMediumLWS@_@WarpMediumIPT@_@WarpMediumBS@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_segmented_radix_sort_pairs") + set(list_across_names "KeyType;ValueType;RadixBits;BlockSize;ItemsPerThread;WarpSmallLWS;WarpSmallIPT;WarpSmallBS;WarpPartition;WarpMediumLWS;WarpMediumIPT;WarpMediumBS" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};${LIMITED_TUNING_TYPES};8;256;4 8 16;8;4;256;64;16;8;256" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@RadixBits@_@BlockSize@_@ItemsPerThread@_@WarpSmallLWS@_@WarpSmallIPT@_@WarpSmallBS@_@WarpPartition@_@WarpMediumLWS@_@WarpMediumIPT@_@WarpMediumBS@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_segmented_reduce") + set(list_across_names "DataType;BlockSize;ItemsPerThread" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};64 128 256;1 2 4 8 16" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@BlockSize@_@ItemsPerThread@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_transform") + set(list_across_names "DataType;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};64 128 256 512 1024" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_transform_pointer") + set(list_across_names "DataType;BlockSize;LoadType" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};64 128 256 512 1024;rocprim::load_default rocprim::load_nontemporal" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@BlockSize@_@LoadType@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_partition") + set(list_across_names "DataType;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};128 192 256 384 512" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_select") + set(list_across_names "KeyType;ValueType;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};128 192 256 384 512" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_reduce_by_key") + set(list_across_names "KeyType;ValueType;BlockSize" PARENT_SCOPE) + set(list_across "${LIMITED_TUNING_TYPES};${TUNING_TYPES};128 192 256 384 512" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_find_first_of") + set(list_across_names "DataType;BlockSize" PARENT_SCOPE) + set(list_across "${LIMITED_TUNING_TYPES};32 64 128 256 512 1024" PARENT_SCOPE) + set(output_pattern_suffix "@DataType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_run_length_encode") + set(list_across_names "KeyType;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};128 192 256 384 512" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@BlockSize@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_run_length_encode_non_trivial_runs") + set(list_across_names "KeyType;BlockSize;BlockLoadMethod" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};64 128 256 512 1024;block_load_vectorize block_load_warp_transpose" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@BlockSize@_@BlockLoadMethod@" PARENT_SCOPE) + elseif(file STREQUAL "benchmark_device_merge") + set(list_across_names "KeyType;ValueType;BlockSize" PARENT_SCOPE) + set(list_across "${TUNING_TYPES};rocprim::empty_type ${LIMITED_TUNING_TYPES};32 64 128 256 512 1024" PARENT_SCOPE) + set(output_pattern_suffix "@KeyType@_@ValueType@_@BlockSize@" PARENT_SCOPE) + endif() endfunction() diff --git a/projects/rocprim/cmake/ConfigAutotune.cmake b/projects/rocprim/cmake/ConfigAutotune.cmake index 56f3dc7ced9..ff37c59e7f2 100644 --- a/projects/rocprim/cmake/ConfigAutotune.cmake +++ b/projects/rocprim/cmake/ConfigAutotune.cmake @@ -1,123 +1,89 @@ -# MIT License -# -# Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT -# Function to add a configured source file to a target. -# It parses arguments, prepares the output file name, and configures the file. function(add_configured_source) - # Parse arguments and ensure proper usage - cmake_parse_arguments(PARSE_ARGV 0 ARG "" "INPUT;TARGET;OUTPUT_PATTERN" "NAMES;VALUES") - list(LENGTH ARG_NAMES NAMES_LEN) - list(LENGTH ARG_VALUES VALS_LEN) - if (NOT NAMES_LEN EQUAL VALS_LEN) - message(FATAL_ERROR "add_configured_source: The same number of names (${NAMES_LEN}) and values (${VALS_LEN}) must be provided!") - endif() + cmake_parse_arguments(PARSE_ARGV 0 ARG "" "INPUT;TARGET;OUTPUT_PATTERN" "NAMES;VALUES") + list(LENGTH ARG_NAMES NAMES_LEN) + list(LENGTH ARG_VALUES VALS_LEN) + if(NOT NAMES_LEN EQUAL VALS_LEN) + message(FATAL_ERROR "add_configured_source: The same number of names (${NAMES_LEN}) and values (${VALS_LEN}) must be provided!") + endif() - # Loop through the names and values, preparing the output pattern - set(max ${VALS_LEN}) - math(EXPR max "${max} - 1") - foreach(i RANGE ${max}) - list(GET ARG_NAMES ${i} curr_name) - list(GET ARG_VALUES ${i} "${curr_name}") - endforeach() + set(max ${VALS_LEN}) + math(EXPR max "${max} - 1") + foreach(i RANGE ${max}) + list(GET ARG_NAMES ${i} curr_name) + list(GET ARG_VALUES ${i} "${curr_name}") + endforeach() - # Configure the output file and add it to the target - string(CONFIGURE "${ARG_OUTPUT_PATTERN}" output @ONLY) - string(MAKE_C_IDENTIFIER ${output} output) - set(output_path "${ARG_TARGET}.parallel/${output}.cpp") - configure_file("${ARG_INPUT}" "${output_path}" @ONLY) - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_CLEAN_FILES "${ARG_TARGET}.parallel") - target_sources("${ARG_TARGET}" PRIVATE "${output_path}") - target_include_directories("${ARG_TARGET}" PRIVATE "../benchmark") + string(CONFIGURE "${ARG_OUTPUT_PATTERN}" output @ONLY) + string(MAKE_C_IDENTIFIER ${output} output) + set(output_path "${ARG_TARGET}.parallel/${output}.cpp") + configure_file("${ARG_INPUT}" "${output_path}" @ONLY) + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_CLEAN_FILES "${ARG_TARGET}.parallel") + target_sources("${ARG_TARGET}" PRIVATE "${output_path}") + target_include_directories("${ARG_TARGET}" PRIVATE "../benchmark") - # Ensure reconfiguration if necessary - set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${ARG_INPUT}" "${output_path}") + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${ARG_INPUT}" "${output_path}") endfunction() -# Function to divide two numbers and round up. function(div_round_up dividend divisor result_var) - math(EXPR result "(${dividend} + ${divisor} - 1) / ${divisor}") - set("${result_var}" "${result}" PARENT_SCOPE) + math(EXPR result "(${dividend} + ${divisor} - 1) / ${divisor}") + set("${result_var}" "${result}" PARENT_SCOPE) endfunction() -# Function to add a matrix of configured sources. -# It handles permutations of input parameters and calls add_configured_source accordingly. function(add_matrix) - set(single_value_args "TARGET" "INPUT" "OUTPUT_PATTERN" "SHARDS" "CURRENT_SHARD") - cmake_parse_arguments(PARSE_ARGV 0 ARG "" "${single_value_args}" "NAMES;LISTS") + set(single_value_args "TARGET" "INPUT" "OUTPUT_PATTERN" "SHARDS" "CURRENT_SHARD") + cmake_parse_arguments(PARSE_ARGV 0 ARG "" "${single_value_args}" "NAMES;LISTS") - # Validate argument lengths - list(LENGTH ARG_NAMES NAMES_LEN) - list(LENGTH ARG_LISTS LISTS_LEN) - if (NOT NAMES_LEN EQUAL LISTS_LEN) - message(FATAL_ERROR "add_matrix: The same number of names (${NAMES_LEN}) and lists (${LISTS_LEN}) must be provided!") - endif() + list(LENGTH ARG_NAMES NAMES_LEN) + list(LENGTH ARG_LISTS LISTS_LEN) + if(NOT NAMES_LEN EQUAL LISTS_LEN) + message(FATAL_ERROR "add_matrix: The same number of names (${NAMES_LEN}) and lists (${LISTS_LEN}) must be provided!") + endif() - # Calculate the total number of permutations - set(total_len 1) - foreach(LIST IN LISTS ARG_LISTS) - string(REPLACE " " ";" list ${LIST}) - list(LENGTH list LIST_LEN) - math(EXPR total_len "${total_len} * ${LIST_LEN}") - endforeach() + set(total_len 1) + foreach(LIST IN LISTS ARG_LISTS) + string(REPLACE " " ";" list ${LIST}) + list(LENGTH list LIST_LEN) + math(EXPR total_len "${total_len} * ${LIST_LEN}") + endforeach() - # Handle sharding - if(NOT DEFINED ARG_SHARDS) - set(ARG_SHARDS 1) - endif() - div_round_up("${total_len}" "${ARG_SHARDS}" per_shard) + if(NOT DEFINED ARG_SHARDS) + set(ARG_SHARDS 1) + endif() + div_round_up("${total_len}" "${ARG_SHARDS}" per_shard) - # Determine the range of permutations for the current shard - math(EXPR start "${ARG_CURRENT_SHARD} * ${per_shard}") - math(EXPR stop "${start} + ${per_shard} - 1") + math(EXPR start "${ARG_CURRENT_SHARD} * ${per_shard}") + math(EXPR stop "${start} + ${per_shard} - 1") - # Process each permutation - foreach(i RANGE ${start} ${stop}) - set(index ${i}) - set(values "") - foreach(input_list IN LISTS ARG_LISTS) - string(REPLACE " " ";" curr_list ${input_list}) - list(LENGTH curr_list curr_length) - math(EXPR curr_index "${index} % ${curr_length}") - list(GET curr_list ${curr_index} curr_item) - list(APPEND values "${curr_item}") - math(EXPR index "${index} / ${curr_length}") - endforeach() + foreach(i RANGE ${start} ${stop}) + set(index ${i}) + set(values "") + foreach(input_list IN LISTS ARG_LISTS) + string(REPLACE " " ";" curr_list ${input_list}) + list(LENGTH curr_list curr_length) + math(EXPR curr_index "${index} % ${curr_length}") + list(GET curr_list ${curr_index} curr_item) + list(APPEND values "${curr_item}") + math(EXPR index "${index} / ${curr_length}") + endforeach() - # Add the configured source for each permutation - add_configured_source(TARGET "${ARG_TARGET}" + add_configured_source( + TARGET "${ARG_TARGET}" INPUT "${ARG_INPUT}" OUTPUT_PATTERN "${ARG_OUTPUT_PATTERN}" NAMES ${ARG_NAMES} - VALUES ${values}) - endforeach() + VALUES ${values} + ) + endforeach() endfunction() -# Function to filter out odd block sizes. -# It sets a variable in the parent scope based on the condition. function(reject_odd_blocksize RESULT BlockSize) - math(EXPR res "${BlockSize} % 2") - if(res EQUAL 0) - set("${RESULT}" ON PARENT_SCOPE) - else() - set("${RESULT}" OFF PARENT_SCOPE) - endif() -endfunction() \ No newline at end of file + math(EXPR res "${BlockSize} % 2") + if(res EQUAL 0) + set("${RESULT}" ON PARENT_SCOPE) + else() + set("${RESULT}" OFF PARENT_SCOPE) + endif() +endfunction() diff --git a/projects/rocprim/cmake/Dependencies.cmake b/projects/rocprim/cmake/Dependencies.cmake index 3ee02289300..8f91907cf7e 100644 --- a/projects/rocprim/cmake/Dependencies.cmake +++ b/projects/rocprim/cmake/Dependencies.cmake @@ -1,235 +1,24 @@ -# MIT License -# -# Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# ########################### -# rocPRIM dependencies -# ########################### - -# NOTE1: the reason we don't scope global state meddling using add_subdirectory -# is because CMake < 3.24 lacks CMAKE_FIND_PACKAGE_TARGETS_GLOBAL which -# would promote IMPORTED targets of find_package(CONFIG) to be visible -# by other parts of the build. So we save and restore global state. -# -# NOTE2: We disable the ROCMChecks.cmake warning noting that we meddle with -# global state. This is consequence of abusing the CMake CXX language -# which HIP piggybacks on top of. This kind of HIP support has one chance -# at observing the global flags, at the find_package(HIP) invocation. -# The device compiler won't be able to pick up changes after that, hence -# the warning. -set(USER_CXX_FLAGS ${CMAKE_CXX_FLAGS}) -if(DEFINED BUILD_SHARED_LIBS) - set(USER_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) -endif() -set(USER_ROCM_WARN_TOOLCHAIN_VAR ${ROCM_WARN_TOOLCHAIN_VAR}) - -set(ROCM_WARN_TOOLCHAIN_VAR OFF CACHE BOOL "") -# Turn off warnings and errors for all warnings in dependencies -separate_arguments(CXX_FLAGS_LIST NATIVE_COMMAND ${CMAKE_CXX_FLAGS}) -list(REMOVE_ITEM CXX_FLAGS_LIST /WX -Werror -Werror=pendantic -pedantic-errors) -if(MSVC) - list(FILTER CXX_FLAGS_LIST EXCLUDE REGEX "/[Ww]([0-4]?)(all)?") # Remove MSVC warning flags - list(APPEND CXX_FLAGS_LIST /w) -else() - list(FILTER CXX_FLAGS_LIST EXCLUDE REGEX "-W(all|extra|everything)") # Remove GCC/LLVM flags - list(APPEND CXX_FLAGS_LIST -w) -endif() -list(JOIN CXX_FLAGS_LIST " " CMAKE_CXX_FLAGS) -# Don't build client dependencies as shared -set(BUILD_SHARED_LIBS OFF CACHE BOOL "Global flag to cause add_library() to create shared libraries if on." FORCE) - -# HIP dependency is handled earlier in the project cmake file -# when VerifyCompiler.cmake is included. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT +# ROCm CMake dependencies include(FetchContent) -# For downloading, building, and installing required dependencies -include(cmake/DownloadProject.cmake) - -# Test dependencies -if(BUILD_TEST) - # NOTE1: Google Test has created a mess with legacy FindGTest.cmake and newer GTestConfig.cmake - # - # FindGTest.cmake defines: GTest::GTest, GTest::Main, GTEST_FOUND - # - # GTestConfig.cmake defines: GTest::gtest, GTest::gtest_main, GTest::gmock, GTest::gmock_main - # - # NOTE2: Finding GTest in MODULE mode, one cannot invoke find_package in CONFIG mode, because targets - # will be duplicately defined. - # - # NOTE3: The following snippet first tries to find Google Test binary either in MODULE or CONFIG modes. - # If neither succeeds it goes on to import Google Test into this build either from a system - # source package (apt install googletest on Ubuntu 18.04 only) or GitHub and defines the MODULE - # mode targets. Otherwise if MODULE or CONFIG succeeded, then it prints the result to the - # console via a non-QUIET find_package call and if CONFIG succeeded, creates ALIAS targets - # with the MODULE IMPORTED names. - if(NOT DEPENDENCIES_FORCE_DOWNLOAD) - if(WIN32) - # Older versions of gtest on Windows does not support printing of 128-bit values, - # Causing compilation errors. - find_package(GTest 1.11.0 REQUIRED) - else() - find_package(GTest QUIET) - endif() - endif() - if(NOT TARGET GTest::GTest AND NOT TARGET GTest::gtest) - option(BUILD_GTEST "Builds the googletest subproject" ON) - option(BUILD_GMOCK "Builds the googlemock subproject" OFF) - option(INSTALL_GTEST "Enable installation of googletest." OFF) - if(EXISTS /usr/src/googletest AND NOT DEPENDENCIES_FORCE_DOWNLOAD) - FetchContent_Declare( - googletest - SOURCE_DIR /usr/src/googletest - ) - else() - message(STATUS "Google Test not found. Fetching...") - FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG e2239ee6043f73722e7aa812a459f54a28552929 # release-1.11.0 - ) - endif() - FetchContent_MakeAvailable(googletest) - add_library(GTest::GTest ALIAS gtest) - add_library(GTest::Main ALIAS gtest_main) - else() - find_package(GTest REQUIRED) - if(TARGET GTest::gtest_main AND NOT TARGET GTest::Main) - add_library(GTest::GTest ALIAS GTest::gtest) - add_library(GTest::Main ALIAS GTest::gtest_main) - endif() - endif() -endif(BUILD_TEST) - -if(BUILD_BENCHMARK) - set(BENCHMARK_VERSION 1.8.0) - if(NOT DEPENDENCIES_FORCE_DOWNLOAD) - find_package(benchmark ${BENCHMARK_VERSION} CONFIG QUIET) - endif() - if(NOT TARGET benchmark::benchmark) - message(STATUS "Google Benchmark not found. Fetching...") - option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." OFF) - option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark." OFF) - FetchContent_Declare( - googlebench - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v${BENCHMARK_VERSION} - ) - set(HAVE_STD_REGEX ON) - set(RUN_HAVE_STD_REGEX 1) - FetchContent_MakeAvailable(googlebench) - if(NOT TARGET benchmark::benchmark) - add_library(benchmark::benchmark ALIAS benchmark) - endif() - else() - find_package(benchmark CONFIG REQUIRED) - endif() -endif(BUILD_BENCHMARK) - -if(NOT DEPENDENCIES_FORCE_DOWNLOAD) - find_package(ROCmCMakeBuildTools 0.11.0 CONFIG QUIET PATHS "${ROCM_ROOT}") # rocm-cmake -endif() +find_package(ROCmCMakeBuildTools 0.11.0 CONFIG QUIET) if(NOT ROCmCMakeBuildTools_FOUND) - message(STATUS "ROCm CMake not found. Fetching...") - # We don't really want to consume the build and test targets of ROCm CMake. - # CMake 3.18 allows omitting them, even though there's a CMakeLists.txt in source root. - if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) - set(SOURCE_SUBDIR_ARG SOURCE_SUBDIR "DISABLE ADDING TO BUILD") - else() - set(SOURCE_SUBDIR_ARG) - endif() - set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") - FetchContent_Declare( - rocm-cmake - GIT_REPOSITORY https://github.com/ROCm/rocm-cmake.git - GIT_TAG rocm-6.4.4 - ${SOURCE_SUBDIR_ARG} - ) - FetchContent_GetProperties(rocm-cmake) - if(NOT rocm-cmake_POPULATED) - # rocm-cmake 0.12.0 and higher needs to built from source - FetchContent_Populate(rocm-cmake) - message("Populated: ${rocm-cmake_SOURCE_DIR}") - execute_process( - WORKING_DIRECTORY ${rocm-cmake_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} ${rocm-cmake_SOURCE_DIR} -DCMAKE_INSTALL_PREFIX=. - ) - execute_process( - WORKING_DIRECTORY ${rocm-cmake_SOURCE_DIR} - COMMAND ${CMAKE_COMMAND} --build ${rocm-cmake_SOURCE_DIR} --target install + message(STATUS "ROCm CMake not found. Fetching...") + FetchContent_Declare( + rocm-cmake + GIT_REPOSITORY https://github.com/ROCm/rocm-cmake.git + GIT_TAG rocm-6.4.4 + SOURCE_SUBDIR "DISABLE ADDING TO BUILD" ) - endif() - FetchContent_MakeAvailable(rocm-cmake) - find_package(ROCmCMakeBuildTools CONFIG REQUIRED NO_DEFAULT_PATH PATHS "${rocm-cmake_SOURCE_DIR}") + FetchContent_MakeAvailable(rocm-cmake) + find_package(ROCmCMakeBuildTools CONFIG REQUIRED NO_DEFAULT_PATH PATHS "${rocm-cmake_SOURCE_DIR}") else() - find_package(ROCmCMakeBuildTools 0.11.0 CONFIG REQUIRED PATHS "${ROCM_ROOT}") + find_package(ROCmCMakeBuildTools 0.11.0 CONFIG REQUIRED) endif() - -# rocRAND (https://github.com/ROCmSoftwarePlatform/rocRAND) -if(WITH_ROCRAND) - find_package(rocrand QUIET) -endif() -if(WITH_ROCRAND AND NOT rocrand_FOUND) - message(STATUS "Downloading and building rocrand.") - set(ROCRAND_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/rocrand CACHE PATH "") - - set(EXTRA_CMAKE_ARGS "-DGPU_TARGETS=${GPU_TARGETS}") - # CMAKE_ARGS of download_project (or ExternalProject_Add) can't contain ; so another separator - # is needed and LIST_SEPARATOR is passed to download_project() - string(REPLACE ";" "|" EXTRA_CMAKE_ARGS "${EXTRA_CMAKE_ARGS}") - # Pass launcher so sccache can be used to speed up building rocRAND - if(CMAKE_CXX_COMPILER_LAUNCHER) - set(EXTRA_CMAKE_ARGS "${EXTRA_CMAKE_ARGS} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}") - endif() - download_project( - PROJ rocrand - GIT_REPOSITORY https://github.com/ROCmSoftwarePlatform/rocRAND.git - GIT_TAG develop - GIT_SHALLOW TRUE - INSTALL_DIR ${ROCRAND_ROOT} - LIST_SEPARATOR | - CMAKE_ARGS -DCMAKE_CXX_COMPILER=hipcc -DBUILD_TEST=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_PREFIX_PATH=/opt/rocm ${EXTRA_CMAKE_ARGS} - LOG_DOWNLOAD TRUE - LOG_CONFIGURE TRUE - LOG_BUILD TRUE - LOG_INSTALL TRUE - LOG_OUTPUT_ON_FAILURE TRUE - BUILD_PROJECT TRUE - UPDATE_DISCONNECTED TRUE - ) - find_package(rocrand REQUIRED CONFIG PATHS ${ROCRAND_ROOT}) -endif() - - - -# Restore user global state -set(CMAKE_CXX_FLAGS ${USER_CXX_FLAGS}) -if(DEFINED USER_BUILD_SHARED_LIBS) - set(BUILD_SHARED_LIBS ${USER_BUILD_SHARED_LIBS}) -else() - unset(BUILD_SHARED_LIBS CACHE ) -endif() -set(ROCM_WARN_TOOLCHAIN_VAR ${USER_ROCM_WARN_TOOLCHAIN_VAR} CACHE BOOL "") - include(ROCMSetupVersion) include(ROCMCreatePackage) include(ROCMInstallTargets) @@ -237,6 +26,98 @@ include(ROCMPackageConfigHelpers) include(ROCMInstallSymlinks) include(ROCMCheckTargetIds) include(ROCMClients) -if(BUILD_DOCS) - include(ROCMSphinxDoc) +if(ROCPRIM_ENABLE_DOCS) + include(ROCMSphinxDoc) +endif() + +# Test dependencies +if(ROCPRIM_BUILD_TESTING) + find_package(GTest QUIET) + if(NOT GTest_FOUND) + if(EXISTS /usr/src/googletest AND NOT DEPENDENCIES_FORCE_DOWNLOAD) + FetchContent_Declare( + googletest + SOURCE_DIR /usr/src/googletest + ) + else() + message(STATUS "Google Test not found. Fetching...") + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG e2239ee6043f73722e7aa812a459f54a28552929 # release-1.11.0 + ) + endif() + set(BUILD_GMOCK OFF CACHE BOOL "") + set(INSTALL_GTEST OFF CACHE BOOL "") + FetchContent_MakeAvailable(googletest) + if(NOT TARGET GTest::GTest) + add_library(GTest::GTest ALIAS gtest) + add_library(GTest::Main ALIAS gtest_main) + endif() + else() + find_package(GTest REQUIRED) + if(TARGET GTest::gtest_main AND NOT TARGET GTest::Main) + add_library(GTest::GTest ALIAS GTest::gtest) + add_library(GTest::Main ALIAS GTest::gtest_main) + endif() + endif() +endif() + +# Benchmark dependencies +if(ROCPRIM_ENABLE_BENCHMARK) + set(BENCHMARK_VERSION 1.8.0) + find_package(benchmark ${BENCHMARK_VERSION} CONFIG QUIET) + if(NOT benchmark_FOUND) + message(STATUS "Google Benchmark not found. Fetching...") + FetchContent_Declare( + googlebench + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v${BENCHMARK_VERSION} + ) + set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "") + set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "") + set(HAVE_STD_REGEX ON) + set(RUN_HAVE_STD_REGEX 1) + FetchContent_MakeAvailable(googlebench) + if(NOT TARGET benchmark::benchmark) + add_library(benchmark::benchmark ALIAS benchmark) + endif() + else() + find_package(benchmark CONFIG REQUIRED) + endif() +endif() + +# rocRAND dependency +if(ROCPRIM_ENABLE_ROCRAND) + find_package(rocrand QUIET) + if(NOT rocrand_FOUND) + message(STATUS "rocRAND not found. Fetching and building...") + include(DownloadProject) + + set(ROCRAND_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/rocrand CACHE PATH "") + set(EXTRA_CMAKE_ARGS "-DGPU_TARGETS=${GPU_TARGETS}") + string(REPLACE ";" "|" EXTRA_CMAKE_ARGS "${EXTRA_CMAKE_ARGS}") + + if(CMAKE_CXX_COMPILER_LAUNCHER) + set(EXTRA_CMAKE_ARGS "${EXTRA_CMAKE_ARGS} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}") + endif() + + download_project( + PROJ rocrand + GIT_REPOSITORY https://github.com/ROCmSoftwarePlatform/rocRAND.git + GIT_TAG develop + GIT_SHALLOW TRUE + INSTALL_DIR ${ROCRAND_ROOT} + LIST_SEPARATOR | + CMAKE_ARGS -DCMAKE_CXX_COMPILER=hipcc -DBUILD_TEST=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_PREFIX_PATH=/opt/rocm ${EXTRA_CMAKE_ARGS} + LOG_DOWNLOAD TRUE + LOG_CONFIGURE TRUE + LOG_BUILD TRUE + LOG_INSTALL TRUE + LOG_OUTPUT_ON_FAILURE TRUE + BUILD_PROJECT TRUE + UPDATE_DISCONNECTED TRUE + ) + find_package(rocrand REQUIRED CONFIG PATHS ${ROCRAND_ROOT}) + endif() endif() diff --git a/projects/rocprim/cmake/GenerateResourceSpec.cmake b/projects/rocprim/cmake/GenerateResourceSpec.cmake index d485ca47a90..86fbe39630d 100755 --- a/projects/rocprim/cmake/GenerateResourceSpec.cmake +++ b/projects/rocprim/cmake/GenerateResourceSpec.cmake @@ -1,116 +1,68 @@ #!/usr/bin/cmake -P +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT -find_program(ROCMINFO_EXECUTABLE - rocminfo -) +find_program(ROCMINFO_EXECUTABLE rocminfo) if(NOT ROCMINFO_EXECUTABLE) - message(FATAL_ERROR "rocminfo not found") + message(FATAL_ERROR "rocminfo not found") endif() execute_process( - COMMAND ${ROCMINFO_EXECUTABLE} - RESULT_VARIABLE ROCMINFO_EXIT_CODE - OUTPUT_VARIABLE ROCMINFO_STDOUT - ERROR_VARIABLE ROCMINFO_STDERR + COMMAND ${ROCMINFO_EXECUTABLE} + RESULT_VARIABLE ROCMINFO_EXIT_CODE + OUTPUT_VARIABLE ROCMINFO_STDOUT + ERROR_VARIABLE ROCMINFO_STDERR ) if(ROCMINFO_EXIT_CODE) - message(SEND_ERROR "rocminfo exited with ${ROCMINFO_EXIT_CODE}") - message(SEND_ERROR ${ROCMINFO_STDOUT}) - message(FATAL_ERROR ${ROCMINFO_STDERR}) + message(SEND_ERROR "rocminfo exited with ${ROCMINFO_EXIT_CODE}") + message(SEND_ERROR ${ROCMINFO_STDOUT}) + message(FATAL_ERROR ${ROCMINFO_STDERR}) endif() string(REGEX MATCHALL [[--(gfx[0-9a-f]+)]] - ROCMINFO_MATCHES - ${ROCMINFO_STDOUT} + ROCMINFO_MATCHES + ${ROCMINFO_STDOUT} ) -# NOTE: Unfortunately we don't have structs in CMake, -# neither do we have std::partition only list(SORT) -# - -# Transform raw regex matches to pairs of gfx IP and device id -# This will be our struct emulation. In C++ it would be -# -# struct device -# { -# std::string ip; -# int id; -# }; -# -# std::vector GFXIP_AND_ID{ {"gfx900",0},{"gfx803",1},{"gfx900",2} }; -# std::sort(GFXIP_AND_ID.begin(), GFXIP_AND_ID.end(), -# [](const device& lhs, const device& rhs) -# { -# return std::lexicographical_compare(lhs.ip.begin(), lhs.ip.end(), -# rhs.ip.begin(), rhs.ip.end()); -# }); -# set(GFXIP_AND_ID) set(ID 0) foreach(ROCMINFO_MATCH IN LISTS ROCMINFO_MATCHES) - string(REGEX REPLACE - "--" - "" - ROCMINFO_MATCH - ${ROCMINFO_MATCH} - ) - list(APPEND GFXIP_AND_ID "${ROCMINFO_MATCH}:${ID}") - math(EXPR ID "${ID} + 1") + string(REGEX REPLACE "--" "" ROCMINFO_MATCH ${ROCMINFO_MATCH}) + list(APPEND GFXIP_AND_ID "${ROCMINFO_MATCH}:${ID}") + math(EXPR ID "${ID} + 1") endforeach() list(SORT GFXIP_AND_ID) -# Now comes the tricky part: implementing the following C++ logic -# -# std::stringstream JSON_PAYLOAD; -# auto it = GFXIP_AND_ID.begin(); -# while (it != GFXIP_AND_ID.end()) -# { -# auto IT = std::find_if(it, GFXIP_AND_ID.end(), -# [=](const device& ip_id){ return ip_id.ip.compare(it->ip) != 0; }); -# JSON_PAYLOAD << "\n \"" << it->ip << "\": ["; -# std::for_each(it, IT, [&](const device& ip_id) -# { -# JSON_PAYLOAD << -# "\n {\n" << -# " \"id\": \"" << ip_id.id << "\"\n" << -# " },"; -# }); -# JSON_PAYLOAD.seekp(-1, std::ios_base::end); // discard trailing comma -# JSON_PAYLOAD << "\n ],"; -# it = IT; -# } -# JSON_PAYLOAD.seekp(-1, std::ios_base::end); // discard trailing comma -# set(JSON_PAYLOAD) set(IT1 0) list(GET GFXIP_AND_ID ${IT1} I1) string(REGEX REPLACE ":[0-9a-f]+" "" IP1 ${I1}) list(LENGTH GFXIP_AND_ID COUNT) while(IT1 LESS COUNT) - string(APPEND JSON_PAYLOAD "\n \"${IP1}\": [") - set(IT2 ${IT1}) - list(GET GFXIP_AND_ID ${IT2} I2) - string(REGEX REPLACE [[:[0-9a-f]+$]] "" IP2 ${I2}) - string(REGEX REPLACE [[^gfx[0-9a-f]+:]] "" ID2 ${I2}) - while(${IP2} STREQUAL ${IP1} AND IT2 LESS COUNT) - string(APPEND JSON_PAYLOAD - "\n {\n" - " \"id\": \"${ID2}\"\n" - " }," - ) - math(EXPR IT2 "${IT2} + 1") - if(IT2 LESS COUNT) - list(GET GFXIP_AND_ID ${IT2} I2) - string(REGEX REPLACE [[:[0-9a-f]+$]] "" IP2 ${I2}) - string(REGEX REPLACE [[^gfx[0-9a-f]+:]] "" ID2 ${I2}) - endif() - endwhile() - string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) - string(APPEND JSON_PAYLOAD "\n ],") - set(IT1 ${IT2}) - set(IP1 ${IP2}) + string(APPEND JSON_PAYLOAD "\n \"${IP1}\": [") + set(IT2 ${IT1}) + list(GET GFXIP_AND_ID ${IT2} I2) + string(REGEX REPLACE [[:[0-9a-f]+$]] "" IP2 ${I2}) + string(REGEX REPLACE [[^gfx[0-9a-f]+:]] "" ID2 ${I2}) + while(${IP2} STREQUAL ${IP1} AND IT2 LESS COUNT) + string(APPEND JSON_PAYLOAD + "\n {\n" + " \"id\": \"${ID2}\"\n" + " }," + ) + math(EXPR IT2 "${IT2} + 1") + if(IT2 LESS COUNT) + list(GET GFXIP_AND_ID ${IT2} I2) + string(REGEX REPLACE [[:[0-9a-f]+$]] "" IP2 ${I2}) + string(REGEX REPLACE [[^gfx[0-9a-f]+:]] "" ID2 ${I2}) + endif() + endwhile() + string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) + string(APPEND JSON_PAYLOAD "\n ],") + set(IT1 ${IT2}) + set(IP1 ${IP2}) endwhile() string(REGEX REPLACE [[,$]] "" JSON_PAYLOAD ${JSON_PAYLOAD}) @@ -130,7 +82,7 @@ set(JSON_TAIL [[ ) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/resources.json - ${JSON_HEAD} - ${JSON_PAYLOAD} - ${JSON_TAIL} + ${JSON_HEAD} + ${JSON_PAYLOAD} + ${JSON_TAIL} ) diff --git a/projects/rocprim/cmake/Summary.cmake b/projects/rocprim/cmake/Summary.cmake index a2304a38b6d..61e763b765d 100644 --- a/projects/rocprim/cmake/Summary.cmake +++ b/projects/rocprim/cmake/Summary.cmake @@ -1,110 +1,89 @@ -# MIT License -# -# Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT function(print_configuration_summary) - find_package(Git) - if(GIT_FOUND) - execute_process( - COMMAND ${GIT_EXECUTABLE} show --format=%H --no-patch - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} - OUTPUT_VARIABLE COMMIT_HASH - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - execute_process( - COMMAND ${GIT_EXECUTABLE} show --format=%s --no-patch - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} - OUTPUT_VARIABLE COMMIT_SUBJECT - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - endif() + find_package(Git) + if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} show --format=%H --no-patch + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + execute_process( + COMMAND ${GIT_EXECUTABLE} show --format=%s --no-patch + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE COMMIT_SUBJECT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + endif() - execute_process( - COMMAND ${CMAKE_CXX_COMPILER} --version - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} - OUTPUT_VARIABLE CMAKE_CXX_COMPILER_VERBOSE_DETAILS - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - find_program(UNAME_EXECUTABLE uname) - if(UNAME_EXECUTABLE) execute_process( - COMMAND ${UNAME_EXECUTABLE} -a - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} - OUTPUT_VARIABLE LINUX_KERNEL_DETAILS - OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND ${CMAKE_CXX_COMPILER} --version + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE CMAKE_CXX_COMPILER_VERBOSE_DETAILS + OUTPUT_STRIP_TRAILING_WHITESPACE ) - endif() - string(REPLACE "\n" ";" CMAKE_CXX_COMPILER_VERBOSE_DETAILS "${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") - list(TRANSFORM CMAKE_CXX_COMPILER_VERBOSE_DETAILS PREPEND "-- ") - string(REPLACE ";" "\n" CMAKE_CXX_COMPILER_VERBOSE_DETAILS "${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") + find_program(UNAME_EXECUTABLE uname) + if(UNAME_EXECUTABLE) + execute_process( + COMMAND ${UNAME_EXECUTABLE} -a + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE LINUX_KERNEL_DETAILS + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + endif() - message(STATUS "") - message(STATUS "******** Summary ********") - message(STATUS "General:") - message(STATUS " System : ${CMAKE_SYSTEM_NAME}") - if(USE_HIPCXX) - message(STATUS " HIP compiler : ${CMAKE_HIP_COMPILER}") - message(STATUS " HIP compiler version : ${CMAKE_HIP_COMPILER_VERSION}") - string(STRIP "${CMAKE_HIP_FLAGS}" CMAKE_HIP_FLAGS_STRIP) - message(STATUS " HIP flags : ${CMAKE_HIP_FLAGS_STRIP}") - else() - message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") - message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") - string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS_STRIP) - message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS_STRIP}") - endif() - get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) - if(GENERATOR_IS_MULTI_CONFIG) - message(STATUS " Build types : ${CMAKE_CONFIGURATION_TYPES}") - else() - message(STATUS " Build type : ${CMAKE_BUILD_TYPE}") - endif() - message(STATUS " Install prefix : ${CMAKE_INSTALL_PREFIX}") - if(USE_HIPCXX) - message(STATUS " Device targets : ${CMAKE_HIP_ARCHITECTURES}") - else() - message(STATUS " Device targets : ${GPU_TARGETS}") - endif() - message(STATUS "") - message(STATUS " ONLY_INSTALL : ${ONLY_INSTALL}") - message(STATUS " BUILD_TEST : ${BUILD_TEST}") - message(STATUS " WITH_ROCRAND : ${WITH_ROCRAND}") - message(STATUS " BUILD_BENCHMARK : ${BUILD_BENCHMARK}") - message(STATUS " BUILD_NAIVE_BENCHMARK : ${BUILD_NAIVE_BENCHMARK}") - message(STATUS " BUILD_EXAMPLE : ${BUILD_EXAMPLE}") - message(STATUS " BUILD_DOCS : ${BUILD_DOCS}") - message(STATUS " BUILD_OFFLOAD_COMPRESS : ${BUILD_OFFLOAD_COMPRESS}") - message(STATUS " USE_SYSTEM_LIB : ${USE_SYSTEM_LIB}") - message(STATUS "") - message(STATUS "Detailed:") - message(STATUS " C++ compiler details : \n${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") - if(GIT_FOUND) - message(STATUS " Commit : ${COMMIT_HASH}") - message(STATUS " ${COMMIT_SUBJECT}") - endif() - if(UNAME_EXECUTABLE) - message(STATUS " Unix name : ${LINUX_KERNEL_DETAILS}") - endif() - -endfunction() + string(REPLACE "\n" ";" CMAKE_CXX_COMPILER_VERBOSE_DETAILS "${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") + list(TRANSFORM CMAKE_CXX_COMPILER_VERBOSE_DETAILS PREPEND "-- ") + string(REPLACE ";" "\n" CMAKE_CXX_COMPILER_VERBOSE_DETAILS "${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") + message(STATUS "") + message(STATUS "******** Summary ********") + message(STATUS "General:") + message(STATUS " System : ${CMAKE_SYSTEM_NAME}") + if(ROCPRIM_USE_HIPCXX) + message(STATUS " HIP compiler : ${CMAKE_HIP_COMPILER}") + message(STATUS " HIP compiler version : ${CMAKE_HIP_COMPILER_VERSION}") + string(STRIP "${CMAKE_HIP_FLAGS}" CMAKE_HIP_FLAGS_STRIP) + message(STATUS " HIP flags : ${CMAKE_HIP_FLAGS_STRIP}") + else() + message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") + message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") + string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS_STRIP) + message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS_STRIP}") + endif() + get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) + if(GENERATOR_IS_MULTI_CONFIG) + message(STATUS " Build types : ${CMAKE_CONFIGURATION_TYPES}") + else() + message(STATUS " Build type : ${CMAKE_BUILD_TYPE}") + endif() + message(STATUS " Install prefix : ${CMAKE_INSTALL_PREFIX}") + if(ROCPRIM_USE_HIPCXX) + message(STATUS " Device targets : ${CMAKE_HIP_ARCHITECTURES}") + else() + message(STATUS " Device targets : ${GPU_TARGETS}") + endif() + message(STATUS "") + message(STATUS " ROCPRIM_ENABLE_INSTALL : ${ROCPRIM_ENABLE_INSTALL}") + message(STATUS " ROCPRIM_BUILD_TESTING : ${ROCPRIM_BUILD_TESTING}") + message(STATUS " ROCPRIM_ENABLE_ROCRAND : ${ROCPRIM_ENABLE_ROCRAND}") + message(STATUS " ROCPRIM_ENABLE_BENCHMARK : ${ROCPRIM_ENABLE_BENCHMARK}") + message(STATUS " ROCPRIM_ENABLE_NAIVE_BENCHMARK : ${ROCPRIM_ENABLE_NAIVE_BENCHMARK}") + message(STATUS " ROCPRIM_ENABLE_EXAMPLES : ${ROCPRIM_ENABLE_EXAMPLES}") + message(STATUS " ROCPRIM_ENABLE_DOCS : ${ROCPRIM_ENABLE_DOCS}") + message(STATUS " ROCPRIM_ENABLE_OFFLOAD_COMPRESS : ${ROCPRIM_ENABLE_OFFLOAD_COMPRESS}") + message(STATUS " ROCPRIM_USE_SYSTEM_LIBS : ${ROCPRIM_USE_SYSTEM_LIBS}") + message(STATUS "") + message(STATUS "Detailed:") + message(STATUS " C++ compiler details : \n${CMAKE_CXX_COMPILER_VERBOSE_DETAILS}") + if(GIT_FOUND) + message(STATUS " Commit : ${COMMIT_HASH}") + message(STATUS " ${COMMIT_SUBJECT}") + endif() + if(UNAME_EXECUTABLE) + message(STATUS " Unix name : ${LINUX_KERNEL_DETAILS}") + endif() +endfunction() diff --git a/projects/rocprim/cmake/VerifyCompiler.cmake b/projects/rocprim/cmake/VerifyCompiler.cmake index 98bd368f582..4a69447cb2b 100644 --- a/projects/rocprim/cmake/VerifyCompiler.cmake +++ b/projects/rocprim/cmake/VerifyCompiler.cmake @@ -1,30 +1,11 @@ -# MIT License -# -# Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT - -list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip ${ROCM_PATH}/llvm /opt/rocm/llvm /opt/rocm /opt/rocm/hip) -find_package(hip REQUIRED CONFIG PATHS ${HIP_DIR} ${ROCM_PATH} /opt/rocm) - -if(NOT USE_HIPCXX) +if(NOT ROCPRIM_USE_HIPCXX) + if(NOT DEFINED HIP_COMPILER) + message(FATAL_ERROR "HIP_COMPILER is not defined. Please ensure find_package(hip) has been called.") + endif() + if(HIP_COMPILER STREQUAL "clang") if(NOT (HIP_CXX_COMPILER MATCHES ".*hipcc" OR HIP_CXX_COMPILER MATCHES ".*clang\\+\\+")) message(FATAL_ERROR "On ROCm platform 'hipcc' or HIP-aware Clang must be used as C++ compiler.") diff --git a/projects/rocprim/cmake/rocprim_shim.cmake b/projects/rocprim/cmake/rocprim_shim.cmake new file mode 100644 index 00000000000..3ac7d5938ec --- /dev/null +++ b/projects/rocprim/cmake/rocprim_shim.cmake @@ -0,0 +1,159 @@ +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +# Backward compatibility shim for rocPRIM option names +# This file maps legacy option names to modern equivalents + +macro(_rocprim_deprecation_warning old_var new_var) + message(DEPRECATION "Use '${new_var}' instead of '${old_var}'. The old option will be removed in a future release.") +endmacro() + +macro(_rocprim_check_conflict old_var new_var) + if(DEFINED ${old_var} AND DEFINED ${new_var}) + if(NOT "${${old_var}}" STREQUAL "${${new_var}}") + message(FATAL_ERROR "Conflicting options: ${old_var}=${${old_var}} vs ${new_var}=${${new_var}}. Please use only ${new_var}.") + endif() + endif() +endmacro() + +# Map BUILD_TEST -> ROCPRIM_BUILD_TESTING +if(DEFINED BUILD_TEST) + _rocprim_check_conflict(BUILD_TEST ROCPRIM_BUILD_TESTING) + if(NOT DEFINED ROCPRIM_BUILD_TESTING) + set(ROCPRIM_BUILD_TESTING ${BUILD_TEST} CACHE BOOL "Build tests" FORCE) + _rocprim_deprecation_warning(BUILD_TEST ROCPRIM_BUILD_TESTING) + endif() +endif() + +# Map WITH_ROCRAND -> ROCPRIM_ENABLE_ROCRAND +if(DEFINED WITH_ROCRAND) + _rocprim_check_conflict(WITH_ROCRAND ROCPRIM_ENABLE_ROCRAND) + if(NOT DEFINED ROCPRIM_ENABLE_ROCRAND) + set(ROCPRIM_ENABLE_ROCRAND ${WITH_ROCRAND} CACHE BOOL "Build tests with device-side data generation (requires rocRAND)" FORCE) + _rocprim_deprecation_warning(WITH_ROCRAND ROCPRIM_ENABLE_ROCRAND) + endif() +endif() + +# Map BUILD_BENCHMARK -> ROCPRIM_ENABLE_BENCHMARK +if(DEFINED BUILD_BENCHMARK) + _rocprim_check_conflict(BUILD_BENCHMARK ROCPRIM_ENABLE_BENCHMARK) + if(NOT DEFINED ROCPRIM_ENABLE_BENCHMARK) + set(ROCPRIM_ENABLE_BENCHMARK ${BUILD_BENCHMARK} CACHE BOOL "Build benchmarks" FORCE) + _rocprim_deprecation_warning(BUILD_BENCHMARK ROCPRIM_ENABLE_BENCHMARK) + endif() +endif() + +# Map BUILD_EXAMPLE -> ROCPRIM_ENABLE_EXAMPLES +if(DEFINED BUILD_EXAMPLE) + _rocprim_check_conflict(BUILD_EXAMPLE ROCPRIM_ENABLE_EXAMPLES) + if(NOT DEFINED ROCPRIM_ENABLE_EXAMPLES) + set(ROCPRIM_ENABLE_EXAMPLES ${BUILD_EXAMPLE} CACHE BOOL "Build examples" FORCE) + _rocprim_deprecation_warning(BUILD_EXAMPLE ROCPRIM_ENABLE_EXAMPLES) + endif() +endif() + +# Map BUILD_DOCS -> ROCPRIM_ENABLE_DOCS +if(DEFINED BUILD_DOCS) + _rocprim_check_conflict(BUILD_DOCS ROCPRIM_ENABLE_DOCS) + if(NOT DEFINED ROCPRIM_ENABLE_DOCS) + set(ROCPRIM_ENABLE_DOCS ${BUILD_DOCS} CACHE BOOL "Build documentation" FORCE) + _rocprim_deprecation_warning(BUILD_DOCS ROCPRIM_ENABLE_DOCS) + endif() +endif() + +# Map BUILD_CODE_COVERAGE -> ROCPRIM_ENABLE_COVERAGE +if(DEFINED BUILD_CODE_COVERAGE) + _rocprim_check_conflict(BUILD_CODE_COVERAGE ROCPRIM_ENABLE_COVERAGE) + if(NOT DEFINED ROCPRIM_ENABLE_COVERAGE) + set(ROCPRIM_ENABLE_COVERAGE ${BUILD_CODE_COVERAGE} CACHE BOOL "Build with code coverage enabled" FORCE) + _rocprim_deprecation_warning(BUILD_CODE_COVERAGE ROCPRIM_ENABLE_COVERAGE) + endif() +endif() + +# Map ROCPRIM_INSTALL -> ROCPRIM_ENABLE_INSTALL +if(DEFINED ROCPRIM_INSTALL) + _rocprim_check_conflict(ROCPRIM_INSTALL ROCPRIM_ENABLE_INSTALL) + if(NOT DEFINED ROCPRIM_ENABLE_INSTALL) + set(ROCPRIM_ENABLE_INSTALL ${ROCPRIM_INSTALL} CACHE BOOL "Enable installation of rocPRIM" FORCE) + _rocprim_deprecation_warning(ROCPRIM_INSTALL ROCPRIM_ENABLE_INSTALL) + endif() +endif() + +# Map BUILD_OFFLOAD_COMPRESS -> ROCPRIM_ENABLE_OFFLOAD_COMPRESS +if(DEFINED BUILD_OFFLOAD_COMPRESS) + _rocprim_check_conflict(BUILD_OFFLOAD_COMPRESS ROCPRIM_ENABLE_OFFLOAD_COMPRESS) + if(NOT DEFINED ROCPRIM_ENABLE_OFFLOAD_COMPRESS) + set(ROCPRIM_ENABLE_OFFLOAD_COMPRESS ${BUILD_OFFLOAD_COMPRESS} CACHE BOOL "Build rocPRIM with offload compression" FORCE) + _rocprim_deprecation_warning(BUILD_OFFLOAD_COMPRESS ROCPRIM_ENABLE_OFFLOAD_COMPRESS) + endif() +endif() + +# Map BUILD_NAIVE_BENCHMARK -> ROCPRIM_ENABLE_NAIVE_BENCHMARK +if(DEFINED BUILD_NAIVE_BENCHMARK) + _rocprim_check_conflict(BUILD_NAIVE_BENCHMARK ROCPRIM_ENABLE_NAIVE_BENCHMARK) + if(NOT DEFINED ROCPRIM_ENABLE_NAIVE_BENCHMARK) + set(ROCPRIM_ENABLE_NAIVE_BENCHMARK ${BUILD_NAIVE_BENCHMARK} CACHE BOOL "Build naive benchmarks" FORCE) + _rocprim_deprecation_warning(BUILD_NAIVE_BENCHMARK ROCPRIM_ENABLE_NAIVE_BENCHMARK) + endif() +endif() + +# Map USE_SYSTEM_LIB -> ROCPRIM_USE_SYSTEM_LIBS +if(DEFINED USE_SYSTEM_LIB) + _rocprim_check_conflict(USE_SYSTEM_LIB ROCPRIM_USE_SYSTEM_LIBS) + if(NOT DEFINED ROCPRIM_USE_SYSTEM_LIBS) + set(ROCPRIM_USE_SYSTEM_LIBS ${USE_SYSTEM_LIB} CACHE BOOL "Use installed ROCm libs when building tests" FORCE) + _rocprim_deprecation_warning(USE_SYSTEM_LIB ROCPRIM_USE_SYSTEM_LIBS) + endif() +endif() + +# Map USE_HIPCXX -> ROCPRIM_USE_HIPCXX +if(DEFINED USE_HIPCXX) + _rocprim_check_conflict(USE_HIPCXX ROCPRIM_USE_HIPCXX) + if(NOT DEFINED ROCPRIM_USE_HIPCXX) + set(ROCPRIM_USE_HIPCXX ${USE_HIPCXX} CACHE BOOL "Use CMake HIP language support" FORCE) + _rocprim_deprecation_warning(USE_HIPCXX ROCPRIM_USE_HIPCXX) + endif() +endif() + +# Handle ONLY_INSTALL special case +if(DEFINED ONLY_INSTALL) + if(ONLY_INSTALL) + message(DEPRECATION "ONLY_INSTALL is deprecated. Set ROCPRIM_BUILD_TESTING=OFF, ROCPRIM_ENABLE_BENCHMARK=OFF, and ROCPRIM_ENABLE_EXAMPLES=OFF instead.") + if(NOT DEFINED ROCPRIM_BUILD_TESTING) + set(ROCPRIM_BUILD_TESTING OFF CACHE BOOL "Build tests" FORCE) + endif() + if(NOT DEFINED ROCPRIM_ENABLE_BENCHMARK) + set(ROCPRIM_ENABLE_BENCHMARK OFF CACHE BOOL "Build benchmarks" FORCE) + endif() + if(NOT DEFINED ROCPRIM_ENABLE_EXAMPLES) + set(ROCPRIM_ENABLE_EXAMPLES OFF CACHE BOOL "Build examples" FORCE) + endif() + endif() +endif() + +# Map BENCHMARK_CONFIG_TUNING -> ROCPRIM_ENABLE_CONFIG_TUNING +if(DEFINED BENCHMARK_CONFIG_TUNING) + _rocprim_check_conflict(BENCHMARK_CONFIG_TUNING ROCPRIM_ENABLE_CONFIG_TUNING) + if(NOT DEFINED ROCPRIM_ENABLE_CONFIG_TUNING) + set(ROCPRIM_ENABLE_CONFIG_TUNING ${BENCHMARK_CONFIG_TUNING} CACHE BOOL "Benchmark device-level functions using various configs" FORCE) + _rocprim_deprecation_warning(BENCHMARK_CONFIG_TUNING ROCPRIM_ENABLE_CONFIG_TUNING) + endif() +endif() + +# Map BENCHMARK_AUTOTUNED_TYPES_ONLY -> ROCPRIM_ENABLE_AUTOTUNED_TYPES_ONLY +if(DEFINED BENCHMARK_AUTOTUNED_TYPES_ONLY) + _rocprim_check_conflict(BENCHMARK_AUTOTUNED_TYPES_ONLY ROCPRIM_ENABLE_AUTOTUNED_TYPES_ONLY) + if(NOT DEFINED ROCPRIM_ENABLE_AUTOTUNED_TYPES_ONLY) + set(ROCPRIM_ENABLE_AUTOTUNED_TYPES_ONLY ${BENCHMARK_AUTOTUNED_TYPES_ONLY} CACHE BOOL "Benchmark autotuned types only" FORCE) + _rocprim_deprecation_warning(BENCHMARK_AUTOTUNED_TYPES_ONLY ROCPRIM_ENABLE_AUTOTUNED_TYPES_ONLY) + endif() +endif() + +# Map BENCHMARK_USE_AMDSMI -> ROCPRIM_BENCHMARK_USE_AMDSMI +if(DEFINED BENCHMARK_USE_AMDSMI) + _rocprim_check_conflict(BENCHMARK_USE_AMDSMI ROCPRIM_BENCHMARK_USE_AMDSMI) + if(NOT DEFINED ROCPRIM_BENCHMARK_USE_AMDSMI) + set(ROCPRIM_BENCHMARK_USE_AMDSMI ${BENCHMARK_USE_AMDSMI} CACHE BOOL "Let benchmarks use AMD SMI to output more GPU statistics" FORCE) + _rocprim_deprecation_warning(BENCHMARK_USE_AMDSMI ROCPRIM_BENCHMARK_USE_AMDSMI) + endif() +endif() diff --git a/projects/rocprim/docs/CMakeLists.txt b/projects/rocprim/docs/CMakeLists.txt index b41862c02b8..fd254630d93 100644 --- a/projects/rocprim/docs/CMakeLists.txt +++ b/projects/rocprim/docs/CMakeLists.txt @@ -1,37 +1,16 @@ -# MIT License -# -# Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# rocPRIM documentation +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT include(GNUInstallDirs) rocm_add_sphinx_doc( - "${CMAKE_CURRENT_SOURCE_DIR}" - BUILDER html - OUTPUT_DIR html - USES_DOXYGEN + "${CMAKE_CURRENT_SOURCE_DIR}" + BUILDER html + OUTPUT_DIR html + USES_DOXYGEN ) install( - DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html" - DESTINATION ${CMAKE_INSTALL_DOCDIR} + DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html" + DESTINATION ${CMAKE_INSTALL_DOCDIR} ) diff --git a/projects/rocprim/example/CMakeLists.txt b/projects/rocprim/example/CMakeLists.txt index 7ed0d29fe47..1d253617cde 100644 --- a/projects/rocprim/example/CMakeLists.txt +++ b/projects/rocprim/example/CMakeLists.txt @@ -1,84 +1,62 @@ -# MIT License -# -# Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT function(add_example EXAMPLE_SOURCE) - get_filename_component(EXAMPLE_TARGET ${EXAMPLE_SOURCE} NAME_WE) + get_filename_component(EXAMPLE_TARGET ${EXAMPLE_SOURCE} NAME_WE) - if(USE_HIPCXX) - set_source_files_properties(${EXAMPLE_SOURCE} PROPERTIES LANGUAGES HIP) - endif() + if(ROCPRIM_USE_HIPCXX) + set_source_files_properties(${EXAMPLE_SOURCE} PROPERTIES LANGUAGE HIP) + endif() - add_executable(${EXAMPLE_TARGET} ${EXAMPLE_SOURCE}) + add_executable(${EXAMPLE_TARGET} ${EXAMPLE_SOURCE}) - target_link_libraries(${EXAMPLE_TARGET} - PRIVATE - rocprim - ) - - if(USE_HIPCXX) target_link_libraries(${EXAMPLE_TARGET} - PRIVATE - $,hip::host,hip::device> + PRIVATE + roc::rocprim ) - else() - target_link_libraries(${EXAMPLE_TARGET} - PRIVATE - hip::device) - endif() - - set_target_properties(${EXAMPLE_TARGET} - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example" - ) - # Use an EXAMPLE_SOURCE like "rocprim/device/example_device_search.cpp" - # to create a test named "example.rocprim.device.example_device_search" - get_filename_component(DIR_PATH "${EXAMPLE_SOURCE}" PATH) - string(REPLACE "/" "." DIR_PATH_DOTS "${DIR_PATH}") - set(TEST_NAME "example.${DIR_PATH_DOTS}.${EXAMPLE_TARGET}") - add_test(NAME ${TEST_NAME} COMMAND ${EXAMPLE_TARGET}) - - if (WIN32 AND NOT DEFINED DLLS_COPIED) - set(DLLS_COPIED "YES") - set(DLLS_COPIED ${DLLS_COPIED} PARENT_SCOPE) - - # for now adding in all .dll as dependency chain is not cmake based on win32 - file( GLOB third_party_dlls - LIST_DIRECTORIES ON - CONFIGURE_DEPENDS - ${HIP_DIR}/bin/*.dll - ${CMAKE_SOURCE_DIR}/rtest.* + if(ROCPRIM_USE_HIPCXX) + target_link_libraries(${EXAMPLE_TARGET} + PRIVATE + $,hip::host,hip::device> + ) + else() + target_link_libraries(${EXAMPLE_TARGET} + PRIVATE + hip::device + ) + endif() + + set_target_properties(${EXAMPLE_TARGET} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example" ) - foreach( file_i ${third_party_dlls}) - add_custom_command( TARGET ${EXAMPLE_TARGET} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/example ) - endforeach( file_i ) - endif() + # Use an EXAMPLE_SOURCE like "rocprim/device/example_device_search.cpp" + # to create a test named "example.rocprim.device.example_device_search" + get_filename_component(DIR_PATH "${EXAMPLE_SOURCE}" PATH) + string(REPLACE "/" "." DIR_PATH_DOTS "${DIR_PATH}") + set(TEST_NAME "example.${DIR_PATH_DOTS}.${EXAMPLE_TARGET}") + add_test(NAME ${TEST_NAME} COMMAND ${EXAMPLE_TARGET}) + + if(WIN32 AND NOT DEFINED DLLS_COPIED) + set(DLLS_COPIED "YES" PARENT_SCOPE) + file(GLOB third_party_dlls + LIST_DIRECTORIES ON + CONFIGURE_DEPENDS + ${HIP_DIR}/bin/*.dll + ${CMAKE_SOURCE_DIR}/rtest.* + ) + foreach(file_i ${third_party_dlls}) + add_custom_command( + TARGET ${EXAMPLE_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/example + ) + endforeach() + endif() endfunction() -# **************************************************************************** # rocPRIM examples -# **************************************************************************** - add_example(extra/example_temporary_storage.cpp) add_example(extra/example_type_traits_interface.cpp) add_example(rocprim/device/example_device_search.cpp) diff --git a/projects/rocprim/rocprim/CMakeLists.txt b/projects/rocprim/rocprim/CMakeLists.txt index 0eda121b570..fbf8e05356f 100644 --- a/projects/rocprim/rocprim/CMakeLists.txt +++ b/projects/rocprim/rocprim/CMakeLists.txt @@ -1,72 +1,57 @@ -# MIT License -# -# Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT -# rocPRIM header-only library - -# Configure a header file to pass the rocPRIM version +# Configure version header configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/include/rocprim/rocprim_version.hpp.in" - "${CMAKE_CURRENT_BINARY_DIR}/include/rocprim/rocprim_version.hpp" - @ONLY + "${CMAKE_CURRENT_SOURCE_DIR}/include/rocprim/rocprim_version.hpp.in" + "${CMAKE_CURRENT_BINARY_DIR}/include/rocprim/rocprim_version.hpp" + @ONLY ) -# Only header target, does not include dependencies +# Header-only library target add_library(rocprim INTERFACE) +add_library(roc::rocprim ALIAS rocprim) + target_include_directories(rocprim - INTERFACE - $ - $ - $ - $ + INTERFACE + $ + $ + $ + $ ) -# This target links against HIP library +target_compile_features(rocprim INTERFACE cxx_std_17) + +# Library target that includes HIP dependency add_library(rocprim_hip INTERFACE) -target_link_libraries(rocprim_hip INTERFACE rocprim hip::device) +add_library(roc::rocprim_hip ALIAS rocprim_hip) +target_link_libraries(rocprim_hip + INTERFACE + roc::rocprim + hip::device +) # Installation -if (ROCPRIM_INSTALL) - # We need to install headers manually as rocm_install_targets - # does not support header-only libraries (INTERFACE targets) - rocm_install_targets( - TARGETS rocprim rocprim_hip - ) - - rocm_install( - DIRECTORY - "${CMAKE_CURRENT_SOURCE_DIR}/include/" - "${PROJECT_BINARY_DIR}/rocprim/include/" - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - FILES_MATCHING - PATTERN "*.h" - PATTERN "*.hpp" - PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ - ) - - # Export targets - rocm_export_targets( - TARGETS roc::rocprim roc::rocprim_hip - DEPENDS PACKAGE hip - NAMESPACE roc:: - ) +if(ROCPRIM_ENABLE_INSTALL) + rocm_install_targets( + TARGETS rocprim rocprim_hip + ) + + rocm_install( + DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}/include/" + "${CMAKE_CURRENT_BINARY_DIR}/include/" + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING + PATTERN "*.h" + PATTERN "*.hpp" + PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ + ) + + rocm_export_targets( + TARGETS roc::rocprim roc::rocprim_hip + DEPENDS PACKAGE hip + NAMESPACE roc:: + ) endif() diff --git a/projects/rocprim/test/CMakeLists.txt b/projects/rocprim/test/CMakeLists.txt index 9ca2f908d29..9e1ce59a597 100644 --- a/projects/rocprim/test/CMakeLists.txt +++ b/projects/rocprim/test/CMakeLists.txt @@ -1,33 +1,13 @@ -# MIT License -# -# Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + set(INSTALL_TEST_FILE "${CMAKE_CURRENT_BINARY_DIR}/install_CTestTestfile.cmake") file(WRITE "${INSTALL_TEST_FILE}" -[=[ -# This is a test file generated by rocPRIM for install time. +"# This is a test file generated by rocPRIM for install time. # It differs slightly from the default testfile, and you may encounter issues because of that. -]=] +" ) -# This variable is defined here to be accessible to other test sub-modules set(COMMON_TEST_HEADER_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) function(add_relative_test test_name test_target) @@ -49,100 +29,96 @@ function(add_relative_test test_name test_target) endfunction() function(add_hip_test TEST_NAME TEST_SOURCES) - list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) + list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) + get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) - get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) + if(ROCPRIM_USE_HIPCXX) + set_source_files_properties(${TEST_SOURCES} PROPERTIES LANGUAGE HIP) + endif() - if(USE_HIPCXX) - set_source_files_properties(${TEST_SOURCES} PROPERTIES LANGUAGE HIP) - endif() + add_executable(${TEST_TARGET} ${TEST_SOURCES}) - add_executable(${TEST_TARGET} ${TEST_SOURCES}) + target_compile_definitions(${TEST_TARGET} PRIVATE USE_GTEST=ON) - target_compile_definitions(${TEST_TARGET} PRIVATE USE_GTEST=ON) + if(ROCPRIM_ENABLE_INSTALL) + rocm_install(TARGETS ${TEST_TARGET} COMPONENT tests) + endif() - if (ROCPRIM_INSTALL) - rocm_install(TARGETS ${TEST_TARGET} COMPONENT tests) - endif() + target_include_directories(${TEST_TARGET} + PRIVATE + $ + ) - target_include_directories(${TEST_TARGET} SYSTEM BEFORE - PUBLIC - $ - ) + target_link_libraries(${TEST_TARGET} + PRIVATE + GTest::GTest + GTest::Main + ) - target_link_libraries(${TEST_TARGET} - PRIVATE - GTest::GTest - GTest::Main - ) + if(ROCPRIM_USE_SYSTEM_LIBS) + target_link_libraries(${TEST_TARGET} PRIVATE roc::rocprim) + else() + target_link_libraries(${TEST_TARGET} PRIVATE roc::rocprim) + endif() - if(USE_SYSTEM_LIB) - target_link_libraries(${TEST_TARGET} PRIVATE roc::rocprim) - else() - target_link_libraries(${TEST_TARGET} PRIVATE rocprim) - endif() + if(ROCPRIM_USE_HIPCXX) + target_link_libraries(${TEST_TARGET} + PRIVATE + $,hip::host,hip::device> + ) + else() + target_link_libraries(${TEST_TARGET} + PRIVATE + hip::device + ) + endif() - if(USE_HIPCXX) - target_link_libraries(${TEST_TARGET} - PRIVATE - $,hip::host,hip::device> + target_compile_options(${TEST_TARGET} + PRIVATE + $<$:/bigobj> ) - else() - target_link_libraries(${TEST_TARGET} - PRIVATE - hip::device) - endif() - - target_compile_options(${TEST_TARGET} - PRIVATE - $<$: - /bigobj # number of sections exceeded object file format limit: compile with /bigobj - > - ) - - set_target_properties(${TEST_TARGET} - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test" - ) - - add_relative_test(${TEST_NAME} ${TEST_TARGET}) - if (WIN32 AND NOT DEFINED DLLS_COPIED) - set(DLLS_COPIED "YES") - set(DLLS_COPIED ${DLLS_COPIED} PARENT_SCOPE) - # for now adding in all .dll as dependency chain is not cmake based on win32 - file( GLOB third_party_dlls - LIST_DIRECTORIES ON - CONFIGURE_DEPENDS - ${HIP_DIR}/bin/*.dll - ${CMAKE_SOURCE_DIR}/rtest.* + + set_target_properties(${TEST_TARGET} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test" ) - foreach( file_i ${third_party_dlls}) - add_custom_command( TARGET ${TEST_TARGET} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/test ) - endforeach( file_i ) - endif() -endfunction() -# **************************************************************************** -# Tests -# **************************************************************************** + add_relative_test(${TEST_NAME} ${TEST_TARGET}) + + if(WIN32 AND NOT DEFINED DLLS_COPIED) + set(DLLS_COPIED "YES" PARENT_SCOPE) + file(GLOB third_party_dlls + LIST_DIRECTORIES ON + CONFIGURE_DEPENDS + ${HIP_DIR}/bin/*.dll + ${CMAKE_SOURCE_DIR}/rtest.* + ) + foreach(file_i ${third_party_dlls}) + add_custom_command( + TARGET ${TEST_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/test + ) + endforeach() + endif() +endfunction() # HIP tests without using rocPRIM add_hip_test("hip.device_api" hip/test_hip_api.cpp) add_hip_test("hip.async_copy" hip/test_hip_async_copy.cpp) add_hip_test("hip.ordered_block_id" hip/test_ordered_block_id.cpp) -# rocPRIM test +# rocPRIM tests add_subdirectory(rocprim) # hipGraph tests add_hip_test("hipgraph.basic" hipgraph/test_hipgraph_basic.cpp) add_hip_test("hipgraph.algs" hipgraph/test_hipgraph_algs.cpp) -if (ROCPRIM_INSTALL) - rocm_install( - FILES "${INSTALL_TEST_FILE}" - DESTINATION "${CMAKE_INSTALL_BINDIR}/${PROJECT_NAME}" - COMPONENT tests - RENAME "CTestTestfile.cmake" - ) +if(ROCPRIM_ENABLE_INSTALL) + rocm_install( + FILES "${INSTALL_TEST_FILE}" + DESTINATION "${CMAKE_INSTALL_BINDIR}/${PROJECT_NAME}" + COMPONENT tests + RENAME "CTestTestfile.cmake" + ) endif() diff --git a/projects/rocprim/test/extra/CMakeLists.txt b/projects/rocprim/test/extra/CMakeLists.txt index 9dbbb5b8022..f0b18dde430 100644 --- a/projects/rocprim/test/extra/CMakeLists.txt +++ b/projects/rocprim/test/extra/CMakeLists.txt @@ -1,26 +1,7 @@ -# MIT License -# -# Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT -cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) include(CheckLanguage) include(CMakeDependentOption) @@ -28,60 +9,52 @@ include(CMakeDependentOption) check_language(HIP) cmake_dependent_option(USE_HIPCXX "Use CMake HIP language support" OFF CMAKE_HIP_COMPILER OFF) if(USE_HIPCXX) - set(PROJECT_LANG HIP) + set(PROJECT_LANG HIP) else() - set(PROJECT_LANG CXX) + set(PROJECT_LANG CXX) endif() -# This project includes tests that should be run after -# rocPRIM is installed from package or using `make install` project(rocprim_package_install_test ${PROJECT_LANG}) -# CMake modules list(APPEND CMAKE_MODULE_PATH - ${CMAKE_CURRENT_SOURCE_DIR}/../../cmake - ${HIP_PATH}/lib/cmake/hip ${HIP_PATH}/cmake /opt/rocm/lib/cmake/hip /opt/rocm/hip/cmake # FindHIP.cmake + "${CMAKE_CURRENT_SOURCE_DIR}/../../cmake" ) -# Use target ID syntax if supported for AMDGPU_TARGETS -if(TARGET_ID_SUPPORT) - set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030 CACHE STRING "List of specific machine types for library to target") -else() - set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") +if(USE_HIPCXX) + enable_language(HIP) endif() -# Verify that supported compilers are being used include(VerifyCompiler) -# Find rocPRIM -find_package(rocprim REQUIRED CONFIG PATHS "/opt/rocm/rocprim") +find_package(rocprim REQUIRED CONFIG) -# Build CXX flags -if (NOT DEFINED CMAKE_${PROJECT_LANG}_STANDARD) - set(CMAKE_${PROJECT_LANG}_STANDARD 17) +if(NOT DEFINED CMAKE_${PROJECT_LANG}_STANDARD) + set(CMAKE_${PROJECT_LANG}_STANDARD 17) endif() set(CMAKE_${PROJECT_LANG}_STANDARD_REQUIRED ON) set(CMAKE_${PROJECT_LANG}_EXTENSIONS OFF) -set(CMAKE_${PROJECT_LANG}_FLAGS "${CMAKE_${PROJECT_LANG}_FLAGS} -Wall -Wextra -Werror") if(NOT CMAKE_CXX_STANDARD EQUAL 17) - message(FATAL_ERROR "Only C++17 is supported") + message(FATAL_ERROR "Only C++17 is supported") endif() -# Enable testing (ctest) enable_testing() -# Build function(add_rocprim_test TEST_NAME TEST_SOURCES) - list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) - get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) - add_executable(${TEST_TARGET} ${TEST_SOURCES}) - target_link_libraries(${TEST_TARGET} - PRIVATE - ${rocprim_LIBRARIES} # roc::rocprim_hip - ) - add_test(${TEST_NAME} ${TEST_TARGET}) + list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) + get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) + add_executable(${TEST_TARGET} ${TEST_SOURCES}) + target_link_libraries(${TEST_TARGET} + PRIVATE + roc::rocprim_hip + ) + target_compile_options(${TEST_TARGET} + PRIVATE + -Wall + -Wextra + -Werror + ) + add_test(${TEST_NAME} ${TEST_TARGET}) endfunction() - add_rocprim_test("test_rocprim_package" test_rocprim_package.cpp) diff --git a/projects/rocprim/test/rocprim/CMakeLists.txt b/projects/rocprim/test/rocprim/CMakeLists.txt index fd75fec91db..4fd2c821538 100644 --- a/projects/rocprim/test/rocprim/CMakeLists.txt +++ b/projects/rocprim/test/rocprim/CMakeLists.txt @@ -1,278 +1,213 @@ -# MIT License -# -# Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -set(AMDGPU_TEST_TARGETS "" CACHE STRING "List of specific device types to test for") # Leave empty for default system device - -# Gets a test target name based on the first source file. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT + +set(AMDGPU_TEST_TARGETS "" CACHE STRING "List of specific device types to test for") + function(get_rocprim_test_target TEST_SOURCES TEST_TARGET) - list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) - get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) - set(TEST_TARGET ${TEST_TARGET} PARENT_SCOPE) + list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) + get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) + set(TEST_TARGET ${TEST_TARGET} PARENT_SCOPE) endfunction() function(add_rocprim_test TEST_NAME TEST_SOURCES) - get_rocprim_test_target(${TEST_SOURCES} TEST_TARGET) - add_rocprim_test_internal(${TEST_NAME} "${TEST_SOURCES}" ${TEST_TARGET}) - if(WITH_ROCRAND) - find_package(rocrand REQUIRED) - target_link_libraries(${TEST_TARGET} PRIVATE roc::rocrand) - target_compile_definitions(${TEST_TARGET} PRIVATE WITH_ROCRAND) - endif() + get_rocprim_test_target(${TEST_SOURCES} TEST_TARGET) + add_rocprim_test_internal(${TEST_NAME} "${TEST_SOURCES}" ${TEST_TARGET}) + if(ROCPRIM_ENABLE_ROCRAND) + find_package(rocrand REQUIRED) + target_link_libraries(${TEST_TARGET} PRIVATE roc::rocrand) + target_compile_definitions(${TEST_TARGET} PRIVATE WITH_ROCRAND) + endif() endfunction() function(add_rocprim_test_internal TEST_NAME TEST_SOURCES TEST_TARGET) - if(USE_HIPCXX) - set_source_files_properties(${TEST_SOURCES} PROPERTIES LANGUAGE HIP) - endif() - add_executable(${TEST_TARGET} ${TEST_SOURCES}) - if (ROCPRIM_INSTALL) - rocm_install(TARGETS ${TEST_TARGET} COMPONENT tests) - endif() - - target_include_directories(${TEST_TARGET} SYSTEM BEFORE - PUBLIC - $ - ) - - if(USE_SYSTEM_LIB) - target_link_libraries(${TEST_TARGET} PRIVATE roc::rocprim) - # We need this include to allow tests to find the common utilities folder - target_include_directories(${TEST_TARGET} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) - else() - target_link_libraries(${TEST_TARGET} PRIVATE rocprim) - endif() - target_link_libraries(${TEST_TARGET} - PRIVATE - GTest::GTest - GTest::Main - ) - - if(USE_HIPCXX) - target_link_libraries(${TEST_TARGET} - PRIVATE - $,hip::host,hip::device> + if(ROCPRIM_USE_HIPCXX) + set_source_files_properties(${TEST_SOURCES} PROPERTIES LANGUAGE HIP) + endif() + + add_executable(${TEST_TARGET} ${TEST_SOURCES}) + + if(ROCPRIM_ENABLE_INSTALL) + rocm_install(TARGETS ${TEST_TARGET} COMPONENT tests) + endif() + + target_include_directories(${TEST_TARGET} + PRIVATE + $ ) - else() + + if(ROCPRIM_USE_SYSTEM_LIBS) + target_link_libraries(${TEST_TARGET} PRIVATE roc::rocprim) + target_include_directories(${TEST_TARGET} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) + else() + target_link_libraries(${TEST_TARGET} PRIVATE roc::rocprim) + endif() + target_link_libraries(${TEST_TARGET} - PRIVATE - hip::device) - endif() - - target_compile_options(${TEST_TARGET} - PRIVATE - $<$: - /bigobj # number of sections exceeded object file format limit: compile with /bigobj - > - $<$:$<$:/bigobj>> - ) - - function(add_rocprim_test_disable_dpp TEST_NAME TEST_SOURCES) + PRIVATE + GTest::GTest + GTest::Main + ) + + if(ROCPRIM_USE_HIPCXX) + target_link_libraries(${TEST_TARGET} + PRIVATE + $,hip::host,hip::device> + ) + else() + target_link_libraries(${TEST_TARGET} + PRIVATE + hip::device + ) + endif() + + target_compile_options(${TEST_TARGET} + PRIVATE + $<$:/bigobj> + $<$:$<$:/bigobj>> + ) + + if(WIN32) + target_compile_definitions(${TEST_TARGET} PRIVATE _ENABLE_EXTENDED_ALIGNED_STORAGE) + endif() + + set_target_properties(${TEST_TARGET} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/rocprim" + ) + + if(AMDGPU_TEST_TARGETS) + foreach(AMDGPU_TARGET IN LISTS AMDGPU_TEST_TARGETS) + add_relative_test("${AMDGPU_TARGET}-${TEST_NAME}" ${TEST_TARGET}) + set_tests_properties("${AMDGPU_TARGET}-${TEST_NAME}" + PROPERTIES + RESOURCE_GROUPS "1,${AMDGPU_TARGET}:1" + LABELS "hip;${AMDGPU_TARGET}" + ) + endforeach() + else() + add_relative_test(${TEST_NAME} ${TEST_TARGET}) + set_tests_properties(${TEST_NAME} + PROPERTIES + LABELS "hip" + ) + endif() + + if(WIN32 AND NOT DEFINED DLLS_COPIED_2) + set(DLLS_COPIED_2 "YES" PARENT_SCOPE) + file(GLOB third_party_dlls + LIST_DIRECTORIES ON + CONFIGURE_DEPENDS + ${HIP_DIR}/bin/*.dll + ${CMAKE_SOURCE_DIR}/rtest.* + ) + foreach(file_i ${third_party_dlls}) + add_custom_command( + TARGET ${TEST_TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/test/rocprim + ) + endforeach() + endif() +endfunction() + +function(add_rocprim_test_disable_dpp TEST_NAME TEST_SOURCES) get_rocprim_test_target(${TEST_SOURCES} TEST_TARGET) set(TEST_TARGET "${TEST_TARGET}_disable_dpp") add_rocprim_test_internal("${TEST_NAME}_disable_dpp" "${TEST_SOURCES}" ${TEST_TARGET}) target_compile_definitions(${TEST_TARGET} PRIVATE ROCPRIM_DISABLE_DPP=1) - endfunction() - - if(WIN32) - # Usage of 128-bit integral types (__int128_t and __uint128_t, whose alignment is 16 bytes) - # requires an extended alignment support. Otherwise, a static assert will be triggered in - # functions like std::stable_sort. _ENABLE_EXTENDED_ALIGNED_STORAGE turns it on: - target_compile_definitions(${TEST_TARGET} PRIVATE _ENABLE_EXTENDED_ALIGNED_STORAGE) - endif() - - set_target_properties(${TEST_TARGET} - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/rocprim" - ) - if(AMDGPU_TEST_TARGETS) - foreach(AMDGPU_TARGET IN LISTS AMDGPU_TEST_TARGETS) - add_relative_test("${AMDGPU_TARGET}-${TEST_NAME}" ${TEST_TARGET}) - set_tests_properties("${AMDGPU_TARGET}-${TEST_NAME}" - PROPERTIES - RESOURCE_GROUPS "1,${AMDGPU_TARGET}:1" - LABELS "hip;${AMDGPU_TARGET}" - ) - endforeach() - else() - add_relative_test(${TEST_NAME} ${TEST_TARGET}) - set_tests_properties(${TEST_NAME} - PROPERTIES - LABELS "hip" - ) - endif() - - if (WIN32 AND NOT DEFINED DLLS_COPIED_2) - set(DLLS_COPIED_2 "YES") - set(DLLS_COPIED_2 ${DLLS_COPIED_2} PARENT_SCOPE) - # for now adding in all .dll as dependency chain is not cmake based on win32 - file( GLOB third_party_dlls - LIST_DIRECTORIES ON - CONFIGURE_DEPENDS - ${HIP_DIR}/bin/*.dll - ${CMAKE_SOURCE_DIR}/rtest.* - ) - foreach( file_i ${third_party_dlls}) - add_custom_command( TARGET ${TEST_TARGET} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/test/rocprim ) - endforeach( file_i ) - endif() endfunction() -# Returns a list of values that match the pattern -# "if ${WORD} == " with optional whitespace between the tokens. function(get_match_list FILE_STRING WORD MATCH_LIST) - # collect all substrings that match the pattern - string(REGEX MATCHALL "${WORD}[ ]*==[ ]*[0-9]*" LINE_MATCHES "${${FILE_STRING}}") - set(${MATCH_LIST} "") - # iterate over the substrings, record the values using the same regex - foreach(LINE IN LISTS LINE_MATCHES) - string(REGEX MATCH "${WORD}[ ]*==[ ]*([0-9]*)" TMP "${LINE}") - list(APPEND ${MATCH_LIST} "${CMAKE_MATCH_1}") - endforeach() - set(${MATCH_LIST} ${${MATCH_LIST}} PARENT_SCOPE) + string(REGEX MATCHALL "${WORD}[ ]*==[ ]*[0-9]*" LINE_MATCHES "${${FILE_STRING}}") + set(${MATCH_LIST} "") + foreach(LINE IN LISTS LINE_MATCHES) + string(REGEX MATCH "${WORD}[ ]*==[ ]*([0-9]*)" TMP "${LINE}") + list(APPEND ${MATCH_LIST} "${CMAKE_MATCH_1}") + endforeach() + set(${MATCH_LIST} ${${MATCH_LIST}} PARENT_SCOPE) endfunction() -# Replacement for add_rocprim_test that splits up test cases allowing them to be compiled in parallel. -# A single .cpp.in file is provided containing valid C++ code with the addition of slice definitions. -# The slice definitions ROCPRIM_TEST_SLICE, ROCPRIM_TEST_SUITE_SLICE, and ROCPRIM_TEST_TYPE_SLICE demarkate -# slices of non-typed tests, typed test suites, and test types respectively. The slice cases must be -# marked with an "(el)if" statement (no "else") that has a unique value to ensure that the correct slice -# gets enabled. This function will generate a separate .cpp file for all non-typed test slices and the -# product of the typed test suites and test types. -# -# This example will generate five files: -# -# #cmakedefine ROCPRIM_TEST_SUITE_SLICE @ROCPRIM_TEST_SUITE_SLICE@ -# #cmakedefine ROCPRIM_TEST_TYPE_SLICE @ROCPRIM_TEST_TYPE_SLICE@ -# #cmakedefine ROCPRIM_TEST_SLICE @ROCPRIM_TEST_SLICE@ -# -# #if ROCPRIM_TEST_SLICE == 0 -# DEFINE_NAMED_TEST(TestSuiteName, NonTypedTest) -# #endif -# #if ROCPRIM_TEST_SUITE_SLICE == 0 -# REGISTER_TYPED_TEST(TestSuiteName, TestZero, test_zero) -# REGISTER_TYPED_TEST_SUITE(TestSuiteName, TestZero) -# #elif ROCPRIM_TEST_SUITE_SLICE == 1 -# REGISTER_TYPED_TEST(TestSuiteName, TestOne, test_one) -# REGISTER_TYPED_TEST(TestSuiteName, TestTwo, test_two) -# REGISTER_TYPED_TEST_SUITE(TestSuiteName, TestOne, TestTwo) -# #endif -# #if ROCPRIM_TEST_TYPE_SLICE == 0 -# INSTANTIATE_TYPED_TEST(TestSuiteName, double) -# #elif ROCPRIM_TEST_TYPE_SLICE == 1 -# INSTANTIATE_TYPED_TEST(TestSuiteName, float) -# INSTANTIATE_TYPED_TEST(TestSuiteName, int) -# #endif function(add_rocprim_test_parallel TEST_NAME TEST_SOURCE) - get_rocprim_test_target(${TEST_SOURCE} TEST_TARGET) - - file(READ ${TEST_SOURCE} FILE_CONTENTS) - - set(SOURCES "") - - # first, handle all non-typed tests - # disable typed test, generate one file for each non-typed test - set(ROCPRIM_TEST_SUITE_SLICE -1) - set(ROCPRIM_TEST_TYPE_SLICE -1) - get_match_list(FILE_CONTENTS "ROCPRIM_TEST_SLICE" TEST_SLICE_LIST) - list(LENGTH TEST_SLICE_LIST TEST_SLICE_COUNT) - if(TEST_SLICE_COUNT EQUAL 0) - message(VERBOSE "found no non-typed tests for test target ${TEST_TARGET}") - else() - message(VERBOSE "found ${TEST_SLICE_COUNT} non-typed test slice(s) for test target ${TEST_TARGET}") - foreach(ROCPRIM_TEST_SLICE IN LISTS TEST_SLICE_LIST) - set(FILENAME "${TEST_TARGET}.parallel/${TEST_TARGET}_${ROCPRIM_TEST_SLICE}.cpp") - configure_file(${TEST_SOURCE} ${FILENAME} @ONLY) - list(APPEND SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}") - endforeach() - endif() - - # second, handle all typed tests - # disable non-typed test, generate one file for each test suite and test type pair - set(ROCPRIM_TEST_SLICE -1) - get_match_list(FILE_CONTENTS "ROCPRIM_TEST_SUITE_SLICE" TEST_SUITE_SLICE_LIST) - list(LENGTH TEST_SUITE_SLICE_LIST TEST_SUITE_SLICE_COUNT) - get_match_list(FILE_CONTENTS "ROCPRIM_TEST_TYPE_SLICE" TEST_TYPE_SLICE_LIST) - list(LENGTH TEST_TYPE_SLICE_LIST TEST_TYPE_SLICE_COUNT) - if(TEST_SUITE_SLICE_COUNT EQUAL 0 OR TEST_TYPE_SLICE_COUNT EQUAL 0) - message(VERBOSE "found no typed tests for test target ${TEST_TARGET}") - else() - message(VERBOSE "found ${TEST_SUITE_SLICE_COUNT} test suite slice(s) and \ -${TEST_TYPE_SLICE_COUNT} test type slice(s) for test target ${TEST_TARGET}") - foreach(ROCPRIM_TEST_SUITE_SLICE IN LISTS TEST_SUITE_SLICE_LIST) - foreach(ROCPRIM_TEST_TYPE_SLICE IN LISTS TEST_TYPE_SLICE_LIST) - set(FILENAME "${TEST_TARGET}.parallel/${TEST_TARGET}_typed_${ROCPRIM_TEST_SUITE_SLICE}_${ROCPRIM_TEST_TYPE_SLICE}.cpp") - configure_file(${TEST_SOURCE} ${FILENAME} @ONLY) - list(APPEND SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}") - endforeach() - endforeach() - endif() - - # if no files are generated, nothing is built for the target - list(LENGTH SOURCES SOURCES_COUNT) - if(${SOURCES_COUNT} EQUAL 0) - message(FATAL_ERROR "no .cpp files generated for target ${TEST_TARGET}") - endif() - - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_CLEAN_FILES "${TEST_TARGET}.parallel") - add_rocprim_test_internal(${TEST_NAME} "${SOURCES}" ${TEST_TARGET}) - target_include_directories("${TEST_TARGET}" PRIVATE "../../test/rocprim") - # Make sure the configured files are re-configured if they are removed (via clean). - set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${SOURCES}) + get_rocprim_test_target(${TEST_SOURCE} TEST_TARGET) + + file(READ ${TEST_SOURCE} FILE_CONTENTS) + + set(SOURCES "") + + # Handle all non-typed tests + set(ROCPRIM_TEST_SUITE_SLICE -1) + set(ROCPRIM_TEST_TYPE_SLICE -1) + get_match_list(FILE_CONTENTS "ROCPRIM_TEST_SLICE" TEST_SLICE_LIST) + list(LENGTH TEST_SLICE_LIST TEST_SLICE_COUNT) + if(TEST_SLICE_COUNT EQUAL 0) + message(VERBOSE "Found no non-typed tests for test target ${TEST_TARGET}") + else() + message(VERBOSE "Found ${TEST_SLICE_COUNT} non-typed test slice(s) for test target ${TEST_TARGET}") + foreach(ROCPRIM_TEST_SLICE IN LISTS TEST_SLICE_LIST) + set(FILENAME "${TEST_TARGET}.parallel/${TEST_TARGET}_${ROCPRIM_TEST_SLICE}.cpp") + configure_file(${TEST_SOURCE} ${FILENAME} @ONLY) + list(APPEND SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}") + endforeach() + endif() + + # Handle all typed tests + set(ROCPRIM_TEST_SLICE -1) + get_match_list(FILE_CONTENTS "ROCPRIM_TEST_SUITE_SLICE" TEST_SUITE_SLICE_LIST) + list(LENGTH TEST_SUITE_SLICE_LIST TEST_SUITE_SLICE_COUNT) + get_match_list(FILE_CONTENTS "ROCPRIM_TEST_TYPE_SLICE" TEST_TYPE_SLICE_LIST) + list(LENGTH TEST_TYPE_SLICE_LIST TEST_TYPE_SLICE_COUNT) + if(TEST_SUITE_SLICE_COUNT EQUAL 0 OR TEST_TYPE_SLICE_COUNT EQUAL 0) + message(VERBOSE "Found no typed tests for test target ${TEST_TARGET}") + else() + message(VERBOSE "Found ${TEST_SUITE_SLICE_COUNT} test suite slice(s) and ${TEST_TYPE_SLICE_COUNT} test type slice(s) for test target ${TEST_TARGET}") + foreach(ROCPRIM_TEST_SUITE_SLICE IN LISTS TEST_SUITE_SLICE_LIST) + foreach(ROCPRIM_TEST_TYPE_SLICE IN LISTS TEST_TYPE_SLICE_LIST) + set(FILENAME "${TEST_TARGET}.parallel/${TEST_TARGET}_typed_${ROCPRIM_TEST_SUITE_SLICE}_${ROCPRIM_TEST_TYPE_SLICE}.cpp") + configure_file(${TEST_SOURCE} ${FILENAME} @ONLY) + list(APPEND SOURCES "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}") + endforeach() + endforeach() + endif() + + list(LENGTH SOURCES SOURCES_COUNT) + if(${SOURCES_COUNT} EQUAL 0) + message(FATAL_ERROR "No .cpp files generated for target ${TEST_TARGET}") + endif() + + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_CLEAN_FILES "${TEST_TARGET}.parallel") + add_rocprim_test_internal(${TEST_NAME} "${SOURCES}" ${TEST_TARGET}) + target_include_directories("${TEST_TARGET}" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${SOURCES}) endfunction() function(add_rocprim_cpp_standard_test STANDARD EXTENSIONS TARGET_SUFFIX TEST_NAME TEST_SOURCES) - get_rocprim_test_target(${TEST_SOURCES} TEST_TARGET) - set(TEST_TARGET ${TEST_TARGET}${TARGET_SUFFIX}) - add_rocprim_test_internal(${TEST_NAME}${TARGET_SUFFIX} "${TEST_SOURCES}" ${TEST_TARGET}) - # Request C++ standard 17, but decay to a previous version if not available: - set_target_properties(${TEST_TARGET} - PROPERTIES - CXX_STANDARD ${STANDARD} - CXX_EXTENSIONS ${EXTENSIONS}) - if(USE_HIPCXX) + get_rocprim_test_target(${TEST_SOURCES} TEST_TARGET) + set(TEST_TARGET ${TEST_TARGET}${TARGET_SUFFIX}) + add_rocprim_test_internal(${TEST_NAME}${TARGET_SUFFIX} "${TEST_SOURCES}" ${TEST_TARGET}) set_target_properties(${TEST_TARGET} - PROPERTIES - HIP_STANDARD ${STANDARD} - HIP_EXTENSIONS ${EXTENSIONS}) - endif() + PROPERTIES + CXX_STANDARD ${STANDARD} + CXX_EXTENSIONS ${EXTENSIONS} + ) + if(ROCPRIM_USE_HIPCXX) + set_target_properties(${TEST_TARGET} + PROPERTIES + HIP_STANDARD ${STANDARD} + HIP_EXTENSIONS ${EXTENSIONS} + ) + endif() endfunction() -# **************************************************************************** # Tests -# **************************************************************************** - -# Internal test to check internal behaviour add_rocprim_test("rocprim.internal_merge_path" "internal/test_internal_merge_path.cpp") - -# HIP basic test, which also checks if there are no linkage problems when there are multiple sources add_rocprim_test("rocprim.basic_test" "test_basic.cpp;detail/get_rocprim_version.cpp") - add_rocprim_test("rocprim.arg_index_iterator" test_arg_index_iterator.cpp) add_rocprim_test("rocprim.temporary_storage_partitioning" test_temporary_storage_partitioning.cpp) + if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") - # clang++ from ROCm 6.1+ takes too long to build these tests in Debug mode (which passes -O0) - add_rocprim_test_parallel("rocprim.block_adjacent_difference" test_block_adjacent_difference.cpp.in) - add_rocprim_test_parallel("rocprim.block_discontinuity" test_block_discontinuity.cpp.in) + add_rocprim_test_parallel("rocprim.block_adjacent_difference" test_block_adjacent_difference.cpp.in) + add_rocprim_test_parallel("rocprim.block_discontinuity" test_block_discontinuity.cpp.in) endif() + add_rocprim_test("rocprim.bit_cast" test_bit_cast.cpp) add_rocprim_test("rocprim.block_exchange" test_block_exchange.cpp) add_rocprim_test("rocprim.block_histogram" test_block_histogram.cpp) @@ -347,95 +282,74 @@ add_rocprim_test("rocprim.zip_iterator" test_zip_iterator.cpp) add_rocprim_test("rocprim.accumulator_t" test_accumulator_t.cpp) if(NOT WIN32) - # Linking tests check if all external rocPRIM symbols are in the inline namespace, kernel are not - # visible and if it is possible to link two libraries that use different rocPRIM versions. - # See comments in test_linking_new_scan.hpp for more details. - - # For testing how the old behavior (without inline namespace) works set it to ON, the tests will - # fail - set(WITHOUT_INLINE_NAMESPACE OFF) - - if(WITHOUT_INLINE_NAMESPACE) - set(COMMON_DEFINITIONS "ROCPRIM_DISABLE_INLINE_NAMESPACE") - endif() - - add_library(test_linking_lib1 SHARED test_linking_lib.cpp) - if(ROCPRIM_INSTALL) - # Use a dummy export file to avoid adding the library to rocprim-targets.cmake - rocm_install(TARGETS test_linking_lib1 COMPONENT tests EXPORT rocprim-tests) - endif() - target_include_directories(test_linking_lib1 SYSTEM BEFORE - PRIVATE - $ - $ - ) - target_link_libraries(test_linking_lib1 PRIVATE rocprim_hip) - set_target_properties(test_linking_lib1 - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/rocprim" - COMPILE_DEFINITIONS "TEST_FUNC=test1;${COMMON_DEFINITIONS};" - ) - - add_library(test_linking_lib2 SHARED test_linking_lib.cpp) - if(ROCPRIM_INSTALL) - # Use a dummy export file to avoid adding the library to rocprim-targets.cmake - rocm_install(TARGETS test_linking_lib2 COMPONENT tests EXPORT rocprim-tests) - endif() - target_include_directories(test_linking_lib2 SYSTEM BEFORE - PRIVATE - $ - $ - ) - target_link_libraries(test_linking_lib2 PRIVATE rocprim_hip) - # Pretend that we build this library with another version of rocPRIM which uses another - # implementation of scan - if(NOT WITHOUT_INLINE_NAMESPACE) - set(LIB2_NAMESPACE_DEFINITION "ROCPRIM_INLINE_NAMESPACE=ROCPRIM_300201_NS") - endif() - set_target_properties(test_linking_lib2 - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/rocprim" - COMPILE_DEFINITIONS "TEST_FUNC=test2;TEST_LINKING_EMULATE_ANOTHER_VERSION;${COMMON_DEFINITIONS};${LIB2_NAMESPACE_DEFINITION};" - ) - - add_rocprim_test_internal("rocprim.linking" "test_linking.cpp;test_linking_lib.cpp" test_linking) - target_link_libraries(test_linking PUBLIC test_linking_lib1 test_linking_lib2) - set_target_properties(test_linking - PROPERTIES - COMPILE_DEFINITIONS "TEST_FUNC=test0;${COMMON_DEFINITIONS};" - ) - - # Check what symbols are exported by the libraries and what their names are - # nm --demangle --defined-only --extern-only is used to print - # the symbols that are exported from a library. - - set(NM_ARGS --demangle --defined-only --extern-only) - - # The tests fail if - # * there are symbols in the rocprim namespace but not in the inline namespace with the - # corresponding rocPRIM version; - # * any kernels are visible (usually they have "kernel" in their names). - - # The current compiler ignores `__attribute__((__visibility__("hidden")))` for executables, - # but not for libraries, so we remove the `|kernel` from the regex in rocprim.linking_symbols. - string(REPLACE ";" " " NM_ARGS_STR "${NM_ARGS}") - add_test( - NAME "rocprim.linking_lib1_symbols" - COMMAND sh -c "${CMAKE_NM} ${NM_ARGS_STR} $ | grep -c -P \"(rocprim::(?!ROCPRIM_${rocprim_VERSION_NUMBER}_NS))|kernel\"" - ) - add_test( - NAME "rocprim.linking_lib2_symbols" - COMMAND sh -c "${CMAKE_NM} ${NM_ARGS_STR} $ | grep -c -P \"(rocprim::(?!ROCPRIM_300201_NS))|kernel\"" - ) - add_test( - NAME "rocprim.linking_symbols" - COMMAND sh -c "${CMAKE_NM} ${NM_ARGS_STR} $ | grep -c -P \"(rocprim::(?!ROCPRIM_${rocprim_VERSION_NUMBER}_NS))\"" - ) - set_tests_properties( - "rocprim.linking_lib1_symbols" - "rocprim.linking_lib2_symbols" - "rocprim.linking_symbols" - PROPERTIES - PASS_REGULAR_EXPRESSION "^0" # no such symbols must be found (grep -c returns 0) - ) + set(WITHOUT_INLINE_NAMESPACE OFF) + + if(WITHOUT_INLINE_NAMESPACE) + set(COMMON_DEFINITIONS "ROCPRIM_DISABLE_INLINE_NAMESPACE") + endif() + + add_library(test_linking_lib1 SHARED test_linking_lib.cpp) + if(ROCPRIM_ENABLE_INSTALL) + rocm_install(TARGETS test_linking_lib1 COMPONENT tests EXPORT rocprim-tests) + endif() + target_include_directories(test_linking_lib1 + PRIVATE + $ + $ + ) + target_link_libraries(test_linking_lib1 PRIVATE roc::rocprim_hip) + set_target_properties(test_linking_lib1 + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/rocprim" + COMPILE_DEFINITIONS "TEST_FUNC=test1;${COMMON_DEFINITIONS};" + ) + + add_library(test_linking_lib2 SHARED test_linking_lib.cpp) + if(ROCPRIM_ENABLE_INSTALL) + rocm_install(TARGETS test_linking_lib2 COMPONENT tests EXPORT rocprim-tests) + endif() + target_include_directories(test_linking_lib2 + PRIVATE + $ + $ + ) + target_link_libraries(test_linking_lib2 PRIVATE roc::rocprim_hip) + if(NOT WITHOUT_INLINE_NAMESPACE) + set(LIB2_NAMESPACE_DEFINITION "ROCPRIM_INLINE_NAMESPACE=ROCPRIM_300201_NS") + endif() + set_target_properties(test_linking_lib2 + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/rocprim" + COMPILE_DEFINITIONS "TEST_FUNC=test2;TEST_LINKING_EMULATE_ANOTHER_VERSION;${COMMON_DEFINITIONS};${LIB2_NAMESPACE_DEFINITION};" + ) + + add_rocprim_test_internal("rocprim.linking" "test_linking.cpp;test_linking_lib.cpp" test_linking) + target_link_libraries(test_linking PUBLIC test_linking_lib1 test_linking_lib2) + set_target_properties(test_linking + PROPERTIES + COMPILE_DEFINITIONS "TEST_FUNC=test0;${COMMON_DEFINITIONS};" + ) + + set(NM_ARGS --demangle --defined-only --extern-only) + string(REPLACE ";" " " NM_ARGS_STR "${NM_ARGS}") + + add_test( + NAME "rocprim.linking_lib1_symbols" + COMMAND sh -c "${CMAKE_NM} ${NM_ARGS_STR} $ | grep -c -P \"(rocprim::(?!ROCPRIM_${rocprim_VERSION_NUMBER}_NS))|kernel\"" + ) + add_test( + NAME "rocprim.linking_lib2_symbols" + COMMAND sh -c "${CMAKE_NM} ${NM_ARGS_STR} $ | grep -c -P \"(rocprim::(?!ROCPRIM_300201_NS))|kernel\"" + ) + add_test( + NAME "rocprim.linking_symbols" + COMMAND sh -c "${CMAKE_NM} ${NM_ARGS_STR} $ | grep -c -P \"(rocprim::(?!ROCPRIM_${rocprim_VERSION_NUMBER}_NS))\"" + ) + set_tests_properties( + "rocprim.linking_lib1_symbols" + "rocprim.linking_lib2_symbols" + "rocprim.linking_symbols" + PROPERTIES + PASS_REGULAR_EXPRESSION "^0" + ) endif() diff --git a/projects/rocprim/test/rocprim/internal/CMakeLists.txt b/projects/rocprim/test/rocprim/internal/CMakeLists.txt index fa360ab3f67..66e78f7ae2c 100644 --- a/projects/rocprim/test/rocprim/internal/CMakeLists.txt +++ b/projects/rocprim/test/rocprim/internal/CMakeLists.txt @@ -1,23 +1,4 @@ -# MIT License -# -# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT add_rocprim_test("rocprim.internal.merge_path" test_internal_merge_path.hpp) diff --git a/projects/rocprim/toolchain-linux.cmake b/projects/rocprim/toolchain-linux.cmake index 6a71c2aac39..2c2cf7d2f31 100644 --- a/projects/rocprim/toolchain-linux.cmake +++ b/projects/rocprim/toolchain-linux.cmake @@ -1,17 +1,19 @@ +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT -if (DEFINED ENV{ROCM_PATH}) - set(rocm_bin "$ENV{ROCM_PATH}/bin") +if(DEFINED ENV{ROCM_PATH}) + set(rocm_bin "$ENV{ROCM_PATH}/bin") else() - set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to the ROCm installation.") - set(rocm_bin "/opt/rocm/bin") + set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to the ROCm installation.") + set(rocm_bin "/opt/rocm/bin") endif() -if (NOT DEFINED ENV{CXX}) - set(CMAKE_CXX_COMPILER "${rocm_bin}/amdclang++" CACHE PATH "Path to the C++ compiler") - set(CMAKE_CXX_FLAGS_INIT "-mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false") - if (DEFINED ENV{HIPCC_COMPILE_FLAGS_APPEND}) - set(CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT} $ENV{HIPCC_COMPILE_FLAGS_APPEND}") - endif() +if(NOT DEFINED ENV{CXX}) + set(CMAKE_CXX_COMPILER "${rocm_bin}/amdclang++" CACHE PATH "Path to the C++ compiler") + set(CMAKE_CXX_FLAGS_INIT "-mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false") + if(DEFINED ENV{HIPCC_COMPILE_FLAGS_APPEND}) + set(CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT} $ENV{HIPCC_COMPILE_FLAGS_APPEND}") + endif() else() - set(CMAKE_CXX_COMPILER "$ENV{CXX}" CACHE PATH "Path to the C++ compiler") + set(CMAKE_CXX_COMPILER "$ENV{CXX}" CACHE PATH "Path to the C++ compiler") endif() diff --git a/projects/rocprim/toolchain-windows.cmake b/projects/rocprim/toolchain-windows.cmake index 974fb7b3374..4c6ec44d86d 100644 --- a/projects/rocprim/toolchain-windows.cmake +++ b/projects/rocprim/toolchain-windows.cmake @@ -1,37 +1,35 @@ -#set(CMAKE_MAKE_PROGRAM "nmake.exe") -#set(CMAKE_GENERATOR "Ninja") -# Ninja doesn't support platform -#set(CMAKE_GENERATOR_PLATFORM x64) +# Copyright Advanced Micro Devices, Inc., or its affiliates. +# SPDX-License-Identifier: MIT -if (DEFINED ENV{HIP_PATH}) - file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_DIR) - set(rocm_bin "${HIP_DIR}/bin") -elseif (DEFINED ENV{HIP_DIR}) - file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR) - set(rocm_bin "${HIP_DIR}/bin") +if(DEFINED ENV{HIP_PATH}) + file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_DIR) + set(rocm_bin "${HIP_DIR}/bin") +elseif(DEFINED ENV{HIP_DIR}) + file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR) + set(rocm_bin "${HIP_DIR}/bin") else() - set(HIP_DIR "C:/hip") - set(rocm_bin "C:/hip/bin") + set(HIP_DIR "C:/hip") + set(rocm_bin "C:/hip/bin") endif() set(CMAKE_CXX_COMPILER "${rocm_bin}/clang++.exe") -if (NOT python) - set(python "python3") # take default for windows +if(NOT python) + set(python "python3") endif() -# our usage flags +# Our usage flags set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWIN32 -D_CRT_SECURE_NO_WARNINGS") -# flags for clang direct use -# -Wno-ignored-attributes to avoid warning: __declspec attribute 'dllexport' is not supported [-Wignored-attributes] which is used by msvc compiler +# Flags for clang direct use +# -Wno-ignored-attributes to avoid warning: __declspec attribute 'dllexport' is not supported [-Wignored-attributes] set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fms-extensions -fms-compatibility -Wno-ignored-attributes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__") -if (DEFINED ENV{VCPKG_PATH}) - file(TO_CMAKE_PATH "$ENV{VCPKG_PATH}" VCPKG_PATH) +if(DEFINED ENV{VCPKG_PATH}) + file(TO_CMAKE_PATH "$ENV{VCPKG_PATH}" VCPKG_PATH) else() - set(VCPKG_PATH "C:/github/vcpkg") + set(VCPKG_PATH "C:/github/vcpkg") endif() include("${VCPKG_PATH}/scripts/buildsystems/vcpkg.cmake")