From 311fe6817df5a69f026f355f48236ad583a7be78 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Wed, 27 Apr 2022 08:58:59 -0700 Subject: [PATCH 1/3] Working benchmark build for windows --- benchmark/benchmark_device_segmented_sort.cpp | 1 + benchmark/benchmark_utils.hpp | 65 ++++++++++++++++- benchmark/cmdparser.hpp | 7 ++ benchmark/common_benchmark_header.hpp | 1 + cmake/Dependencies.cmake | 70 ++++++++++--------- rmake.py | 2 +- 6 files changed, 110 insertions(+), 36 deletions(-) diff --git a/benchmark/benchmark_device_segmented_sort.cpp b/benchmark/benchmark_device_segmented_sort.cpp index cc86950a..ba78fa6d 100644 --- a/benchmark/benchmark_device_segmented_sort.cpp +++ b/benchmark/benchmark_device_segmented_sort.cpp @@ -25,6 +25,7 @@ // HIP API #include "hipcub/hipcub.hpp" + #ifndef DEFAULT_N const size_t DEFAULT_N = 1024 * 1024 * 32; #endif diff --git a/benchmark/benchmark_utils.hpp b/benchmark/benchmark_utils.hpp index 461ad743..f55c68ef 100644 --- a/benchmark/benchmark_utils.hpp +++ b/benchmark/benchmark_utils.hpp @@ -43,11 +43,12 @@ namespace benchmark_utils { +const size_t default_max_random_size = 1024 * 1024; // get_random_data() generates only part of sequence and replicates it, // because benchmarks usually do not need "true" random sequence. template -inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = 1024 * 1024) +inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = default_max_random_size) -> typename std::enable_if::value, std::vector>::type { std::random_device rd; @@ -66,7 +67,7 @@ inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = } template -inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = 1024 * 1024) +inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = default_max_random_size) -> typename std::enable_if::value, std::vector>::type { std::random_device rd; @@ -84,8 +85,66 @@ inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = return data; } +#if defined(_WIN32) && defined(__clang__) +// get_random_data() generates only part of sequence and replicates it, +// because benchmarks usually do not need "true" random sequence. +template<> +inline std::vector get_random_data(size_t size, unsigned char min, unsigned char max, size_t max_random_size) +{ + std::random_device rd; + std::default_random_engine gen(rd()); + std::uniform_int_distribution distribution(min, max); + std::vector data(size); + std::generate( + data.begin(), data.begin() + std::min(size, max_random_size), + [&]() { return distribution(gen); } + ); + for(size_t i = max_random_size; i < size; i += max_random_size) + { + std::copy_n(data.begin(), std::min(size - i, max_random_size), data.begin() + i); + } + return data; +} + +template<> +inline std::vector get_random_data(size_t size, signed char min, signed char max, size_t max_random_size) +{ + std::random_device rd; + std::default_random_engine gen(rd()); + std::uniform_int_distribution distribution(min, max); + std::vector data(size); + std::generate( + data.begin(), data.begin() + std::min(size, max_random_size), + [&]() { return distribution(gen); } + ); + for(size_t i = max_random_size; i < size; i += max_random_size) + { + std::copy_n(data.begin(), std::min(size - i, max_random_size), data.begin() + i); + } + return data; +} + +template<> +inline std::vector get_random_data(size_t size, char min, char max, size_t max_random_size) +{ + std::random_device rd; + std::default_random_engine gen(rd()); + std::uniform_int_distribution distribution(min, max); + std::vector data(size); + std::generate( + data.begin(), data.begin() + std::min(size, max_random_size), + [&]() { return distribution(gen); } + ); + for(size_t i = max_random_size; i < size; i += max_random_size) + { + std::copy_n(data.begin(), std::min(size - i, max_random_size), data.begin() + i); + } + return data; +} +#endif + template -inline std::vector get_random_data01(size_t size, float p, size_t max_random_size = 1024 * 1024) +inline std::vector get_random_data01(size_t size, float p, size_t max_random_size = default_max_random_size) { std::random_device rd; std::default_random_engine gen(rd()); diff --git a/benchmark/cmdparser.hpp b/benchmark/cmdparser.hpp index 5ffc24f6..50a6cb7e 100644 --- a/benchmark/cmdparser.hpp +++ b/benchmark/cmdparser.hpp @@ -185,6 +185,13 @@ namespace cli { return std::stoul(elements[0]); } + static unsigned long long parse(const std::vector& elements, const unsigned long long&) { + if (elements.size() != 1) + throw std::bad_cast(); + + return std::stoull(elements[0]); + } + static long parse(const std::vector& elements, const long&) { if (elements.size() != 1) throw std::bad_cast(); diff --git a/benchmark/common_benchmark_header.hpp b/benchmark/common_benchmark_header.hpp index 08780184..457e0fe0 100644 --- a/benchmark/common_benchmark_header.hpp +++ b/benchmark/common_benchmark_header.hpp @@ -31,6 +31,7 @@ #include #include #include +#include // Google Benchmark #include "benchmark/benchmark.h" diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 3090a777..8c69683f 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -46,54 +46,54 @@ if(HIP_COMPILER STREQUAL "nvcc") if(NOT DEFINED CUB_INCLUDE_DIR) file( - DOWNLOAD https://github.com/NVIDIA/cub/archive/1.16.0.zip - ${CMAKE_CURRENT_BINARY_DIR}/cub-1.16.0.zip + DOWNLOAD https://github.com/NVIDIA/cub/archive/1.15.0.zip + ${CMAKE_CURRENT_BINARY_DIR}/cub-1.15.0.zip STATUS cub_download_status LOG cub_download_log ) list(GET cub_download_status 0 cub_download_error_code) if(cub_download_error_code) message(FATAL_ERROR "Error: downloading " - "https://github.com/NVIDIA/cub/archive/1.16.0.zip failed " + "https://github.com/NVIDIA/cub/archive/1.15.0.zip failed " "error_code: ${cub_download_error_code} " "log: ${cub_download_log} " ) endif() execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/cub-1.16.0.zip + COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/cub-1.15.0.zip WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} RESULT_VARIABLE cub_unpack_error_code ) if(cub_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/cub-1.16.0.zip failed") + message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/cub-1.15.0.zip failed") endif() - set(CUB_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub-1.16.0/ CACHE PATH "") + set(CUB_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub-1.15.0/ CACHE PATH "") endif() if(NOT DEFINED THRUST_INCLUDE_DIR) file( - DOWNLOAD https://github.com/NVIDIA/thrust/archive/1.16.0.zip - ${CMAKE_CURRENT_BINARY_DIR}/thrust-1.16.0.zip + DOWNLOAD https://github.com/NVIDIA/thrust/archive/1.15.0.zip + ${CMAKE_CURRENT_BINARY_DIR}/thrust-1.15.0.zip STATUS thrust_download_status LOG thrust_download_log ) list(GET thrust_download_status 0 thrust_download_error_code) if(thrust_download_error_code) message(FATAL_ERROR "Error: downloading " - "https://github.com/NVIDIA/thrust/archive/1.16.0.zip failed " + "https://github.com/NVIDIA/thrust/archive/1.15.0.zip failed " "error_code: ${thrust_download_error_code} " "log: ${thrust_download_log} " ) endif() execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/thrust-1.16.0.zip + COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/thrust-1.15.0.zip WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} RESULT_VARIABLE thrust_unpack_error_code ) if(thrust_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/thrust-1.16.0.zip failed") + message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/thrust-1.15.0.zip failed") endif() - set(THRUST_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/thrust-1.16.0/ CACHE PATH "") + set(THRUST_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/thrust-1.15.0/ CACHE PATH "") endif() else() # rocPRIM (only for ROCm platform) @@ -156,26 +156,32 @@ endif() # Benchmark dependencies if(BUILD_BENCHMARK) # Google Benchmark (https://github.com/google/benchmark.git) - message(STATUS "Downloading and building Google Benchmark.") - if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$|.*/nvcc$") - # hip-clang cannot compile googlebenchmark for some reason - set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++") + find_package(benchmark QUIET) + + if(NOT benchmark_FOUND) + message(STATUS "Google Benchmark not found or force download Google Benchmark on. Downloading and building Google Benchmark.") + if(CMAKE_CONFIGURATION_TYPES) + message(FATAL_ERROR "DownloadProject.cmake doesn't support multi-configuration generators.") + endif() + set(GOOGLEBENCHMARK_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/googlebenchmark CACHE PATH "") + if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$") + # hip-clang cannot compile googlebenchmark for some reason + set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++") + endif() + + download_project( + PROJ googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.6.1 + INSTALL_DIR ${GOOGLEBENCHMARK_ROOT} + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} -DBENCHMARK_ENABLE_TESTING=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_CXX_STANDARD=14 ${COMPILER_OVERRIDE} + LOG_DOWNLOAD TRUE + LOG_CONFIGURE TRUE + LOG_BUILD TRUE + LOG_INSTALL TRUE + BUILD_PROJECT TRUE + ${UPDATE_DISCONNECTED_IF_AVAILABLE} + ) endif() - # Download, build and install googlebenchmark library - set(GOOGLEBENCHMARK_ROOT ${CMAKE_CURRENT_BINARY_DIR}/googlebenchmark CACHE PATH "") - download_project( - PROJ googlebenchmark - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v1.5.2 - GIT_SHALLOW TRUE - INSTALL_DIR ${GOOGLEBENCHMARK_ROOT} - CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX= ${COMPILER_OVERRIDE} - LOG_DOWNLOAD TRUE - LOG_CONFIGURE TRUE - LOG_BUILD TRUE - LOG_INSTALL TRUE - BUILD_PROJECT TRUE - ${UPDATE_DISCONNECTED_IF_AVAILABLE} - ) find_package(benchmark REQUIRED CONFIG PATHS ${GOOGLEBENCHMARK_ROOT}) endif() diff --git a/rmake.py b/rmake.py index 9a5b90af..c71a1658 100644 --- a/rmake.py +++ b/rmake.py @@ -143,7 +143,7 @@ def config_cmd(): cmake_options.append( f"-DROCM_DISABLE_LDCONFIG=ON" ) if args.build_clients: - cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_DIR={build_dir}" ) + cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_BENCHMARK=ON -DBUILD_DIR={build_dir}" ) cmake_options.append( f"-DAMDGPU_TARGETS={args.gpu_architecture}" ) From 222f9ff168ded810ab2cdd4a7f08db6fcca9e55b Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Wed, 27 Apr 2022 09:58:06 -0700 Subject: [PATCH 2/3] Removing whitespace --- benchmark/benchmark_device_segmented_sort.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmark/benchmark_device_segmented_sort.cpp b/benchmark/benchmark_device_segmented_sort.cpp index ba78fa6d..cc86950a 100644 --- a/benchmark/benchmark_device_segmented_sort.cpp +++ b/benchmark/benchmark_device_segmented_sort.cpp @@ -25,7 +25,6 @@ // HIP API #include "hipcub/hipcub.hpp" - #ifndef DEFAULT_N const size_t DEFAULT_N = 1024 * 1024 * 32; #endif From 99a676089428b6c93538b5548f617ac1cf61b6c8 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Thu, 10 Nov 2022 15:41:49 -0800 Subject: [PATCH 3/3] Updating compiler for benchmarks to cl for windows --- cmake/Dependencies.cmake | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index ff0f1d2d..ee8ccb51 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -160,9 +160,13 @@ if(BUILD_BENCHMARK) message(FATAL_ERROR "DownloadProject.cmake doesn't support multi-configuration generators.") endif() set(GOOGLEBENCHMARK_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/googlebenchmark CACHE PATH "") - if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (WIN32 AND CMAKE_CXX_COMPILER MATCHES ".*/hipcc$")) + if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")) # hip-clang cannot compile googlebenchmark for some reason - set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++") + if(WIN32) + set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=cl") + else() + set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++") + endif() endif() download_project(