Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ See README.md on how to build the hipCUB documentation using Doxygen.
### Changed
- CUB backend references CUB and Thrust version 1.17.2.
- Improved benchmark coverage of `BlockScan` by adding `ExclusiveScan`, benchmark coverage of `BlockRadixSort` by adding `SortBlockedToStriped`, and benchmark coverage of `WarpScan` by adding `Broadcast`.
### Fixed
- Windows HIP SDK support
### Known Issues
- `BlockRadixRankMatch` is currently broken under the rocPRIM backend.
- `BlockRadixRankMatch` with a warp size that does not exactly divide the block size is broken under the CUB backend.
Expand Down
11 changes: 6 additions & 5 deletions benchmark/benchmark_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,17 @@

namespace benchmark_utils
{

const size_t default_max_random_size = 1024 * 1024;
// get_random_data() generates only part of sequence and replicates it,
// because benchmarks usually do not need "true" random sequence.
template<class T>
inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = 1024 * 1024)
inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = default_max_random_size)
-> typename std::enable_if<std::is_integral<T>::value, std::vector<T>>::type
{
std::random_device rd;
std::default_random_engine gen(rd());
std::uniform_int_distribution<T> distribution(min, max);
using distribution_type = typename std::conditional<(sizeof(T)==1), short, T>::type;
std::uniform_int_distribution<distribution_type> distribution(min, max);
std::vector<T> data(size);
std::generate(
data.begin(), data.begin() + std::min(size, max_random_size),
Expand All @@ -66,7 +67,7 @@ inline auto get_random_data(size_t size, T min, T max, size_t max_random_size =
}

template<class T>
inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = 1024 * 1024)
inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = default_max_random_size)
-> typename std::enable_if<std::is_floating_point<T>::value, std::vector<T>>::type
{
std::random_device rd;
Expand All @@ -85,7 +86,7 @@ inline auto get_random_data(size_t size, T min, T max, size_t max_random_size =
}

template<class T>
inline std::vector<T> get_random_data01(size_t size, float p, size_t max_random_size = 1024 * 1024)
inline std::vector<T> get_random_data01(size_t size, float p, size_t max_random_size = default_max_random_size)
{
std::random_device rd;
std::default_random_engine gen(rd());
Expand Down
7 changes: 7 additions & 0 deletions benchmark/cmdparser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ namespace cli {
return std::stoul(elements[0]);
}

static unsigned long long parse(const std::vector<std::string>& elements, const unsigned long long&) {
if (elements.size() != 1)
throw std::bad_cast();

return std::stoull(elements[0]);
}

static long parse(const std::vector<std::string>& elements, const long&) {
if (elements.size() != 1)
throw std::bad_cast();
Expand Down
1 change: 1 addition & 0 deletions benchmark/common_benchmark_header.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <limits>
#include <cmath>
#include <cstdlib>
#include <numeric>

// Google Benchmark
#include "benchmark/benchmark.h"
Expand Down
8 changes: 6 additions & 2 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,11 @@ if(BUILD_BENCHMARK)
set(GOOGLEBENCHMARK_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/googlebenchmark CACHE PATH "")
if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
# hip-clang cannot compile googlebenchmark for some reason
set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++")
if(WIN32)
set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=cl")
else()
set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++")
endif()
endif()

download_project(
Expand All @@ -191,7 +195,7 @@ if(BUILD_BENCHMARK)
GIT_TAG v1.6.1
GIT_SHALLOW TRUE
INSTALL_DIR ${GOOGLEBENCHMARK_ROOT}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${COMPILER_OVERRIDE}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_CXX_STANDARD=14 ${COMPILER_OVERRIDE}
LOG_DOWNLOAD TRUE
LOG_CONFIGURE TRUE
LOG_BUILD TRUE
Expand Down
29 changes: 29 additions & 0 deletions examples/example_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
* Modifications Copyright (c) 2021-2023, Advanced Micro Devices, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#ifndef EXAMPLES_EXAMPLE_UTILS_HPP
#define EXAMPLES_EXAMPLE_UTILS_HPP
#include "mersenne.h"
Expand Down
14 changes: 11 additions & 3 deletions rmake.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/python3
"""Copyright 2020-2021 Advanced Micro Devices, Inc.
"""Copyright 2020-2023 Advanced Micro Devices, Inc.
Manage build and installation"""

import re
Expand Down Expand Up @@ -72,6 +72,12 @@ def delete_dir(dir_path) :
linux_path = pathlib.Path(dir_path).absolute()
#print( linux_path )
run_cmd( "rm" , f"-rf {linux_path}")

def cmake_path(os_path):
if OS_info["ID"] == "windows":
return os_path.replace("\\", "/")
else:
return os.path.realpath(os_path)

def config_cmd():
global args
Expand All @@ -85,7 +91,9 @@ def config_cmd():
cmake_platform_opts = []
if (OS_info["ID"] == 'windows'):
# we don't have ROCM on windows but have hip, ROCM can be downloaded if required
rocm_path = os.getenv( 'ROCM_PATH', "C:/hipsdk/rocm-cmake-master") #C:/hip") # rocm/Utils/cmake-rocm4.2.0"
# CMAKE_PREFIX_PATH set to rocm_path and HIP_PATH set BY SDK Installer
raw_rocm_path = cmake_path(os.getenv('HIP_PATH', "C:/hip"))
rocm_path = f'"{raw_rocm_path}"' # guard against spaces in path
cmake_executable = "cmake.exe"
toolchain = os.path.join( src_path, "toolchain-windows.cmake" )
#set CPACK_PACKAGING_INSTALL_PREFIX= defined as blank as it is appended to end of path for archive creation
Expand Down Expand Up @@ -143,7 +151,7 @@ def config_cmd():
cmake_options.append( f"-DROCM_DISABLE_LDCONFIG=ON" )

if args.build_clients:
cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_DIR={build_dir}" )
cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_BENCHMARK=ON -DBUILD_DIR={build_dir}" )

cmake_options.append( f"-DAMDGPU_TARGETS={args.gpu_architecture}" )

Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ function(add_relative_test test_name test_target)
endif()
file(RELATIVE_PATH rel_path "${CMAKE_CURRENT_BINARY_DIR}" "${EXE_PATH}/${EXE_NAME}")
add_test(NAME "${test_name}" COMMAND "./${rel_path}")
rocm_install(TARGETS ${test_target} COMPONENT tests)
file(APPEND "${INSTALL_TEST_FILE}" "add_test(${test_name} \"../${EXE_NAME}\")\n")
endfunction()

Expand Down
9 changes: 5 additions & 4 deletions test/hipcub/test_hipcub_block_run_length_decode.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -101,7 +101,7 @@ void block_run_length_decode_kernel(
const unsigned global_thread_idx = BlockSize * hipBlockIdx_x + hipThreadIdx_x;
hipcub::LoadDirectBlocked(global_thread_idx, d_run_items, run_items);
hipcub::LoadDirectBlocked(global_thread_idx, d_run_lengths, run_lengths);

unsigned total_decoded_size{};
BlockRunLengthDecodeT block_run_length_decode(
temp_storage,
Expand Down Expand Up @@ -140,7 +140,8 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode)

for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++)
{
const unsigned seed_value = seed_index >= random_seeds_count ? seeds[seed_index] : rand();
const unsigned int seed_value
= seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count];
SCOPED_TRACE(testing::Message() << "with seed= " << seed_value);

const LengthT max_run_length = static_cast<LengthT>(
Expand All @@ -164,7 +165,7 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode)
std::uniform_int_distribution<size_t> num_empty_runs_dist(1, 4);
const size_t num_trailing_empty_runs = num_empty_runs_dist(prng);
num_runs += num_trailing_empty_runs;

const auto empty_run_items = test_utils::get_random_data<ItemT>(
num_trailing_empty_runs,
std::numeric_limits<ItemT>::min(),
Expand Down
31 changes: 10 additions & 21 deletions toolchain-windows.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,36 @@
# Ninja doesn't support platform
#set(CMAKE_GENERATOR_PLATFORM x64)

if (DEFINED ENV{HIP_DIR})
if (DEFINED ENV{HIP_PATH})
file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_DIR)
set(rocm_bin "${HIP_DIR}/bin")
elseif (DEFINED ENV{HIP_DIR})
file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR)
set(rocm_bin "${HIP_DIR}/bin")
else()
set(HIP_DIR "C:/hip")
set(rocm_bin "C:/hip/bin")
endif()

#set(CMAKE_CXX_COMPILER "${rocm_bin}/hipcc.bat")
#set(CMAKE_C_COMPILER "${rocm_bin}/hipcc.bat")
set(CMAKE_CXX_COMPILER "${rocm_bin}/clang++.exe")
set(CMAKE_C_COMPILER "${rocm_bin}/clang.exe")

#set(CMAKE_CXX_LINKER "${rocm_bin}/hipcc.bat" )

# TODO remove, just to speed up slow cmake
set(CMAKE_C_COMPILER_WORKS 1)
set(CMAKE_CXX_COMPILER_WORKS 1)
#
if (NOT python)
set(python "python3") # take default for windows
endif()

#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -IC:/hip/include -IC:/hip/lib/clang/12.0.0 -DWIN32 -D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include -DWIN32 -D_CRT_SECURE_NO_WARNINGS")
# our usage flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWIN32 -D_CRT_SECURE_NO_WARNINGS")

# flags for clang direct use
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility")
# -Wno-ignored-attributes to avoid warning: __declspec attribute 'dllexport' is not supported [-Wignored-attributes] which is used by msvc compiler
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility -Wno-ignored-attributes")

# flags for clang direct use with hip
# -x hip causes linker error
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -x hip -IC:/hip/include/hip -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include/hip -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")

if (DEFINED ENV{VCPKG_PATH})
file(TO_CMAKE_PATH "$ENV{VCPKG_PATH}" VCPKG_PATH)
else()
set(VCPKG_PATH "C:/github/vcpkg")
endif()
include("${VCPKG_PATH}/scripts/buildsystems/vcpkg.cmake")
# set(GTEST_DIR "C:/rocm/Utils/GTestMSVC")
# set(GTEST_INCLUDE_DIR "${GTEST_DIR}/include")
# set(GTEST_LIBRARY "${GTEST_DIR}/lib/Release/gtest.lib")
# set(GTEST_MAIN_LIBRARY "${GTEST_DIR}/lib/Release/gtest_main.lib")
# set(GTEST_LIBRARIES "${GTEST_DIR}/lib/Release/gtest.lib;${GTEST_DIR}/lib/Release/gtest_main.lib")